]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
Add markup support to wxOSX/Cocoa wxStaticText and wxButton.
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
105993f7 27 #include "wx/intl.h"
ba7e7253 28 #include "wx/log.h"
6b769f3d 29#endif
c801d85f
KB
30
31#include <ctype.h>
92df97b8
WS
32
33#ifndef __WXWINCE__
34 #include <errno.h>
35#endif
36
c801d85f
KB
37#include <string.h>
38#include <stdlib.h>
9a08c20e 39
8116a0c5 40#include "wx/hashmap.h"
072682ce 41#include "wx/vector.h"
529e491c 42#include "wx/xlocale.h"
8f93a29f 43
825d69c1
VZ
44#ifdef __WXMSW__
45 #include "wx/msw/wrapwin.h"
46#endif // __WXMSW__
47
951201d8
VZ
48#if wxUSE_STD_IOSTREAM
49 #include <sstream>
50#endif
51
8f93a29f
VS
52// string handling functions used by wxString:
53#if wxUSE_UNICODE_UTF8
54 #define wxStringMemcpy memcpy
55 #define wxStringMemcmp memcmp
56 #define wxStringMemchr memchr
57 #define wxStringStrlen strlen
58#else
59 #define wxStringMemcpy wxTmemcpy
60 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
61 #define wxStringMemchr wxTmemchr
62 #define wxStringStrlen wxStrlen
63#endif
8f93a29f 64
b96a56e6
VZ
65// define a function declared in wx/buffer.h here as we don't have buffer.cpp
66// and don't want to add it just because of this simple function
4e79262f
VZ
67namespace wxPrivate
68{
69
b96a56e6
VZ
70// wxXXXBuffer classes can be (implicitly) used during global statics
71// initialization so wrap the status UntypedBufferData variable in a function
72// to make it safe to access it even before all global statics are initialized
73UntypedBufferData *GetUntypedNullData()
74{
75 static UntypedBufferData s_untypedNullData(NULL, 0);
4e79262f 76
b96a56e6
VZ
77 return &s_untypedNullData;
78}
4e79262f
VZ
79
80} // namespace wxPrivate
e87b7833 81
a7ea63e2
VS
82// ---------------------------------------------------------------------------
83// static class variables definition
84// ---------------------------------------------------------------------------
e87b7833 85
a7ea63e2
VS
86//According to STL _must_ be a -1 size_t
87const size_t wxString::npos = (size_t) -1;
8f93a29f 88
68482dc5 89#if wxUSE_STRING_POS_CACHE
68482dc5 90
e810df36
VZ
91#ifdef wxHAS_COMPILER_TLS
92
93wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
94
95#else // !wxHAS_COMPILER_TLS
96
ad8ae788
VZ
97struct wxStrCacheInitializer
98{
99 wxStrCacheInitializer()
100 {
101 // calling this function triggers s_cache initialization in it, and
102 // from now on it becomes safe to call from multiple threads
103 wxString::GetCache();
104 }
105};
106
e317bd3f
SC
107/*
108wxString::Cache& wxString::GetCache()
109{
110 static wxTLS_TYPE(Cache) s_cache;
111
112 return wxTLS_VALUE(s_cache);
113}
114*/
115
ad8ae788
VZ
116static wxStrCacheInitializer gs_stringCacheInit;
117
e810df36
VZ
118#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
119
68482dc5
VZ
120// gdb seems to be unable to display thread-local variables correctly, at least
121// not my 6.4.98 version under amd64, so provide this debugging helper to do it
4b6a582b 122#if wxDEBUG_LEVEL >= 2
68482dc5
VZ
123
124struct wxStrCacheDumper
125{
126 static void ShowAll()
127 {
128 puts("*** wxString cache dump:");
129 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
130 {
131 const wxString::Cache::Element&
8b73c531 132 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
133
134 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
135 n,
8b73c531 136 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
137 c.str,
138 (unsigned long)c.pos,
139 (unsigned long)c.impl,
140 (long)c.len);
141 }
142 }
143};
144
145void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
146
4b6a582b 147#endif // wxDEBUG_LEVEL >= 2
68482dc5
VZ
148
149#ifdef wxPROFILE_STRING_CACHE
150
151wxString::CacheStats wxString::ms_cacheStats;
152
8c3b65d9 153struct wxStrCacheStatsDumper
68482dc5 154{
8c3b65d9 155 ~wxStrCacheStatsDumper()
68482dc5
VZ
156 {
157 const wxString::CacheStats& stats = wxString::ms_cacheStats;
158
159 if ( stats.postot )
160 {
161 puts("*** wxString cache statistics:");
162 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
163 stats.postot);
164 printf("\tHits %u (of which %u not used) or %.2f%%\n",
165 stats.poshits,
166 stats.mishits,
167 100.*float(stats.poshits - stats.mishits)/stats.postot);
168 printf("\tAverage position requested: %.2f\n",
169 float(stats.sumpos) / stats.postot);
170 printf("\tAverage offset after cached hint: %.2f\n",
171 float(stats.sumofs) / stats.postot);
172 }
173
174 if ( stats.lentot )
175 {
176 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
177 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
178 }
179 }
8c3b65d9 180};
68482dc5 181
8c3b65d9 182static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
183
184#endif // wxPROFILE_STRING_CACHE
185
186#endif // wxUSE_STRING_POS_CACHE
187
a7ea63e2
VS
188// ----------------------------------------------------------------------------
189// global functions
190// ----------------------------------------------------------------------------
e87b7833 191
a7ea63e2 192#if wxUSE_STD_IOSTREAM
8f93a29f 193
a7ea63e2 194#include <iostream>
8f93a29f 195
a7ea63e2 196wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 197{
7a906e1a 198#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
de4983f3 199 const wxScopedCharBuffer buf(str.AsCharBuf());
ddf01bdb
VZ
200 if ( !buf )
201 os.clear(wxSTD ios_base::failbit);
202 else
203 os << buf.data();
204
205 return os;
a7ea63e2 206#else
7a906e1a 207 return os << str.AsInternal();
a7ea63e2 208#endif
8f93a29f
VS
209}
210
04abe4bc
VS
211wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
212{
213 return os << str.c_str();
214}
215
de4983f3 216wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
04abe4bc
VS
217{
218 return os << str.data();
219}
220
221#ifndef __BORLANDC__
de4983f3 222wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
04abe4bc
VS
223{
224 return os << str.data();
225}
226#endif
227
6a6ea041 228#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
229
230wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
231{
232 return wos << str.wc_str();
233}
234
235wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
236{
237 return wos << str.AsWChar();
238}
239
de4983f3 240wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
6b61b594
VZ
241{
242 return wos << str.data();
243}
244
6a6ea041 245#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 246
a7ea63e2 247#endif // wxUSE_STD_IOSTREAM
e87b7833 248
81727065
VS
249// ===========================================================================
250// wxString class core
251// ===========================================================================
252
253#if wxUSE_UNICODE_UTF8
254
81727065
VS
255void wxString::PosLenToImpl(size_t pos, size_t len,
256 size_t *implPos, size_t *implLen) const
257{
258 if ( pos == npos )
68482dc5 259 {
81727065 260 *implPos = npos;
68482dc5
VZ
261 }
262 else // have valid start position
81727065 263 {
68482dc5
VZ
264 const const_iterator b = GetIterForNthChar(pos);
265 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 266 if ( len == npos )
68482dc5 267 {
81727065 268 *implLen = npos;
68482dc5
VZ
269 }
270 else // have valid length too
81727065 271 {
68482dc5
VZ
272 // we need to handle the case of length specifying a substring
273 // going beyond the end of the string, just as std::string does
274 const const_iterator e(end());
275 const_iterator i(b);
276 while ( len && i <= e )
277 {
278 ++i;
279 --len;
280 }
281
282 *implLen = i.impl() - b.impl();
81727065
VS
283 }
284 }
285}
286
287#endif // wxUSE_UNICODE_UTF8
288
11aac4ba
VS
289// ----------------------------------------------------------------------------
290// wxCStrData converted strings caching
291// ----------------------------------------------------------------------------
292
132276cf
VS
293// FIXME-UTF8: temporarily disabled because it doesn't work with global
294// string objects; re-enable after fixing this bug and benchmarking
295// performance to see if using a hash is a good idea at all
296#if 0
297
11aac4ba
VS
298// For backward compatibility reasons, it must be possible to assign the value
299// returned by wxString::c_str() to a char* or wchar_t* variable and work with
300// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
301// because the memory would be freed immediately, but it has to be valid as long
302// as the string is not modified, so that code like this still works:
303//
304// const wxChar *s = str.c_str();
305// while ( s ) { ... }
306
307// FIXME-UTF8: not thread safe!
308// FIXME-UTF8: we currently clear the cached conversion only when the string is
309// destroyed, but we should do it when the string is modified, to
310// keep memory usage down
311// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
312// invalidated the cache on every change, we could keep the previous
313// conversion
314// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
315// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
316
317template<typename T>
318static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
319{
6c4ebcda 320 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
321 if ( i != hash.end() )
322 {
323 free(i->second);
324 hash.erase(i);
325 }
326}
327
328#if wxUSE_UNICODE
6c4ebcda
VS
329// NB: non-STL implementation doesn't compile with "const wxString*" key type,
330// so we have to use wxString* here and const-cast when used
11aac4ba
VS
331WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
332 wxStringCharConversionCache);
333static wxStringCharConversionCache gs_stringsCharCache;
334
335const char* wxCStrData::AsChar() const
336{
337 // remove previously cache value, if any (see FIXMEs above):
338 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
339
340 // convert the string and keep it:
6c4ebcda
VS
341 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
342 m_str->mb_str().release();
11aac4ba
VS
343
344 return s + m_offset;
345}
346#endif // wxUSE_UNICODE
347
348#if !wxUSE_UNICODE_WCHAR
349WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
350 wxStringWCharConversionCache);
351static wxStringWCharConversionCache gs_stringsWCharCache;
352
353const wchar_t* wxCStrData::AsWChar() const
354{
355 // remove previously cache value, if any (see FIXMEs above):
356 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
357
358 // convert the string and keep it:
6c4ebcda
VS
359 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
360 m_str->wc_str().release();
11aac4ba
VS
361
362 return s + m_offset;
363}
364#endif // !wxUSE_UNICODE_WCHAR
365
11aac4ba
VS
366wxString::~wxString()
367{
368#if wxUSE_UNICODE
369 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
370 DeleteStringFromConversionCache(gs_stringsCharCache, this);
371#endif
372#if !wxUSE_UNICODE_WCHAR
373 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
374#endif
375}
132276cf
VS
376#endif
377
132276cf
VS
378// ===========================================================================
379// wxString class core
380// ===========================================================================
381
382// ---------------------------------------------------------------------------
383// construction and conversion
384// ---------------------------------------------------------------------------
11aac4ba 385
81727065 386#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
387/* static */
388wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 389 const wxMBConv& conv)
8f93a29f
VS
390{
391 // anything to do?
392 if ( !psz || nLength == 0 )
de4983f3 393 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
394
395 if ( nLength == npos )
396 nLength = wxNO_LEN;
397
398 size_t wcLen;
de4983f3 399 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
8f93a29f 400 if ( !wcLen )
de4983f3 401 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
402 else
403 return SubstrBufFromMB(wcBuf, wcLen);
404}
81727065
VS
405#endif // wxUSE_UNICODE_WCHAR
406
407#if wxUSE_UNICODE_UTF8
408/* static */
409wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
410 const wxMBConv& conv)
411{
81727065
VS
412 // anything to do?
413 if ( !psz || nLength == 0 )
de4983f3 414 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065 415
111d9948
VS
416 // if psz is already in UTF-8, we don't have to do the roundtrip to
417 // wchar_t* and back:
418 if ( conv.IsUTF8() )
419 {
420 // we need to validate the input because UTF8 iterators assume valid
421 // UTF-8 sequence and psz may be invalid:
422 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
423 {
9ef1ad0d
VZ
424 // we must pass the real string length to SubstrBufFromMB ctor
425 if ( nLength == npos )
426 nLength = psz ? strlen(psz) : 0;
38d26d60 427 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
6df09f32 428 nLength);
111d9948
VS
429 }
430 // else: do the roundtrip through wchar_t*
431 }
432
81727065
VS
433 if ( nLength == npos )
434 nLength = wxNO_LEN;
435
436 // first convert to wide string:
437 size_t wcLen;
de4983f3 438 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
81727065 439 if ( !wcLen )
de4983f3 440 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065
VS
441
442 // and then to UTF-8:
4fdfe2f3 443 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065 444 // widechar -> UTF-8 conversion isn't supposed to ever fail:
9a83f860 445 wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
81727065
VS
446
447 return buf;
448}
449#endif // wxUSE_UNICODE_UTF8
450
451#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
452/* static */
453wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 454 const wxMBConv& conv)
8f93a29f
VS
455{
456 // anything to do?
457 if ( !pwz || nLength == 0 )
de4983f3 458 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
459
460 if ( nLength == npos )
461 nLength = wxNO_LEN;
462
463 size_t mbLen;
de4983f3 464 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
8f93a29f 465 if ( !mbLen )
de4983f3 466 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
467 else
468 return SubstrBufFromWC(mbBuf, mbLen);
469}
81727065 470#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f 471
f54cb154
VZ
472// This std::string::c_str()-like method returns a wide char pointer to string
473// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
474// a pointer to the internal representation. Otherwise a conversion is required
475// and it returns a temporary buffer.
476//
477// However for compatibility with c_str() and to avoid breaking existing code
478// doing
479//
480// for ( const wchar_t *p = s.wc_str(); *p; p++ )
481// ... use *p...
482//
483// we actually need to ensure that the returned buffer is _not_ temporary and
484// so we use wxString::m_convertedToWChar to store the returned data
485#if !wxUSE_UNICODE_WCHAR
8f93a29f 486
f54cb154 487const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
265d5cce 488{
f54cb154
VZ
489 const char * const strMB = m_impl.c_str();
490 const size_t lenMB = m_impl.length();
491
492 // find out the size of the buffer needed
493 const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
494 if ( lenWC == wxCONV_FAILED )
495 return NULL;
496
497 // keep the same buffer if the string size didn't change: this is not only
498 // an optimization but also ensure that code which modifies string
499 // character by character (without changing its length) can continue to use
500 // the pointer returned by a previous wc_str() call even after changing the
501 // string
502
503 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
504 // allow to save on buffer reallocations but at the cost of
505 // consuming (even) more memory, we should benchmark this to
506 // determine if it's worth doing
507 if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
508 {
509 if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
510 return NULL;
511 }
e87b7833 512
f54cb154
VZ
513 // finally do convert
514 m_convertedToWChar.m_str[lenWC] = L'\0';
515 if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
516 strMB, lenMB) == wxCONV_FAILED )
517 return NULL;
e87b7833 518
f54cb154 519 return m_convertedToWChar.m_str;
81727065
VS
520}
521
f54cb154
VZ
522#endif // !wxUSE_UNICODE_WCHAR
523
524
525// Same thing for mb_str() which returns a normal char pointer to string
526// contents: this always requires converting it to the specified encoding in
527// non-ANSI build except if we need to convert to UTF-8 and this is what we
528// already use internally.
529#if wxUSE_UNICODE
530
531const char *wxString::AsChar(const wxMBConv& conv) const
81727065 532{
f54cb154 533#if wxUSE_UNICODE_UTF8
111d9948 534 if ( conv.IsUTF8() )
f54cb154 535 return m_impl.c_str();
111d9948 536
f54cb154
VZ
537 const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
538 const size_t lenWC = m_convertedToWChar.m_len;
539#else // wxUSE_UNICODE_WCHAR
540 const wchar_t * const strWC = m_impl.c_str();
541 const size_t lenWC = m_impl.length();
542#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
81727065 543
f54cb154
VZ
544 const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
545 if ( lenMB == wxCONV_FAILED )
546 return NULL;
547
548 if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
549 {
550 if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
551 return NULL;
552 }
81727065 553
f54cb154
VZ
554 m_convertedToChar.m_str[lenMB] = '\0';
555 if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
556 strWC, lenWC) == wxCONV_FAILED )
557 return NULL;
eec47cc6 558
f54cb154 559 return m_convertedToChar.m_str;
265d5cce 560}
7663d0d4 561
f54cb154 562#endif // wxUSE_UNICODE
e87b7833
MB
563
564// shrink to minimal size (releasing extra memory)
565bool wxString::Shrink()
566{
567 wxString tmp(begin(), end());
568 swap(tmp);
569 return tmp.length() == length();
570}
571
d8a4b666 572// deprecated compatibility code:
a7ea63e2 573#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 574wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
575{
576 return DoGetWriteBuf(nLen);
577}
578
579void wxString::UngetWriteBuf()
580{
581 DoUngetWriteBuf();
582}
583
584void wxString::UngetWriteBuf(size_t nLen)
585{
586 DoUngetWriteBuf(nLen);
587}
a7ea63e2 588#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 589
d8a4b666 590
e87b7833
MB
591// ---------------------------------------------------------------------------
592// data access
593// ---------------------------------------------------------------------------
594
595// all functions are inline in string.h
596
597// ---------------------------------------------------------------------------
e8f59039 598// concatenation operators
e87b7833
MB
599// ---------------------------------------------------------------------------
600
c801d85f 601/*
c801d85f
KB
602 * concatenation functions come in 5 flavours:
603 * string + string
604 * char + string and string + char
605 * C str + string and string + C str
606 */
607
b1801e0e 608wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 609{
992527a5 610#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
611 wxASSERT( str1.IsValid() );
612 wxASSERT( str2.IsValid() );
e87b7833 613#endif
097c080b 614
3458e408
WS
615 wxString s = str1;
616 s += str2;
3168a13f 617
3458e408 618 return s;
c801d85f
KB
619}
620
c9f78968 621wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 622{
992527a5 623#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 624 wxASSERT( str.IsValid() );
e87b7833 625#endif
3168a13f 626
3458e408
WS
627 wxString s = str;
628 s += ch;
097c080b 629
3458e408 630 return s;
c801d85f
KB
631}
632
c9f78968 633wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 634{
992527a5 635#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 636 wxASSERT( str.IsValid() );
e87b7833 637#endif
097c080b 638
3458e408
WS
639 wxString s = ch;
640 s += str;
3168a13f 641
3458e408 642 return s;
c801d85f
KB
643}
644
8f93a29f 645wxString operator+(const wxString& str, const char *psz)
c801d85f 646{
992527a5 647#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 648 wxASSERT( str.IsValid() );
e87b7833 649#endif
097c080b 650
3458e408 651 wxString s;
8f93a29f 652 if ( !s.Alloc(strlen(psz) + str.length()) ) {
9a83f860 653 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
3458e408
WS
654 }
655 s += str;
656 s += psz;
3168a13f 657
3458e408 658 return s;
c801d85f
KB
659}
660
8f93a29f 661wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 662{
992527a5 663#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
664 wxASSERT( str.IsValid() );
665#endif
666
667 wxString s;
668 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
9a83f860 669 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
8f93a29f
VS
670 }
671 s += str;
672 s += pwz;
673
674 return s;
675}
676
677wxString operator+(const char *psz, const wxString& str)
678{
a7ea63e2
VS
679#if !wxUSE_STL_BASED_WXSTRING
680 wxASSERT( str.IsValid() );
681#endif
682
683 wxString s;
684 if ( !s.Alloc(strlen(psz) + str.length()) ) {
9a83f860 685 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
a7ea63e2
VS
686 }
687 s = psz;
688 s += str;
689
690 return s;
691}
692
693wxString operator+(const wchar_t *pwz, const wxString& str)
694{
695#if !wxUSE_STL_BASED_WXSTRING
696 wxASSERT( str.IsValid() );
697#endif
698
699 wxString s;
700 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
9a83f860 701 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
a7ea63e2
VS
702 }
703 s = pwz;
704 s += str;
705
706 return s;
707}
708
709// ---------------------------------------------------------------------------
710// string comparison
711// ---------------------------------------------------------------------------
712
52de37c7
VS
713bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
714{
715 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
716 : wxToupper(GetChar(0u)) == wxToupper(c));
717}
718
a7ea63e2
VS
719#ifdef HAVE_STD_STRING_COMPARE
720
721// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
722// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
723// sort strings in characters code point order by sorting the byte sequence
724// in byte values order (i.e. what strcmp() and memcmp() do).
725
726int wxString::compare(const wxString& str) const
727{
728 return m_impl.compare(str.m_impl);
729}
730
731int wxString::compare(size_t nStart, size_t nLen,
732 const wxString& str) const
733{
734 size_t pos, len;
735 PosLenToImpl(nStart, nLen, &pos, &len);
736 return m_impl.compare(pos, len, str.m_impl);
737}
738
739int wxString::compare(size_t nStart, size_t nLen,
740 const wxString& str,
741 size_t nStart2, size_t nLen2) const
742{
743 size_t pos, len;
744 PosLenToImpl(nStart, nLen, &pos, &len);
745
746 size_t pos2, len2;
747 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
748
749 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
750}
751
752int wxString::compare(const char* sz) const
753{
754 return m_impl.compare(ImplStr(sz));
755}
756
757int wxString::compare(const wchar_t* sz) const
758{
759 return m_impl.compare(ImplStr(sz));
760}
761
762int wxString::compare(size_t nStart, size_t nLen,
763 const char* sz, size_t nCount) const
764{
765 size_t pos, len;
766 PosLenToImpl(nStart, nLen, &pos, &len);
767
768 SubstrBufFromMB str(ImplStr(sz, nCount));
769
770 return m_impl.compare(pos, len, str.data, str.len);
771}
772
773int wxString::compare(size_t nStart, size_t nLen,
774 const wchar_t* sz, size_t nCount) const
775{
776 size_t pos, len;
777 PosLenToImpl(nStart, nLen, &pos, &len);
778
779 SubstrBufFromWC str(ImplStr(sz, nCount));
780
781 return m_impl.compare(pos, len, str.data, str.len);
782}
783
784#else // !HAVE_STD_STRING_COMPARE
785
786static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
787 const wxStringCharType* s2, size_t l2)
788{
789 if( l1 == l2 )
790 return wxStringMemcmp(s1, s2, l1);
791 else if( l1 < l2 )
792 {
793 int ret = wxStringMemcmp(s1, s2, l1);
794 return ret == 0 ? -1 : ret;
795 }
796 else
797 {
798 int ret = wxStringMemcmp(s1, s2, l2);
799 return ret == 0 ? +1 : ret;
800 }
801}
802
803int wxString::compare(const wxString& str) const
804{
805 return ::wxDoCmp(m_impl.data(), m_impl.length(),
806 str.m_impl.data(), str.m_impl.length());
807}
808
809int wxString::compare(size_t nStart, size_t nLen,
810 const wxString& str) const
811{
812 wxASSERT(nStart <= length());
813 size_type strLen = length() - nStart;
814 nLen = strLen < nLen ? strLen : nLen;
815
816 size_t pos, len;
817 PosLenToImpl(nStart, nLen, &pos, &len);
818
819 return ::wxDoCmp(m_impl.data() + pos, len,
820 str.m_impl.data(), str.m_impl.length());
821}
822
823int wxString::compare(size_t nStart, size_t nLen,
824 const wxString& str,
825 size_t nStart2, size_t nLen2) const
826{
827 wxASSERT(nStart <= length());
828 wxASSERT(nStart2 <= str.length());
829 size_type strLen = length() - nStart,
830 strLen2 = str.length() - nStart2;
831 nLen = strLen < nLen ? strLen : nLen;
832 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
833
834 size_t pos, len;
835 PosLenToImpl(nStart, nLen, &pos, &len);
836 size_t pos2, len2;
837 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
838
839 return ::wxDoCmp(m_impl.data() + pos, len,
840 str.m_impl.data() + pos2, len2);
841}
842
843int wxString::compare(const char* sz) const
844{
845 SubstrBufFromMB str(ImplStr(sz, npos));
846 if ( str.len == npos )
847 str.len = wxStringStrlen(str.data);
848 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
849}
850
851int wxString::compare(const wchar_t* sz) const
852{
853 SubstrBufFromWC str(ImplStr(sz, npos));
854 if ( str.len == npos )
855 str.len = wxStringStrlen(str.data);
856 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
857}
858
859int wxString::compare(size_t nStart, size_t nLen,
860 const char* sz, size_t nCount) const
861{
862 wxASSERT(nStart <= length());
863 size_type strLen = length() - nStart;
864 nLen = strLen < nLen ? strLen : nLen;
097c080b 865
a7ea63e2
VS
866 size_t pos, len;
867 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 868
a7ea63e2
VS
869 SubstrBufFromMB str(ImplStr(sz, nCount));
870 if ( str.len == npos )
871 str.len = wxStringStrlen(str.data);
872
873 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
874}
875
a7ea63e2
VS
876int wxString::compare(size_t nStart, size_t nLen,
877 const wchar_t* sz, size_t nCount) const
8f93a29f 878{
a7ea63e2
VS
879 wxASSERT(nStart <= length());
880 size_type strLen = length() - nStart;
881 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 882
a7ea63e2
VS
883 size_t pos, len;
884 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 885
a7ea63e2
VS
886 SubstrBufFromWC str(ImplStr(sz, nCount));
887 if ( str.len == npos )
888 str.len = wxStringStrlen(str.data);
889
890 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
891}
892
a7ea63e2
VS
893#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
894
895
8f93a29f
VS
896// ---------------------------------------------------------------------------
897// find_{first,last}_[not]_of functions
898// ---------------------------------------------------------------------------
899
900#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 901
8f93a29f
VS
902// NB: All these functions are implemented with the argument being wxChar*,
903// i.e. widechar string in any Unicode build, even though native string
904// representation is char* in the UTF-8 build. This is because we couldn't
905// use memchr() to determine if a character is in a set encoded as UTF-8.
906
907size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 908{
8f93a29f 909 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
910}
911
8f93a29f 912size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 913{
8f93a29f 914 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
915}
916
8f93a29f 917size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 918{
9a83f860 919 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
dcb68102 920
8f93a29f
VS
921 size_t idx = nStart;
922 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 923 {
8f93a29f
VS
924 if ( wxTmemchr(sz, *i, n) )
925 return idx;
dcb68102 926 }
8f93a29f
VS
927
928 return npos;
929}
930
931size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
932{
9a83f860 933 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
8f93a29f
VS
934
935 size_t idx = nStart;
936 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 937 {
8f93a29f
VS
938 if ( !wxTmemchr(sz, *i, n) )
939 return idx;
940 }
941
942 return npos;
943}
944
945
946size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
947{
948 return find_last_of(sz, nStart, wxStrlen(sz));
949}
950
951size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
952{
953 return find_last_not_of(sz, nStart, wxStrlen(sz));
954}
955
956size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
957{
958 size_t len = length();
959
960 if ( nStart == npos )
961 {
962 nStart = len - 1;
dcb68102 963 }
2c09fb3b 964 else
dcb68102 965 {
9a83f860 966 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
dcb68102 967 }
8f93a29f
VS
968
969 size_t idx = nStart;
970 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
971 i != rend(); --idx, ++i )
972 {
973 if ( wxTmemchr(sz, *i, n) )
974 return idx;
975 }
976
977 return npos;
dcb68102
RN
978}
979
8f93a29f 980size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 981{
8f93a29f
VS
982 size_t len = length();
983
984 if ( nStart == npos )
985 {
986 nStart = len - 1;
987 }
988 else
989 {
9a83f860 990 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
8f93a29f
VS
991 }
992
993 size_t idx = nStart;
994 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
995 i != rend(); --idx, ++i )
996 {
997 if ( !wxTmemchr(sz, *i, n) )
998 return idx;
999 }
1000
1001 return npos;
dcb68102
RN
1002}
1003
8f93a29f 1004size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1005{
9a83f860 1006 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
8f93a29f
VS
1007
1008 size_t idx = nStart;
1009 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1010 {
1011 if ( *i != ch )
1012 return idx;
1013 }
1014
1015 return npos;
1016}
1017
1018size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1019{
1020 size_t len = length();
1021
1022 if ( nStart == npos )
1023 {
1024 nStart = len - 1;
1025 }
1026 else
1027 {
9a83f860 1028 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
8f93a29f
VS
1029 }
1030
1031 size_t idx = nStart;
1032 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1033 i != rend(); --idx, ++i )
1034 {
1035 if ( *i != ch )
1036 return idx;
1037 }
1038
1039 return npos;
1040}
1041
1042// the functions above were implemented for wchar_t* arguments in Unicode
1043// build and char* in ANSI build; below are implementations for the other
1044// version:
1045#if wxUSE_UNICODE
1046 #define wxOtherCharType char
1047 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1048#else
1049 #define wxOtherCharType wchar_t
1050 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1051#endif
1052
1053size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1054 { return find_first_of(STRCONV(sz), nStart); }
1055
1056size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1057 size_t n) const
1058 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1059size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1060 { return find_last_of(STRCONV(sz), nStart); }
1061size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1062 size_t n) const
1063 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1064size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1065 { return find_first_not_of(STRCONV(sz), nStart); }
1066size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1067 size_t n) const
1068 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1069size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1070 { return find_last_not_of(STRCONV(sz), nStart); }
1071size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1072 size_t n) const
1073 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1074
1075#undef wxOtherCharType
1076#undef STRCONV
1077
1078#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1079
1080// ===========================================================================
1081// other common string functions
1082// ===========================================================================
1083
1084int wxString::CmpNoCase(const wxString& s) const
1085{
5858fe68
VZ
1086#if !wxUSE_UNICODE_UTF8
1087 // We compare NUL-delimited chunks of the strings inside the loop. We will
1088 // do as many iterations as there are embedded NULs in the string, i.e.
1089 // usually we will run it just once.
1090
1091 typedef const wxStringImpl::value_type *pchar_type;
1092 const pchar_type thisBegin = m_impl.c_str();
1093 const pchar_type thatBegin = s.m_impl.c_str();
1094
1095 const pchar_type thisEnd = thisBegin + m_impl.length();
1096 const pchar_type thatEnd = thatBegin + s.m_impl.length();
825d69c1 1097
5858fe68
VZ
1098 pchar_type thisCur = thisBegin;
1099 pchar_type thatCur = thatBegin;
825d69c1 1100
5858fe68
VZ
1101 int rc;
1102 for ( ;; )
1103 {
1104 // Compare until the next NUL, if the strings differ this is the final
1105 // result.
1106 rc = wxStricmp(thisCur, thatCur);
1107 if ( rc )
1108 break;
1109
1110 const size_t lenChunk = wxStrlen(thisCur);
1111 thisCur += lenChunk;
1112 thatCur += lenChunk;
1113
1114 // Skip all the NULs as wxStricmp() doesn't handle them.
1115 for ( ; !*thisCur; thisCur++, thatCur++ )
1116 {
1117 // Check if we exhausted either of the strings.
1118 if ( thisCur == thisEnd )
1119 {
1120 // This one is exhausted, is the other one too?
1121 return thatCur == thatEnd ? 0 : -1;
1122 }
8f93a29f 1123
5858fe68
VZ
1124 if ( thatCur == thatEnd )
1125 {
1126 // Because of the test above we know that this one is not
1127 // exhausted yet so it's greater than the other one that is.
1128 return 1;
1129 }
825d69c1 1130
5858fe68
VZ
1131 if ( *thatCur )
1132 {
1133 // Anything non-NUL is greater than NUL.
1134 return -1;
1135 }
1136 }
825d69c1 1137 }
825d69c1 1138
5858fe68
VZ
1139 return rc;
1140#else // wxUSE_UNICODE_UTF8
1141 // CRT functions can't be used for case-insensitive comparison of UTF-8
1142 // strings so do it in the naive, simple and inefficient way.
825d69c1
VZ
1143
1144 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
8f93a29f
VS
1145 const_iterator i1 = begin();
1146 const_iterator end1 = end();
1147 const_iterator i2 = s.begin();
1148 const_iterator end2 = s.end();
1149
0d8b0f94 1150 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1151 {
1152 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1153 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1154 if ( lower1 != lower2 )
1155 return lower1 < lower2 ? -1 : 1;
1156 }
1157
1158 size_t len1 = length();
1159 size_t len2 = s.length();
dcb68102 1160
8f93a29f
VS
1161 if ( len1 < len2 )
1162 return -1;
1163 else if ( len1 > len2 )
1164 return 1;
1165 return 0;
5858fe68 1166#endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
dcb68102
RN
1167}
1168
1169
b1ac3b56 1170#if wxUSE_UNICODE
e015c2a3 1171
cf6bedce
SC
1172#ifdef __MWERKS__
1173#ifndef __SCHAR_MAX__
1174#define __SCHAR_MAX__ 127
1175#endif
1176#endif
1177
e6310bbc 1178wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1179{
e6310bbc 1180 if (!ascii || len == 0)
b1ac3b56 1181 return wxEmptyString;
e015c2a3 1182
b1ac3b56 1183 wxString res;
e015c2a3 1184
e6310bbc 1185 {
6798451b 1186 wxStringInternalBuffer buf(res, len);
602a857b 1187 wxStringCharType *dest = buf;
c1eada83 1188
602a857b
VS
1189 for ( ; len > 0; --len )
1190 {
1191 unsigned char c = (unsigned char)*ascii++;
1192 wxASSERT_MSG( c < 0x80,
9a83f860 1193 wxT("Non-ASCII value passed to FromAscii().") );
c1eada83 1194
602a857b
VS
1195 *dest++ = (wchar_t)c;
1196 }
e015c2a3
VZ
1197 }
1198
b1ac3b56
RR
1199 return res;
1200}
1201
e6310bbc
VS
1202wxString wxString::FromAscii(const char *ascii)
1203{
0081dd72 1204 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1205}
1206
c5288c5c 1207wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1208{
1209 // What do we do with '\0' ?
1210
c1eada83 1211 unsigned char c = (unsigned char)ascii;
8760bc65 1212
9a83f860 1213 wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
c1eada83
VS
1214
1215 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1216 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1217}
1218
de4983f3 1219const wxScopedCharBuffer wxString::ToAscii() const
b1ac3b56 1220{
e015c2a3
VZ
1221 // this will allocate enough space for the terminating NUL too
1222 wxCharBuffer buffer(length());
6e394fc6 1223 char *dest = buffer.data();
e015c2a3 1224
c1eada83 1225 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1226 {
c1eada83
VS
1227 wxUniChar c(*i);
1228 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1229 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1230
1231 // the output string can't have embedded NULs anyhow, so we can safely
1232 // stop at first of them even if we do have any
c1eada83 1233 if ( !c )
e015c2a3 1234 break;
b1ac3b56 1235 }
e015c2a3 1236
b1ac3b56
RR
1237 return buffer;
1238}
e015c2a3 1239
c1eada83 1240#endif // wxUSE_UNICODE
b1ac3b56 1241
c801d85f 1242// extract string of length nCount starting at nFirst
c801d85f
KB
1243wxString wxString::Mid(size_t nFirst, size_t nCount) const
1244{
73f507f5 1245 size_t nLen = length();
30d9011f 1246
73f507f5
WS
1247 // default value of nCount is npos and means "till the end"
1248 if ( nCount == npos )
1249 {
1250 nCount = nLen - nFirst;
1251 }
30d9011f 1252
73f507f5
WS
1253 // out-of-bounds requests return sensible things
1254 if ( nFirst + nCount > nLen )
1255 {
1256 nCount = nLen - nFirst;
1257 }
c801d85f 1258
73f507f5
WS
1259 if ( nFirst > nLen )
1260 {
1261 // AllocCopy() will return empty string
1262 return wxEmptyString;
1263 }
c801d85f 1264
73f507f5
WS
1265 wxString dest(*this, nFirst, nCount);
1266 if ( dest.length() != nCount )
1267 {
9a83f860 1268 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
73f507f5 1269 }
30d9011f 1270
73f507f5 1271 return dest;
c801d85f
KB
1272}
1273
e87b7833 1274// check that the string starts with prefix and return the rest of the string
d775fa82 1275// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1276bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1277{
c5e7a7d7
VS
1278 if ( compare(0, prefix.length(), prefix) != 0 )
1279 return false;
f6bcfd97
BP
1280
1281 if ( rest )
1282 {
1283 // put the rest of the string into provided pointer
c5e7a7d7 1284 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1285 }
1286
d775fa82 1287 return true;
f6bcfd97
BP
1288}
1289
3affcd07
VZ
1290
1291// check that the string ends with suffix and return the rest of it in the
1292// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1293bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1294{
c5e7a7d7 1295 int start = length() - suffix.length();
81727065
VS
1296
1297 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1298 return false;
1299
1300 if ( rest )
1301 {
1302 // put the rest of the string into provided pointer
1303 rest->assign(*this, 0, start);
1304 }
1305
1306 return true;
1307}
1308
1309
c801d85f
KB
1310// extract nCount last (rightmost) characters
1311wxString wxString::Right(size_t nCount) const
1312{
e87b7833
MB
1313 if ( nCount > length() )
1314 nCount = length();
c801d85f 1315
e87b7833
MB
1316 wxString dest(*this, length() - nCount, nCount);
1317 if ( dest.length() != nCount ) {
9a83f860 1318 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
b1801e0e 1319 }
c801d85f
KB
1320 return dest;
1321}
1322
7929902d 1323// get all characters after the last occurrence of ch
c801d85f 1324// (returns the whole string if ch not found)
c9f78968 1325wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1326{
1327 wxString str;
d775fa82 1328 int iPos = Find(ch, true);
3c67202d 1329 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1330 str = *this;
1331 else
c565abe1 1332 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1333
1334 return str;
1335}
1336
1337// extract nCount first (leftmost) characters
1338wxString wxString::Left(size_t nCount) const
1339{
e87b7833
MB
1340 if ( nCount > length() )
1341 nCount = length();
c801d85f 1342
e87b7833
MB
1343 wxString dest(*this, 0, nCount);
1344 if ( dest.length() != nCount ) {
9a83f860 1345 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
b1801e0e 1346 }
c801d85f
KB
1347 return dest;
1348}
1349
7929902d 1350// get all characters before the first occurrence of ch
c801d85f 1351// (returns the whole string if ch not found)
6becc1e6 1352wxString wxString::BeforeFirst(wxUniChar ch, wxString *rest) const
c801d85f 1353{
e87b7833 1354 int iPos = Find(ch);
c565abe1 1355 if ( iPos == wxNOT_FOUND )
6becc1e6
VZ
1356 {
1357 iPos = length();
1358 if ( rest )
1359 rest->clear();
1360 }
1361 else
1362 {
1363 if ( rest )
1364 rest->assign(*this, iPos + 1, npos);
1365 }
1366
e87b7833 1367 return wxString(*this, 0, iPos);
c801d85f
KB
1368}
1369
7929902d 1370/// get all characters before the last occurrence of ch
c801d85f 1371/// (returns empty string if ch not found)
6becc1e6 1372wxString wxString::BeforeLast(wxUniChar ch, wxString *rest) const
c801d85f
KB
1373{
1374 wxString str;
d775fa82 1375 int iPos = Find(ch, true);
6becc1e6
VZ
1376 if ( iPos != wxNOT_FOUND )
1377 {
1378 if ( iPos != 0 )
1379 str.assign(*this, 0, iPos);
1380
1381 if ( rest )
1382 rest->assign(*this, iPos + 1, npos);
1383 }
1384 else
1385 {
1386 if ( rest )
1387 *rest = *this;
1388 }
c801d85f
KB
1389
1390 return str;
1391}
1392
7929902d 1393/// get all characters after the first occurrence of ch
c801d85f 1394/// (returns empty string if ch not found)
c9f78968 1395wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1396{
1397 wxString str;
1398 int iPos = Find(ch);
3c67202d 1399 if ( iPos != wxNOT_FOUND )
c565abe1 1400 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1401
1402 return str;
1403}
1404
7929902d 1405// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1406size_t wxString::Replace(const wxString& strOld,
1407 const wxString& strNew, bool bReplaceAll)
c801d85f 1408{
a8f1f1b2 1409 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1410 wxCHECK_MSG( !strOld.empty(), 0,
9a83f860 1411 wxT("wxString::Replace(): invalid parameter") );
a8f1f1b2 1412
68482dc5
VZ
1413 wxSTRING_INVALIDATE_CACHE();
1414
510bb748 1415 size_t uiCount = 0; // count of replacements made
c801d85f 1416
8a627032
VZ
1417 // optimize the special common case: replacement of one character by
1418 // another one (in UTF-8 case we can only do this for ASCII characters)
1419 //
1420 // benchmarks show that this special version is around 3 times faster
1421 // (depending on the proportion of matching characters and UTF-8/wchar_t
1422 // build)
1423 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1424 {
1425 const wxStringCharType chOld = strOld.m_impl[0],
1426 chNew = strNew.m_impl[0];
1427
1428 // this loop is the simplified version of the one below
1429 for ( size_t pos = 0; ; )
1430 {
1431 pos = m_impl.find(chOld, pos);
1432 if ( pos == npos )
1433 break;
c801d85f 1434
8a627032
VZ
1435 m_impl[pos++] = chNew;
1436
1437 uiCount++;
1438
1439 if ( !bReplaceAll )
1440 break;
1441 }
1442 }
072682ce
VZ
1443 else if ( !bReplaceAll)
1444 {
1445 size_t pos = m_impl.find(strOld, 0);
1446 if ( pos != npos )
1447 {
1448 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1449 uiCount = 1;
1450 }
1451 }
1452 else // replace all occurrences
510bb748 1453 {
8a627032
VZ
1454 const size_t uiOldLen = strOld.m_impl.length();
1455 const size_t uiNewLen = strNew.m_impl.length();
1456
072682ce
VZ
1457 // first scan the string to find all positions at which the replacement
1458 // should be made
1459 wxVector<size_t> replacePositions;
1460
1461 size_t pos;
1462 for ( pos = m_impl.find(strOld.m_impl, 0);
1463 pos != npos;
1464 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
8a627032 1465 {
072682ce
VZ
1466 replacePositions.push_back(pos);
1467 ++uiCount;
1468 }
510bb748 1469
072682ce
VZ
1470 if ( !uiCount )
1471 return 0;
510bb748 1472
072682ce
VZ
1473 // allocate enough memory for the whole new string
1474 wxString tmp;
1475 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
ad5bb7d6 1476
072682ce
VZ
1477 // copy this string to tmp doing replacements on the fly
1478 size_t replNum = 0;
1479 for ( pos = 0; replNum < uiCount; replNum++ )
1480 {
1481 const size_t nextReplPos = replacePositions[replNum];
394b2900 1482
072682ce
VZ
1483 if ( pos != nextReplPos )
1484 {
1485 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1486 }
1487
1488 tmp.m_impl.append(strNew.m_impl);
1489 pos = nextReplPos + uiOldLen;
8a627032 1490 }
072682ce
VZ
1491
1492 if ( pos != m_impl.length() )
1493 {
1494 // append the rest of the string unchanged
1495 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1496 }
1497
1498 swap(tmp);
c801d85f 1499 }
c801d85f 1500
510bb748 1501 return uiCount;
c801d85f
KB
1502}
1503
1504bool wxString::IsAscii() const
1505{
a4a44612
VS
1506 for ( const_iterator i = begin(); i != end(); ++i )
1507 {
1508 if ( !(*i).IsAscii() )
1509 return false;
1510 }
1511
1512 return true;
c801d85f 1513}
dd1eaa89 1514
c801d85f
KB
1515bool wxString::IsWord() const
1516{
a4a44612
VS
1517 for ( const_iterator i = begin(); i != end(); ++i )
1518 {
1519 if ( !wxIsalpha(*i) )
1520 return false;
1521 }
1522
1523 return true;
c801d85f 1524}
dd1eaa89 1525
c801d85f
KB
1526bool wxString::IsNumber() const
1527{
a4a44612
VS
1528 if ( empty() )
1529 return true;
1530
1531 const_iterator i = begin();
1532
9a83f860 1533 if ( *i == wxT('-') || *i == wxT('+') )
a4a44612
VS
1534 ++i;
1535
1536 for ( ; i != end(); ++i )
1537 {
1538 if ( !wxIsdigit(*i) )
1539 return false;
1540 }
1541
1542 return true;
c801d85f
KB
1543}
1544
c801d85f
KB
1545wxString wxString::Strip(stripType w) const
1546{
1547 wxString s = *this;
d775fa82
WS
1548 if ( w & leading ) s.Trim(false);
1549 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1550 return s;
1551}
1552
c801d85f
KB
1553// ---------------------------------------------------------------------------
1554// case conversion
1555// ---------------------------------------------------------------------------
1556
1557wxString& wxString::MakeUpper()
1558{
e87b7833
MB
1559 for ( iterator it = begin(), en = end(); it != en; ++it )
1560 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1561
1562 return *this;
1563}
1564
1565wxString& wxString::MakeLower()
1566{
e87b7833
MB
1567 for ( iterator it = begin(), en = end(); it != en; ++it )
1568 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1569
1570 return *this;
1571}
1572
0c7db140
VZ
1573wxString& wxString::MakeCapitalized()
1574{
1575 const iterator en = end();
1576 iterator it = begin();
1577 if ( it != en )
1578 {
1579 *it = (wxChar)wxToupper(*it);
1580 for ( ++it; it != en; ++it )
1581 *it = (wxChar)wxTolower(*it);
1582 }
1583
1584 return *this;
1585}
1586
c801d85f
KB
1587// ---------------------------------------------------------------------------
1588// trimming and padding
1589// ---------------------------------------------------------------------------
1590
d775fa82 1591// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1592// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1593// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1594// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1595// space-like symbols somewhere except in the first 128 chars), it is arguably
1596// still better than trimming away accented letters
1597inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1598
c801d85f
KB
1599// trims spaces (in the sense of isspace) from left or right side
1600wxString& wxString::Trim(bool bFromRight)
1601{
3458e408
WS
1602 // first check if we're going to modify the string at all
1603 if ( !empty() &&
1604 (
1605 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1606 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1607 )
2c3b684c 1608 )
2c3b684c 1609 {
3458e408
WS
1610 if ( bFromRight )
1611 {
1612 // find last non-space character
d4d02bd5 1613 reverse_iterator psz = rbegin();
32c62191 1614 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1615 ++psz;
92df97b8 1616
3458e408 1617 // truncate at trailing space start
d4d02bd5 1618 erase(psz.base(), end());
3458e408
WS
1619 }
1620 else
1621 {
1622 // find first non-space character
1623 iterator psz = begin();
32c62191 1624 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1625 ++psz;
2c3b684c 1626
3458e408
WS
1627 // fix up data and length
1628 erase(begin(), psz);
1629 }
2c3b684c 1630 }
c801d85f 1631
3458e408 1632 return *this;
c801d85f
KB
1633}
1634
1635// adds nCount characters chPad to the string from either side
c9f78968 1636wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1637{
3458e408 1638 wxString s(chPad, nCount);
c801d85f 1639
3458e408
WS
1640 if ( bFromRight )
1641 *this += s;
1642 else
1643 {
1644 s += *this;
1645 swap(s);
1646 }
c801d85f 1647
3458e408 1648 return *this;
c801d85f
KB
1649}
1650
1651// truncate the string
1652wxString& wxString::Truncate(size_t uiLen)
1653{
3458e408
WS
1654 if ( uiLen < length() )
1655 {
1656 erase(begin() + uiLen, end());
1657 }
1658 //else: nothing to do, string is already short enough
c801d85f 1659
3458e408 1660 return *this;
c801d85f
KB
1661}
1662
1663// ---------------------------------------------------------------------------
3c67202d 1664// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1665// ---------------------------------------------------------------------------
1666
1667// find a character
c9f78968 1668int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1669{
3458e408 1670 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1671
3458e408 1672 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1673}
1674
cd0b1709
VZ
1675// ----------------------------------------------------------------------------
1676// conversion to numbers
1677// ----------------------------------------------------------------------------
1678
52de37c7
VS
1679// The implementation of all the functions below is exactly the same so factor
1680// it out. Note that number extraction works correctly on UTF-8 strings, so
1681// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1682
92df97b8 1683#ifndef __WXWINCE__
941a4e62
VS
1684 #define DO_IF_NOT_WINCE(x) x
1685#else
1686 #define DO_IF_NOT_WINCE(x)
92df97b8 1687#endif
4ea4767e 1688
529e491c 1689#define WX_STRING_TO_X_TYPE_START \
9a83f860 1690 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
941a4e62 1691 DO_IF_NOT_WINCE( errno = 0; ) \
941a4e62 1692 const wxStringCharType *start = wx_str(); \
529e491c
FM
1693 wxStringCharType *end;
1694
69d31e31
VZ
1695// notice that we return false without modifying the output parameter at all if
1696// nothing could be parsed but we do modify it and return false then if we did
1697// parse something successfully but not the entire string
529e491c 1698#define WX_STRING_TO_X_TYPE_END \
69d31e31 1699 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
c95e653c 1700 return false; \
529e491c 1701 *pVal = val; \
69d31e31 1702 return !*end;
cd0b1709 1703
c95e653c 1704bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1705{
9a83f860 1706 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1707
1708 WX_STRING_TO_X_TYPE_START
1709 long val = wxStrtol(start, &end, base);
1710 WX_STRING_TO_X_TYPE_END
619dcb09 1711}
cd0b1709 1712
c95e653c 1713bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1714{
9a83f860 1715 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1716
1717 WX_STRING_TO_X_TYPE_START
1718 unsigned long val = wxStrtoul(start, &end, base);
1719 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1720}
1721
c95e653c 1722bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1723{
9a83f860 1724 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1725
1726 WX_STRING_TO_X_TYPE_START
1727 wxLongLong_t val = wxStrtoll(start, &end, base);
1728 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1729}
1730
c95e653c 1731bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1732{
9a83f860 1733 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1734
1735 WX_STRING_TO_X_TYPE_START
1736 wxULongLong_t val = wxStrtoull(start, &end, base);
1737 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1738}
1739
c95e653c 1740bool wxString::ToDouble(double *pVal) const
cd0b1709 1741{
529e491c
FM
1742 WX_STRING_TO_X_TYPE_START
1743 double val = wxStrtod(start, &end);
1744 WX_STRING_TO_X_TYPE_END
1745}
cd0b1709 1746
529e491c 1747#if wxUSE_XLOCALE
e71e5b37 1748
529e491c
FM
1749bool wxString::ToCLong(long *pVal, int base) const
1750{
9a83f860 1751 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
cd0b1709 1752
529e491c 1753 WX_STRING_TO_X_TYPE_START
a51fdf81 1754#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1755 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1756#else
1757 long val = wxStrtol_l(start, &end, base, wxCLocale);
1758#endif
1759 WX_STRING_TO_X_TYPE_END
1760}
c95e653c 1761
529e491c
FM
1762bool wxString::ToCULong(unsigned long *pVal, int base) const
1763{
9a83f860 1764 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
c95e653c 1765
529e491c 1766 WX_STRING_TO_X_TYPE_START
a51fdf81 1767#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1768 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1769#else
1770 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1771#endif
1772 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1773}
1774
529e491c
FM
1775bool wxString::ToCDouble(double *pVal) const
1776{
1777 WX_STRING_TO_X_TYPE_START
a51fdf81 1778#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1779 double val = wxStrtod_lA(start, &end, wxCLocale);
1780#else
1781 double val = wxStrtod_l(start, &end, wxCLocale);
1782#endif
1783 WX_STRING_TO_X_TYPE_END
1784}
1785
105993f7
VZ
1786#else // wxUSE_XLOCALE
1787
1788// Provide implementation of these functions even when wxUSE_XLOCALE is
1789// disabled, we still need them in wxWidgets internal code.
1790
1791// For integers we just assume the current locale uses the same number
1792// representation as the C one as there is nothing else we can do.
1793bool wxString::ToCLong(long *pVal, int base) const
1794{
1795 return ToLong(pVal, base);
1796}
1797
1798bool wxString::ToCULong(unsigned long *pVal, int base) const
1799{
1800 return ToULong(pVal, base);
1801}
1802
1803// For floating point numbers we have to handle the problem of the decimal
1804// point which is different in different locales.
1805bool wxString::ToCDouble(double *pVal) const
1806{
1807 // Create a copy of this string using the decimal point instead of whatever
1808 // separator the current locale uses.
1809#if wxUSE_INTL
1810 wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1811 wxLOCALE_CAT_NUMBER);
1812 if ( sep == "." )
1813 {
1814 // We can avoid an unnecessary string copy in this case.
1815 return ToDouble(pVal);
1816 }
1817#else // !wxUSE_INTL
1818 // We don't know what the current separator is so it might even be a point
1819 // already, try to parse the string as a double:
1820 if ( ToDouble(pVal) )
1821 {
1822 // It must have been the point, nothing else to do.
1823 return true;
1824 }
1825
1826 // Try to guess the separator, using the most common alternative value.
1827 wxString sep(",");
1828#endif // wxUSE_INTL/!wxUSE_INTL
1829 wxString cstr(*this);
1830 cstr.Replace(".", sep);
1831
1832 return cstr.ToDouble(pVal);
1833}
1834
1835#endif // wxUSE_XLOCALE/!wxUSE_XLOCALE
529e491c 1836
951201d8
VZ
1837// ----------------------------------------------------------------------------
1838// number to string conversion
1839// ----------------------------------------------------------------------------
1840
1841/* static */
1842wxString wxString::FromCDouble(double val)
1843{
1844#if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING
1845 // We assume that we can use the ostream and not wstream for numbers.
1846 wxSTD ostringstream os;
1847 os << val;
1848 return os.str();
1849#else // wxUSE_STD_IOSTREAM
1850 // Can't use iostream locale support, fall back to the manual method
1851 // instead.
1852 wxString s = FromDouble(val);
1853#if wxUSE_INTL
1854 wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1855 wxLOCALE_CAT_NUMBER);
1856#else // !wxUSE_INTL
1857 // As above, this is the most common alternative value. Notice that here it
1858 // doesn't matter if we guess wrongly and the current separator is already
1859 // ".": we'll just waste a call to Replace() in this case.
1860 wxString sep(",");
1861#endif // wxUSE_INTL/!wxUSE_INTL
1862
1863 s.Replace(sep, ".");
1864 return s;
1865#endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM
1866}
1867
c801d85f 1868// ---------------------------------------------------------------------------
9efd3367 1869// formatted output
c801d85f 1870// ---------------------------------------------------------------------------
378b05f7 1871
d1f6e2cf 1872#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1873/* static */
c9f78968 1874#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1875wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1876#else
d1f6e2cf 1877wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1878#endif
341e7d28 1879{
77c3e48a 1880 va_list argptr;
c9f78968 1881 va_start(argptr, format);
341e7d28 1882
77c3e48a 1883 wxString s;
c9f78968 1884 s.PrintfV(format, argptr);
341e7d28 1885
77c3e48a 1886 va_end(argptr);
341e7d28 1887
77c3e48a 1888 return s;
341e7d28 1889}
d1f6e2cf
VS
1890#endif // !wxUSE_UTF8_LOCALE_ONLY
1891
1892#if wxUSE_UNICODE_UTF8
1893/* static */
1894wxString wxString::DoFormatUtf8(const char *format, ...)
1895{
1896 va_list argptr;
1897 va_start(argptr, format);
1898
1899 wxString s;
1900 s.PrintfV(format, argptr);
1901
1902 va_end(argptr);
1903
1904 return s;
1905}
1906#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1907
1908/* static */
c9f78968 1909wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1910{
1911 wxString s;
c9f78968 1912 s.PrintfV(format, argptr);
341e7d28
VZ
1913 return s;
1914}
1915
d1f6e2cf 1916#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1917#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1918int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1919#else
d1f6e2cf 1920int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1921#endif
c801d85f 1922{
ba9bbf13 1923 va_list argptr;
c9f78968 1924 va_start(argptr, format);
c801d85f 1925
c9f78968
VS
1926#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1927 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1928 // because it's the only cast that works safely for downcasting when
1929 // multiple inheritance is used:
1930 wxString *str = static_cast<wxString*>(this);
1931#else
1932 wxString *str = this;
1933#endif
1934
1935 int iLen = str->PrintfV(format, argptr);
c801d85f 1936
ba9bbf13 1937 va_end(argptr);
c801d85f 1938
ba9bbf13 1939 return iLen;
c801d85f 1940}
d1f6e2cf
VS
1941#endif // !wxUSE_UTF8_LOCALE_ONLY
1942
1943#if wxUSE_UNICODE_UTF8
1944int wxString::DoPrintfUtf8(const char *format, ...)
1945{
1946 va_list argptr;
1947 va_start(argptr, format);
1948
1949 int iLen = PrintfV(format, argptr);
1950
1951 va_end(argptr);
1952
1953 return iLen;
1954}
1955#endif // wxUSE_UNICODE_UTF8
c801d85f 1956
67612ff1
DE
1957/*
1958 Uses wxVsnprintf and places the result into the this string.
1959
1960 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1961 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1962 the ISO C99 (and thus SUSv3) standard the return value for the case of
1963 an undersized buffer is inconsistent. For conforming vsnprintf
1964 implementations the function must return the number of characters that
1965 would have been printed had the buffer been large enough. For conforming
1966 vswprintf implementations the function must return a negative number
1967 and set errno.
1968
1969 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1970 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1971 those are defined in the standard and backed up by several conformance
1972 statements. Note that ENOMEM mentioned in the manual page does not
1973 apply to swprintf, only wprintf and fwprintf.
1974
1975 Official manual page:
1976 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1977
1978 Some conformance statements (AIX, Solaris):
1979 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1980 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1981
1982 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1983 EILSEQ and EINVAL are specifically defined to mean the error is other than
1984 an undersized buffer and no other errno are defined we treat those two
1985 as meaning hard errors and everything else gets the old behavior which
1986 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1987
67612ff1
DE
1988 In practice it's impossible to determine before compilation which behavior
1989 may be used. The vswprintf function may have vsnprintf-like behavior or
1990 vice-versa. Behavior detected on one release can theoretically change
1991 with an updated release. Not to mention that configure testing for it
1992 would require the test to be run on the host system, not the build system
1993 which makes cross compilation difficult. Therefore, we make no assumptions
1994 about behavior and try our best to handle every known case, including the
1995 case where wxVsnprintf returns a negative number and fails to set errno.
1996
1997 There is yet one more non-standard implementation and that is our own.
1998 Fortunately, that can be detected at compile-time.
1999
2000 On top of all that, ISO C99 explicitly defines snprintf to write a null
2001 character to the last position of the specified buffer. That would be at
2002 at the given buffer size minus 1. It is supposed to do this even if it
2003 turns out that the buffer is sized too small.
2004
2005 Darwin (tested on 10.5) follows the C99 behavior exactly.
2006
2007 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
2008 errno even when it fails. However, it only seems to ever fail due
2009 to an undersized buffer.
2010*/
2523e9b7
VS
2011#if wxUSE_UNICODE_UTF8
2012template<typename BufferType>
2013#else
2014// we only need one version in non-UTF8 builds and at least two Windows
2015// compilers have problems with this function template, so use just one
2016// normal function here
2017#endif
2018static int DoStringPrintfV(wxString& str,
2019 const wxString& format, va_list argptr)
c801d85f 2020{
f6f5941b 2021 int size = 1024;
e87b7833 2022
f6f5941b
VZ
2023 for ( ;; )
2024 {
2523e9b7
VS
2025#if wxUSE_UNICODE_UTF8
2026 BufferType tmp(str, size + 1);
2027 typename BufferType::CharType *buf = tmp;
2028#else
2029 wxStringBuffer tmp(str, size + 1);
de2589be 2030 wxChar *buf = tmp;
2523e9b7 2031#endif
2bb67b80 2032
ba9bbf13
WS
2033 if ( !buf )
2034 {
2035 // out of memory
2036 return -1;
e87b7833 2037 }
f6f5941b 2038
ba9bbf13
WS
2039 // wxVsnprintf() may modify the original arg pointer, so pass it
2040 // only a copy
2041 va_list argptrcopy;
2042 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
2043
2044#ifndef __WXWINCE__
2045 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
2046 errno = 0;
2047#endif
2523e9b7 2048 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
2049 va_end(argptrcopy);
2050
2051 // some implementations of vsnprintf() don't NUL terminate
2052 // the string if there is not enough space for it so
2053 // always do it manually
67612ff1
DE
2054 // FIXME: This really seems to be the wrong and would be an off-by-one
2055 // bug except the code above allocates an extra character.
9a83f860 2056 buf[size] = wxT('\0');
ba9bbf13 2057
caff62f2
VZ
2058 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
2059 // total number of characters which would have been written if the
b1727cfe 2060 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
2061 if ( len < 0 )
2062 {
52de37c7
VS
2063 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
2064 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
2065 // is true if *both* of them use our own implementation,
2066 // otherwise we can't be sure
f2bbe5b6
VZ
2067#if wxUSE_WXVSNPRINTF
2068 // we know that our own implementation of wxVsnprintf() returns -1
2069 // only for a format error - thus there's something wrong with
2070 // the user's format string
a33c7045 2071 buf[0] = '\0';
f2bbe5b6 2072 return -1;
52de37c7
VS
2073#else // possibly using system version
2074 // assume it only returns error if there is not enough space, but
2075 // as we don't know how much we need, double the current size of
2076 // the buffer
67612ff1 2077#ifndef __WXWINCE__
a9a854d7
DE
2078 if( (errno == EILSEQ) || (errno == EINVAL) )
2079 // If errno was set to one of the two well-known hard errors
2080 // then fail immediately to avoid an infinite loop.
2081 return -1;
2082 else
2083#endif // __WXWINCE__
67612ff1
DE
2084 // still not enough, as we don't know how much we need, double the
2085 // current size of the buffer
2086 size *= 2;
f2bbe5b6 2087#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 2088 }
64f8f94c 2089 else if ( len >= size )
de2589be 2090 {
f2bbe5b6 2091#if wxUSE_WXVSNPRINTF
c95e653c 2092 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
2093 // size+1 when there's not enough space but that's not the size
2094 // of the required buffer!
2095 size *= 2; // so we just double the current size of the buffer
2096#else
64f8f94c
VZ
2097 // some vsnprintf() implementations NUL-terminate the buffer and
2098 // some don't in len == size case, to be safe always add 1
67612ff1
DE
2099 // FIXME: I don't quite understand this comment. The vsnprintf
2100 // function is specifically defined to return the number of
2101 // characters printed not including the null terminator.
2102 // So OF COURSE you need to add 1 to get the right buffer size.
2103 // The following line is definitely correct, no question.
64f8f94c 2104 size = len + 1;
f2bbe5b6 2105#endif
de2589be
VZ
2106 }
2107 else // ok, there was enough space
f6f5941b 2108 {
f6f5941b
VZ
2109 break;
2110 }
f6f5941b
VZ
2111 }
2112
2113 // we could have overshot
2523e9b7
VS
2114 str.Shrink();
2115
2116 return str.length();
2117}
c801d85f 2118
2523e9b7
VS
2119int wxString::PrintfV(const wxString& format, va_list argptr)
2120{
2523e9b7
VS
2121#if wxUSE_UNICODE_UTF8
2122 #if wxUSE_STL_BASED_WXSTRING
2123 typedef wxStringTypeBuffer<char> Utf8Buffer;
2124 #else
6798451b 2125 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
2126 #endif
2127#endif
2128
2129#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 2130 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2131#else
2132 #if wxUSE_UNICODE_UTF8
2133 if ( wxLocaleIsUtf8 )
c6255a6e 2134 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2135 else
2136 // wxChar* version
c6255a6e 2137 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 2138 #else
c6255a6e 2139 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
2140 #endif // UTF8/WCHAR
2141#endif
c801d85f
KB
2142}
2143
097c080b
VZ
2144// ----------------------------------------------------------------------------
2145// misc other operations
2146// ----------------------------------------------------------------------------
0c5d3e1c 2147
d775fa82 2148// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
2149// '?' metacharacters (as usual, '?' matches any character and '*' any number
2150// of them)
8a540c88 2151bool wxString::Matches(const wxString& mask) const
097c080b 2152{
d6044f58
VZ
2153 // I disable this code as it doesn't seem to be faster (in fact, it seems
2154 // to be much slower) than the old, hand-written code below and using it
2155 // here requires always linking with libregex even if the user code doesn't
2156 // use it
2157#if 0 // wxUSE_REGEX
706c2ac9
VZ
2158 // first translate the shell-like mask into a regex
2159 wxString pattern;
2160 pattern.reserve(wxStrlen(pszMask));
2161
9a83f860 2162 pattern += wxT('^');
706c2ac9
VZ
2163 while ( *pszMask )
2164 {
2165 switch ( *pszMask )
2166 {
9a83f860
VZ
2167 case wxT('?'):
2168 pattern += wxT('.');
706c2ac9
VZ
2169 break;
2170
9a83f860
VZ
2171 case wxT('*'):
2172 pattern += wxT(".*");
706c2ac9
VZ
2173 break;
2174
9a83f860
VZ
2175 case wxT('^'):
2176 case wxT('.'):
2177 case wxT('$'):
2178 case wxT('('):
2179 case wxT(')'):
2180 case wxT('|'):
2181 case wxT('+'):
2182 case wxT('\\'):
706c2ac9
VZ
2183 // these characters are special in a RE, quote them
2184 // (however note that we don't quote '[' and ']' to allow
2185 // using them for Unix shell like matching)
9a83f860 2186 pattern += wxT('\\');
706c2ac9
VZ
2187 // fall through
2188
2189 default:
2190 pattern += *pszMask;
2191 }
2192
2193 pszMask++;
2194 }
9a83f860 2195 pattern += wxT('$');
706c2ac9
VZ
2196
2197 // and now use it
2198 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2199#else // !wxUSE_REGEX
9a4232dc
VZ
2200 // TODO: this is, of course, awfully inefficient...
2201
8a540c88
VS
2202 // FIXME-UTF8: implement using iterators, remove #if
2203#if wxUSE_UNICODE_UTF8
de4983f3
VS
2204 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2205 const wxScopedWCharBuffer txtBuf = wc_str();
8a540c88
VS
2206 const wxChar *pszMask = maskBuf.data();
2207 const wxChar *pszTxt = txtBuf.data();
2208#else
2209 const wxChar *pszMask = mask.wx_str();
9a4232dc 2210 // the char currently being checked
8a540c88
VS
2211 const wxChar *pszTxt = wx_str();
2212#endif
9a4232dc
VZ
2213
2214 // the last location where '*' matched
2215 const wxChar *pszLastStarInText = NULL;
2216 const wxChar *pszLastStarInMask = NULL;
2217
2218match:
2219 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2220 switch ( *pszMask ) {
223d09f6
KB
2221 case wxT('?'):
2222 if ( *pszTxt == wxT('\0') )
d775fa82 2223 return false;
097c080b 2224
9a4232dc 2225 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2226
097c080b
VZ
2227 break;
2228
223d09f6 2229 case wxT('*'):
097c080b 2230 {
9a4232dc
VZ
2231 // remember where we started to be able to backtrack later
2232 pszLastStarInText = pszTxt;
2233 pszLastStarInMask = pszMask;
2234
097c080b 2235 // ignore special chars immediately following this one
9a4232dc 2236 // (should this be an error?)
223d09f6 2237 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2238 pszMask++;
2239
2240 // if there is nothing more, match
223d09f6 2241 if ( *pszMask == wxT('\0') )
d775fa82 2242 return true;
097c080b
VZ
2243
2244 // are there any other metacharacters in the mask?
c86f1403 2245 size_t uiLenMask;
223d09f6 2246 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2247
2248 if ( pEndMask != NULL ) {
2249 // we have to match the string between two metachars
2250 uiLenMask = pEndMask - pszMask;
2251 }
2252 else {
2253 // we have to match the remainder of the string
2bb67b80 2254 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2255 }
2256
2257 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2258 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2259 if ( pMatch == NULL )
d775fa82 2260 return false;
097c080b
VZ
2261
2262 // -1 to compensate "++" in the loop
2263 pszTxt = pMatch + uiLenMask - 1;
2264 pszMask += uiLenMask - 1;
2265 }
2266 break;
2267
2268 default:
2269 if ( *pszMask != *pszTxt )
d775fa82 2270 return false;
097c080b
VZ
2271 break;
2272 }
2273 }
2274
2275 // match only if nothing left
9a4232dc 2276 if ( *pszTxt == wxT('\0') )
d775fa82 2277 return true;
9a4232dc
VZ
2278
2279 // if we failed to match, backtrack if we can
2280 if ( pszLastStarInText ) {
2281 pszTxt = pszLastStarInText + 1;
2282 pszMask = pszLastStarInMask;
2283
2284 pszLastStarInText = NULL;
2285
2286 // don't bother resetting pszLastStarInMask, it's unnecessary
2287
2288 goto match;
2289 }
2290
d775fa82 2291 return false;
706c2ac9 2292#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2293}
2294
1fc5dd6f 2295// Count the number of chars
c9f78968 2296int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2297{
2298 int count = 0;
8f93a29f 2299 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2300 {
8f93a29f 2301 if ( *i == ch )
1fc5dd6f
JS
2302 count ++;
2303 }
2304 return count;
2305}
4e79262f 2306