]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
add a simple benchmark for wx and iconv converters benchmarking
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
6b769f3d 27#endif
c801d85f
KB
28
29#include <ctype.h>
92df97b8
WS
30
31#ifndef __WXWINCE__
32 #include <errno.h>
33#endif
34
c801d85f
KB
35#include <string.h>
36#include <stdlib.h>
9a08c20e 37
8116a0c5 38#include "wx/hashmap.h"
8f93a29f
VS
39
40// string handling functions used by wxString:
41#if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46#else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51#endif
8f93a29f 52
e87b7833 53
a7ea63e2
VS
54// ---------------------------------------------------------------------------
55// static class variables definition
56// ---------------------------------------------------------------------------
e87b7833 57
a7ea63e2
VS
58//According to STL _must_ be a -1 size_t
59const size_t wxString::npos = (size_t) -1;
8f93a29f 60
68482dc5 61#if wxUSE_STRING_POS_CACHE
68482dc5 62
e810df36
VZ
63#ifdef wxHAS_COMPILER_TLS
64
65wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
66
67#else // !wxHAS_COMPILER_TLS
68
ad8ae788
VZ
69struct wxStrCacheInitializer
70{
71 wxStrCacheInitializer()
72 {
73 // calling this function triggers s_cache initialization in it, and
74 // from now on it becomes safe to call from multiple threads
75 wxString::GetCache();
76 }
77};
78
e317bd3f
SC
79/*
80wxString::Cache& wxString::GetCache()
81{
82 static wxTLS_TYPE(Cache) s_cache;
83
84 return wxTLS_VALUE(s_cache);
85}
86*/
87
ad8ae788
VZ
88static wxStrCacheInitializer gs_stringCacheInit;
89
e810df36
VZ
90#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
91
68482dc5
VZ
92// gdb seems to be unable to display thread-local variables correctly, at least
93// not my 6.4.98 version under amd64, so provide this debugging helper to do it
94#ifdef __WXDEBUG__
95
96struct wxStrCacheDumper
97{
98 static void ShowAll()
99 {
100 puts("*** wxString cache dump:");
101 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
102 {
103 const wxString::Cache::Element&
8b73c531 104 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
105
106 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
107 n,
8b73c531 108 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
109 c.str,
110 (unsigned long)c.pos,
111 (unsigned long)c.impl,
112 (long)c.len);
113 }
114 }
115};
116
117void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
118
119#endif // __WXDEBUG__
120
121#ifdef wxPROFILE_STRING_CACHE
122
123wxString::CacheStats wxString::ms_cacheStats;
124
8c3b65d9 125struct wxStrCacheStatsDumper
68482dc5 126{
8c3b65d9 127 ~wxStrCacheStatsDumper()
68482dc5
VZ
128 {
129 const wxString::CacheStats& stats = wxString::ms_cacheStats;
130
131 if ( stats.postot )
132 {
133 puts("*** wxString cache statistics:");
134 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
135 stats.postot);
136 printf("\tHits %u (of which %u not used) or %.2f%%\n",
137 stats.poshits,
138 stats.mishits,
139 100.*float(stats.poshits - stats.mishits)/stats.postot);
140 printf("\tAverage position requested: %.2f\n",
141 float(stats.sumpos) / stats.postot);
142 printf("\tAverage offset after cached hint: %.2f\n",
143 float(stats.sumofs) / stats.postot);
144 }
145
146 if ( stats.lentot )
147 {
148 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
149 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
150 }
151 }
8c3b65d9 152};
68482dc5 153
8c3b65d9 154static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
155
156#endif // wxPROFILE_STRING_CACHE
157
158#endif // wxUSE_STRING_POS_CACHE
159
a7ea63e2
VS
160// ----------------------------------------------------------------------------
161// global functions
162// ----------------------------------------------------------------------------
e87b7833 163
a7ea63e2 164#if wxUSE_STD_IOSTREAM
8f93a29f 165
a7ea63e2 166#include <iostream>
8f93a29f 167
a7ea63e2 168wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 169{
7a906e1a 170#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
ddf01bdb
VZ
171 const wxCharBuffer buf(str.AsCharBuf());
172 if ( !buf )
173 os.clear(wxSTD ios_base::failbit);
174 else
175 os << buf.data();
176
177 return os;
a7ea63e2 178#else
7a906e1a 179 return os << str.AsInternal();
a7ea63e2 180#endif
8f93a29f
VS
181}
182
04abe4bc
VS
183wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
184{
185 return os << str.c_str();
186}
187
188wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
189{
190 return os << str.data();
191}
192
193#ifndef __BORLANDC__
194wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
195{
196 return os << str.data();
197}
198#endif
199
6a6ea041 200#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
201
202wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
203{
204 return wos << str.wc_str();
205}
206
207wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
208{
209 return wos << str.AsWChar();
210}
211
212wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
213{
214 return wos << str.data();
215}
216
6a6ea041 217#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 218
a7ea63e2 219#endif // wxUSE_STD_IOSTREAM
e87b7833 220
81727065
VS
221// ===========================================================================
222// wxString class core
223// ===========================================================================
224
225#if wxUSE_UNICODE_UTF8
226
81727065
VS
227void wxString::PosLenToImpl(size_t pos, size_t len,
228 size_t *implPos, size_t *implLen) const
229{
230 if ( pos == npos )
68482dc5 231 {
81727065 232 *implPos = npos;
68482dc5
VZ
233 }
234 else // have valid start position
81727065 235 {
68482dc5
VZ
236 const const_iterator b = GetIterForNthChar(pos);
237 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 238 if ( len == npos )
68482dc5 239 {
81727065 240 *implLen = npos;
68482dc5
VZ
241 }
242 else // have valid length too
81727065 243 {
68482dc5
VZ
244 // we need to handle the case of length specifying a substring
245 // going beyond the end of the string, just as std::string does
246 const const_iterator e(end());
247 const_iterator i(b);
248 while ( len && i <= e )
249 {
250 ++i;
251 --len;
252 }
253
254 *implLen = i.impl() - b.impl();
81727065
VS
255 }
256 }
257}
258
259#endif // wxUSE_UNICODE_UTF8
260
11aac4ba
VS
261// ----------------------------------------------------------------------------
262// wxCStrData converted strings caching
263// ----------------------------------------------------------------------------
264
132276cf
VS
265// FIXME-UTF8: temporarily disabled because it doesn't work with global
266// string objects; re-enable after fixing this bug and benchmarking
267// performance to see if using a hash is a good idea at all
268#if 0
269
11aac4ba
VS
270// For backward compatibility reasons, it must be possible to assign the value
271// returned by wxString::c_str() to a char* or wchar_t* variable and work with
272// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
273// because the memory would be freed immediately, but it has to be valid as long
274// as the string is not modified, so that code like this still works:
275//
276// const wxChar *s = str.c_str();
277// while ( s ) { ... }
278
279// FIXME-UTF8: not thread safe!
280// FIXME-UTF8: we currently clear the cached conversion only when the string is
281// destroyed, but we should do it when the string is modified, to
282// keep memory usage down
283// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
284// invalidated the cache on every change, we could keep the previous
285// conversion
286// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
287// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
288
289template<typename T>
290static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
291{
6c4ebcda 292 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
293 if ( i != hash.end() )
294 {
295 free(i->second);
296 hash.erase(i);
297 }
298}
299
300#if wxUSE_UNICODE
6c4ebcda
VS
301// NB: non-STL implementation doesn't compile with "const wxString*" key type,
302// so we have to use wxString* here and const-cast when used
11aac4ba
VS
303WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
304 wxStringCharConversionCache);
305static wxStringCharConversionCache gs_stringsCharCache;
306
307const char* wxCStrData::AsChar() const
308{
309 // remove previously cache value, if any (see FIXMEs above):
310 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
311
312 // convert the string and keep it:
6c4ebcda
VS
313 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
314 m_str->mb_str().release();
11aac4ba
VS
315
316 return s + m_offset;
317}
318#endif // wxUSE_UNICODE
319
320#if !wxUSE_UNICODE_WCHAR
321WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
322 wxStringWCharConversionCache);
323static wxStringWCharConversionCache gs_stringsWCharCache;
324
325const wchar_t* wxCStrData::AsWChar() const
326{
327 // remove previously cache value, if any (see FIXMEs above):
328 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
329
330 // convert the string and keep it:
6c4ebcda
VS
331 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
332 m_str->wc_str().release();
11aac4ba
VS
333
334 return s + m_offset;
335}
336#endif // !wxUSE_UNICODE_WCHAR
337
11aac4ba
VS
338wxString::~wxString()
339{
340#if wxUSE_UNICODE
341 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
342 DeleteStringFromConversionCache(gs_stringsCharCache, this);
343#endif
344#if !wxUSE_UNICODE_WCHAR
345 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
346#endif
347}
132276cf
VS
348#endif
349
111d9948 350#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
351const char* wxCStrData::AsChar() const
352{
111d9948
VS
353#if wxUSE_UNICODE_UTF8
354 if ( wxLocaleIsUtf8 )
355 return AsInternal();
356#endif
357 // under non-UTF8 locales, we have to convert the internal UTF-8
358 // representation using wxConvLibc and cache the result
359
132276cf 360 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
361
362 // convert the string:
2a7431e1
VZ
363 //
364 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
365 // have it) but it's unfortunately not obvious to implement
366 // because we don't know how big buffer do we need for the
367 // given string length (in case of multibyte encodings, e.g.
368 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
369 //
370 // One idea would be to store more than just m_convertedToChar
371 // in wxString: then we could record the length of the string
372 // which was converted the last time and try to reuse the same
373 // buffer if the current length is not greater than it (this
374 // could still fail because string could have been modified in
375 // place but it would work most of the time, so we'd do it and
376 // only allocate the new buffer if in-place conversion returned
377 // an error). We could also store a bit saying if the string
378 // was modified since the last conversion (and update it in all
379 // operation modifying the string, of course) to avoid unneeded
380 // consequential conversions. But both of these ideas require
381 // adding more fields to wxString and require profiling results
382 // to be sure that we really gain enough from them to justify
383 // doing it.
05f32fc3
VS
384 wxCharBuffer buf(str->mb_str());
385
28be59b4
VZ
386 // if it failed, return empty string and not NULL to avoid crashes in code
387 // written with either wxWidgets 2 wxString or std::string behaviour in
388 // mind: neither of them ever returns NULL and so we shouldn't neither
389 if ( !buf )
390 return "";
391
05f32fc3
VS
392 if ( str->m_convertedToChar &&
393 strlen(buf) == strlen(str->m_convertedToChar) )
394 {
395 // keep the same buffer for as long as possible, so that several calls
396 // to c_str() in a row still work:
397 strcpy(str->m_convertedToChar, buf);
398 }
399 else
400 {
401 str->m_convertedToChar = buf.release();
402 }
403
404 // and keep it:
132276cf
VS
405 return str->m_convertedToChar + m_offset;
406}
111d9948 407#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
408
409#if !wxUSE_UNICODE_WCHAR
410const wchar_t* wxCStrData::AsWChar() const
411{
412 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
413
414 // convert the string:
415 wxWCharBuffer buf(str->wc_str());
416
28be59b4
VZ
417 // notice that here, unlike above in AsChar(), conversion can't fail as our
418 // internal UTF-8 is always well-formed -- or the string was corrupted and
419 // all bets are off anyhow
420
05f32fc3
VS
421 // FIXME-UTF8: do the conversion in-place in the existing buffer
422 if ( str->m_convertedToWChar &&
423 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
424 {
425 // keep the same buffer for as long as possible, so that several calls
426 // to c_str() in a row still work:
427 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
428 }
429 else
430 {
431 str->m_convertedToWChar = buf.release();
432 }
433
434 // and keep it:
132276cf
VS
435 return str->m_convertedToWChar + m_offset;
436}
437#endif // !wxUSE_UNICODE_WCHAR
438
439// ===========================================================================
440// wxString class core
441// ===========================================================================
442
443// ---------------------------------------------------------------------------
444// construction and conversion
445// ---------------------------------------------------------------------------
11aac4ba 446
81727065 447#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
448/* static */
449wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 450 const wxMBConv& conv)
8f93a29f
VS
451{
452 // anything to do?
453 if ( !psz || nLength == 0 )
81727065 454 return SubstrBufFromMB(L"", 0);
8f93a29f
VS
455
456 if ( nLength == npos )
457 nLength = wxNO_LEN;
458
459 size_t wcLen;
460 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
461 if ( !wcLen )
81727065 462 return SubstrBufFromMB(_T(""), 0);
8f93a29f
VS
463 else
464 return SubstrBufFromMB(wcBuf, wcLen);
465}
81727065
VS
466#endif // wxUSE_UNICODE_WCHAR
467
468#if wxUSE_UNICODE_UTF8
469/* static */
470wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
471 const wxMBConv& conv)
472{
81727065
VS
473 // anything to do?
474 if ( !psz || nLength == 0 )
475 return SubstrBufFromMB("", 0);
476
111d9948
VS
477 // if psz is already in UTF-8, we don't have to do the roundtrip to
478 // wchar_t* and back:
479 if ( conv.IsUTF8() )
480 {
481 // we need to validate the input because UTF8 iterators assume valid
482 // UTF-8 sequence and psz may be invalid:
483 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
484 {
9ef1ad0d
VZ
485 // we must pass the real string length to SubstrBufFromMB ctor
486 if ( nLength == npos )
487 nLength = psz ? strlen(psz) : 0;
111d9948
VS
488 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
489 }
490 // else: do the roundtrip through wchar_t*
491 }
492
81727065
VS
493 if ( nLength == npos )
494 nLength = wxNO_LEN;
495
496 // first convert to wide string:
497 size_t wcLen;
498 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
499 if ( !wcLen )
500 return SubstrBufFromMB("", 0);
501
502 // and then to UTF-8:
4fdfe2f3 503 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065
VS
504 // widechar -> UTF-8 conversion isn't supposed to ever fail:
505 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
506
507 return buf;
508}
509#endif // wxUSE_UNICODE_UTF8
510
511#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
512/* static */
513wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 514 const wxMBConv& conv)
8f93a29f
VS
515{
516 // anything to do?
517 if ( !pwz || nLength == 0 )
81727065 518 return SubstrBufFromWC("", 0);
8f93a29f
VS
519
520 if ( nLength == npos )
521 nLength = wxNO_LEN;
522
523 size_t mbLen;
524 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
525 if ( !mbLen )
81727065 526 return SubstrBufFromWC("", 0);
8f93a29f
VS
527 else
528 return SubstrBufFromWC(mbBuf, mbLen);
529}
81727065 530#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
531
532
81727065 533#if wxUSE_UNICODE_WCHAR
e87b7833 534
06386448 535//Convert wxString in Unicode mode to a multi-byte string
830f8f11 536const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 537{
81727065 538 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
539}
540
81727065 541#elif wxUSE_UNICODE_UTF8
e87b7833 542
81727065
VS
543const wxWCharBuffer wxString::wc_str() const
544{
4fdfe2f3
VZ
545 return wxMBConvStrictUTF8().cMB2WC
546 (
547 m_impl.c_str(),
548 m_impl.length() + 1, // size, not length
549 NULL
550 );
81727065
VS
551}
552
553const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
554{
111d9948
VS
555 if ( conv.IsUTF8() )
556 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
557
81727065
VS
558 // FIXME-UTF8: use wc_str() here once we have buffers with length
559
560 size_t wcLen;
4fdfe2f3
VZ
561 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
562 (
563 m_impl.c_str(),
564 m_impl.length() + 1, // size
565 &wcLen
566 ));
81727065
VS
567 if ( !wcLen )
568 return wxCharBuffer("");
569
4f696af8 570 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
81727065
VS
571}
572
573#else // ANSI
eec47cc6 574
7663d0d4 575//Converts this string to a wide character string if unicode
06386448 576//mode is not enabled and wxUSE_WCHAR_T is enabled
830f8f11 577const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 578{
81727065 579 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 580}
7663d0d4 581
e87b7833
MB
582#endif // Unicode/ANSI
583
584// shrink to minimal size (releasing extra memory)
585bool wxString::Shrink()
586{
587 wxString tmp(begin(), end());
588 swap(tmp);
589 return tmp.length() == length();
590}
591
d8a4b666 592// deprecated compatibility code:
a7ea63e2 593#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 594wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
595{
596 return DoGetWriteBuf(nLen);
597}
598
599void wxString::UngetWriteBuf()
600{
601 DoUngetWriteBuf();
602}
603
604void wxString::UngetWriteBuf(size_t nLen)
605{
606 DoUngetWriteBuf(nLen);
607}
a7ea63e2 608#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 609
d8a4b666 610
e87b7833
MB
611// ---------------------------------------------------------------------------
612// data access
613// ---------------------------------------------------------------------------
614
615// all functions are inline in string.h
616
617// ---------------------------------------------------------------------------
e8f59039 618// concatenation operators
e87b7833
MB
619// ---------------------------------------------------------------------------
620
c801d85f 621/*
c801d85f
KB
622 * concatenation functions come in 5 flavours:
623 * string + string
624 * char + string and string + char
625 * C str + string and string + C str
626 */
627
b1801e0e 628wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 629{
992527a5 630#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
631 wxASSERT( str1.IsValid() );
632 wxASSERT( str2.IsValid() );
e87b7833 633#endif
097c080b 634
3458e408
WS
635 wxString s = str1;
636 s += str2;
3168a13f 637
3458e408 638 return s;
c801d85f
KB
639}
640
c9f78968 641wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 642{
992527a5 643#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 644 wxASSERT( str.IsValid() );
e87b7833 645#endif
3168a13f 646
3458e408
WS
647 wxString s = str;
648 s += ch;
097c080b 649
3458e408 650 return s;
c801d85f
KB
651}
652
c9f78968 653wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 654{
992527a5 655#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 656 wxASSERT( str.IsValid() );
e87b7833 657#endif
097c080b 658
3458e408
WS
659 wxString s = ch;
660 s += str;
3168a13f 661
3458e408 662 return s;
c801d85f
KB
663}
664
8f93a29f 665wxString operator+(const wxString& str, const char *psz)
c801d85f 666{
992527a5 667#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 668 wxASSERT( str.IsValid() );
e87b7833 669#endif
097c080b 670
3458e408 671 wxString s;
8f93a29f 672 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
673 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
674 }
675 s += str;
676 s += psz;
3168a13f 677
3458e408 678 return s;
c801d85f
KB
679}
680
8f93a29f 681wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 682{
992527a5 683#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
684 wxASSERT( str.IsValid() );
685#endif
686
687 wxString s;
688 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
689 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
690 }
691 s += str;
692 s += pwz;
693
694 return s;
695}
696
697wxString operator+(const char *psz, const wxString& str)
698{
a7ea63e2
VS
699#if !wxUSE_STL_BASED_WXSTRING
700 wxASSERT( str.IsValid() );
701#endif
702
703 wxString s;
704 if ( !s.Alloc(strlen(psz) + str.length()) ) {
705 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
706 }
707 s = psz;
708 s += str;
709
710 return s;
711}
712
713wxString operator+(const wchar_t *pwz, const wxString& str)
714{
715#if !wxUSE_STL_BASED_WXSTRING
716 wxASSERT( str.IsValid() );
717#endif
718
719 wxString s;
720 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
721 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
722 }
723 s = pwz;
724 s += str;
725
726 return s;
727}
728
729// ---------------------------------------------------------------------------
730// string comparison
731// ---------------------------------------------------------------------------
732
52de37c7
VS
733bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
734{
735 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
736 : wxToupper(GetChar(0u)) == wxToupper(c));
737}
738
a7ea63e2
VS
739#ifdef HAVE_STD_STRING_COMPARE
740
741// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
742// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
743// sort strings in characters code point order by sorting the byte sequence
744// in byte values order (i.e. what strcmp() and memcmp() do).
745
746int wxString::compare(const wxString& str) const
747{
748 return m_impl.compare(str.m_impl);
749}
750
751int wxString::compare(size_t nStart, size_t nLen,
752 const wxString& str) const
753{
754 size_t pos, len;
755 PosLenToImpl(nStart, nLen, &pos, &len);
756 return m_impl.compare(pos, len, str.m_impl);
757}
758
759int wxString::compare(size_t nStart, size_t nLen,
760 const wxString& str,
761 size_t nStart2, size_t nLen2) const
762{
763 size_t pos, len;
764 PosLenToImpl(nStart, nLen, &pos, &len);
765
766 size_t pos2, len2;
767 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
768
769 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
770}
771
772int wxString::compare(const char* sz) const
773{
774 return m_impl.compare(ImplStr(sz));
775}
776
777int wxString::compare(const wchar_t* sz) const
778{
779 return m_impl.compare(ImplStr(sz));
780}
781
782int wxString::compare(size_t nStart, size_t nLen,
783 const char* sz, size_t nCount) const
784{
785 size_t pos, len;
786 PosLenToImpl(nStart, nLen, &pos, &len);
787
788 SubstrBufFromMB str(ImplStr(sz, nCount));
789
790 return m_impl.compare(pos, len, str.data, str.len);
791}
792
793int wxString::compare(size_t nStart, size_t nLen,
794 const wchar_t* sz, size_t nCount) const
795{
796 size_t pos, len;
797 PosLenToImpl(nStart, nLen, &pos, &len);
798
799 SubstrBufFromWC str(ImplStr(sz, nCount));
800
801 return m_impl.compare(pos, len, str.data, str.len);
802}
803
804#else // !HAVE_STD_STRING_COMPARE
805
806static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
807 const wxStringCharType* s2, size_t l2)
808{
809 if( l1 == l2 )
810 return wxStringMemcmp(s1, s2, l1);
811 else if( l1 < l2 )
812 {
813 int ret = wxStringMemcmp(s1, s2, l1);
814 return ret == 0 ? -1 : ret;
815 }
816 else
817 {
818 int ret = wxStringMemcmp(s1, s2, l2);
819 return ret == 0 ? +1 : ret;
820 }
821}
822
823int wxString::compare(const wxString& str) const
824{
825 return ::wxDoCmp(m_impl.data(), m_impl.length(),
826 str.m_impl.data(), str.m_impl.length());
827}
828
829int wxString::compare(size_t nStart, size_t nLen,
830 const wxString& str) const
831{
832 wxASSERT(nStart <= length());
833 size_type strLen = length() - nStart;
834 nLen = strLen < nLen ? strLen : nLen;
835
836 size_t pos, len;
837 PosLenToImpl(nStart, nLen, &pos, &len);
838
839 return ::wxDoCmp(m_impl.data() + pos, len,
840 str.m_impl.data(), str.m_impl.length());
841}
842
843int wxString::compare(size_t nStart, size_t nLen,
844 const wxString& str,
845 size_t nStart2, size_t nLen2) const
846{
847 wxASSERT(nStart <= length());
848 wxASSERT(nStart2 <= str.length());
849 size_type strLen = length() - nStart,
850 strLen2 = str.length() - nStart2;
851 nLen = strLen < nLen ? strLen : nLen;
852 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
853
854 size_t pos, len;
855 PosLenToImpl(nStart, nLen, &pos, &len);
856 size_t pos2, len2;
857 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
858
859 return ::wxDoCmp(m_impl.data() + pos, len,
860 str.m_impl.data() + pos2, len2);
861}
862
863int wxString::compare(const char* sz) const
864{
865 SubstrBufFromMB str(ImplStr(sz, npos));
866 if ( str.len == npos )
867 str.len = wxStringStrlen(str.data);
868 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
869}
870
871int wxString::compare(const wchar_t* sz) const
872{
873 SubstrBufFromWC str(ImplStr(sz, npos));
874 if ( str.len == npos )
875 str.len = wxStringStrlen(str.data);
876 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
877}
878
879int wxString::compare(size_t nStart, size_t nLen,
880 const char* sz, size_t nCount) const
881{
882 wxASSERT(nStart <= length());
883 size_type strLen = length() - nStart;
884 nLen = strLen < nLen ? strLen : nLen;
097c080b 885
a7ea63e2
VS
886 size_t pos, len;
887 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 888
a7ea63e2
VS
889 SubstrBufFromMB str(ImplStr(sz, nCount));
890 if ( str.len == npos )
891 str.len = wxStringStrlen(str.data);
892
893 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
894}
895
a7ea63e2
VS
896int wxString::compare(size_t nStart, size_t nLen,
897 const wchar_t* sz, size_t nCount) const
8f93a29f 898{
a7ea63e2
VS
899 wxASSERT(nStart <= length());
900 size_type strLen = length() - nStart;
901 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 902
a7ea63e2
VS
903 size_t pos, len;
904 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 905
a7ea63e2
VS
906 SubstrBufFromWC str(ImplStr(sz, nCount));
907 if ( str.len == npos )
908 str.len = wxStringStrlen(str.data);
909
910 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
911}
912
a7ea63e2
VS
913#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
914
915
8f93a29f
VS
916// ---------------------------------------------------------------------------
917// find_{first,last}_[not]_of functions
918// ---------------------------------------------------------------------------
919
920#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 921
8f93a29f
VS
922// NB: All these functions are implemented with the argument being wxChar*,
923// i.e. widechar string in any Unicode build, even though native string
924// representation is char* in the UTF-8 build. This is because we couldn't
925// use memchr() to determine if a character is in a set encoded as UTF-8.
926
927size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 928{
8f93a29f 929 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
930}
931
8f93a29f 932size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 933{
8f93a29f 934 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
935}
936
8f93a29f 937size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 938{
8f93a29f 939 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 940
8f93a29f
VS
941 size_t idx = nStart;
942 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 943 {
8f93a29f
VS
944 if ( wxTmemchr(sz, *i, n) )
945 return idx;
dcb68102 946 }
8f93a29f
VS
947
948 return npos;
949}
950
951size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
952{
953 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
954
955 size_t idx = nStart;
956 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 957 {
8f93a29f
VS
958 if ( !wxTmemchr(sz, *i, n) )
959 return idx;
960 }
961
962 return npos;
963}
964
965
966size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
967{
968 return find_last_of(sz, nStart, wxStrlen(sz));
969}
970
971size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
972{
973 return find_last_not_of(sz, nStart, wxStrlen(sz));
974}
975
976size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
977{
978 size_t len = length();
979
980 if ( nStart == npos )
981 {
982 nStart = len - 1;
dcb68102 983 }
2c09fb3b 984 else
dcb68102 985 {
8f93a29f 986 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 987 }
8f93a29f
VS
988
989 size_t idx = nStart;
990 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
991 i != rend(); --idx, ++i )
992 {
993 if ( wxTmemchr(sz, *i, n) )
994 return idx;
995 }
996
997 return npos;
dcb68102
RN
998}
999
8f93a29f 1000size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 1001{
8f93a29f
VS
1002 size_t len = length();
1003
1004 if ( nStart == npos )
1005 {
1006 nStart = len - 1;
1007 }
1008 else
1009 {
1010 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1011 }
1012
1013 size_t idx = nStart;
1014 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1015 i != rend(); --idx, ++i )
1016 {
1017 if ( !wxTmemchr(sz, *i, n) )
1018 return idx;
1019 }
1020
1021 return npos;
dcb68102
RN
1022}
1023
8f93a29f 1024size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1025{
8f93a29f
VS
1026 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1027
1028 size_t idx = nStart;
1029 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1030 {
1031 if ( *i != ch )
1032 return idx;
1033 }
1034
1035 return npos;
1036}
1037
1038size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1039{
1040 size_t len = length();
1041
1042 if ( nStart == npos )
1043 {
1044 nStart = len - 1;
1045 }
1046 else
1047 {
1048 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1049 }
1050
1051 size_t idx = nStart;
1052 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1053 i != rend(); --idx, ++i )
1054 {
1055 if ( *i != ch )
1056 return idx;
1057 }
1058
1059 return npos;
1060}
1061
1062// the functions above were implemented for wchar_t* arguments in Unicode
1063// build and char* in ANSI build; below are implementations for the other
1064// version:
1065#if wxUSE_UNICODE
1066 #define wxOtherCharType char
1067 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1068#else
1069 #define wxOtherCharType wchar_t
1070 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1071#endif
1072
1073size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1074 { return find_first_of(STRCONV(sz), nStart); }
1075
1076size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1077 size_t n) const
1078 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1079size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1080 { return find_last_of(STRCONV(sz), nStart); }
1081size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1082 size_t n) const
1083 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1084size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1085 { return find_first_not_of(STRCONV(sz), nStart); }
1086size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1087 size_t n) const
1088 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1089size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1090 { return find_last_not_of(STRCONV(sz), nStart); }
1091size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1092 size_t n) const
1093 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1094
1095#undef wxOtherCharType
1096#undef STRCONV
1097
1098#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1099
1100// ===========================================================================
1101// other common string functions
1102// ===========================================================================
1103
1104int wxString::CmpNoCase(const wxString& s) const
1105{
1106 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1107
8f93a29f
VS
1108 const_iterator i1 = begin();
1109 const_iterator end1 = end();
1110 const_iterator i2 = s.begin();
1111 const_iterator end2 = s.end();
1112
0d8b0f94 1113 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1114 {
1115 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1116 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1117 if ( lower1 != lower2 )
1118 return lower1 < lower2 ? -1 : 1;
1119 }
1120
1121 size_t len1 = length();
1122 size_t len2 = s.length();
dcb68102 1123
8f93a29f
VS
1124 if ( len1 < len2 )
1125 return -1;
1126 else if ( len1 > len2 )
1127 return 1;
1128 return 0;
dcb68102
RN
1129}
1130
1131
b1ac3b56 1132#if wxUSE_UNICODE
e015c2a3 1133
cf6bedce
SC
1134#ifdef __MWERKS__
1135#ifndef __SCHAR_MAX__
1136#define __SCHAR_MAX__ 127
1137#endif
1138#endif
1139
e6310bbc 1140wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1141{
e6310bbc 1142 if (!ascii || len == 0)
b1ac3b56 1143 return wxEmptyString;
e015c2a3 1144
b1ac3b56 1145 wxString res;
e015c2a3 1146
e6310bbc 1147 {
6798451b 1148 wxStringInternalBuffer buf(res, len);
602a857b 1149 wxStringCharType *dest = buf;
c1eada83 1150
602a857b
VS
1151 for ( ; len > 0; --len )
1152 {
1153 unsigned char c = (unsigned char)*ascii++;
1154 wxASSERT_MSG( c < 0x80,
1155 _T("Non-ASCII value passed to FromAscii().") );
c1eada83 1156
602a857b
VS
1157 *dest++ = (wchar_t)c;
1158 }
e015c2a3
VZ
1159 }
1160
b1ac3b56
RR
1161 return res;
1162}
1163
e6310bbc
VS
1164wxString wxString::FromAscii(const char *ascii)
1165{
0081dd72 1166 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1167}
1168
c5288c5c 1169wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1170{
1171 // What do we do with '\0' ?
1172
c1eada83 1173 unsigned char c = (unsigned char)ascii;
8760bc65 1174
c1eada83
VS
1175 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1176
1177 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1178 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1179}
1180
b1ac3b56
RR
1181const wxCharBuffer wxString::ToAscii() const
1182{
e015c2a3
VZ
1183 // this will allocate enough space for the terminating NUL too
1184 wxCharBuffer buffer(length());
6e394fc6 1185 char *dest = buffer.data();
e015c2a3 1186
c1eada83 1187 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1188 {
c1eada83
VS
1189 wxUniChar c(*i);
1190 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1191 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1192
1193 // the output string can't have embedded NULs anyhow, so we can safely
1194 // stop at first of them even if we do have any
c1eada83 1195 if ( !c )
e015c2a3 1196 break;
b1ac3b56 1197 }
e015c2a3 1198
b1ac3b56
RR
1199 return buffer;
1200}
e015c2a3 1201
c1eada83 1202#endif // wxUSE_UNICODE
b1ac3b56 1203
c801d85f 1204// extract string of length nCount starting at nFirst
c801d85f
KB
1205wxString wxString::Mid(size_t nFirst, size_t nCount) const
1206{
73f507f5 1207 size_t nLen = length();
30d9011f 1208
73f507f5
WS
1209 // default value of nCount is npos and means "till the end"
1210 if ( nCount == npos )
1211 {
1212 nCount = nLen - nFirst;
1213 }
30d9011f 1214
73f507f5
WS
1215 // out-of-bounds requests return sensible things
1216 if ( nFirst + nCount > nLen )
1217 {
1218 nCount = nLen - nFirst;
1219 }
c801d85f 1220
73f507f5
WS
1221 if ( nFirst > nLen )
1222 {
1223 // AllocCopy() will return empty string
1224 return wxEmptyString;
1225 }
c801d85f 1226
73f507f5
WS
1227 wxString dest(*this, nFirst, nCount);
1228 if ( dest.length() != nCount )
1229 {
1230 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1231 }
30d9011f 1232
73f507f5 1233 return dest;
c801d85f
KB
1234}
1235
e87b7833 1236// check that the string starts with prefix and return the rest of the string
d775fa82 1237// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1238bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1239{
c5e7a7d7
VS
1240 if ( compare(0, prefix.length(), prefix) != 0 )
1241 return false;
f6bcfd97
BP
1242
1243 if ( rest )
1244 {
1245 // put the rest of the string into provided pointer
c5e7a7d7 1246 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1247 }
1248
d775fa82 1249 return true;
f6bcfd97
BP
1250}
1251
3affcd07
VZ
1252
1253// check that the string ends with suffix and return the rest of it in the
1254// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1255bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1256{
c5e7a7d7 1257 int start = length() - suffix.length();
81727065
VS
1258
1259 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1260 return false;
1261
1262 if ( rest )
1263 {
1264 // put the rest of the string into provided pointer
1265 rest->assign(*this, 0, start);
1266 }
1267
1268 return true;
1269}
1270
1271
c801d85f
KB
1272// extract nCount last (rightmost) characters
1273wxString wxString::Right(size_t nCount) const
1274{
e87b7833
MB
1275 if ( nCount > length() )
1276 nCount = length();
c801d85f 1277
e87b7833
MB
1278 wxString dest(*this, length() - nCount, nCount);
1279 if ( dest.length() != nCount ) {
b1801e0e
GD
1280 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1281 }
c801d85f
KB
1282 return dest;
1283}
1284
7929902d 1285// get all characters after the last occurrence of ch
c801d85f 1286// (returns the whole string if ch not found)
c9f78968 1287wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1288{
1289 wxString str;
d775fa82 1290 int iPos = Find(ch, true);
3c67202d 1291 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1292 str = *this;
1293 else
c565abe1 1294 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1295
1296 return str;
1297}
1298
1299// extract nCount first (leftmost) characters
1300wxString wxString::Left(size_t nCount) const
1301{
e87b7833
MB
1302 if ( nCount > length() )
1303 nCount = length();
c801d85f 1304
e87b7833
MB
1305 wxString dest(*this, 0, nCount);
1306 if ( dest.length() != nCount ) {
b1801e0e
GD
1307 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1308 }
c801d85f
KB
1309 return dest;
1310}
1311
7929902d 1312// get all characters before the first occurrence of ch
c801d85f 1313// (returns the whole string if ch not found)
c9f78968 1314wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1315{
e87b7833 1316 int iPos = Find(ch);
c565abe1
VZ
1317 if ( iPos == wxNOT_FOUND )
1318 iPos = length();
e87b7833 1319 return wxString(*this, 0, iPos);
c801d85f
KB
1320}
1321
7929902d 1322/// get all characters before the last occurrence of ch
c801d85f 1323/// (returns empty string if ch not found)
c9f78968 1324wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1325{
1326 wxString str;
d775fa82 1327 int iPos = Find(ch, true);
3c67202d 1328 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1329 str = wxString(c_str(), iPos);
c801d85f
KB
1330
1331 return str;
1332}
1333
7929902d 1334/// get all characters after the first occurrence of ch
c801d85f 1335/// (returns empty string if ch not found)
c9f78968 1336wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1337{
1338 wxString str;
1339 int iPos = Find(ch);
3c67202d 1340 if ( iPos != wxNOT_FOUND )
c565abe1 1341 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1342
1343 return str;
1344}
1345
7929902d 1346// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1347size_t wxString::Replace(const wxString& strOld,
1348 const wxString& strNew, bool bReplaceAll)
c801d85f 1349{
a8f1f1b2 1350 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1351 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1352 _T("wxString::Replace(): invalid parameter") );
1353
68482dc5
VZ
1354 wxSTRING_INVALIDATE_CACHE();
1355
510bb748 1356 size_t uiCount = 0; // count of replacements made
c801d85f 1357
8a627032
VZ
1358 // optimize the special common case: replacement of one character by
1359 // another one (in UTF-8 case we can only do this for ASCII characters)
1360 //
1361 // benchmarks show that this special version is around 3 times faster
1362 // (depending on the proportion of matching characters and UTF-8/wchar_t
1363 // build)
1364 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1365 {
1366 const wxStringCharType chOld = strOld.m_impl[0],
1367 chNew = strNew.m_impl[0];
1368
1369 // this loop is the simplified version of the one below
1370 for ( size_t pos = 0; ; )
1371 {
1372 pos = m_impl.find(chOld, pos);
1373 if ( pos == npos )
1374 break;
c801d85f 1375
8a627032
VZ
1376 m_impl[pos++] = chNew;
1377
1378 uiCount++;
1379
1380 if ( !bReplaceAll )
1381 break;
1382 }
1383 }
1384 else // general case
510bb748 1385 {
8a627032
VZ
1386 const size_t uiOldLen = strOld.m_impl.length();
1387 const size_t uiNewLen = strNew.m_impl.length();
1388
1389 for ( size_t pos = 0; ; )
1390 {
1391 pos = m_impl.find(strOld.m_impl, pos);
1392 if ( pos == npos )
1393 break;
510bb748 1394
8a627032
VZ
1395 // replace this occurrence of the old string with the new one
1396 m_impl.replace(pos, uiOldLen, strNew.m_impl);
510bb748 1397
8a627032
VZ
1398 // move up pos past the string that was replaced
1399 pos += uiNewLen;
ad5bb7d6 1400
8a627032
VZ
1401 // increase replace count
1402 uiCount++;
394b2900 1403
8a627032
VZ
1404 // stop after the first one?
1405 if ( !bReplaceAll )
1406 break;
1407 }
c801d85f 1408 }
c801d85f 1409
510bb748 1410 return uiCount;
c801d85f
KB
1411}
1412
1413bool wxString::IsAscii() const
1414{
a4a44612
VS
1415 for ( const_iterator i = begin(); i != end(); ++i )
1416 {
1417 if ( !(*i).IsAscii() )
1418 return false;
1419 }
1420
1421 return true;
c801d85f 1422}
dd1eaa89 1423
c801d85f
KB
1424bool wxString::IsWord() const
1425{
a4a44612
VS
1426 for ( const_iterator i = begin(); i != end(); ++i )
1427 {
1428 if ( !wxIsalpha(*i) )
1429 return false;
1430 }
1431
1432 return true;
c801d85f 1433}
dd1eaa89 1434
c801d85f
KB
1435bool wxString::IsNumber() const
1436{
a4a44612
VS
1437 if ( empty() )
1438 return true;
1439
1440 const_iterator i = begin();
1441
1442 if ( *i == _T('-') || *i == _T('+') )
1443 ++i;
1444
1445 for ( ; i != end(); ++i )
1446 {
1447 if ( !wxIsdigit(*i) )
1448 return false;
1449 }
1450
1451 return true;
c801d85f
KB
1452}
1453
c801d85f
KB
1454wxString wxString::Strip(stripType w) const
1455{
1456 wxString s = *this;
d775fa82
WS
1457 if ( w & leading ) s.Trim(false);
1458 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1459 return s;
1460}
1461
c801d85f
KB
1462// ---------------------------------------------------------------------------
1463// case conversion
1464// ---------------------------------------------------------------------------
1465
1466wxString& wxString::MakeUpper()
1467{
e87b7833
MB
1468 for ( iterator it = begin(), en = end(); it != en; ++it )
1469 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1470
1471 return *this;
1472}
1473
1474wxString& wxString::MakeLower()
1475{
e87b7833
MB
1476 for ( iterator it = begin(), en = end(); it != en; ++it )
1477 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1478
1479 return *this;
1480}
1481
0c7db140
VZ
1482wxString& wxString::MakeCapitalized()
1483{
1484 const iterator en = end();
1485 iterator it = begin();
1486 if ( it != en )
1487 {
1488 *it = (wxChar)wxToupper(*it);
1489 for ( ++it; it != en; ++it )
1490 *it = (wxChar)wxTolower(*it);
1491 }
1492
1493 return *this;
1494}
1495
c801d85f
KB
1496// ---------------------------------------------------------------------------
1497// trimming and padding
1498// ---------------------------------------------------------------------------
1499
d775fa82 1500// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1501// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1502// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1503// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1504// space-like symbols somewhere except in the first 128 chars), it is arguably
1505// still better than trimming away accented letters
1506inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1507
c801d85f
KB
1508// trims spaces (in the sense of isspace) from left or right side
1509wxString& wxString::Trim(bool bFromRight)
1510{
3458e408
WS
1511 // first check if we're going to modify the string at all
1512 if ( !empty() &&
1513 (
1514 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1515 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1516 )
2c3b684c 1517 )
2c3b684c 1518 {
3458e408
WS
1519 if ( bFromRight )
1520 {
1521 // find last non-space character
d4d02bd5 1522 reverse_iterator psz = rbegin();
32c62191 1523 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1524 ++psz;
92df97b8 1525
3458e408 1526 // truncate at trailing space start
d4d02bd5 1527 erase(psz.base(), end());
3458e408
WS
1528 }
1529 else
1530 {
1531 // find first non-space character
1532 iterator psz = begin();
32c62191 1533 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1534 ++psz;
2c3b684c 1535
3458e408
WS
1536 // fix up data and length
1537 erase(begin(), psz);
1538 }
2c3b684c 1539 }
c801d85f 1540
3458e408 1541 return *this;
c801d85f
KB
1542}
1543
1544// adds nCount characters chPad to the string from either side
c9f78968 1545wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1546{
3458e408 1547 wxString s(chPad, nCount);
c801d85f 1548
3458e408
WS
1549 if ( bFromRight )
1550 *this += s;
1551 else
1552 {
1553 s += *this;
1554 swap(s);
1555 }
c801d85f 1556
3458e408 1557 return *this;
c801d85f
KB
1558}
1559
1560// truncate the string
1561wxString& wxString::Truncate(size_t uiLen)
1562{
3458e408
WS
1563 if ( uiLen < length() )
1564 {
1565 erase(begin() + uiLen, end());
1566 }
1567 //else: nothing to do, string is already short enough
c801d85f 1568
3458e408 1569 return *this;
c801d85f
KB
1570}
1571
1572// ---------------------------------------------------------------------------
3c67202d 1573// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1574// ---------------------------------------------------------------------------
1575
1576// find a character
c9f78968 1577int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1578{
3458e408 1579 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1580
3458e408 1581 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1582}
1583
cd0b1709
VZ
1584// ----------------------------------------------------------------------------
1585// conversion to numbers
1586// ----------------------------------------------------------------------------
1587
52de37c7
VS
1588// The implementation of all the functions below is exactly the same so factor
1589// it out. Note that number extraction works correctly on UTF-8 strings, so
1590// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1591
92df97b8 1592#ifndef __WXWINCE__
941a4e62
VS
1593 #define DO_IF_NOT_WINCE(x) x
1594#else
1595 #define DO_IF_NOT_WINCE(x)
92df97b8 1596#endif
4ea4767e 1597
c95e653c
VZ
1598#define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1599 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
941a4e62
VS
1600 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1601 \
1602 DO_IF_NOT_WINCE( errno = 0; ) \
1603 \
1604 const wxStringCharType *start = wx_str(); \
1605 wxStringCharType *end; \
c95e653c 1606 T val = func(start, &end, base); \
941a4e62
VS
1607 \
1608 /* return true only if scan was stopped by the terminating NUL and */ \
1609 /* if the string was not empty to start with and no under/overflow */ \
1610 /* occurred: */ \
c95e653c
VZ
1611 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1612 return false; \
1613 *out = val; \
1614 return true
cd0b1709 1615
c95e653c 1616bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1617{
c95e653c 1618 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
619dcb09 1619}
cd0b1709 1620
c95e653c 1621bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1622{
c95e653c 1623 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
cd0b1709
VZ
1624}
1625
c95e653c 1626bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1627{
c95e653c 1628 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
d6718dd1
VZ
1629}
1630
c95e653c 1631bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1632{
c95e653c 1633 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
d6718dd1
VZ
1634}
1635
c95e653c 1636bool wxString::ToDouble(double *pVal) const
cd0b1709 1637{
c95e653c 1638 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
cd0b1709 1639
c95e653c 1640 DO_IF_NOT_WINCE( errno = 0; )
e71e5b37 1641
cd0b1709
VZ
1642 const wxChar *start = c_str();
1643 wxChar *end;
c95e653c 1644 double val = wxStrtod(start, &end);
cd0b1709 1645
d775fa82 1646 // return true only if scan was stopped by the terminating NUL and if the
bda041e5 1647 // string was not empty to start with and no under/overflow occurred
c95e653c
VZ
1648 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1649 return false;
1650
1651 *pVal = val;
1652
1653 return true;
cd0b1709
VZ
1654}
1655
c801d85f 1656// ---------------------------------------------------------------------------
9efd3367 1657// formatted output
c801d85f 1658// ---------------------------------------------------------------------------
378b05f7 1659
d1f6e2cf 1660#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1661/* static */
c9f78968 1662#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1663wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1664#else
d1f6e2cf 1665wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1666#endif
341e7d28 1667{
77c3e48a 1668 va_list argptr;
c9f78968 1669 va_start(argptr, format);
341e7d28 1670
77c3e48a 1671 wxString s;
c9f78968 1672 s.PrintfV(format, argptr);
341e7d28 1673
77c3e48a 1674 va_end(argptr);
341e7d28 1675
77c3e48a 1676 return s;
341e7d28 1677}
d1f6e2cf
VS
1678#endif // !wxUSE_UTF8_LOCALE_ONLY
1679
1680#if wxUSE_UNICODE_UTF8
1681/* static */
1682wxString wxString::DoFormatUtf8(const char *format, ...)
1683{
1684 va_list argptr;
1685 va_start(argptr, format);
1686
1687 wxString s;
1688 s.PrintfV(format, argptr);
1689
1690 va_end(argptr);
1691
1692 return s;
1693}
1694#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1695
1696/* static */
c9f78968 1697wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1698{
1699 wxString s;
c9f78968 1700 s.PrintfV(format, argptr);
341e7d28
VZ
1701 return s;
1702}
1703
d1f6e2cf 1704#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1705#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1706int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1707#else
d1f6e2cf 1708int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1709#endif
c801d85f 1710{
ba9bbf13 1711 va_list argptr;
c9f78968 1712 va_start(argptr, format);
c801d85f 1713
c9f78968
VS
1714#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1715 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1716 // because it's the only cast that works safely for downcasting when
1717 // multiple inheritance is used:
1718 wxString *str = static_cast<wxString*>(this);
1719#else
1720 wxString *str = this;
1721#endif
1722
1723 int iLen = str->PrintfV(format, argptr);
c801d85f 1724
ba9bbf13 1725 va_end(argptr);
c801d85f 1726
ba9bbf13 1727 return iLen;
c801d85f 1728}
d1f6e2cf
VS
1729#endif // !wxUSE_UTF8_LOCALE_ONLY
1730
1731#if wxUSE_UNICODE_UTF8
1732int wxString::DoPrintfUtf8(const char *format, ...)
1733{
1734 va_list argptr;
1735 va_start(argptr, format);
1736
1737 int iLen = PrintfV(format, argptr);
1738
1739 va_end(argptr);
1740
1741 return iLen;
1742}
1743#endif // wxUSE_UNICODE_UTF8
c801d85f 1744
67612ff1
DE
1745/*
1746 Uses wxVsnprintf and places the result into the this string.
1747
1748 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1749 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1750 the ISO C99 (and thus SUSv3) standard the return value for the case of
1751 an undersized buffer is inconsistent. For conforming vsnprintf
1752 implementations the function must return the number of characters that
1753 would have been printed had the buffer been large enough. For conforming
1754 vswprintf implementations the function must return a negative number
1755 and set errno.
1756
1757 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1758 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1759 those are defined in the standard and backed up by several conformance
1760 statements. Note that ENOMEM mentioned in the manual page does not
1761 apply to swprintf, only wprintf and fwprintf.
1762
1763 Official manual page:
1764 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1765
1766 Some conformance statements (AIX, Solaris):
1767 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1768 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1769
1770 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1771 EILSEQ and EINVAL are specifically defined to mean the error is other than
1772 an undersized buffer and no other errno are defined we treat those two
1773 as meaning hard errors and everything else gets the old behavior which
1774 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1775
67612ff1
DE
1776 In practice it's impossible to determine before compilation which behavior
1777 may be used. The vswprintf function may have vsnprintf-like behavior or
1778 vice-versa. Behavior detected on one release can theoretically change
1779 with an updated release. Not to mention that configure testing for it
1780 would require the test to be run on the host system, not the build system
1781 which makes cross compilation difficult. Therefore, we make no assumptions
1782 about behavior and try our best to handle every known case, including the
1783 case where wxVsnprintf returns a negative number and fails to set errno.
1784
1785 There is yet one more non-standard implementation and that is our own.
1786 Fortunately, that can be detected at compile-time.
1787
1788 On top of all that, ISO C99 explicitly defines snprintf to write a null
1789 character to the last position of the specified buffer. That would be at
1790 at the given buffer size minus 1. It is supposed to do this even if it
1791 turns out that the buffer is sized too small.
1792
1793 Darwin (tested on 10.5) follows the C99 behavior exactly.
1794
1795 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1796 errno even when it fails. However, it only seems to ever fail due
1797 to an undersized buffer.
1798*/
2523e9b7
VS
1799#if wxUSE_UNICODE_UTF8
1800template<typename BufferType>
1801#else
1802// we only need one version in non-UTF8 builds and at least two Windows
1803// compilers have problems with this function template, so use just one
1804// normal function here
1805#endif
1806static int DoStringPrintfV(wxString& str,
1807 const wxString& format, va_list argptr)
c801d85f 1808{
f6f5941b 1809 int size = 1024;
e87b7833 1810
f6f5941b
VZ
1811 for ( ;; )
1812 {
2523e9b7
VS
1813#if wxUSE_UNICODE_UTF8
1814 BufferType tmp(str, size + 1);
1815 typename BufferType::CharType *buf = tmp;
1816#else
1817 wxStringBuffer tmp(str, size + 1);
de2589be 1818 wxChar *buf = tmp;
2523e9b7 1819#endif
2bb67b80 1820
ba9bbf13
WS
1821 if ( !buf )
1822 {
1823 // out of memory
a33c7045
VS
1824
1825 // in UTF-8 build, leaving uninitialized junk in the buffer
1826 // could result in invalid non-empty UTF-8 string, so just
1827 // reset the string to empty on failure:
1828 buf[0] = '\0';
ba9bbf13 1829 return -1;
e87b7833 1830 }
f6f5941b 1831
ba9bbf13
WS
1832 // wxVsnprintf() may modify the original arg pointer, so pass it
1833 // only a copy
1834 va_list argptrcopy;
1835 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1836
1837#ifndef __WXWINCE__
1838 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1839 errno = 0;
1840#endif
2523e9b7 1841 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1842 va_end(argptrcopy);
1843
1844 // some implementations of vsnprintf() don't NUL terminate
1845 // the string if there is not enough space for it so
1846 // always do it manually
67612ff1
DE
1847 // FIXME: This really seems to be the wrong and would be an off-by-one
1848 // bug except the code above allocates an extra character.
ba9bbf13
WS
1849 buf[size] = _T('\0');
1850
caff62f2
VZ
1851 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1852 // total number of characters which would have been written if the
b1727cfe 1853 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1854 if ( len < 0 )
1855 {
52de37c7
VS
1856 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1857 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1858 // is true if *both* of them use our own implementation,
1859 // otherwise we can't be sure
f2bbe5b6
VZ
1860#if wxUSE_WXVSNPRINTF
1861 // we know that our own implementation of wxVsnprintf() returns -1
1862 // only for a format error - thus there's something wrong with
1863 // the user's format string
a33c7045 1864 buf[0] = '\0';
f2bbe5b6 1865 return -1;
52de37c7
VS
1866#else // possibly using system version
1867 // assume it only returns error if there is not enough space, but
1868 // as we don't know how much we need, double the current size of
1869 // the buffer
67612ff1 1870#ifndef __WXWINCE__
a9a854d7
DE
1871 if( (errno == EILSEQ) || (errno == EINVAL) )
1872 // If errno was set to one of the two well-known hard errors
1873 // then fail immediately to avoid an infinite loop.
1874 return -1;
1875 else
1876#endif // __WXWINCE__
67612ff1
DE
1877 // still not enough, as we don't know how much we need, double the
1878 // current size of the buffer
1879 size *= 2;
f2bbe5b6 1880#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1881 }
64f8f94c 1882 else if ( len >= size )
de2589be 1883 {
f2bbe5b6 1884#if wxUSE_WXVSNPRINTF
c95e653c 1885 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1886 // size+1 when there's not enough space but that's not the size
1887 // of the required buffer!
1888 size *= 2; // so we just double the current size of the buffer
1889#else
64f8f94c
VZ
1890 // some vsnprintf() implementations NUL-terminate the buffer and
1891 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1892 // FIXME: I don't quite understand this comment. The vsnprintf
1893 // function is specifically defined to return the number of
1894 // characters printed not including the null terminator.
1895 // So OF COURSE you need to add 1 to get the right buffer size.
1896 // The following line is definitely correct, no question.
64f8f94c 1897 size = len + 1;
f2bbe5b6 1898#endif
de2589be
VZ
1899 }
1900 else // ok, there was enough space
f6f5941b 1901 {
f6f5941b
VZ
1902 break;
1903 }
f6f5941b
VZ
1904 }
1905
1906 // we could have overshot
2523e9b7
VS
1907 str.Shrink();
1908
1909 return str.length();
1910}
c801d85f 1911
2523e9b7
VS
1912int wxString::PrintfV(const wxString& format, va_list argptr)
1913{
2523e9b7
VS
1914#if wxUSE_UNICODE_UTF8
1915 #if wxUSE_STL_BASED_WXSTRING
1916 typedef wxStringTypeBuffer<char> Utf8Buffer;
1917 #else
6798451b 1918 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
1919 #endif
1920#endif
1921
1922#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 1923 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1924#else
1925 #if wxUSE_UNICODE_UTF8
1926 if ( wxLocaleIsUtf8 )
c6255a6e 1927 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1928 else
1929 // wxChar* version
c6255a6e 1930 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 1931 #else
c6255a6e 1932 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
1933 #endif // UTF8/WCHAR
1934#endif
c801d85f
KB
1935}
1936
097c080b
VZ
1937// ----------------------------------------------------------------------------
1938// misc other operations
1939// ----------------------------------------------------------------------------
0c5d3e1c 1940
d775fa82 1941// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
1942// '?' metacharacters (as usual, '?' matches any character and '*' any number
1943// of them)
8a540c88 1944bool wxString::Matches(const wxString& mask) const
097c080b 1945{
d6044f58
VZ
1946 // I disable this code as it doesn't seem to be faster (in fact, it seems
1947 // to be much slower) than the old, hand-written code below and using it
1948 // here requires always linking with libregex even if the user code doesn't
1949 // use it
1950#if 0 // wxUSE_REGEX
706c2ac9
VZ
1951 // first translate the shell-like mask into a regex
1952 wxString pattern;
1953 pattern.reserve(wxStrlen(pszMask));
1954
1955 pattern += _T('^');
1956 while ( *pszMask )
1957 {
1958 switch ( *pszMask )
1959 {
1960 case _T('?'):
1961 pattern += _T('.');
1962 break;
1963
1964 case _T('*'):
1965 pattern += _T(".*");
1966 break;
1967
1968 case _T('^'):
1969 case _T('.'):
1970 case _T('$'):
1971 case _T('('):
1972 case _T(')'):
1973 case _T('|'):
1974 case _T('+'):
1975 case _T('\\'):
1976 // these characters are special in a RE, quote them
1977 // (however note that we don't quote '[' and ']' to allow
1978 // using them for Unix shell like matching)
1979 pattern += _T('\\');
1980 // fall through
1981
1982 default:
1983 pattern += *pszMask;
1984 }
1985
1986 pszMask++;
1987 }
1988 pattern += _T('$');
1989
1990 // and now use it
1991 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1992#else // !wxUSE_REGEX
9a4232dc
VZ
1993 // TODO: this is, of course, awfully inefficient...
1994
8a540c88
VS
1995 // FIXME-UTF8: implement using iterators, remove #if
1996#if wxUSE_UNICODE_UTF8
1997 wxWCharBuffer maskBuf = mask.wc_str();
1998 wxWCharBuffer txtBuf = wc_str();
1999 const wxChar *pszMask = maskBuf.data();
2000 const wxChar *pszTxt = txtBuf.data();
2001#else
2002 const wxChar *pszMask = mask.wx_str();
9a4232dc 2003 // the char currently being checked
8a540c88
VS
2004 const wxChar *pszTxt = wx_str();
2005#endif
9a4232dc
VZ
2006
2007 // the last location where '*' matched
2008 const wxChar *pszLastStarInText = NULL;
2009 const wxChar *pszLastStarInMask = NULL;
2010
2011match:
2012 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2013 switch ( *pszMask ) {
223d09f6
KB
2014 case wxT('?'):
2015 if ( *pszTxt == wxT('\0') )
d775fa82 2016 return false;
097c080b 2017
9a4232dc 2018 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2019
097c080b
VZ
2020 break;
2021
223d09f6 2022 case wxT('*'):
097c080b 2023 {
9a4232dc
VZ
2024 // remember where we started to be able to backtrack later
2025 pszLastStarInText = pszTxt;
2026 pszLastStarInMask = pszMask;
2027
097c080b 2028 // ignore special chars immediately following this one
9a4232dc 2029 // (should this be an error?)
223d09f6 2030 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2031 pszMask++;
2032
2033 // if there is nothing more, match
223d09f6 2034 if ( *pszMask == wxT('\0') )
d775fa82 2035 return true;
097c080b
VZ
2036
2037 // are there any other metacharacters in the mask?
c86f1403 2038 size_t uiLenMask;
223d09f6 2039 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2040
2041 if ( pEndMask != NULL ) {
2042 // we have to match the string between two metachars
2043 uiLenMask = pEndMask - pszMask;
2044 }
2045 else {
2046 // we have to match the remainder of the string
2bb67b80 2047 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2048 }
2049
2050 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2051 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2052 if ( pMatch == NULL )
d775fa82 2053 return false;
097c080b
VZ
2054
2055 // -1 to compensate "++" in the loop
2056 pszTxt = pMatch + uiLenMask - 1;
2057 pszMask += uiLenMask - 1;
2058 }
2059 break;
2060
2061 default:
2062 if ( *pszMask != *pszTxt )
d775fa82 2063 return false;
097c080b
VZ
2064 break;
2065 }
2066 }
2067
2068 // match only if nothing left
9a4232dc 2069 if ( *pszTxt == wxT('\0') )
d775fa82 2070 return true;
9a4232dc
VZ
2071
2072 // if we failed to match, backtrack if we can
2073 if ( pszLastStarInText ) {
2074 pszTxt = pszLastStarInText + 1;
2075 pszMask = pszLastStarInMask;
2076
2077 pszLastStarInText = NULL;
2078
2079 // don't bother resetting pszLastStarInMask, it's unnecessary
2080
2081 goto match;
2082 }
2083
d775fa82 2084 return false;
706c2ac9 2085#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2086}
2087
1fc5dd6f 2088// Count the number of chars
c9f78968 2089int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2090{
2091 int count = 0;
8f93a29f 2092 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2093 {
8f93a29f 2094 if ( *i == ch )
1fc5dd6f
JS
2095 count ++;
2096 }
2097 return count;
2098}
2099
628f87da
VS
2100// ----------------------------------------------------------------------------
2101// wxUTF8StringBuffer
2102// ----------------------------------------------------------------------------
2103
7d46f92b 2104#if wxUSE_UNICODE_WCHAR
628f87da
VS
2105wxUTF8StringBuffer::~wxUTF8StringBuffer()
2106{
2107 wxMBConvStrictUTF8 conv;
2108 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2109 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2110
2111 wxStringInternalBuffer wbuf(m_str, wlen);
2112 conv.ToWChar(wbuf, wlen, m_buf);
2113}
2114
2115wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2116{
2117 wxCHECK_RET(m_lenSet, "length not set");
2118
2119 wxMBConvStrictUTF8 conv;
2120 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2121 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2122
2123 wxStringInternalBufferLength wbuf(m_str, wlen);
2124 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2125 wbuf.SetLength(wlen);
2126}
7d46f92b 2127#endif // wxUSE_UNICODE_WCHAR
5c1de526
VS
2128
2129// ----------------------------------------------------------------------------
2130// wxCharBufferType<T>
2131// ----------------------------------------------------------------------------
2132
c9b6cebd 2133#ifndef __VMS_BROKEN_TEMPLATES
5c1de526 2134template<>
c9b6cebd 2135#endif
5c1de526
VS
2136wxCharTypeBuffer<char>::Data
2137wxCharTypeBuffer<char>::NullData(NULL);
2138
c9b6cebd 2139#ifndef __VMS_BROKEN_TEMPLATES
5c1de526 2140template<>
c9b6cebd 2141#endif
5c1de526
VS
2142wxCharTypeBuffer<wchar_t>::Data
2143wxCharTypeBuffer<wchar_t>::NullData(NULL);