]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
Correct conversion from text file to charset (which is read in that text file)
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
6b769f3d 27#endif
c801d85f
KB
28
29#include <ctype.h>
92df97b8
WS
30
31#ifndef __WXWINCE__
32 #include <errno.h>
33#endif
34
c801d85f
KB
35#include <string.h>
36#include <stdlib.h>
9a08c20e 37
8116a0c5 38#include "wx/hashmap.h"
8f93a29f
VS
39
40// string handling functions used by wxString:
41#if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46#else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51#endif
8f93a29f 52
4e79262f
VZ
53// ----------------------------------------------------------------------------
54// global variables
55// ----------------------------------------------------------------------------
56
57namespace wxPrivate
58{
59
60static UntypedBufferData s_untypedNullData(NULL);
61
62UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
63
64} // namespace wxPrivate
e87b7833 65
a7ea63e2
VS
66// ---------------------------------------------------------------------------
67// static class variables definition
68// ---------------------------------------------------------------------------
e87b7833 69
a7ea63e2
VS
70//According to STL _must_ be a -1 size_t
71const size_t wxString::npos = (size_t) -1;
8f93a29f 72
68482dc5 73#if wxUSE_STRING_POS_CACHE
68482dc5 74
e810df36
VZ
75#ifdef wxHAS_COMPILER_TLS
76
77wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
78
79#else // !wxHAS_COMPILER_TLS
80
ad8ae788
VZ
81struct wxStrCacheInitializer
82{
83 wxStrCacheInitializer()
84 {
85 // calling this function triggers s_cache initialization in it, and
86 // from now on it becomes safe to call from multiple threads
87 wxString::GetCache();
88 }
89};
90
e317bd3f
SC
91/*
92wxString::Cache& wxString::GetCache()
93{
94 static wxTLS_TYPE(Cache) s_cache;
95
96 return wxTLS_VALUE(s_cache);
97}
98*/
99
ad8ae788
VZ
100static wxStrCacheInitializer gs_stringCacheInit;
101
e810df36
VZ
102#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
103
68482dc5
VZ
104// gdb seems to be unable to display thread-local variables correctly, at least
105// not my 6.4.98 version under amd64, so provide this debugging helper to do it
106#ifdef __WXDEBUG__
107
108struct wxStrCacheDumper
109{
110 static void ShowAll()
111 {
112 puts("*** wxString cache dump:");
113 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
114 {
115 const wxString::Cache::Element&
8b73c531 116 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
117
118 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
119 n,
8b73c531 120 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
121 c.str,
122 (unsigned long)c.pos,
123 (unsigned long)c.impl,
124 (long)c.len);
125 }
126 }
127};
128
129void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
130
131#endif // __WXDEBUG__
132
133#ifdef wxPROFILE_STRING_CACHE
134
135wxString::CacheStats wxString::ms_cacheStats;
136
8c3b65d9 137struct wxStrCacheStatsDumper
68482dc5 138{
8c3b65d9 139 ~wxStrCacheStatsDumper()
68482dc5
VZ
140 {
141 const wxString::CacheStats& stats = wxString::ms_cacheStats;
142
143 if ( stats.postot )
144 {
145 puts("*** wxString cache statistics:");
146 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
147 stats.postot);
148 printf("\tHits %u (of which %u not used) or %.2f%%\n",
149 stats.poshits,
150 stats.mishits,
151 100.*float(stats.poshits - stats.mishits)/stats.postot);
152 printf("\tAverage position requested: %.2f\n",
153 float(stats.sumpos) / stats.postot);
154 printf("\tAverage offset after cached hint: %.2f\n",
155 float(stats.sumofs) / stats.postot);
156 }
157
158 if ( stats.lentot )
159 {
160 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
161 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
162 }
163 }
8c3b65d9 164};
68482dc5 165
8c3b65d9 166static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
167
168#endif // wxPROFILE_STRING_CACHE
169
170#endif // wxUSE_STRING_POS_CACHE
171
a7ea63e2
VS
172// ----------------------------------------------------------------------------
173// global functions
174// ----------------------------------------------------------------------------
e87b7833 175
a7ea63e2 176#if wxUSE_STD_IOSTREAM
8f93a29f 177
a7ea63e2 178#include <iostream>
8f93a29f 179
a7ea63e2 180wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 181{
7a906e1a 182#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
ddf01bdb
VZ
183 const wxCharBuffer buf(str.AsCharBuf());
184 if ( !buf )
185 os.clear(wxSTD ios_base::failbit);
186 else
187 os << buf.data();
188
189 return os;
a7ea63e2 190#else
7a906e1a 191 return os << str.AsInternal();
a7ea63e2 192#endif
8f93a29f
VS
193}
194
04abe4bc
VS
195wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
196{
197 return os << str.c_str();
198}
199
200wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
201{
202 return os << str.data();
203}
204
205#ifndef __BORLANDC__
206wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
207{
208 return os << str.data();
209}
210#endif
211
6a6ea041 212#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
213
214wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
215{
216 return wos << str.wc_str();
217}
218
219wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
220{
221 return wos << str.AsWChar();
222}
223
224wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
225{
226 return wos << str.data();
227}
228
6a6ea041 229#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 230
a7ea63e2 231#endif // wxUSE_STD_IOSTREAM
e87b7833 232
81727065
VS
233// ===========================================================================
234// wxString class core
235// ===========================================================================
236
237#if wxUSE_UNICODE_UTF8
238
81727065
VS
239void wxString::PosLenToImpl(size_t pos, size_t len,
240 size_t *implPos, size_t *implLen) const
241{
242 if ( pos == npos )
68482dc5 243 {
81727065 244 *implPos = npos;
68482dc5
VZ
245 }
246 else // have valid start position
81727065 247 {
68482dc5
VZ
248 const const_iterator b = GetIterForNthChar(pos);
249 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 250 if ( len == npos )
68482dc5 251 {
81727065 252 *implLen = npos;
68482dc5
VZ
253 }
254 else // have valid length too
81727065 255 {
68482dc5
VZ
256 // we need to handle the case of length specifying a substring
257 // going beyond the end of the string, just as std::string does
258 const const_iterator e(end());
259 const_iterator i(b);
260 while ( len && i <= e )
261 {
262 ++i;
263 --len;
264 }
265
266 *implLen = i.impl() - b.impl();
81727065
VS
267 }
268 }
269}
270
271#endif // wxUSE_UNICODE_UTF8
272
11aac4ba
VS
273// ----------------------------------------------------------------------------
274// wxCStrData converted strings caching
275// ----------------------------------------------------------------------------
276
132276cf
VS
277// FIXME-UTF8: temporarily disabled because it doesn't work with global
278// string objects; re-enable after fixing this bug and benchmarking
279// performance to see if using a hash is a good idea at all
280#if 0
281
11aac4ba
VS
282// For backward compatibility reasons, it must be possible to assign the value
283// returned by wxString::c_str() to a char* or wchar_t* variable and work with
284// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
285// because the memory would be freed immediately, but it has to be valid as long
286// as the string is not modified, so that code like this still works:
287//
288// const wxChar *s = str.c_str();
289// while ( s ) { ... }
290
291// FIXME-UTF8: not thread safe!
292// FIXME-UTF8: we currently clear the cached conversion only when the string is
293// destroyed, but we should do it when the string is modified, to
294// keep memory usage down
295// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
296// invalidated the cache on every change, we could keep the previous
297// conversion
298// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
299// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
300
301template<typename T>
302static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
303{
6c4ebcda 304 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
305 if ( i != hash.end() )
306 {
307 free(i->second);
308 hash.erase(i);
309 }
310}
311
312#if wxUSE_UNICODE
6c4ebcda
VS
313// NB: non-STL implementation doesn't compile with "const wxString*" key type,
314// so we have to use wxString* here and const-cast when used
11aac4ba
VS
315WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
316 wxStringCharConversionCache);
317static wxStringCharConversionCache gs_stringsCharCache;
318
319const char* wxCStrData::AsChar() const
320{
321 // remove previously cache value, if any (see FIXMEs above):
322 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
323
324 // convert the string and keep it:
6c4ebcda
VS
325 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
326 m_str->mb_str().release();
11aac4ba
VS
327
328 return s + m_offset;
329}
330#endif // wxUSE_UNICODE
331
332#if !wxUSE_UNICODE_WCHAR
333WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
334 wxStringWCharConversionCache);
335static wxStringWCharConversionCache gs_stringsWCharCache;
336
337const wchar_t* wxCStrData::AsWChar() const
338{
339 // remove previously cache value, if any (see FIXMEs above):
340 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
341
342 // convert the string and keep it:
6c4ebcda
VS
343 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
344 m_str->wc_str().release();
11aac4ba
VS
345
346 return s + m_offset;
347}
348#endif // !wxUSE_UNICODE_WCHAR
349
11aac4ba
VS
350wxString::~wxString()
351{
352#if wxUSE_UNICODE
353 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
354 DeleteStringFromConversionCache(gs_stringsCharCache, this);
355#endif
356#if !wxUSE_UNICODE_WCHAR
357 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
358#endif
359}
132276cf
VS
360#endif
361
111d9948 362#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
363const char* wxCStrData::AsChar() const
364{
111d9948
VS
365#if wxUSE_UNICODE_UTF8
366 if ( wxLocaleIsUtf8 )
367 return AsInternal();
368#endif
369 // under non-UTF8 locales, we have to convert the internal UTF-8
370 // representation using wxConvLibc and cache the result
371
132276cf 372 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
373
374 // convert the string:
2a7431e1
VZ
375 //
376 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
377 // have it) but it's unfortunately not obvious to implement
378 // because we don't know how big buffer do we need for the
379 // given string length (in case of multibyte encodings, e.g.
380 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
381 //
382 // One idea would be to store more than just m_convertedToChar
383 // in wxString: then we could record the length of the string
384 // which was converted the last time and try to reuse the same
385 // buffer if the current length is not greater than it (this
386 // could still fail because string could have been modified in
387 // place but it would work most of the time, so we'd do it and
388 // only allocate the new buffer if in-place conversion returned
389 // an error). We could also store a bit saying if the string
390 // was modified since the last conversion (and update it in all
391 // operation modifying the string, of course) to avoid unneeded
392 // consequential conversions. But both of these ideas require
393 // adding more fields to wxString and require profiling results
394 // to be sure that we really gain enough from them to justify
395 // doing it.
05f32fc3
VS
396 wxCharBuffer buf(str->mb_str());
397
28be59b4
VZ
398 // if it failed, return empty string and not NULL to avoid crashes in code
399 // written with either wxWidgets 2 wxString or std::string behaviour in
400 // mind: neither of them ever returns NULL and so we shouldn't neither
401 if ( !buf )
402 return "";
403
05f32fc3
VS
404 if ( str->m_convertedToChar &&
405 strlen(buf) == strlen(str->m_convertedToChar) )
406 {
407 // keep the same buffer for as long as possible, so that several calls
408 // to c_str() in a row still work:
409 strcpy(str->m_convertedToChar, buf);
410 }
411 else
412 {
413 str->m_convertedToChar = buf.release();
414 }
415
416 // and keep it:
132276cf
VS
417 return str->m_convertedToChar + m_offset;
418}
111d9948 419#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
420
421#if !wxUSE_UNICODE_WCHAR
422const wchar_t* wxCStrData::AsWChar() const
423{
424 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
425
426 // convert the string:
427 wxWCharBuffer buf(str->wc_str());
428
28be59b4
VZ
429 // notice that here, unlike above in AsChar(), conversion can't fail as our
430 // internal UTF-8 is always well-formed -- or the string was corrupted and
431 // all bets are off anyhow
432
05f32fc3
VS
433 // FIXME-UTF8: do the conversion in-place in the existing buffer
434 if ( str->m_convertedToWChar &&
435 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
436 {
437 // keep the same buffer for as long as possible, so that several calls
438 // to c_str() in a row still work:
439 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
440 }
441 else
442 {
443 str->m_convertedToWChar = buf.release();
444 }
445
446 // and keep it:
132276cf
VS
447 return str->m_convertedToWChar + m_offset;
448}
449#endif // !wxUSE_UNICODE_WCHAR
450
451// ===========================================================================
452// wxString class core
453// ===========================================================================
454
455// ---------------------------------------------------------------------------
456// construction and conversion
457// ---------------------------------------------------------------------------
11aac4ba 458
81727065 459#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
460/* static */
461wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 462 const wxMBConv& conv)
8f93a29f
VS
463{
464 // anything to do?
465 if ( !psz || nLength == 0 )
81727065 466 return SubstrBufFromMB(L"", 0);
8f93a29f
VS
467
468 if ( nLength == npos )
469 nLength = wxNO_LEN;
470
471 size_t wcLen;
472 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
473 if ( !wcLen )
81727065 474 return SubstrBufFromMB(_T(""), 0);
8f93a29f
VS
475 else
476 return SubstrBufFromMB(wcBuf, wcLen);
477}
81727065
VS
478#endif // wxUSE_UNICODE_WCHAR
479
480#if wxUSE_UNICODE_UTF8
481/* static */
482wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
483 const wxMBConv& conv)
484{
81727065
VS
485 // anything to do?
486 if ( !psz || nLength == 0 )
487 return SubstrBufFromMB("", 0);
488
111d9948
VS
489 // if psz is already in UTF-8, we don't have to do the roundtrip to
490 // wchar_t* and back:
491 if ( conv.IsUTF8() )
492 {
493 // we need to validate the input because UTF8 iterators assume valid
494 // UTF-8 sequence and psz may be invalid:
495 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
496 {
9ef1ad0d
VZ
497 // we must pass the real string length to SubstrBufFromMB ctor
498 if ( nLength == npos )
499 nLength = psz ? strlen(psz) : 0;
111d9948
VS
500 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
501 }
502 // else: do the roundtrip through wchar_t*
503 }
504
81727065
VS
505 if ( nLength == npos )
506 nLength = wxNO_LEN;
507
508 // first convert to wide string:
509 size_t wcLen;
510 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
511 if ( !wcLen )
512 return SubstrBufFromMB("", 0);
513
514 // and then to UTF-8:
4fdfe2f3 515 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065
VS
516 // widechar -> UTF-8 conversion isn't supposed to ever fail:
517 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
518
519 return buf;
520}
521#endif // wxUSE_UNICODE_UTF8
522
523#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
524/* static */
525wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 526 const wxMBConv& conv)
8f93a29f
VS
527{
528 // anything to do?
529 if ( !pwz || nLength == 0 )
81727065 530 return SubstrBufFromWC("", 0);
8f93a29f
VS
531
532 if ( nLength == npos )
533 nLength = wxNO_LEN;
534
535 size_t mbLen;
536 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
537 if ( !mbLen )
81727065 538 return SubstrBufFromWC("", 0);
8f93a29f
VS
539 else
540 return SubstrBufFromWC(mbBuf, mbLen);
541}
81727065 542#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
543
544
81727065 545#if wxUSE_UNICODE_WCHAR
e87b7833 546
06386448 547//Convert wxString in Unicode mode to a multi-byte string
830f8f11 548const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 549{
81727065 550 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
551}
552
81727065 553#elif wxUSE_UNICODE_UTF8
e87b7833 554
81727065
VS
555const wxWCharBuffer wxString::wc_str() const
556{
4fdfe2f3
VZ
557 return wxMBConvStrictUTF8().cMB2WC
558 (
559 m_impl.c_str(),
560 m_impl.length() + 1, // size, not length
561 NULL
562 );
81727065
VS
563}
564
565const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
566{
111d9948
VS
567 if ( conv.IsUTF8() )
568 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
569
81727065
VS
570 // FIXME-UTF8: use wc_str() here once we have buffers with length
571
572 size_t wcLen;
4fdfe2f3
VZ
573 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
574 (
575 m_impl.c_str(),
576 m_impl.length() + 1, // size
577 &wcLen
578 ));
81727065
VS
579 if ( !wcLen )
580 return wxCharBuffer("");
581
4f696af8 582 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
81727065
VS
583}
584
585#else // ANSI
eec47cc6 586
7663d0d4 587//Converts this string to a wide character string if unicode
06386448 588//mode is not enabled and wxUSE_WCHAR_T is enabled
830f8f11 589const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 590{
81727065 591 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 592}
7663d0d4 593
e87b7833
MB
594#endif // Unicode/ANSI
595
596// shrink to minimal size (releasing extra memory)
597bool wxString::Shrink()
598{
599 wxString tmp(begin(), end());
600 swap(tmp);
601 return tmp.length() == length();
602}
603
d8a4b666 604// deprecated compatibility code:
a7ea63e2 605#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 606wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
607{
608 return DoGetWriteBuf(nLen);
609}
610
611void wxString::UngetWriteBuf()
612{
613 DoUngetWriteBuf();
614}
615
616void wxString::UngetWriteBuf(size_t nLen)
617{
618 DoUngetWriteBuf(nLen);
619}
a7ea63e2 620#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 621
d8a4b666 622
e87b7833
MB
623// ---------------------------------------------------------------------------
624// data access
625// ---------------------------------------------------------------------------
626
627// all functions are inline in string.h
628
629// ---------------------------------------------------------------------------
e8f59039 630// concatenation operators
e87b7833
MB
631// ---------------------------------------------------------------------------
632
c801d85f 633/*
c801d85f
KB
634 * concatenation functions come in 5 flavours:
635 * string + string
636 * char + string and string + char
637 * C str + string and string + C str
638 */
639
b1801e0e 640wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 641{
992527a5 642#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
643 wxASSERT( str1.IsValid() );
644 wxASSERT( str2.IsValid() );
e87b7833 645#endif
097c080b 646
3458e408
WS
647 wxString s = str1;
648 s += str2;
3168a13f 649
3458e408 650 return s;
c801d85f
KB
651}
652
c9f78968 653wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 654{
992527a5 655#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 656 wxASSERT( str.IsValid() );
e87b7833 657#endif
3168a13f 658
3458e408
WS
659 wxString s = str;
660 s += ch;
097c080b 661
3458e408 662 return s;
c801d85f
KB
663}
664
c9f78968 665wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 666{
992527a5 667#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 668 wxASSERT( str.IsValid() );
e87b7833 669#endif
097c080b 670
3458e408
WS
671 wxString s = ch;
672 s += str;
3168a13f 673
3458e408 674 return s;
c801d85f
KB
675}
676
8f93a29f 677wxString operator+(const wxString& str, const char *psz)
c801d85f 678{
992527a5 679#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 680 wxASSERT( str.IsValid() );
e87b7833 681#endif
097c080b 682
3458e408 683 wxString s;
8f93a29f 684 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
685 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
686 }
687 s += str;
688 s += psz;
3168a13f 689
3458e408 690 return s;
c801d85f
KB
691}
692
8f93a29f 693wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 694{
992527a5 695#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
696 wxASSERT( str.IsValid() );
697#endif
698
699 wxString s;
700 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
701 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
702 }
703 s += str;
704 s += pwz;
705
706 return s;
707}
708
709wxString operator+(const char *psz, const wxString& str)
710{
a7ea63e2
VS
711#if !wxUSE_STL_BASED_WXSTRING
712 wxASSERT( str.IsValid() );
713#endif
714
715 wxString s;
716 if ( !s.Alloc(strlen(psz) + str.length()) ) {
717 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
718 }
719 s = psz;
720 s += str;
721
722 return s;
723}
724
725wxString operator+(const wchar_t *pwz, const wxString& str)
726{
727#if !wxUSE_STL_BASED_WXSTRING
728 wxASSERT( str.IsValid() );
729#endif
730
731 wxString s;
732 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
733 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
734 }
735 s = pwz;
736 s += str;
737
738 return s;
739}
740
741// ---------------------------------------------------------------------------
742// string comparison
743// ---------------------------------------------------------------------------
744
52de37c7
VS
745bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
746{
747 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
748 : wxToupper(GetChar(0u)) == wxToupper(c));
749}
750
a7ea63e2
VS
751#ifdef HAVE_STD_STRING_COMPARE
752
753// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
754// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
755// sort strings in characters code point order by sorting the byte sequence
756// in byte values order (i.e. what strcmp() and memcmp() do).
757
758int wxString::compare(const wxString& str) const
759{
760 return m_impl.compare(str.m_impl);
761}
762
763int wxString::compare(size_t nStart, size_t nLen,
764 const wxString& str) const
765{
766 size_t pos, len;
767 PosLenToImpl(nStart, nLen, &pos, &len);
768 return m_impl.compare(pos, len, str.m_impl);
769}
770
771int wxString::compare(size_t nStart, size_t nLen,
772 const wxString& str,
773 size_t nStart2, size_t nLen2) const
774{
775 size_t pos, len;
776 PosLenToImpl(nStart, nLen, &pos, &len);
777
778 size_t pos2, len2;
779 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
780
781 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
782}
783
784int wxString::compare(const char* sz) const
785{
786 return m_impl.compare(ImplStr(sz));
787}
788
789int wxString::compare(const wchar_t* sz) const
790{
791 return m_impl.compare(ImplStr(sz));
792}
793
794int wxString::compare(size_t nStart, size_t nLen,
795 const char* sz, size_t nCount) const
796{
797 size_t pos, len;
798 PosLenToImpl(nStart, nLen, &pos, &len);
799
800 SubstrBufFromMB str(ImplStr(sz, nCount));
801
802 return m_impl.compare(pos, len, str.data, str.len);
803}
804
805int wxString::compare(size_t nStart, size_t nLen,
806 const wchar_t* sz, size_t nCount) const
807{
808 size_t pos, len;
809 PosLenToImpl(nStart, nLen, &pos, &len);
810
811 SubstrBufFromWC str(ImplStr(sz, nCount));
812
813 return m_impl.compare(pos, len, str.data, str.len);
814}
815
816#else // !HAVE_STD_STRING_COMPARE
817
818static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
819 const wxStringCharType* s2, size_t l2)
820{
821 if( l1 == l2 )
822 return wxStringMemcmp(s1, s2, l1);
823 else if( l1 < l2 )
824 {
825 int ret = wxStringMemcmp(s1, s2, l1);
826 return ret == 0 ? -1 : ret;
827 }
828 else
829 {
830 int ret = wxStringMemcmp(s1, s2, l2);
831 return ret == 0 ? +1 : ret;
832 }
833}
834
835int wxString::compare(const wxString& str) const
836{
837 return ::wxDoCmp(m_impl.data(), m_impl.length(),
838 str.m_impl.data(), str.m_impl.length());
839}
840
841int wxString::compare(size_t nStart, size_t nLen,
842 const wxString& str) const
843{
844 wxASSERT(nStart <= length());
845 size_type strLen = length() - nStart;
846 nLen = strLen < nLen ? strLen : nLen;
847
848 size_t pos, len;
849 PosLenToImpl(nStart, nLen, &pos, &len);
850
851 return ::wxDoCmp(m_impl.data() + pos, len,
852 str.m_impl.data(), str.m_impl.length());
853}
854
855int wxString::compare(size_t nStart, size_t nLen,
856 const wxString& str,
857 size_t nStart2, size_t nLen2) const
858{
859 wxASSERT(nStart <= length());
860 wxASSERT(nStart2 <= str.length());
861 size_type strLen = length() - nStart,
862 strLen2 = str.length() - nStart2;
863 nLen = strLen < nLen ? strLen : nLen;
864 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
865
866 size_t pos, len;
867 PosLenToImpl(nStart, nLen, &pos, &len);
868 size_t pos2, len2;
869 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
870
871 return ::wxDoCmp(m_impl.data() + pos, len,
872 str.m_impl.data() + pos2, len2);
873}
874
875int wxString::compare(const char* sz) const
876{
877 SubstrBufFromMB str(ImplStr(sz, npos));
878 if ( str.len == npos )
879 str.len = wxStringStrlen(str.data);
880 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
881}
882
883int wxString::compare(const wchar_t* sz) const
884{
885 SubstrBufFromWC str(ImplStr(sz, npos));
886 if ( str.len == npos )
887 str.len = wxStringStrlen(str.data);
888 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
889}
890
891int wxString::compare(size_t nStart, size_t nLen,
892 const char* sz, size_t nCount) const
893{
894 wxASSERT(nStart <= length());
895 size_type strLen = length() - nStart;
896 nLen = strLen < nLen ? strLen : nLen;
097c080b 897
a7ea63e2
VS
898 size_t pos, len;
899 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 900
a7ea63e2
VS
901 SubstrBufFromMB str(ImplStr(sz, nCount));
902 if ( str.len == npos )
903 str.len = wxStringStrlen(str.data);
904
905 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
906}
907
a7ea63e2
VS
908int wxString::compare(size_t nStart, size_t nLen,
909 const wchar_t* sz, size_t nCount) const
8f93a29f 910{
a7ea63e2
VS
911 wxASSERT(nStart <= length());
912 size_type strLen = length() - nStart;
913 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 914
a7ea63e2
VS
915 size_t pos, len;
916 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 917
a7ea63e2
VS
918 SubstrBufFromWC str(ImplStr(sz, nCount));
919 if ( str.len == npos )
920 str.len = wxStringStrlen(str.data);
921
922 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
923}
924
a7ea63e2
VS
925#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
926
927
8f93a29f
VS
928// ---------------------------------------------------------------------------
929// find_{first,last}_[not]_of functions
930// ---------------------------------------------------------------------------
931
932#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 933
8f93a29f
VS
934// NB: All these functions are implemented with the argument being wxChar*,
935// i.e. widechar string in any Unicode build, even though native string
936// representation is char* in the UTF-8 build. This is because we couldn't
937// use memchr() to determine if a character is in a set encoded as UTF-8.
938
939size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 940{
8f93a29f 941 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
942}
943
8f93a29f 944size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 945{
8f93a29f 946 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
947}
948
8f93a29f 949size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 950{
8f93a29f 951 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 952
8f93a29f
VS
953 size_t idx = nStart;
954 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 955 {
8f93a29f
VS
956 if ( wxTmemchr(sz, *i, n) )
957 return idx;
dcb68102 958 }
8f93a29f
VS
959
960 return npos;
961}
962
963size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
964{
965 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
966
967 size_t idx = nStart;
968 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 969 {
8f93a29f
VS
970 if ( !wxTmemchr(sz, *i, n) )
971 return idx;
972 }
973
974 return npos;
975}
976
977
978size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
979{
980 return find_last_of(sz, nStart, wxStrlen(sz));
981}
982
983size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
984{
985 return find_last_not_of(sz, nStart, wxStrlen(sz));
986}
987
988size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
989{
990 size_t len = length();
991
992 if ( nStart == npos )
993 {
994 nStart = len - 1;
dcb68102 995 }
2c09fb3b 996 else
dcb68102 997 {
8f93a29f 998 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 999 }
8f93a29f
VS
1000
1001 size_t idx = nStart;
1002 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1003 i != rend(); --idx, ++i )
1004 {
1005 if ( wxTmemchr(sz, *i, n) )
1006 return idx;
1007 }
1008
1009 return npos;
dcb68102
RN
1010}
1011
8f93a29f 1012size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 1013{
8f93a29f
VS
1014 size_t len = length();
1015
1016 if ( nStart == npos )
1017 {
1018 nStart = len - 1;
1019 }
1020 else
1021 {
1022 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1023 }
1024
1025 size_t idx = nStart;
1026 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1027 i != rend(); --idx, ++i )
1028 {
1029 if ( !wxTmemchr(sz, *i, n) )
1030 return idx;
1031 }
1032
1033 return npos;
dcb68102
RN
1034}
1035
8f93a29f 1036size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1037{
8f93a29f
VS
1038 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1039
1040 size_t idx = nStart;
1041 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1042 {
1043 if ( *i != ch )
1044 return idx;
1045 }
1046
1047 return npos;
1048}
1049
1050size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1051{
1052 size_t len = length();
1053
1054 if ( nStart == npos )
1055 {
1056 nStart = len - 1;
1057 }
1058 else
1059 {
1060 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1061 }
1062
1063 size_t idx = nStart;
1064 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1065 i != rend(); --idx, ++i )
1066 {
1067 if ( *i != ch )
1068 return idx;
1069 }
1070
1071 return npos;
1072}
1073
1074// the functions above were implemented for wchar_t* arguments in Unicode
1075// build and char* in ANSI build; below are implementations for the other
1076// version:
1077#if wxUSE_UNICODE
1078 #define wxOtherCharType char
1079 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1080#else
1081 #define wxOtherCharType wchar_t
1082 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1083#endif
1084
1085size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1086 { return find_first_of(STRCONV(sz), nStart); }
1087
1088size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1089 size_t n) const
1090 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1091size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1092 { return find_last_of(STRCONV(sz), nStart); }
1093size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1094 size_t n) const
1095 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1096size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1097 { return find_first_not_of(STRCONV(sz), nStart); }
1098size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1099 size_t n) const
1100 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1101size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1102 { return find_last_not_of(STRCONV(sz), nStart); }
1103size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1104 size_t n) const
1105 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1106
1107#undef wxOtherCharType
1108#undef STRCONV
1109
1110#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1111
1112// ===========================================================================
1113// other common string functions
1114// ===========================================================================
1115
1116int wxString::CmpNoCase(const wxString& s) const
1117{
6689960c 1118#if wxUSE_UNICODE_UTF8
8f93a29f
VS
1119 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1120
8f93a29f
VS
1121 const_iterator i1 = begin();
1122 const_iterator end1 = end();
1123 const_iterator i2 = s.begin();
1124 const_iterator end2 = s.end();
1125
0d8b0f94 1126 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1127 {
1128 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1129 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1130 if ( lower1 != lower2 )
1131 return lower1 < lower2 ? -1 : 1;
1132 }
1133
1134 size_t len1 = length();
1135 size_t len2 = s.length();
dcb68102 1136
8f93a29f
VS
1137 if ( len1 < len2 )
1138 return -1;
1139 else if ( len1 > len2 )
1140 return 1;
1141 return 0;
6689960c
VZ
1142#else // wxUSE_UNICODE_WCHAR or ANSI
1143 return wxStricmp(m_impl.c_str(), s.m_impl.c_str());
1144#endif
dcb68102
RN
1145}
1146
1147
b1ac3b56 1148#if wxUSE_UNICODE
e015c2a3 1149
cf6bedce
SC
1150#ifdef __MWERKS__
1151#ifndef __SCHAR_MAX__
1152#define __SCHAR_MAX__ 127
1153#endif
1154#endif
1155
e6310bbc 1156wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1157{
e6310bbc 1158 if (!ascii || len == 0)
b1ac3b56 1159 return wxEmptyString;
e015c2a3 1160
b1ac3b56 1161 wxString res;
e015c2a3 1162
e6310bbc 1163 {
6798451b 1164 wxStringInternalBuffer buf(res, len);
602a857b 1165 wxStringCharType *dest = buf;
c1eada83 1166
602a857b
VS
1167 for ( ; len > 0; --len )
1168 {
1169 unsigned char c = (unsigned char)*ascii++;
1170 wxASSERT_MSG( c < 0x80,
1171 _T("Non-ASCII value passed to FromAscii().") );
c1eada83 1172
602a857b
VS
1173 *dest++ = (wchar_t)c;
1174 }
e015c2a3
VZ
1175 }
1176
b1ac3b56
RR
1177 return res;
1178}
1179
e6310bbc
VS
1180wxString wxString::FromAscii(const char *ascii)
1181{
0081dd72 1182 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1183}
1184
c5288c5c 1185wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1186{
1187 // What do we do with '\0' ?
1188
c1eada83 1189 unsigned char c = (unsigned char)ascii;
8760bc65 1190
c1eada83
VS
1191 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1192
1193 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1194 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1195}
1196
b1ac3b56
RR
1197const wxCharBuffer wxString::ToAscii() const
1198{
e015c2a3
VZ
1199 // this will allocate enough space for the terminating NUL too
1200 wxCharBuffer buffer(length());
6e394fc6 1201 char *dest = buffer.data();
e015c2a3 1202
c1eada83 1203 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1204 {
c1eada83
VS
1205 wxUniChar c(*i);
1206 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1207 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1208
1209 // the output string can't have embedded NULs anyhow, so we can safely
1210 // stop at first of them even if we do have any
c1eada83 1211 if ( !c )
e015c2a3 1212 break;
b1ac3b56 1213 }
e015c2a3 1214
b1ac3b56
RR
1215 return buffer;
1216}
e015c2a3 1217
c1eada83 1218#endif // wxUSE_UNICODE
b1ac3b56 1219
c801d85f 1220// extract string of length nCount starting at nFirst
c801d85f
KB
1221wxString wxString::Mid(size_t nFirst, size_t nCount) const
1222{
73f507f5 1223 size_t nLen = length();
30d9011f 1224
73f507f5
WS
1225 // default value of nCount is npos and means "till the end"
1226 if ( nCount == npos )
1227 {
1228 nCount = nLen - nFirst;
1229 }
30d9011f 1230
73f507f5
WS
1231 // out-of-bounds requests return sensible things
1232 if ( nFirst + nCount > nLen )
1233 {
1234 nCount = nLen - nFirst;
1235 }
c801d85f 1236
73f507f5
WS
1237 if ( nFirst > nLen )
1238 {
1239 // AllocCopy() will return empty string
1240 return wxEmptyString;
1241 }
c801d85f 1242
73f507f5
WS
1243 wxString dest(*this, nFirst, nCount);
1244 if ( dest.length() != nCount )
1245 {
1246 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1247 }
30d9011f 1248
73f507f5 1249 return dest;
c801d85f
KB
1250}
1251
e87b7833 1252// check that the string starts with prefix and return the rest of the string
d775fa82 1253// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1254bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1255{
c5e7a7d7
VS
1256 if ( compare(0, prefix.length(), prefix) != 0 )
1257 return false;
f6bcfd97
BP
1258
1259 if ( rest )
1260 {
1261 // put the rest of the string into provided pointer
c5e7a7d7 1262 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1263 }
1264
d775fa82 1265 return true;
f6bcfd97
BP
1266}
1267
3affcd07
VZ
1268
1269// check that the string ends with suffix and return the rest of it in the
1270// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1271bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1272{
c5e7a7d7 1273 int start = length() - suffix.length();
81727065
VS
1274
1275 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1276 return false;
1277
1278 if ( rest )
1279 {
1280 // put the rest of the string into provided pointer
1281 rest->assign(*this, 0, start);
1282 }
1283
1284 return true;
1285}
1286
1287
c801d85f
KB
1288// extract nCount last (rightmost) characters
1289wxString wxString::Right(size_t nCount) const
1290{
e87b7833
MB
1291 if ( nCount > length() )
1292 nCount = length();
c801d85f 1293
e87b7833
MB
1294 wxString dest(*this, length() - nCount, nCount);
1295 if ( dest.length() != nCount ) {
b1801e0e
GD
1296 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1297 }
c801d85f
KB
1298 return dest;
1299}
1300
7929902d 1301// get all characters after the last occurrence of ch
c801d85f 1302// (returns the whole string if ch not found)
c9f78968 1303wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1304{
1305 wxString str;
d775fa82 1306 int iPos = Find(ch, true);
3c67202d 1307 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1308 str = *this;
1309 else
c565abe1 1310 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1311
1312 return str;
1313}
1314
1315// extract nCount first (leftmost) characters
1316wxString wxString::Left(size_t nCount) const
1317{
e87b7833
MB
1318 if ( nCount > length() )
1319 nCount = length();
c801d85f 1320
e87b7833
MB
1321 wxString dest(*this, 0, nCount);
1322 if ( dest.length() != nCount ) {
b1801e0e
GD
1323 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1324 }
c801d85f
KB
1325 return dest;
1326}
1327
7929902d 1328// get all characters before the first occurrence of ch
c801d85f 1329// (returns the whole string if ch not found)
c9f78968 1330wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1331{
e87b7833 1332 int iPos = Find(ch);
c565abe1
VZ
1333 if ( iPos == wxNOT_FOUND )
1334 iPos = length();
e87b7833 1335 return wxString(*this, 0, iPos);
c801d85f
KB
1336}
1337
7929902d 1338/// get all characters before the last occurrence of ch
c801d85f 1339/// (returns empty string if ch not found)
c9f78968 1340wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1341{
1342 wxString str;
d775fa82 1343 int iPos = Find(ch, true);
3c67202d 1344 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1345 str = wxString(c_str(), iPos);
c801d85f
KB
1346
1347 return str;
1348}
1349
7929902d 1350/// get all characters after the first occurrence of ch
c801d85f 1351/// (returns empty string if ch not found)
c9f78968 1352wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1353{
1354 wxString str;
1355 int iPos = Find(ch);
3c67202d 1356 if ( iPos != wxNOT_FOUND )
c565abe1 1357 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1358
1359 return str;
1360}
1361
7929902d 1362// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1363size_t wxString::Replace(const wxString& strOld,
1364 const wxString& strNew, bool bReplaceAll)
c801d85f 1365{
a8f1f1b2 1366 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1367 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1368 _T("wxString::Replace(): invalid parameter") );
1369
68482dc5
VZ
1370 wxSTRING_INVALIDATE_CACHE();
1371
510bb748 1372 size_t uiCount = 0; // count of replacements made
c801d85f 1373
8a627032
VZ
1374 // optimize the special common case: replacement of one character by
1375 // another one (in UTF-8 case we can only do this for ASCII characters)
1376 //
1377 // benchmarks show that this special version is around 3 times faster
1378 // (depending on the proportion of matching characters and UTF-8/wchar_t
1379 // build)
1380 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1381 {
1382 const wxStringCharType chOld = strOld.m_impl[0],
1383 chNew = strNew.m_impl[0];
1384
1385 // this loop is the simplified version of the one below
1386 for ( size_t pos = 0; ; )
1387 {
1388 pos = m_impl.find(chOld, pos);
1389 if ( pos == npos )
1390 break;
c801d85f 1391
8a627032
VZ
1392 m_impl[pos++] = chNew;
1393
1394 uiCount++;
1395
1396 if ( !bReplaceAll )
1397 break;
1398 }
1399 }
1400 else // general case
510bb748 1401 {
8a627032
VZ
1402 const size_t uiOldLen = strOld.m_impl.length();
1403 const size_t uiNewLen = strNew.m_impl.length();
1404
1405 for ( size_t pos = 0; ; )
1406 {
1407 pos = m_impl.find(strOld.m_impl, pos);
1408 if ( pos == npos )
1409 break;
510bb748 1410
8a627032
VZ
1411 // replace this occurrence of the old string with the new one
1412 m_impl.replace(pos, uiOldLen, strNew.m_impl);
510bb748 1413
8a627032
VZ
1414 // move up pos past the string that was replaced
1415 pos += uiNewLen;
ad5bb7d6 1416
8a627032
VZ
1417 // increase replace count
1418 uiCount++;
394b2900 1419
8a627032
VZ
1420 // stop after the first one?
1421 if ( !bReplaceAll )
1422 break;
1423 }
c801d85f 1424 }
c801d85f 1425
510bb748 1426 return uiCount;
c801d85f
KB
1427}
1428
1429bool wxString::IsAscii() const
1430{
a4a44612
VS
1431 for ( const_iterator i = begin(); i != end(); ++i )
1432 {
1433 if ( !(*i).IsAscii() )
1434 return false;
1435 }
1436
1437 return true;
c801d85f 1438}
dd1eaa89 1439
c801d85f
KB
1440bool wxString::IsWord() const
1441{
a4a44612
VS
1442 for ( const_iterator i = begin(); i != end(); ++i )
1443 {
1444 if ( !wxIsalpha(*i) )
1445 return false;
1446 }
1447
1448 return true;
c801d85f 1449}
dd1eaa89 1450
c801d85f
KB
1451bool wxString::IsNumber() const
1452{
a4a44612
VS
1453 if ( empty() )
1454 return true;
1455
1456 const_iterator i = begin();
1457
1458 if ( *i == _T('-') || *i == _T('+') )
1459 ++i;
1460
1461 for ( ; i != end(); ++i )
1462 {
1463 if ( !wxIsdigit(*i) )
1464 return false;
1465 }
1466
1467 return true;
c801d85f
KB
1468}
1469
c801d85f
KB
1470wxString wxString::Strip(stripType w) const
1471{
1472 wxString s = *this;
d775fa82
WS
1473 if ( w & leading ) s.Trim(false);
1474 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1475 return s;
1476}
1477
c801d85f
KB
1478// ---------------------------------------------------------------------------
1479// case conversion
1480// ---------------------------------------------------------------------------
1481
1482wxString& wxString::MakeUpper()
1483{
e87b7833
MB
1484 for ( iterator it = begin(), en = end(); it != en; ++it )
1485 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1486
1487 return *this;
1488}
1489
1490wxString& wxString::MakeLower()
1491{
e87b7833
MB
1492 for ( iterator it = begin(), en = end(); it != en; ++it )
1493 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1494
1495 return *this;
1496}
1497
0c7db140
VZ
1498wxString& wxString::MakeCapitalized()
1499{
1500 const iterator en = end();
1501 iterator it = begin();
1502 if ( it != en )
1503 {
1504 *it = (wxChar)wxToupper(*it);
1505 for ( ++it; it != en; ++it )
1506 *it = (wxChar)wxTolower(*it);
1507 }
1508
1509 return *this;
1510}
1511
c801d85f
KB
1512// ---------------------------------------------------------------------------
1513// trimming and padding
1514// ---------------------------------------------------------------------------
1515
d775fa82 1516// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1517// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1518// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1519// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1520// space-like symbols somewhere except in the first 128 chars), it is arguably
1521// still better than trimming away accented letters
1522inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1523
c801d85f
KB
1524// trims spaces (in the sense of isspace) from left or right side
1525wxString& wxString::Trim(bool bFromRight)
1526{
3458e408
WS
1527 // first check if we're going to modify the string at all
1528 if ( !empty() &&
1529 (
1530 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1531 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1532 )
2c3b684c 1533 )
2c3b684c 1534 {
3458e408
WS
1535 if ( bFromRight )
1536 {
1537 // find last non-space character
d4d02bd5 1538 reverse_iterator psz = rbegin();
32c62191 1539 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1540 ++psz;
92df97b8 1541
3458e408 1542 // truncate at trailing space start
d4d02bd5 1543 erase(psz.base(), end());
3458e408
WS
1544 }
1545 else
1546 {
1547 // find first non-space character
1548 iterator psz = begin();
32c62191 1549 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1550 ++psz;
2c3b684c 1551
3458e408
WS
1552 // fix up data and length
1553 erase(begin(), psz);
1554 }
2c3b684c 1555 }
c801d85f 1556
3458e408 1557 return *this;
c801d85f
KB
1558}
1559
1560// adds nCount characters chPad to the string from either side
c9f78968 1561wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1562{
3458e408 1563 wxString s(chPad, nCount);
c801d85f 1564
3458e408
WS
1565 if ( bFromRight )
1566 *this += s;
1567 else
1568 {
1569 s += *this;
1570 swap(s);
1571 }
c801d85f 1572
3458e408 1573 return *this;
c801d85f
KB
1574}
1575
1576// truncate the string
1577wxString& wxString::Truncate(size_t uiLen)
1578{
3458e408
WS
1579 if ( uiLen < length() )
1580 {
1581 erase(begin() + uiLen, end());
1582 }
1583 //else: nothing to do, string is already short enough
c801d85f 1584
3458e408 1585 return *this;
c801d85f
KB
1586}
1587
1588// ---------------------------------------------------------------------------
3c67202d 1589// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1590// ---------------------------------------------------------------------------
1591
1592// find a character
c9f78968 1593int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1594{
3458e408 1595 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1596
3458e408 1597 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1598}
1599
cd0b1709
VZ
1600// ----------------------------------------------------------------------------
1601// conversion to numbers
1602// ----------------------------------------------------------------------------
1603
52de37c7
VS
1604// The implementation of all the functions below is exactly the same so factor
1605// it out. Note that number extraction works correctly on UTF-8 strings, so
1606// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1607
92df97b8 1608#ifndef __WXWINCE__
941a4e62
VS
1609 #define DO_IF_NOT_WINCE(x) x
1610#else
1611 #define DO_IF_NOT_WINCE(x)
92df97b8 1612#endif
4ea4767e 1613
c95e653c
VZ
1614#define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1615 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
941a4e62
VS
1616 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1617 \
1618 DO_IF_NOT_WINCE( errno = 0; ) \
1619 \
1620 const wxStringCharType *start = wx_str(); \
1621 wxStringCharType *end; \
c95e653c 1622 T val = func(start, &end, base); \
941a4e62
VS
1623 \
1624 /* return true only if scan was stopped by the terminating NUL and */ \
1625 /* if the string was not empty to start with and no under/overflow */ \
1626 /* occurred: */ \
c95e653c
VZ
1627 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1628 return false; \
1629 *out = val; \
1630 return true
cd0b1709 1631
c95e653c 1632bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1633{
c95e653c 1634 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
619dcb09 1635}
cd0b1709 1636
c95e653c 1637bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1638{
c95e653c 1639 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
cd0b1709
VZ
1640}
1641
c95e653c 1642bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1643{
c95e653c 1644 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
d6718dd1
VZ
1645}
1646
c95e653c 1647bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1648{
c95e653c 1649 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
d6718dd1
VZ
1650}
1651
c95e653c 1652bool wxString::ToDouble(double *pVal) const
cd0b1709 1653{
c95e653c 1654 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
cd0b1709 1655
c95e653c 1656 DO_IF_NOT_WINCE( errno = 0; )
e71e5b37 1657
cd0b1709
VZ
1658 const wxChar *start = c_str();
1659 wxChar *end;
c95e653c 1660 double val = wxStrtod(start, &end);
cd0b1709 1661
d775fa82 1662 // return true only if scan was stopped by the terminating NUL and if the
bda041e5 1663 // string was not empty to start with and no under/overflow occurred
c95e653c
VZ
1664 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1665 return false;
1666
1667 *pVal = val;
1668
1669 return true;
cd0b1709
VZ
1670}
1671
c801d85f 1672// ---------------------------------------------------------------------------
9efd3367 1673// formatted output
c801d85f 1674// ---------------------------------------------------------------------------
378b05f7 1675
d1f6e2cf 1676#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1677/* static */
c9f78968 1678#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1679wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1680#else
d1f6e2cf 1681wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1682#endif
341e7d28 1683{
77c3e48a 1684 va_list argptr;
c9f78968 1685 va_start(argptr, format);
341e7d28 1686
77c3e48a 1687 wxString s;
c9f78968 1688 s.PrintfV(format, argptr);
341e7d28 1689
77c3e48a 1690 va_end(argptr);
341e7d28 1691
77c3e48a 1692 return s;
341e7d28 1693}
d1f6e2cf
VS
1694#endif // !wxUSE_UTF8_LOCALE_ONLY
1695
1696#if wxUSE_UNICODE_UTF8
1697/* static */
1698wxString wxString::DoFormatUtf8(const char *format, ...)
1699{
1700 va_list argptr;
1701 va_start(argptr, format);
1702
1703 wxString s;
1704 s.PrintfV(format, argptr);
1705
1706 va_end(argptr);
1707
1708 return s;
1709}
1710#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1711
1712/* static */
c9f78968 1713wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1714{
1715 wxString s;
c9f78968 1716 s.PrintfV(format, argptr);
341e7d28
VZ
1717 return s;
1718}
1719
d1f6e2cf 1720#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1721#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1722int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1723#else
d1f6e2cf 1724int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1725#endif
c801d85f 1726{
ba9bbf13 1727 va_list argptr;
c9f78968 1728 va_start(argptr, format);
c801d85f 1729
c9f78968
VS
1730#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1731 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1732 // because it's the only cast that works safely for downcasting when
1733 // multiple inheritance is used:
1734 wxString *str = static_cast<wxString*>(this);
1735#else
1736 wxString *str = this;
1737#endif
1738
1739 int iLen = str->PrintfV(format, argptr);
c801d85f 1740
ba9bbf13 1741 va_end(argptr);
c801d85f 1742
ba9bbf13 1743 return iLen;
c801d85f 1744}
d1f6e2cf
VS
1745#endif // !wxUSE_UTF8_LOCALE_ONLY
1746
1747#if wxUSE_UNICODE_UTF8
1748int wxString::DoPrintfUtf8(const char *format, ...)
1749{
1750 va_list argptr;
1751 va_start(argptr, format);
1752
1753 int iLen = PrintfV(format, argptr);
1754
1755 va_end(argptr);
1756
1757 return iLen;
1758}
1759#endif // wxUSE_UNICODE_UTF8
c801d85f 1760
67612ff1
DE
1761/*
1762 Uses wxVsnprintf and places the result into the this string.
1763
1764 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1765 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1766 the ISO C99 (and thus SUSv3) standard the return value for the case of
1767 an undersized buffer is inconsistent. For conforming vsnprintf
1768 implementations the function must return the number of characters that
1769 would have been printed had the buffer been large enough. For conforming
1770 vswprintf implementations the function must return a negative number
1771 and set errno.
1772
1773 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1774 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1775 those are defined in the standard and backed up by several conformance
1776 statements. Note that ENOMEM mentioned in the manual page does not
1777 apply to swprintf, only wprintf and fwprintf.
1778
1779 Official manual page:
1780 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1781
1782 Some conformance statements (AIX, Solaris):
1783 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1784 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1785
1786 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1787 EILSEQ and EINVAL are specifically defined to mean the error is other than
1788 an undersized buffer and no other errno are defined we treat those two
1789 as meaning hard errors and everything else gets the old behavior which
1790 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1791
67612ff1
DE
1792 In practice it's impossible to determine before compilation which behavior
1793 may be used. The vswprintf function may have vsnprintf-like behavior or
1794 vice-versa. Behavior detected on one release can theoretically change
1795 with an updated release. Not to mention that configure testing for it
1796 would require the test to be run on the host system, not the build system
1797 which makes cross compilation difficult. Therefore, we make no assumptions
1798 about behavior and try our best to handle every known case, including the
1799 case where wxVsnprintf returns a negative number and fails to set errno.
1800
1801 There is yet one more non-standard implementation and that is our own.
1802 Fortunately, that can be detected at compile-time.
1803
1804 On top of all that, ISO C99 explicitly defines snprintf to write a null
1805 character to the last position of the specified buffer. That would be at
1806 at the given buffer size minus 1. It is supposed to do this even if it
1807 turns out that the buffer is sized too small.
1808
1809 Darwin (tested on 10.5) follows the C99 behavior exactly.
1810
1811 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1812 errno even when it fails. However, it only seems to ever fail due
1813 to an undersized buffer.
1814*/
2523e9b7
VS
1815#if wxUSE_UNICODE_UTF8
1816template<typename BufferType>
1817#else
1818// we only need one version in non-UTF8 builds and at least two Windows
1819// compilers have problems with this function template, so use just one
1820// normal function here
1821#endif
1822static int DoStringPrintfV(wxString& str,
1823 const wxString& format, va_list argptr)
c801d85f 1824{
f6f5941b 1825 int size = 1024;
e87b7833 1826
f6f5941b
VZ
1827 for ( ;; )
1828 {
2523e9b7
VS
1829#if wxUSE_UNICODE_UTF8
1830 BufferType tmp(str, size + 1);
1831 typename BufferType::CharType *buf = tmp;
1832#else
1833 wxStringBuffer tmp(str, size + 1);
de2589be 1834 wxChar *buf = tmp;
2523e9b7 1835#endif
2bb67b80 1836
ba9bbf13
WS
1837 if ( !buf )
1838 {
1839 // out of memory
a33c7045
VS
1840
1841 // in UTF-8 build, leaving uninitialized junk in the buffer
1842 // could result in invalid non-empty UTF-8 string, so just
1843 // reset the string to empty on failure:
1844 buf[0] = '\0';
ba9bbf13 1845 return -1;
e87b7833 1846 }
f6f5941b 1847
ba9bbf13
WS
1848 // wxVsnprintf() may modify the original arg pointer, so pass it
1849 // only a copy
1850 va_list argptrcopy;
1851 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1852
1853#ifndef __WXWINCE__
1854 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1855 errno = 0;
1856#endif
2523e9b7 1857 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1858 va_end(argptrcopy);
1859
1860 // some implementations of vsnprintf() don't NUL terminate
1861 // the string if there is not enough space for it so
1862 // always do it manually
67612ff1
DE
1863 // FIXME: This really seems to be the wrong and would be an off-by-one
1864 // bug except the code above allocates an extra character.
ba9bbf13
WS
1865 buf[size] = _T('\0');
1866
caff62f2
VZ
1867 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1868 // total number of characters which would have been written if the
b1727cfe 1869 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1870 if ( len < 0 )
1871 {
52de37c7
VS
1872 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1873 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1874 // is true if *both* of them use our own implementation,
1875 // otherwise we can't be sure
f2bbe5b6
VZ
1876#if wxUSE_WXVSNPRINTF
1877 // we know that our own implementation of wxVsnprintf() returns -1
1878 // only for a format error - thus there's something wrong with
1879 // the user's format string
a33c7045 1880 buf[0] = '\0';
f2bbe5b6 1881 return -1;
52de37c7
VS
1882#else // possibly using system version
1883 // assume it only returns error if there is not enough space, but
1884 // as we don't know how much we need, double the current size of
1885 // the buffer
67612ff1 1886#ifndef __WXWINCE__
a9a854d7
DE
1887 if( (errno == EILSEQ) || (errno == EINVAL) )
1888 // If errno was set to one of the two well-known hard errors
1889 // then fail immediately to avoid an infinite loop.
1890 return -1;
1891 else
1892#endif // __WXWINCE__
67612ff1
DE
1893 // still not enough, as we don't know how much we need, double the
1894 // current size of the buffer
1895 size *= 2;
f2bbe5b6 1896#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1897 }
64f8f94c 1898 else if ( len >= size )
de2589be 1899 {
f2bbe5b6 1900#if wxUSE_WXVSNPRINTF
c95e653c 1901 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1902 // size+1 when there's not enough space but that's not the size
1903 // of the required buffer!
1904 size *= 2; // so we just double the current size of the buffer
1905#else
64f8f94c
VZ
1906 // some vsnprintf() implementations NUL-terminate the buffer and
1907 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1908 // FIXME: I don't quite understand this comment. The vsnprintf
1909 // function is specifically defined to return the number of
1910 // characters printed not including the null terminator.
1911 // So OF COURSE you need to add 1 to get the right buffer size.
1912 // The following line is definitely correct, no question.
64f8f94c 1913 size = len + 1;
f2bbe5b6 1914#endif
de2589be
VZ
1915 }
1916 else // ok, there was enough space
f6f5941b 1917 {
f6f5941b
VZ
1918 break;
1919 }
f6f5941b
VZ
1920 }
1921
1922 // we could have overshot
2523e9b7
VS
1923 str.Shrink();
1924
1925 return str.length();
1926}
c801d85f 1927
2523e9b7
VS
1928int wxString::PrintfV(const wxString& format, va_list argptr)
1929{
2523e9b7
VS
1930#if wxUSE_UNICODE_UTF8
1931 #if wxUSE_STL_BASED_WXSTRING
1932 typedef wxStringTypeBuffer<char> Utf8Buffer;
1933 #else
6798451b 1934 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
1935 #endif
1936#endif
1937
1938#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 1939 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1940#else
1941 #if wxUSE_UNICODE_UTF8
1942 if ( wxLocaleIsUtf8 )
c6255a6e 1943 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1944 else
1945 // wxChar* version
c6255a6e 1946 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 1947 #else
c6255a6e 1948 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
1949 #endif // UTF8/WCHAR
1950#endif
c801d85f
KB
1951}
1952
097c080b
VZ
1953// ----------------------------------------------------------------------------
1954// misc other operations
1955// ----------------------------------------------------------------------------
0c5d3e1c 1956
d775fa82 1957// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
1958// '?' metacharacters (as usual, '?' matches any character and '*' any number
1959// of them)
8a540c88 1960bool wxString::Matches(const wxString& mask) const
097c080b 1961{
d6044f58
VZ
1962 // I disable this code as it doesn't seem to be faster (in fact, it seems
1963 // to be much slower) than the old, hand-written code below and using it
1964 // here requires always linking with libregex even if the user code doesn't
1965 // use it
1966#if 0 // wxUSE_REGEX
706c2ac9
VZ
1967 // first translate the shell-like mask into a regex
1968 wxString pattern;
1969 pattern.reserve(wxStrlen(pszMask));
1970
1971 pattern += _T('^');
1972 while ( *pszMask )
1973 {
1974 switch ( *pszMask )
1975 {
1976 case _T('?'):
1977 pattern += _T('.');
1978 break;
1979
1980 case _T('*'):
1981 pattern += _T(".*");
1982 break;
1983
1984 case _T('^'):
1985 case _T('.'):
1986 case _T('$'):
1987 case _T('('):
1988 case _T(')'):
1989 case _T('|'):
1990 case _T('+'):
1991 case _T('\\'):
1992 // these characters are special in a RE, quote them
1993 // (however note that we don't quote '[' and ']' to allow
1994 // using them for Unix shell like matching)
1995 pattern += _T('\\');
1996 // fall through
1997
1998 default:
1999 pattern += *pszMask;
2000 }
2001
2002 pszMask++;
2003 }
2004 pattern += _T('$');
2005
2006 // and now use it
2007 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2008#else // !wxUSE_REGEX
9a4232dc
VZ
2009 // TODO: this is, of course, awfully inefficient...
2010
8a540c88
VS
2011 // FIXME-UTF8: implement using iterators, remove #if
2012#if wxUSE_UNICODE_UTF8
2013 wxWCharBuffer maskBuf = mask.wc_str();
2014 wxWCharBuffer txtBuf = wc_str();
2015 const wxChar *pszMask = maskBuf.data();
2016 const wxChar *pszTxt = txtBuf.data();
2017#else
2018 const wxChar *pszMask = mask.wx_str();
9a4232dc 2019 // the char currently being checked
8a540c88
VS
2020 const wxChar *pszTxt = wx_str();
2021#endif
9a4232dc
VZ
2022
2023 // the last location where '*' matched
2024 const wxChar *pszLastStarInText = NULL;
2025 const wxChar *pszLastStarInMask = NULL;
2026
2027match:
2028 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2029 switch ( *pszMask ) {
223d09f6
KB
2030 case wxT('?'):
2031 if ( *pszTxt == wxT('\0') )
d775fa82 2032 return false;
097c080b 2033
9a4232dc 2034 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2035
097c080b
VZ
2036 break;
2037
223d09f6 2038 case wxT('*'):
097c080b 2039 {
9a4232dc
VZ
2040 // remember where we started to be able to backtrack later
2041 pszLastStarInText = pszTxt;
2042 pszLastStarInMask = pszMask;
2043
097c080b 2044 // ignore special chars immediately following this one
9a4232dc 2045 // (should this be an error?)
223d09f6 2046 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2047 pszMask++;
2048
2049 // if there is nothing more, match
223d09f6 2050 if ( *pszMask == wxT('\0') )
d775fa82 2051 return true;
097c080b
VZ
2052
2053 // are there any other metacharacters in the mask?
c86f1403 2054 size_t uiLenMask;
223d09f6 2055 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2056
2057 if ( pEndMask != NULL ) {
2058 // we have to match the string between two metachars
2059 uiLenMask = pEndMask - pszMask;
2060 }
2061 else {
2062 // we have to match the remainder of the string
2bb67b80 2063 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2064 }
2065
2066 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2067 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2068 if ( pMatch == NULL )
d775fa82 2069 return false;
097c080b
VZ
2070
2071 // -1 to compensate "++" in the loop
2072 pszTxt = pMatch + uiLenMask - 1;
2073 pszMask += uiLenMask - 1;
2074 }
2075 break;
2076
2077 default:
2078 if ( *pszMask != *pszTxt )
d775fa82 2079 return false;
097c080b
VZ
2080 break;
2081 }
2082 }
2083
2084 // match only if nothing left
9a4232dc 2085 if ( *pszTxt == wxT('\0') )
d775fa82 2086 return true;
9a4232dc
VZ
2087
2088 // if we failed to match, backtrack if we can
2089 if ( pszLastStarInText ) {
2090 pszTxt = pszLastStarInText + 1;
2091 pszMask = pszLastStarInMask;
2092
2093 pszLastStarInText = NULL;
2094
2095 // don't bother resetting pszLastStarInMask, it's unnecessary
2096
2097 goto match;
2098 }
2099
d775fa82 2100 return false;
706c2ac9 2101#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2102}
2103
1fc5dd6f 2104// Count the number of chars
c9f78968 2105int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2106{
2107 int count = 0;
8f93a29f 2108 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2109 {
8f93a29f 2110 if ( *i == ch )
1fc5dd6f
JS
2111 count ++;
2112 }
2113 return count;
2114}
4e79262f 2115