]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
removed unnceessary gtk_window_set_transient_for() call already done by gtk_message_d...
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
6b769f3d 27#endif
c801d85f
KB
28
29#include <ctype.h>
92df97b8
WS
30
31#ifndef __WXWINCE__
32 #include <errno.h>
33#endif
34
c801d85f
KB
35#include <string.h>
36#include <stdlib.h>
9a08c20e 37
8116a0c5 38#include "wx/hashmap.h"
8f93a29f
VS
39
40// string handling functions used by wxString:
41#if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46#else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51#endif
8f93a29f 52
e87b7833 53
a7ea63e2
VS
54// ---------------------------------------------------------------------------
55// static class variables definition
56// ---------------------------------------------------------------------------
e87b7833 57
a7ea63e2
VS
58//According to STL _must_ be a -1 size_t
59const size_t wxString::npos = (size_t) -1;
8f93a29f 60
68482dc5 61#if wxUSE_STRING_POS_CACHE
68482dc5 62
ad8ae788
VZ
63struct wxStrCacheInitializer
64{
65 wxStrCacheInitializer()
66 {
67 // calling this function triggers s_cache initialization in it, and
68 // from now on it becomes safe to call from multiple threads
69 wxString::GetCache();
70 }
71};
72
73static wxStrCacheInitializer gs_stringCacheInit;
74
68482dc5
VZ
75// gdb seems to be unable to display thread-local variables correctly, at least
76// not my 6.4.98 version under amd64, so provide this debugging helper to do it
77#ifdef __WXDEBUG__
78
79struct wxStrCacheDumper
80{
81 static void ShowAll()
82 {
83 puts("*** wxString cache dump:");
84 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
85 {
86 const wxString::Cache::Element&
8b73c531 87 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
88
89 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
90 n,
8b73c531 91 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
92 c.str,
93 (unsigned long)c.pos,
94 (unsigned long)c.impl,
95 (long)c.len);
96 }
97 }
98};
99
100void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
101
102#endif // __WXDEBUG__
103
104#ifdef wxPROFILE_STRING_CACHE
105
106wxString::CacheStats wxString::ms_cacheStats;
107
8c3b65d9 108struct wxStrCacheStatsDumper
68482dc5 109{
8c3b65d9 110 ~wxStrCacheStatsDumper()
68482dc5
VZ
111 {
112 const wxString::CacheStats& stats = wxString::ms_cacheStats;
113
114 if ( stats.postot )
115 {
116 puts("*** wxString cache statistics:");
117 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
118 stats.postot);
119 printf("\tHits %u (of which %u not used) or %.2f%%\n",
120 stats.poshits,
121 stats.mishits,
122 100.*float(stats.poshits - stats.mishits)/stats.postot);
123 printf("\tAverage position requested: %.2f\n",
124 float(stats.sumpos) / stats.postot);
125 printf("\tAverage offset after cached hint: %.2f\n",
126 float(stats.sumofs) / stats.postot);
127 }
128
129 if ( stats.lentot )
130 {
131 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
132 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
133 }
134 }
8c3b65d9 135};
68482dc5 136
8c3b65d9 137static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
138
139#endif // wxPROFILE_STRING_CACHE
140
141#endif // wxUSE_STRING_POS_CACHE
142
a7ea63e2
VS
143// ----------------------------------------------------------------------------
144// global functions
145// ----------------------------------------------------------------------------
e87b7833 146
a7ea63e2 147#if wxUSE_STD_IOSTREAM
8f93a29f 148
a7ea63e2 149#include <iostream>
8f93a29f 150
a7ea63e2 151wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 152{
7a906e1a
VZ
153#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
154 return os << (const char *)str.AsCharBuf();
a7ea63e2 155#else
7a906e1a 156 return os << str.AsInternal();
a7ea63e2 157#endif
8f93a29f
VS
158}
159
04abe4bc
VS
160wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
161{
162 return os << str.c_str();
163}
164
165wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
166{
167 return os << str.data();
168}
169
170#ifndef __BORLANDC__
171wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
172{
173 return os << str.data();
174}
175#endif
176
6a6ea041 177#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
178
179wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
180{
181 return wos << str.wc_str();
182}
183
184wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
185{
186 return wos << str.AsWChar();
187}
188
189wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
190{
191 return wos << str.data();
192}
193
6a6ea041 194#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 195
a7ea63e2 196#endif // wxUSE_STD_IOSTREAM
e87b7833 197
81727065
VS
198// ===========================================================================
199// wxString class core
200// ===========================================================================
201
202#if wxUSE_UNICODE_UTF8
203
81727065
VS
204void wxString::PosLenToImpl(size_t pos, size_t len,
205 size_t *implPos, size_t *implLen) const
206{
207 if ( pos == npos )
68482dc5 208 {
81727065 209 *implPos = npos;
68482dc5
VZ
210 }
211 else // have valid start position
81727065 212 {
68482dc5
VZ
213 const const_iterator b = GetIterForNthChar(pos);
214 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 215 if ( len == npos )
68482dc5 216 {
81727065 217 *implLen = npos;
68482dc5
VZ
218 }
219 else // have valid length too
81727065 220 {
68482dc5
VZ
221 // we need to handle the case of length specifying a substring
222 // going beyond the end of the string, just as std::string does
223 const const_iterator e(end());
224 const_iterator i(b);
225 while ( len && i <= e )
226 {
227 ++i;
228 --len;
229 }
230
231 *implLen = i.impl() - b.impl();
81727065
VS
232 }
233 }
234}
235
236#endif // wxUSE_UNICODE_UTF8
237
11aac4ba
VS
238// ----------------------------------------------------------------------------
239// wxCStrData converted strings caching
240// ----------------------------------------------------------------------------
241
132276cf
VS
242// FIXME-UTF8: temporarily disabled because it doesn't work with global
243// string objects; re-enable after fixing this bug and benchmarking
244// performance to see if using a hash is a good idea at all
245#if 0
246
11aac4ba
VS
247// For backward compatibility reasons, it must be possible to assign the value
248// returned by wxString::c_str() to a char* or wchar_t* variable and work with
249// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
250// because the memory would be freed immediately, but it has to be valid as long
251// as the string is not modified, so that code like this still works:
252//
253// const wxChar *s = str.c_str();
254// while ( s ) { ... }
255
256// FIXME-UTF8: not thread safe!
257// FIXME-UTF8: we currently clear the cached conversion only when the string is
258// destroyed, but we should do it when the string is modified, to
259// keep memory usage down
260// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
261// invalidated the cache on every change, we could keep the previous
262// conversion
263// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
264// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
265
266template<typename T>
267static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
268{
6c4ebcda 269 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
270 if ( i != hash.end() )
271 {
272 free(i->second);
273 hash.erase(i);
274 }
275}
276
277#if wxUSE_UNICODE
6c4ebcda
VS
278// NB: non-STL implementation doesn't compile with "const wxString*" key type,
279// so we have to use wxString* here and const-cast when used
11aac4ba
VS
280WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
281 wxStringCharConversionCache);
282static wxStringCharConversionCache gs_stringsCharCache;
283
284const char* wxCStrData::AsChar() const
285{
286 // remove previously cache value, if any (see FIXMEs above):
287 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
288
289 // convert the string and keep it:
6c4ebcda
VS
290 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
291 m_str->mb_str().release();
11aac4ba
VS
292
293 return s + m_offset;
294}
295#endif // wxUSE_UNICODE
296
297#if !wxUSE_UNICODE_WCHAR
298WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
299 wxStringWCharConversionCache);
300static wxStringWCharConversionCache gs_stringsWCharCache;
301
302const wchar_t* wxCStrData::AsWChar() const
303{
304 // remove previously cache value, if any (see FIXMEs above):
305 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
306
307 // convert the string and keep it:
6c4ebcda
VS
308 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
309 m_str->wc_str().release();
11aac4ba
VS
310
311 return s + m_offset;
312}
313#endif // !wxUSE_UNICODE_WCHAR
314
11aac4ba
VS
315wxString::~wxString()
316{
317#if wxUSE_UNICODE
318 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
319 DeleteStringFromConversionCache(gs_stringsCharCache, this);
320#endif
321#if !wxUSE_UNICODE_WCHAR
322 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
323#endif
324}
132276cf
VS
325#endif
326
111d9948 327#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
328const char* wxCStrData::AsChar() const
329{
111d9948
VS
330#if wxUSE_UNICODE_UTF8
331 if ( wxLocaleIsUtf8 )
332 return AsInternal();
333#endif
334 // under non-UTF8 locales, we have to convert the internal UTF-8
335 // representation using wxConvLibc and cache the result
336
132276cf 337 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
338
339 // convert the string:
2a7431e1
VZ
340 //
341 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
342 // have it) but it's unfortunately not obvious to implement
343 // because we don't know how big buffer do we need for the
344 // given string length (in case of multibyte encodings, e.g.
345 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
346 //
347 // One idea would be to store more than just m_convertedToChar
348 // in wxString: then we could record the length of the string
349 // which was converted the last time and try to reuse the same
350 // buffer if the current length is not greater than it (this
351 // could still fail because string could have been modified in
352 // place but it would work most of the time, so we'd do it and
353 // only allocate the new buffer if in-place conversion returned
354 // an error). We could also store a bit saying if the string
355 // was modified since the last conversion (and update it in all
356 // operation modifying the string, of course) to avoid unneeded
357 // consequential conversions. But both of these ideas require
358 // adding more fields to wxString and require profiling results
359 // to be sure that we really gain enough from them to justify
360 // doing it.
05f32fc3
VS
361 wxCharBuffer buf(str->mb_str());
362
28be59b4
VZ
363 // if it failed, return empty string and not NULL to avoid crashes in code
364 // written with either wxWidgets 2 wxString or std::string behaviour in
365 // mind: neither of them ever returns NULL and so we shouldn't neither
366 if ( !buf )
367 return "";
368
05f32fc3
VS
369 if ( str->m_convertedToChar &&
370 strlen(buf) == strlen(str->m_convertedToChar) )
371 {
372 // keep the same buffer for as long as possible, so that several calls
373 // to c_str() in a row still work:
374 strcpy(str->m_convertedToChar, buf);
375 }
376 else
377 {
378 str->m_convertedToChar = buf.release();
379 }
380
381 // and keep it:
132276cf
VS
382 return str->m_convertedToChar + m_offset;
383}
111d9948 384#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
385
386#if !wxUSE_UNICODE_WCHAR
387const wchar_t* wxCStrData::AsWChar() const
388{
389 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
390
391 // convert the string:
392 wxWCharBuffer buf(str->wc_str());
393
28be59b4
VZ
394 // notice that here, unlike above in AsChar(), conversion can't fail as our
395 // internal UTF-8 is always well-formed -- or the string was corrupted and
396 // all bets are off anyhow
397
05f32fc3
VS
398 // FIXME-UTF8: do the conversion in-place in the existing buffer
399 if ( str->m_convertedToWChar &&
400 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
401 {
402 // keep the same buffer for as long as possible, so that several calls
403 // to c_str() in a row still work:
404 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
405 }
406 else
407 {
408 str->m_convertedToWChar = buf.release();
409 }
410
411 // and keep it:
132276cf
VS
412 return str->m_convertedToWChar + m_offset;
413}
414#endif // !wxUSE_UNICODE_WCHAR
415
416// ===========================================================================
417// wxString class core
418// ===========================================================================
419
420// ---------------------------------------------------------------------------
421// construction and conversion
422// ---------------------------------------------------------------------------
11aac4ba 423
81727065 424#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
425/* static */
426wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 427 const wxMBConv& conv)
8f93a29f
VS
428{
429 // anything to do?
430 if ( !psz || nLength == 0 )
81727065 431 return SubstrBufFromMB(L"", 0);
8f93a29f
VS
432
433 if ( nLength == npos )
434 nLength = wxNO_LEN;
435
436 size_t wcLen;
437 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
438 if ( !wcLen )
81727065 439 return SubstrBufFromMB(_T(""), 0);
8f93a29f
VS
440 else
441 return SubstrBufFromMB(wcBuf, wcLen);
442}
81727065
VS
443#endif // wxUSE_UNICODE_WCHAR
444
445#if wxUSE_UNICODE_UTF8
446/* static */
447wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
448 const wxMBConv& conv)
449{
81727065
VS
450 // anything to do?
451 if ( !psz || nLength == 0 )
452 return SubstrBufFromMB("", 0);
453
111d9948
VS
454 // if psz is already in UTF-8, we don't have to do the roundtrip to
455 // wchar_t* and back:
456 if ( conv.IsUTF8() )
457 {
458 // we need to validate the input because UTF8 iterators assume valid
459 // UTF-8 sequence and psz may be invalid:
460 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
461 {
9ef1ad0d
VZ
462 // we must pass the real string length to SubstrBufFromMB ctor
463 if ( nLength == npos )
464 nLength = psz ? strlen(psz) : 0;
111d9948
VS
465 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
466 }
467 // else: do the roundtrip through wchar_t*
468 }
469
81727065
VS
470 if ( nLength == npos )
471 nLength = wxNO_LEN;
472
473 // first convert to wide string:
474 size_t wcLen;
475 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
476 if ( !wcLen )
477 return SubstrBufFromMB("", 0);
478
479 // and then to UTF-8:
4fdfe2f3 480 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065
VS
481 // widechar -> UTF-8 conversion isn't supposed to ever fail:
482 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
483
484 return buf;
485}
486#endif // wxUSE_UNICODE_UTF8
487
488#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
489/* static */
490wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 491 const wxMBConv& conv)
8f93a29f
VS
492{
493 // anything to do?
494 if ( !pwz || nLength == 0 )
81727065 495 return SubstrBufFromWC("", 0);
8f93a29f
VS
496
497 if ( nLength == npos )
498 nLength = wxNO_LEN;
499
500 size_t mbLen;
501 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
502 if ( !mbLen )
81727065 503 return SubstrBufFromWC("", 0);
8f93a29f
VS
504 else
505 return SubstrBufFromWC(mbBuf, mbLen);
506}
81727065 507#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
508
509
81727065 510#if wxUSE_UNICODE_WCHAR
e87b7833 511
06386448 512//Convert wxString in Unicode mode to a multi-byte string
830f8f11 513const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 514{
81727065 515 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
516}
517
81727065 518#elif wxUSE_UNICODE_UTF8
e87b7833 519
81727065
VS
520const wxWCharBuffer wxString::wc_str() const
521{
4fdfe2f3
VZ
522 return wxMBConvStrictUTF8().cMB2WC
523 (
524 m_impl.c_str(),
525 m_impl.length() + 1, // size, not length
526 NULL
527 );
81727065
VS
528}
529
530const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
531{
111d9948
VS
532 if ( conv.IsUTF8() )
533 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
534
81727065
VS
535 // FIXME-UTF8: use wc_str() here once we have buffers with length
536
537 size_t wcLen;
4fdfe2f3
VZ
538 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
539 (
540 m_impl.c_str(),
541 m_impl.length() + 1, // size
542 &wcLen
543 ));
81727065
VS
544 if ( !wcLen )
545 return wxCharBuffer("");
546
4f696af8 547 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
81727065
VS
548}
549
550#else // ANSI
eec47cc6 551
7663d0d4 552//Converts this string to a wide character string if unicode
06386448 553//mode is not enabled and wxUSE_WCHAR_T is enabled
830f8f11 554const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 555{
81727065 556 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 557}
7663d0d4 558
e87b7833
MB
559#endif // Unicode/ANSI
560
561// shrink to minimal size (releasing extra memory)
562bool wxString::Shrink()
563{
564 wxString tmp(begin(), end());
565 swap(tmp);
566 return tmp.length() == length();
567}
568
d8a4b666 569// deprecated compatibility code:
a7ea63e2 570#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 571wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
572{
573 return DoGetWriteBuf(nLen);
574}
575
576void wxString::UngetWriteBuf()
577{
578 DoUngetWriteBuf();
579}
580
581void wxString::UngetWriteBuf(size_t nLen)
582{
583 DoUngetWriteBuf(nLen);
584}
a7ea63e2 585#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 586
d8a4b666 587
e87b7833
MB
588// ---------------------------------------------------------------------------
589// data access
590// ---------------------------------------------------------------------------
591
592// all functions are inline in string.h
593
594// ---------------------------------------------------------------------------
e8f59039 595// concatenation operators
e87b7833
MB
596// ---------------------------------------------------------------------------
597
c801d85f 598/*
c801d85f
KB
599 * concatenation functions come in 5 flavours:
600 * string + string
601 * char + string and string + char
602 * C str + string and string + C str
603 */
604
b1801e0e 605wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 606{
992527a5 607#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
608 wxASSERT( str1.IsValid() );
609 wxASSERT( str2.IsValid() );
e87b7833 610#endif
097c080b 611
3458e408
WS
612 wxString s = str1;
613 s += str2;
3168a13f 614
3458e408 615 return s;
c801d85f
KB
616}
617
c9f78968 618wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 619{
992527a5 620#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 621 wxASSERT( str.IsValid() );
e87b7833 622#endif
3168a13f 623
3458e408
WS
624 wxString s = str;
625 s += ch;
097c080b 626
3458e408 627 return s;
c801d85f
KB
628}
629
c9f78968 630wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 631{
992527a5 632#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 633 wxASSERT( str.IsValid() );
e87b7833 634#endif
097c080b 635
3458e408
WS
636 wxString s = ch;
637 s += str;
3168a13f 638
3458e408 639 return s;
c801d85f
KB
640}
641
8f93a29f 642wxString operator+(const wxString& str, const char *psz)
c801d85f 643{
992527a5 644#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 645 wxASSERT( str.IsValid() );
e87b7833 646#endif
097c080b 647
3458e408 648 wxString s;
8f93a29f 649 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
650 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
651 }
652 s += str;
653 s += psz;
3168a13f 654
3458e408 655 return s;
c801d85f
KB
656}
657
8f93a29f 658wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 659{
992527a5 660#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
661 wxASSERT( str.IsValid() );
662#endif
663
664 wxString s;
665 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
666 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
667 }
668 s += str;
669 s += pwz;
670
671 return s;
672}
673
674wxString operator+(const char *psz, const wxString& str)
675{
a7ea63e2
VS
676#if !wxUSE_STL_BASED_WXSTRING
677 wxASSERT( str.IsValid() );
678#endif
679
680 wxString s;
681 if ( !s.Alloc(strlen(psz) + str.length()) ) {
682 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
683 }
684 s = psz;
685 s += str;
686
687 return s;
688}
689
690wxString operator+(const wchar_t *pwz, const wxString& str)
691{
692#if !wxUSE_STL_BASED_WXSTRING
693 wxASSERT( str.IsValid() );
694#endif
695
696 wxString s;
697 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
698 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
699 }
700 s = pwz;
701 s += str;
702
703 return s;
704}
705
706// ---------------------------------------------------------------------------
707// string comparison
708// ---------------------------------------------------------------------------
709
52de37c7
VS
710bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
711{
712 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
713 : wxToupper(GetChar(0u)) == wxToupper(c));
714}
715
a7ea63e2
VS
716#ifdef HAVE_STD_STRING_COMPARE
717
718// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
719// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
720// sort strings in characters code point order by sorting the byte sequence
721// in byte values order (i.e. what strcmp() and memcmp() do).
722
723int wxString::compare(const wxString& str) const
724{
725 return m_impl.compare(str.m_impl);
726}
727
728int wxString::compare(size_t nStart, size_t nLen,
729 const wxString& str) const
730{
731 size_t pos, len;
732 PosLenToImpl(nStart, nLen, &pos, &len);
733 return m_impl.compare(pos, len, str.m_impl);
734}
735
736int wxString::compare(size_t nStart, size_t nLen,
737 const wxString& str,
738 size_t nStart2, size_t nLen2) const
739{
740 size_t pos, len;
741 PosLenToImpl(nStart, nLen, &pos, &len);
742
743 size_t pos2, len2;
744 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
745
746 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
747}
748
749int wxString::compare(const char* sz) const
750{
751 return m_impl.compare(ImplStr(sz));
752}
753
754int wxString::compare(const wchar_t* sz) const
755{
756 return m_impl.compare(ImplStr(sz));
757}
758
759int wxString::compare(size_t nStart, size_t nLen,
760 const char* sz, size_t nCount) const
761{
762 size_t pos, len;
763 PosLenToImpl(nStart, nLen, &pos, &len);
764
765 SubstrBufFromMB str(ImplStr(sz, nCount));
766
767 return m_impl.compare(pos, len, str.data, str.len);
768}
769
770int wxString::compare(size_t nStart, size_t nLen,
771 const wchar_t* sz, size_t nCount) const
772{
773 size_t pos, len;
774 PosLenToImpl(nStart, nLen, &pos, &len);
775
776 SubstrBufFromWC str(ImplStr(sz, nCount));
777
778 return m_impl.compare(pos, len, str.data, str.len);
779}
780
781#else // !HAVE_STD_STRING_COMPARE
782
783static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
784 const wxStringCharType* s2, size_t l2)
785{
786 if( l1 == l2 )
787 return wxStringMemcmp(s1, s2, l1);
788 else if( l1 < l2 )
789 {
790 int ret = wxStringMemcmp(s1, s2, l1);
791 return ret == 0 ? -1 : ret;
792 }
793 else
794 {
795 int ret = wxStringMemcmp(s1, s2, l2);
796 return ret == 0 ? +1 : ret;
797 }
798}
799
800int wxString::compare(const wxString& str) const
801{
802 return ::wxDoCmp(m_impl.data(), m_impl.length(),
803 str.m_impl.data(), str.m_impl.length());
804}
805
806int wxString::compare(size_t nStart, size_t nLen,
807 const wxString& str) const
808{
809 wxASSERT(nStart <= length());
810 size_type strLen = length() - nStart;
811 nLen = strLen < nLen ? strLen : nLen;
812
813 size_t pos, len;
814 PosLenToImpl(nStart, nLen, &pos, &len);
815
816 return ::wxDoCmp(m_impl.data() + pos, len,
817 str.m_impl.data(), str.m_impl.length());
818}
819
820int wxString::compare(size_t nStart, size_t nLen,
821 const wxString& str,
822 size_t nStart2, size_t nLen2) const
823{
824 wxASSERT(nStart <= length());
825 wxASSERT(nStart2 <= str.length());
826 size_type strLen = length() - nStart,
827 strLen2 = str.length() - nStart2;
828 nLen = strLen < nLen ? strLen : nLen;
829 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
830
831 size_t pos, len;
832 PosLenToImpl(nStart, nLen, &pos, &len);
833 size_t pos2, len2;
834 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
835
836 return ::wxDoCmp(m_impl.data() + pos, len,
837 str.m_impl.data() + pos2, len2);
838}
839
840int wxString::compare(const char* sz) const
841{
842 SubstrBufFromMB str(ImplStr(sz, npos));
843 if ( str.len == npos )
844 str.len = wxStringStrlen(str.data);
845 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
846}
847
848int wxString::compare(const wchar_t* sz) const
849{
850 SubstrBufFromWC str(ImplStr(sz, npos));
851 if ( str.len == npos )
852 str.len = wxStringStrlen(str.data);
853 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
854}
855
856int wxString::compare(size_t nStart, size_t nLen,
857 const char* sz, size_t nCount) const
858{
859 wxASSERT(nStart <= length());
860 size_type strLen = length() - nStart;
861 nLen = strLen < nLen ? strLen : nLen;
097c080b 862
a7ea63e2
VS
863 size_t pos, len;
864 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 865
a7ea63e2
VS
866 SubstrBufFromMB str(ImplStr(sz, nCount));
867 if ( str.len == npos )
868 str.len = wxStringStrlen(str.data);
869
870 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
871}
872
a7ea63e2
VS
873int wxString::compare(size_t nStart, size_t nLen,
874 const wchar_t* sz, size_t nCount) const
8f93a29f 875{
a7ea63e2
VS
876 wxASSERT(nStart <= length());
877 size_type strLen = length() - nStart;
878 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 879
a7ea63e2
VS
880 size_t pos, len;
881 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 882
a7ea63e2
VS
883 SubstrBufFromWC str(ImplStr(sz, nCount));
884 if ( str.len == npos )
885 str.len = wxStringStrlen(str.data);
886
887 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
888}
889
a7ea63e2
VS
890#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
891
892
8f93a29f
VS
893// ---------------------------------------------------------------------------
894// find_{first,last}_[not]_of functions
895// ---------------------------------------------------------------------------
896
897#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 898
8f93a29f
VS
899// NB: All these functions are implemented with the argument being wxChar*,
900// i.e. widechar string in any Unicode build, even though native string
901// representation is char* in the UTF-8 build. This is because we couldn't
902// use memchr() to determine if a character is in a set encoded as UTF-8.
903
904size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 905{
8f93a29f 906 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
907}
908
8f93a29f 909size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 910{
8f93a29f 911 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
912}
913
8f93a29f 914size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 915{
8f93a29f 916 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 917
8f93a29f
VS
918 size_t idx = nStart;
919 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 920 {
8f93a29f
VS
921 if ( wxTmemchr(sz, *i, n) )
922 return idx;
dcb68102 923 }
8f93a29f
VS
924
925 return npos;
926}
927
928size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
929{
930 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
931
932 size_t idx = nStart;
933 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 934 {
8f93a29f
VS
935 if ( !wxTmemchr(sz, *i, n) )
936 return idx;
937 }
938
939 return npos;
940}
941
942
943size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
944{
945 return find_last_of(sz, nStart, wxStrlen(sz));
946}
947
948size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
949{
950 return find_last_not_of(sz, nStart, wxStrlen(sz));
951}
952
953size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
954{
955 size_t len = length();
956
957 if ( nStart == npos )
958 {
959 nStart = len - 1;
dcb68102 960 }
2c09fb3b 961 else
dcb68102 962 {
8f93a29f 963 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 964 }
8f93a29f
VS
965
966 size_t idx = nStart;
967 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
968 i != rend(); --idx, ++i )
969 {
970 if ( wxTmemchr(sz, *i, n) )
971 return idx;
972 }
973
974 return npos;
dcb68102
RN
975}
976
8f93a29f 977size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 978{
8f93a29f
VS
979 size_t len = length();
980
981 if ( nStart == npos )
982 {
983 nStart = len - 1;
984 }
985 else
986 {
987 wxASSERT_MSG( nStart <= len, _T("invalid index") );
988 }
989
990 size_t idx = nStart;
991 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
992 i != rend(); --idx, ++i )
993 {
994 if ( !wxTmemchr(sz, *i, n) )
995 return idx;
996 }
997
998 return npos;
dcb68102
RN
999}
1000
8f93a29f 1001size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1002{
8f93a29f
VS
1003 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1004
1005 size_t idx = nStart;
1006 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1007 {
1008 if ( *i != ch )
1009 return idx;
1010 }
1011
1012 return npos;
1013}
1014
1015size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1016{
1017 size_t len = length();
1018
1019 if ( nStart == npos )
1020 {
1021 nStart = len - 1;
1022 }
1023 else
1024 {
1025 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1026 }
1027
1028 size_t idx = nStart;
1029 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1030 i != rend(); --idx, ++i )
1031 {
1032 if ( *i != ch )
1033 return idx;
1034 }
1035
1036 return npos;
1037}
1038
1039// the functions above were implemented for wchar_t* arguments in Unicode
1040// build and char* in ANSI build; below are implementations for the other
1041// version:
1042#if wxUSE_UNICODE
1043 #define wxOtherCharType char
1044 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1045#else
1046 #define wxOtherCharType wchar_t
1047 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1048#endif
1049
1050size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1051 { return find_first_of(STRCONV(sz), nStart); }
1052
1053size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1054 size_t n) const
1055 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1056size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1057 { return find_last_of(STRCONV(sz), nStart); }
1058size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1059 size_t n) const
1060 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1061size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1062 { return find_first_not_of(STRCONV(sz), nStart); }
1063size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1064 size_t n) const
1065 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1066size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1067 { return find_last_not_of(STRCONV(sz), nStart); }
1068size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1069 size_t n) const
1070 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1071
1072#undef wxOtherCharType
1073#undef STRCONV
1074
1075#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1076
1077// ===========================================================================
1078// other common string functions
1079// ===========================================================================
1080
1081int wxString::CmpNoCase(const wxString& s) const
1082{
1083 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1084
8f93a29f
VS
1085 const_iterator i1 = begin();
1086 const_iterator end1 = end();
1087 const_iterator i2 = s.begin();
1088 const_iterator end2 = s.end();
1089
0d8b0f94 1090 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1091 {
1092 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1093 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1094 if ( lower1 != lower2 )
1095 return lower1 < lower2 ? -1 : 1;
1096 }
1097
1098 size_t len1 = length();
1099 size_t len2 = s.length();
dcb68102 1100
8f93a29f
VS
1101 if ( len1 < len2 )
1102 return -1;
1103 else if ( len1 > len2 )
1104 return 1;
1105 return 0;
dcb68102
RN
1106}
1107
1108
b1ac3b56 1109#if wxUSE_UNICODE
e015c2a3 1110
cf6bedce
SC
1111#ifdef __MWERKS__
1112#ifndef __SCHAR_MAX__
1113#define __SCHAR_MAX__ 127
1114#endif
1115#endif
1116
e6310bbc 1117wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1118{
e6310bbc 1119 if (!ascii || len == 0)
b1ac3b56 1120 return wxEmptyString;
e015c2a3 1121
b1ac3b56 1122 wxString res;
e015c2a3 1123
e6310bbc 1124 {
6798451b 1125 wxStringInternalBuffer buf(res, len);
602a857b 1126 wxStringCharType *dest = buf;
c1eada83 1127
602a857b
VS
1128 for ( ; len > 0; --len )
1129 {
1130 unsigned char c = (unsigned char)*ascii++;
1131 wxASSERT_MSG( c < 0x80,
1132 _T("Non-ASCII value passed to FromAscii().") );
c1eada83 1133
602a857b
VS
1134 *dest++ = (wchar_t)c;
1135 }
e015c2a3
VZ
1136 }
1137
b1ac3b56
RR
1138 return res;
1139}
1140
e6310bbc
VS
1141wxString wxString::FromAscii(const char *ascii)
1142{
0081dd72 1143 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1144}
1145
c5288c5c 1146wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1147{
1148 // What do we do with '\0' ?
1149
c1eada83 1150 unsigned char c = (unsigned char)ascii;
8760bc65 1151
c1eada83
VS
1152 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1153
1154 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1155 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1156}
1157
b1ac3b56
RR
1158const wxCharBuffer wxString::ToAscii() const
1159{
e015c2a3
VZ
1160 // this will allocate enough space for the terminating NUL too
1161 wxCharBuffer buffer(length());
6e394fc6 1162 char *dest = buffer.data();
e015c2a3 1163
c1eada83 1164 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1165 {
c1eada83
VS
1166 wxUniChar c(*i);
1167 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1168 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1169
1170 // the output string can't have embedded NULs anyhow, so we can safely
1171 // stop at first of them even if we do have any
c1eada83 1172 if ( !c )
e015c2a3 1173 break;
b1ac3b56 1174 }
e015c2a3 1175
b1ac3b56
RR
1176 return buffer;
1177}
e015c2a3 1178
c1eada83 1179#endif // wxUSE_UNICODE
b1ac3b56 1180
c801d85f 1181// extract string of length nCount starting at nFirst
c801d85f
KB
1182wxString wxString::Mid(size_t nFirst, size_t nCount) const
1183{
73f507f5 1184 size_t nLen = length();
30d9011f 1185
73f507f5
WS
1186 // default value of nCount is npos and means "till the end"
1187 if ( nCount == npos )
1188 {
1189 nCount = nLen - nFirst;
1190 }
30d9011f 1191
73f507f5
WS
1192 // out-of-bounds requests return sensible things
1193 if ( nFirst + nCount > nLen )
1194 {
1195 nCount = nLen - nFirst;
1196 }
c801d85f 1197
73f507f5
WS
1198 if ( nFirst > nLen )
1199 {
1200 // AllocCopy() will return empty string
1201 return wxEmptyString;
1202 }
c801d85f 1203
73f507f5
WS
1204 wxString dest(*this, nFirst, nCount);
1205 if ( dest.length() != nCount )
1206 {
1207 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1208 }
30d9011f 1209
73f507f5 1210 return dest;
c801d85f
KB
1211}
1212
e87b7833 1213// check that the string starts with prefix and return the rest of the string
d775fa82 1214// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1215bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1216{
c5e7a7d7
VS
1217 if ( compare(0, prefix.length(), prefix) != 0 )
1218 return false;
f6bcfd97
BP
1219
1220 if ( rest )
1221 {
1222 // put the rest of the string into provided pointer
c5e7a7d7 1223 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1224 }
1225
d775fa82 1226 return true;
f6bcfd97
BP
1227}
1228
3affcd07
VZ
1229
1230// check that the string ends with suffix and return the rest of it in the
1231// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1232bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1233{
c5e7a7d7 1234 int start = length() - suffix.length();
81727065
VS
1235
1236 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1237 return false;
1238
1239 if ( rest )
1240 {
1241 // put the rest of the string into provided pointer
1242 rest->assign(*this, 0, start);
1243 }
1244
1245 return true;
1246}
1247
1248
c801d85f
KB
1249// extract nCount last (rightmost) characters
1250wxString wxString::Right(size_t nCount) const
1251{
e87b7833
MB
1252 if ( nCount > length() )
1253 nCount = length();
c801d85f 1254
e87b7833
MB
1255 wxString dest(*this, length() - nCount, nCount);
1256 if ( dest.length() != nCount ) {
b1801e0e
GD
1257 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1258 }
c801d85f
KB
1259 return dest;
1260}
1261
1262// get all characters after the last occurence of ch
1263// (returns the whole string if ch not found)
c9f78968 1264wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1265{
1266 wxString str;
d775fa82 1267 int iPos = Find(ch, true);
3c67202d 1268 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1269 str = *this;
1270 else
c9f78968 1271 str = wx_str() + iPos + 1;
c801d85f
KB
1272
1273 return str;
1274}
1275
1276// extract nCount first (leftmost) characters
1277wxString wxString::Left(size_t nCount) const
1278{
e87b7833
MB
1279 if ( nCount > length() )
1280 nCount = length();
c801d85f 1281
e87b7833
MB
1282 wxString dest(*this, 0, nCount);
1283 if ( dest.length() != nCount ) {
b1801e0e
GD
1284 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1285 }
c801d85f
KB
1286 return dest;
1287}
1288
1289// get all characters before the first occurence of ch
1290// (returns the whole string if ch not found)
c9f78968 1291wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1292{
e87b7833
MB
1293 int iPos = Find(ch);
1294 if ( iPos == wxNOT_FOUND ) iPos = length();
1295 return wxString(*this, 0, iPos);
c801d85f
KB
1296}
1297
1298/// get all characters before the last occurence of ch
1299/// (returns empty string if ch not found)
c9f78968 1300wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1301{
1302 wxString str;
d775fa82 1303 int iPos = Find(ch, true);
3c67202d 1304 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1305 str = wxString(c_str(), iPos);
c801d85f
KB
1306
1307 return str;
1308}
1309
1310/// get all characters after the first occurence of ch
1311/// (returns empty string if ch not found)
c9f78968 1312wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1313{
1314 wxString str;
1315 int iPos = Find(ch);
3c67202d 1316 if ( iPos != wxNOT_FOUND )
c9f78968 1317 str = wx_str() + iPos + 1;
c801d85f
KB
1318
1319 return str;
1320}
1321
1322// replace first (or all) occurences of some substring with another one
8a540c88
VS
1323size_t wxString::Replace(const wxString& strOld,
1324 const wxString& strNew, bool bReplaceAll)
c801d85f 1325{
a8f1f1b2 1326 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1327 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1328 _T("wxString::Replace(): invalid parameter") );
1329
68482dc5
VZ
1330 wxSTRING_INVALIDATE_CACHE();
1331
510bb748 1332 size_t uiCount = 0; // count of replacements made
c801d85f 1333
8a627032
VZ
1334 // optimize the special common case: replacement of one character by
1335 // another one (in UTF-8 case we can only do this for ASCII characters)
1336 //
1337 // benchmarks show that this special version is around 3 times faster
1338 // (depending on the proportion of matching characters and UTF-8/wchar_t
1339 // build)
1340 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1341 {
1342 const wxStringCharType chOld = strOld.m_impl[0],
1343 chNew = strNew.m_impl[0];
1344
1345 // this loop is the simplified version of the one below
1346 for ( size_t pos = 0; ; )
1347 {
1348 pos = m_impl.find(chOld, pos);
1349 if ( pos == npos )
1350 break;
c801d85f 1351
8a627032
VZ
1352 m_impl[pos++] = chNew;
1353
1354 uiCount++;
1355
1356 if ( !bReplaceAll )
1357 break;
1358 }
1359 }
1360 else // general case
510bb748 1361 {
8a627032
VZ
1362 const size_t uiOldLen = strOld.m_impl.length();
1363 const size_t uiNewLen = strNew.m_impl.length();
1364
1365 for ( size_t pos = 0; ; )
1366 {
1367 pos = m_impl.find(strOld.m_impl, pos);
1368 if ( pos == npos )
1369 break;
510bb748 1370
8a627032
VZ
1371 // replace this occurrence of the old string with the new one
1372 m_impl.replace(pos, uiOldLen, strNew.m_impl);
510bb748 1373
8a627032
VZ
1374 // move up pos past the string that was replaced
1375 pos += uiNewLen;
ad5bb7d6 1376
8a627032
VZ
1377 // increase replace count
1378 uiCount++;
394b2900 1379
8a627032
VZ
1380 // stop after the first one?
1381 if ( !bReplaceAll )
1382 break;
1383 }
c801d85f 1384 }
c801d85f 1385
510bb748 1386 return uiCount;
c801d85f
KB
1387}
1388
1389bool wxString::IsAscii() const
1390{
a4a44612
VS
1391 for ( const_iterator i = begin(); i != end(); ++i )
1392 {
1393 if ( !(*i).IsAscii() )
1394 return false;
1395 }
1396
1397 return true;
c801d85f 1398}
dd1eaa89 1399
c801d85f
KB
1400bool wxString::IsWord() const
1401{
a4a44612
VS
1402 for ( const_iterator i = begin(); i != end(); ++i )
1403 {
1404 if ( !wxIsalpha(*i) )
1405 return false;
1406 }
1407
1408 return true;
c801d85f 1409}
dd1eaa89 1410
c801d85f
KB
1411bool wxString::IsNumber() const
1412{
a4a44612
VS
1413 if ( empty() )
1414 return true;
1415
1416 const_iterator i = begin();
1417
1418 if ( *i == _T('-') || *i == _T('+') )
1419 ++i;
1420
1421 for ( ; i != end(); ++i )
1422 {
1423 if ( !wxIsdigit(*i) )
1424 return false;
1425 }
1426
1427 return true;
c801d85f
KB
1428}
1429
c801d85f
KB
1430wxString wxString::Strip(stripType w) const
1431{
1432 wxString s = *this;
d775fa82
WS
1433 if ( w & leading ) s.Trim(false);
1434 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1435 return s;
1436}
1437
c801d85f
KB
1438// ---------------------------------------------------------------------------
1439// case conversion
1440// ---------------------------------------------------------------------------
1441
1442wxString& wxString::MakeUpper()
1443{
e87b7833
MB
1444 for ( iterator it = begin(), en = end(); it != en; ++it )
1445 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1446
1447 return *this;
1448}
1449
1450wxString& wxString::MakeLower()
1451{
e87b7833
MB
1452 for ( iterator it = begin(), en = end(); it != en; ++it )
1453 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1454
1455 return *this;
1456}
1457
0c7db140
VZ
1458wxString& wxString::MakeCapitalized()
1459{
1460 const iterator en = end();
1461 iterator it = begin();
1462 if ( it != en )
1463 {
1464 *it = (wxChar)wxToupper(*it);
1465 for ( ++it; it != en; ++it )
1466 *it = (wxChar)wxTolower(*it);
1467 }
1468
1469 return *this;
1470}
1471
c801d85f
KB
1472// ---------------------------------------------------------------------------
1473// trimming and padding
1474// ---------------------------------------------------------------------------
1475
d775fa82 1476// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1477// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1478// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1479// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1480// space-like symbols somewhere except in the first 128 chars), it is arguably
1481// still better than trimming away accented letters
1482inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1483
c801d85f
KB
1484// trims spaces (in the sense of isspace) from left or right side
1485wxString& wxString::Trim(bool bFromRight)
1486{
3458e408
WS
1487 // first check if we're going to modify the string at all
1488 if ( !empty() &&
1489 (
1490 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1491 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1492 )
2c3b684c 1493 )
2c3b684c 1494 {
3458e408
WS
1495 if ( bFromRight )
1496 {
1497 // find last non-space character
d4d02bd5 1498 reverse_iterator psz = rbegin();
32c62191 1499 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1500 ++psz;
92df97b8 1501
3458e408 1502 // truncate at trailing space start
d4d02bd5 1503 erase(psz.base(), end());
3458e408
WS
1504 }
1505 else
1506 {
1507 // find first non-space character
1508 iterator psz = begin();
32c62191 1509 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1510 ++psz;
2c3b684c 1511
3458e408
WS
1512 // fix up data and length
1513 erase(begin(), psz);
1514 }
2c3b684c 1515 }
c801d85f 1516
3458e408 1517 return *this;
c801d85f
KB
1518}
1519
1520// adds nCount characters chPad to the string from either side
c9f78968 1521wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1522{
3458e408 1523 wxString s(chPad, nCount);
c801d85f 1524
3458e408
WS
1525 if ( bFromRight )
1526 *this += s;
1527 else
1528 {
1529 s += *this;
1530 swap(s);
1531 }
c801d85f 1532
3458e408 1533 return *this;
c801d85f
KB
1534}
1535
1536// truncate the string
1537wxString& wxString::Truncate(size_t uiLen)
1538{
3458e408
WS
1539 if ( uiLen < length() )
1540 {
1541 erase(begin() + uiLen, end());
1542 }
1543 //else: nothing to do, string is already short enough
c801d85f 1544
3458e408 1545 return *this;
c801d85f
KB
1546}
1547
1548// ---------------------------------------------------------------------------
3c67202d 1549// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1550// ---------------------------------------------------------------------------
1551
1552// find a character
c9f78968 1553int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1554{
3458e408 1555 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1556
3458e408 1557 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1558}
1559
cd0b1709
VZ
1560// ----------------------------------------------------------------------------
1561// conversion to numbers
1562// ----------------------------------------------------------------------------
1563
52de37c7
VS
1564// The implementation of all the functions below is exactly the same so factor
1565// it out. Note that number extraction works correctly on UTF-8 strings, so
1566// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1567
92df97b8 1568#ifndef __WXWINCE__
941a4e62
VS
1569 #define DO_IF_NOT_WINCE(x) x
1570#else
1571 #define DO_IF_NOT_WINCE(x)
92df97b8 1572#endif
4ea4767e 1573
c95e653c
VZ
1574#define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1575 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
941a4e62
VS
1576 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1577 \
1578 DO_IF_NOT_WINCE( errno = 0; ) \
1579 \
1580 const wxStringCharType *start = wx_str(); \
1581 wxStringCharType *end; \
c95e653c 1582 T val = func(start, &end, base); \
941a4e62
VS
1583 \
1584 /* return true only if scan was stopped by the terminating NUL and */ \
1585 /* if the string was not empty to start with and no under/overflow */ \
1586 /* occurred: */ \
c95e653c
VZ
1587 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1588 return false; \
1589 *out = val; \
1590 return true
cd0b1709 1591
c95e653c 1592bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1593{
c95e653c 1594 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
619dcb09 1595}
cd0b1709 1596
c95e653c 1597bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1598{
c95e653c 1599 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
cd0b1709
VZ
1600}
1601
c95e653c 1602bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1603{
c95e653c 1604 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
d6718dd1
VZ
1605}
1606
c95e653c 1607bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1608{
c95e653c 1609 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
d6718dd1
VZ
1610}
1611
c95e653c 1612bool wxString::ToDouble(double *pVal) const
cd0b1709 1613{
c95e653c 1614 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
cd0b1709 1615
c95e653c 1616 DO_IF_NOT_WINCE( errno = 0; )
e71e5b37 1617
cd0b1709
VZ
1618 const wxChar *start = c_str();
1619 wxChar *end;
c95e653c 1620 double val = wxStrtod(start, &end);
cd0b1709 1621
d775fa82 1622 // return true only if scan was stopped by the terminating NUL and if the
bda041e5 1623 // string was not empty to start with and no under/overflow occurred
c95e653c
VZ
1624 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1625 return false;
1626
1627 *pVal = val;
1628
1629 return true;
cd0b1709
VZ
1630}
1631
c801d85f 1632// ---------------------------------------------------------------------------
9efd3367 1633// formatted output
c801d85f 1634// ---------------------------------------------------------------------------
378b05f7 1635
d1f6e2cf 1636#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1637/* static */
c9f78968 1638#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1639wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1640#else
d1f6e2cf 1641wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1642#endif
341e7d28 1643{
77c3e48a 1644 va_list argptr;
c9f78968 1645 va_start(argptr, format);
341e7d28 1646
77c3e48a 1647 wxString s;
c9f78968 1648 s.PrintfV(format, argptr);
341e7d28 1649
77c3e48a 1650 va_end(argptr);
341e7d28 1651
77c3e48a 1652 return s;
341e7d28 1653}
d1f6e2cf
VS
1654#endif // !wxUSE_UTF8_LOCALE_ONLY
1655
1656#if wxUSE_UNICODE_UTF8
1657/* static */
1658wxString wxString::DoFormatUtf8(const char *format, ...)
1659{
1660 va_list argptr;
1661 va_start(argptr, format);
1662
1663 wxString s;
1664 s.PrintfV(format, argptr);
1665
1666 va_end(argptr);
1667
1668 return s;
1669}
1670#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1671
1672/* static */
c9f78968 1673wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1674{
1675 wxString s;
c9f78968 1676 s.PrintfV(format, argptr);
341e7d28
VZ
1677 return s;
1678}
1679
d1f6e2cf 1680#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1681#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1682int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1683#else
d1f6e2cf 1684int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1685#endif
c801d85f 1686{
ba9bbf13 1687 va_list argptr;
c9f78968 1688 va_start(argptr, format);
c801d85f 1689
c9f78968
VS
1690#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1691 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1692 // because it's the only cast that works safely for downcasting when
1693 // multiple inheritance is used:
1694 wxString *str = static_cast<wxString*>(this);
1695#else
1696 wxString *str = this;
1697#endif
1698
1699 int iLen = str->PrintfV(format, argptr);
c801d85f 1700
ba9bbf13 1701 va_end(argptr);
c801d85f 1702
ba9bbf13 1703 return iLen;
c801d85f 1704}
d1f6e2cf
VS
1705#endif // !wxUSE_UTF8_LOCALE_ONLY
1706
1707#if wxUSE_UNICODE_UTF8
1708int wxString::DoPrintfUtf8(const char *format, ...)
1709{
1710 va_list argptr;
1711 va_start(argptr, format);
1712
1713 int iLen = PrintfV(format, argptr);
1714
1715 va_end(argptr);
1716
1717 return iLen;
1718}
1719#endif // wxUSE_UNICODE_UTF8
c801d85f 1720
67612ff1
DE
1721/*
1722 Uses wxVsnprintf and places the result into the this string.
1723
1724 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1725 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1726 the ISO C99 (and thus SUSv3) standard the return value for the case of
1727 an undersized buffer is inconsistent. For conforming vsnprintf
1728 implementations the function must return the number of characters that
1729 would have been printed had the buffer been large enough. For conforming
1730 vswprintf implementations the function must return a negative number
1731 and set errno.
1732
1733 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1734 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1735 those are defined in the standard and backed up by several conformance
1736 statements. Note that ENOMEM mentioned in the manual page does not
1737 apply to swprintf, only wprintf and fwprintf.
1738
1739 Official manual page:
1740 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1741
1742 Some conformance statements (AIX, Solaris):
1743 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1744 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1745
1746 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1747 EILSEQ and EINVAL are specifically defined to mean the error is other than
1748 an undersized buffer and no other errno are defined we treat those two
1749 as meaning hard errors and everything else gets the old behavior which
1750 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1751
67612ff1
DE
1752 In practice it's impossible to determine before compilation which behavior
1753 may be used. The vswprintf function may have vsnprintf-like behavior or
1754 vice-versa. Behavior detected on one release can theoretically change
1755 with an updated release. Not to mention that configure testing for it
1756 would require the test to be run on the host system, not the build system
1757 which makes cross compilation difficult. Therefore, we make no assumptions
1758 about behavior and try our best to handle every known case, including the
1759 case where wxVsnprintf returns a negative number and fails to set errno.
1760
1761 There is yet one more non-standard implementation and that is our own.
1762 Fortunately, that can be detected at compile-time.
1763
1764 On top of all that, ISO C99 explicitly defines snprintf to write a null
1765 character to the last position of the specified buffer. That would be at
1766 at the given buffer size minus 1. It is supposed to do this even if it
1767 turns out that the buffer is sized too small.
1768
1769 Darwin (tested on 10.5) follows the C99 behavior exactly.
1770
1771 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1772 errno even when it fails. However, it only seems to ever fail due
1773 to an undersized buffer.
1774*/
2523e9b7
VS
1775#if wxUSE_UNICODE_UTF8
1776template<typename BufferType>
1777#else
1778// we only need one version in non-UTF8 builds and at least two Windows
1779// compilers have problems with this function template, so use just one
1780// normal function here
1781#endif
1782static int DoStringPrintfV(wxString& str,
1783 const wxString& format, va_list argptr)
c801d85f 1784{
f6f5941b 1785 int size = 1024;
e87b7833 1786
f6f5941b
VZ
1787 for ( ;; )
1788 {
2523e9b7
VS
1789#if wxUSE_UNICODE_UTF8
1790 BufferType tmp(str, size + 1);
1791 typename BufferType::CharType *buf = tmp;
1792#else
1793 wxStringBuffer tmp(str, size + 1);
de2589be 1794 wxChar *buf = tmp;
2523e9b7 1795#endif
2bb67b80 1796
ba9bbf13
WS
1797 if ( !buf )
1798 {
1799 // out of memory
a33c7045
VS
1800
1801 // in UTF-8 build, leaving uninitialized junk in the buffer
1802 // could result in invalid non-empty UTF-8 string, so just
1803 // reset the string to empty on failure:
1804 buf[0] = '\0';
ba9bbf13 1805 return -1;
e87b7833 1806 }
f6f5941b 1807
ba9bbf13
WS
1808 // wxVsnprintf() may modify the original arg pointer, so pass it
1809 // only a copy
1810 va_list argptrcopy;
1811 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1812
1813#ifndef __WXWINCE__
1814 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1815 errno = 0;
1816#endif
2523e9b7 1817 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1818 va_end(argptrcopy);
1819
1820 // some implementations of vsnprintf() don't NUL terminate
1821 // the string if there is not enough space for it so
1822 // always do it manually
67612ff1
DE
1823 // FIXME: This really seems to be the wrong and would be an off-by-one
1824 // bug except the code above allocates an extra character.
ba9bbf13
WS
1825 buf[size] = _T('\0');
1826
caff62f2
VZ
1827 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1828 // total number of characters which would have been written if the
b1727cfe 1829 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1830 if ( len < 0 )
1831 {
52de37c7
VS
1832 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1833 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1834 // is true if *both* of them use our own implementation,
1835 // otherwise we can't be sure
f2bbe5b6
VZ
1836#if wxUSE_WXVSNPRINTF
1837 // we know that our own implementation of wxVsnprintf() returns -1
1838 // only for a format error - thus there's something wrong with
1839 // the user's format string
a33c7045 1840 buf[0] = '\0';
f2bbe5b6 1841 return -1;
52de37c7
VS
1842#else // possibly using system version
1843 // assume it only returns error if there is not enough space, but
1844 // as we don't know how much we need, double the current size of
1845 // the buffer
67612ff1 1846#ifndef __WXWINCE__
a9a854d7
DE
1847 if( (errno == EILSEQ) || (errno == EINVAL) )
1848 // If errno was set to one of the two well-known hard errors
1849 // then fail immediately to avoid an infinite loop.
1850 return -1;
1851 else
1852#endif // __WXWINCE__
67612ff1
DE
1853 // still not enough, as we don't know how much we need, double the
1854 // current size of the buffer
1855 size *= 2;
f2bbe5b6 1856#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1857 }
64f8f94c 1858 else if ( len >= size )
de2589be 1859 {
f2bbe5b6 1860#if wxUSE_WXVSNPRINTF
c95e653c 1861 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1862 // size+1 when there's not enough space but that's not the size
1863 // of the required buffer!
1864 size *= 2; // so we just double the current size of the buffer
1865#else
64f8f94c
VZ
1866 // some vsnprintf() implementations NUL-terminate the buffer and
1867 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1868 // FIXME: I don't quite understand this comment. The vsnprintf
1869 // function is specifically defined to return the number of
1870 // characters printed not including the null terminator.
1871 // So OF COURSE you need to add 1 to get the right buffer size.
1872 // The following line is definitely correct, no question.
64f8f94c 1873 size = len + 1;
f2bbe5b6 1874#endif
de2589be
VZ
1875 }
1876 else // ok, there was enough space
f6f5941b 1877 {
f6f5941b
VZ
1878 break;
1879 }
f6f5941b
VZ
1880 }
1881
1882 // we could have overshot
2523e9b7
VS
1883 str.Shrink();
1884
1885 return str.length();
1886}
c801d85f 1887
2523e9b7
VS
1888int wxString::PrintfV(const wxString& format, va_list argptr)
1889{
2523e9b7
VS
1890#if wxUSE_UNICODE_UTF8
1891 #if wxUSE_STL_BASED_WXSTRING
1892 typedef wxStringTypeBuffer<char> Utf8Buffer;
1893 #else
6798451b 1894 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
1895 #endif
1896#endif
1897
1898#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 1899 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1900#else
1901 #if wxUSE_UNICODE_UTF8
1902 if ( wxLocaleIsUtf8 )
c6255a6e 1903 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1904 else
1905 // wxChar* version
c6255a6e 1906 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 1907 #else
c6255a6e 1908 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
1909 #endif // UTF8/WCHAR
1910#endif
c801d85f
KB
1911}
1912
097c080b
VZ
1913// ----------------------------------------------------------------------------
1914// misc other operations
1915// ----------------------------------------------------------------------------
0c5d3e1c 1916
d775fa82 1917// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
1918// '?' metacharacters (as usual, '?' matches any character and '*' any number
1919// of them)
8a540c88 1920bool wxString::Matches(const wxString& mask) const
097c080b 1921{
d6044f58
VZ
1922 // I disable this code as it doesn't seem to be faster (in fact, it seems
1923 // to be much slower) than the old, hand-written code below and using it
1924 // here requires always linking with libregex even if the user code doesn't
1925 // use it
1926#if 0 // wxUSE_REGEX
706c2ac9
VZ
1927 // first translate the shell-like mask into a regex
1928 wxString pattern;
1929 pattern.reserve(wxStrlen(pszMask));
1930
1931 pattern += _T('^');
1932 while ( *pszMask )
1933 {
1934 switch ( *pszMask )
1935 {
1936 case _T('?'):
1937 pattern += _T('.');
1938 break;
1939
1940 case _T('*'):
1941 pattern += _T(".*");
1942 break;
1943
1944 case _T('^'):
1945 case _T('.'):
1946 case _T('$'):
1947 case _T('('):
1948 case _T(')'):
1949 case _T('|'):
1950 case _T('+'):
1951 case _T('\\'):
1952 // these characters are special in a RE, quote them
1953 // (however note that we don't quote '[' and ']' to allow
1954 // using them for Unix shell like matching)
1955 pattern += _T('\\');
1956 // fall through
1957
1958 default:
1959 pattern += *pszMask;
1960 }
1961
1962 pszMask++;
1963 }
1964 pattern += _T('$');
1965
1966 // and now use it
1967 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1968#else // !wxUSE_REGEX
9a4232dc
VZ
1969 // TODO: this is, of course, awfully inefficient...
1970
8a540c88
VS
1971 // FIXME-UTF8: implement using iterators, remove #if
1972#if wxUSE_UNICODE_UTF8
1973 wxWCharBuffer maskBuf = mask.wc_str();
1974 wxWCharBuffer txtBuf = wc_str();
1975 const wxChar *pszMask = maskBuf.data();
1976 const wxChar *pszTxt = txtBuf.data();
1977#else
1978 const wxChar *pszMask = mask.wx_str();
9a4232dc 1979 // the char currently being checked
8a540c88
VS
1980 const wxChar *pszTxt = wx_str();
1981#endif
9a4232dc
VZ
1982
1983 // the last location where '*' matched
1984 const wxChar *pszLastStarInText = NULL;
1985 const wxChar *pszLastStarInMask = NULL;
1986
1987match:
1988 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 1989 switch ( *pszMask ) {
223d09f6
KB
1990 case wxT('?'):
1991 if ( *pszTxt == wxT('\0') )
d775fa82 1992 return false;
097c080b 1993
9a4232dc 1994 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 1995
097c080b
VZ
1996 break;
1997
223d09f6 1998 case wxT('*'):
097c080b 1999 {
9a4232dc
VZ
2000 // remember where we started to be able to backtrack later
2001 pszLastStarInText = pszTxt;
2002 pszLastStarInMask = pszMask;
2003
097c080b 2004 // ignore special chars immediately following this one
9a4232dc 2005 // (should this be an error?)
223d09f6 2006 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2007 pszMask++;
2008
2009 // if there is nothing more, match
223d09f6 2010 if ( *pszMask == wxT('\0') )
d775fa82 2011 return true;
097c080b
VZ
2012
2013 // are there any other metacharacters in the mask?
c86f1403 2014 size_t uiLenMask;
223d09f6 2015 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2016
2017 if ( pEndMask != NULL ) {
2018 // we have to match the string between two metachars
2019 uiLenMask = pEndMask - pszMask;
2020 }
2021 else {
2022 // we have to match the remainder of the string
2bb67b80 2023 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2024 }
2025
2026 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2027 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2028 if ( pMatch == NULL )
d775fa82 2029 return false;
097c080b
VZ
2030
2031 // -1 to compensate "++" in the loop
2032 pszTxt = pMatch + uiLenMask - 1;
2033 pszMask += uiLenMask - 1;
2034 }
2035 break;
2036
2037 default:
2038 if ( *pszMask != *pszTxt )
d775fa82 2039 return false;
097c080b
VZ
2040 break;
2041 }
2042 }
2043
2044 // match only if nothing left
9a4232dc 2045 if ( *pszTxt == wxT('\0') )
d775fa82 2046 return true;
9a4232dc
VZ
2047
2048 // if we failed to match, backtrack if we can
2049 if ( pszLastStarInText ) {
2050 pszTxt = pszLastStarInText + 1;
2051 pszMask = pszLastStarInMask;
2052
2053 pszLastStarInText = NULL;
2054
2055 // don't bother resetting pszLastStarInMask, it's unnecessary
2056
2057 goto match;
2058 }
2059
d775fa82 2060 return false;
706c2ac9 2061#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2062}
2063
1fc5dd6f 2064// Count the number of chars
c9f78968 2065int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2066{
2067 int count = 0;
8f93a29f 2068 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2069 {
8f93a29f 2070 if ( *i == ch )
1fc5dd6f
JS
2071 count ++;
2072 }
2073 return count;
2074}
2075
628f87da
VS
2076// ----------------------------------------------------------------------------
2077// wxUTF8StringBuffer
2078// ----------------------------------------------------------------------------
2079
7d46f92b 2080#if wxUSE_UNICODE_WCHAR
628f87da
VS
2081wxUTF8StringBuffer::~wxUTF8StringBuffer()
2082{
2083 wxMBConvStrictUTF8 conv;
2084 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2085 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2086
2087 wxStringInternalBuffer wbuf(m_str, wlen);
2088 conv.ToWChar(wbuf, wlen, m_buf);
2089}
2090
2091wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2092{
2093 wxCHECK_RET(m_lenSet, "length not set");
2094
2095 wxMBConvStrictUTF8 conv;
2096 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2097 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2098
2099 wxStringInternalBufferLength wbuf(m_str, wlen);
2100 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2101 wbuf.SetLength(wlen);
2102}
7d46f92b 2103#endif // wxUSE_UNICODE_WCHAR
5c1de526
VS
2104
2105// ----------------------------------------------------------------------------
2106// wxCharBufferType<T>
2107// ----------------------------------------------------------------------------
2108
2109template<>
2110wxCharTypeBuffer<char>::Data
2111wxCharTypeBuffer<char>::NullData(NULL);
2112
2113template<>
2114wxCharTypeBuffer<wchar_t>::Data
2115wxCharTypeBuffer<wchar_t>::NullData(NULL);