]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
special casing mac code
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
6b769f3d 27#endif
c801d85f
KB
28
29#include <ctype.h>
92df97b8
WS
30
31#ifndef __WXWINCE__
32 #include <errno.h>
33#endif
34
c801d85f
KB
35#include <string.h>
36#include <stdlib.h>
9a08c20e 37
8116a0c5 38#include "wx/hashmap.h"
072682ce 39#include "wx/vector.h"
529e491c 40#include "wx/xlocale.h"
8f93a29f
VS
41
42// string handling functions used by wxString:
43#if wxUSE_UNICODE_UTF8
44 #define wxStringMemcpy memcpy
45 #define wxStringMemcmp memcmp
46 #define wxStringMemchr memchr
47 #define wxStringStrlen strlen
48#else
49 #define wxStringMemcpy wxTmemcpy
50 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
51 #define wxStringMemchr wxTmemchr
52 #define wxStringStrlen wxStrlen
53#endif
8f93a29f 54
4e79262f
VZ
55// ----------------------------------------------------------------------------
56// global variables
57// ----------------------------------------------------------------------------
58
59namespace wxPrivate
60{
61
62static UntypedBufferData s_untypedNullData(NULL);
63
64UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
65
66} // namespace wxPrivate
e87b7833 67
a7ea63e2
VS
68// ---------------------------------------------------------------------------
69// static class variables definition
70// ---------------------------------------------------------------------------
e87b7833 71
a7ea63e2
VS
72//According to STL _must_ be a -1 size_t
73const size_t wxString::npos = (size_t) -1;
8f93a29f 74
68482dc5 75#if wxUSE_STRING_POS_CACHE
68482dc5 76
e810df36
VZ
77#ifdef wxHAS_COMPILER_TLS
78
79wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
80
81#else // !wxHAS_COMPILER_TLS
82
ad8ae788
VZ
83struct wxStrCacheInitializer
84{
85 wxStrCacheInitializer()
86 {
87 // calling this function triggers s_cache initialization in it, and
88 // from now on it becomes safe to call from multiple threads
89 wxString::GetCache();
90 }
91};
92
e317bd3f
SC
93/*
94wxString::Cache& wxString::GetCache()
95{
96 static wxTLS_TYPE(Cache) s_cache;
97
98 return wxTLS_VALUE(s_cache);
99}
100*/
101
ad8ae788
VZ
102static wxStrCacheInitializer gs_stringCacheInit;
103
e810df36
VZ
104#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
105
68482dc5
VZ
106// gdb seems to be unable to display thread-local variables correctly, at least
107// not my 6.4.98 version under amd64, so provide this debugging helper to do it
4b6a582b 108#if wxDEBUG_LEVEL >= 2
68482dc5
VZ
109
110struct wxStrCacheDumper
111{
112 static void ShowAll()
113 {
114 puts("*** wxString cache dump:");
115 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
116 {
117 const wxString::Cache::Element&
8b73c531 118 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
119
120 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
121 n,
8b73c531 122 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
123 c.str,
124 (unsigned long)c.pos,
125 (unsigned long)c.impl,
126 (long)c.len);
127 }
128 }
129};
130
131void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
132
4b6a582b 133#endif // wxDEBUG_LEVEL >= 2
68482dc5
VZ
134
135#ifdef wxPROFILE_STRING_CACHE
136
137wxString::CacheStats wxString::ms_cacheStats;
138
8c3b65d9 139struct wxStrCacheStatsDumper
68482dc5 140{
8c3b65d9 141 ~wxStrCacheStatsDumper()
68482dc5
VZ
142 {
143 const wxString::CacheStats& stats = wxString::ms_cacheStats;
144
145 if ( stats.postot )
146 {
147 puts("*** wxString cache statistics:");
148 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
149 stats.postot);
150 printf("\tHits %u (of which %u not used) or %.2f%%\n",
151 stats.poshits,
152 stats.mishits,
153 100.*float(stats.poshits - stats.mishits)/stats.postot);
154 printf("\tAverage position requested: %.2f\n",
155 float(stats.sumpos) / stats.postot);
156 printf("\tAverage offset after cached hint: %.2f\n",
157 float(stats.sumofs) / stats.postot);
158 }
159
160 if ( stats.lentot )
161 {
162 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
163 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
164 }
165 }
8c3b65d9 166};
68482dc5 167
8c3b65d9 168static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
169
170#endif // wxPROFILE_STRING_CACHE
171
172#endif // wxUSE_STRING_POS_CACHE
173
a7ea63e2
VS
174// ----------------------------------------------------------------------------
175// global functions
176// ----------------------------------------------------------------------------
e87b7833 177
a7ea63e2 178#if wxUSE_STD_IOSTREAM
8f93a29f 179
a7ea63e2 180#include <iostream>
8f93a29f 181
a7ea63e2 182wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 183{
7a906e1a 184#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
de4983f3 185 const wxScopedCharBuffer buf(str.AsCharBuf());
ddf01bdb
VZ
186 if ( !buf )
187 os.clear(wxSTD ios_base::failbit);
188 else
189 os << buf.data();
190
191 return os;
a7ea63e2 192#else
7a906e1a 193 return os << str.AsInternal();
a7ea63e2 194#endif
8f93a29f
VS
195}
196
04abe4bc
VS
197wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
198{
199 return os << str.c_str();
200}
201
de4983f3 202wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
04abe4bc
VS
203{
204 return os << str.data();
205}
206
207#ifndef __BORLANDC__
de4983f3 208wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
04abe4bc
VS
209{
210 return os << str.data();
211}
212#endif
213
6a6ea041 214#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
215
216wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
217{
218 return wos << str.wc_str();
219}
220
221wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
222{
223 return wos << str.AsWChar();
224}
225
de4983f3 226wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
6b61b594
VZ
227{
228 return wos << str.data();
229}
230
6a6ea041 231#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 232
a7ea63e2 233#endif // wxUSE_STD_IOSTREAM
e87b7833 234
81727065
VS
235// ===========================================================================
236// wxString class core
237// ===========================================================================
238
239#if wxUSE_UNICODE_UTF8
240
81727065
VS
241void wxString::PosLenToImpl(size_t pos, size_t len,
242 size_t *implPos, size_t *implLen) const
243{
244 if ( pos == npos )
68482dc5 245 {
81727065 246 *implPos = npos;
68482dc5
VZ
247 }
248 else // have valid start position
81727065 249 {
68482dc5
VZ
250 const const_iterator b = GetIterForNthChar(pos);
251 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 252 if ( len == npos )
68482dc5 253 {
81727065 254 *implLen = npos;
68482dc5
VZ
255 }
256 else // have valid length too
81727065 257 {
68482dc5
VZ
258 // we need to handle the case of length specifying a substring
259 // going beyond the end of the string, just as std::string does
260 const const_iterator e(end());
261 const_iterator i(b);
262 while ( len && i <= e )
263 {
264 ++i;
265 --len;
266 }
267
268 *implLen = i.impl() - b.impl();
81727065
VS
269 }
270 }
271}
272
273#endif // wxUSE_UNICODE_UTF8
274
11aac4ba
VS
275// ----------------------------------------------------------------------------
276// wxCStrData converted strings caching
277// ----------------------------------------------------------------------------
278
132276cf
VS
279// FIXME-UTF8: temporarily disabled because it doesn't work with global
280// string objects; re-enable after fixing this bug and benchmarking
281// performance to see if using a hash is a good idea at all
282#if 0
283
11aac4ba
VS
284// For backward compatibility reasons, it must be possible to assign the value
285// returned by wxString::c_str() to a char* or wchar_t* variable and work with
286// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
287// because the memory would be freed immediately, but it has to be valid as long
288// as the string is not modified, so that code like this still works:
289//
290// const wxChar *s = str.c_str();
291// while ( s ) { ... }
292
293// FIXME-UTF8: not thread safe!
294// FIXME-UTF8: we currently clear the cached conversion only when the string is
295// destroyed, but we should do it when the string is modified, to
296// keep memory usage down
297// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
298// invalidated the cache on every change, we could keep the previous
299// conversion
300// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
301// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
302
303template<typename T>
304static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
305{
6c4ebcda 306 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
307 if ( i != hash.end() )
308 {
309 free(i->second);
310 hash.erase(i);
311 }
312}
313
314#if wxUSE_UNICODE
6c4ebcda
VS
315// NB: non-STL implementation doesn't compile with "const wxString*" key type,
316// so we have to use wxString* here and const-cast when used
11aac4ba
VS
317WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
318 wxStringCharConversionCache);
319static wxStringCharConversionCache gs_stringsCharCache;
320
321const char* wxCStrData::AsChar() const
322{
323 // remove previously cache value, if any (see FIXMEs above):
324 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
325
326 // convert the string and keep it:
6c4ebcda
VS
327 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
328 m_str->mb_str().release();
11aac4ba
VS
329
330 return s + m_offset;
331}
332#endif // wxUSE_UNICODE
333
334#if !wxUSE_UNICODE_WCHAR
335WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
336 wxStringWCharConversionCache);
337static wxStringWCharConversionCache gs_stringsWCharCache;
338
339const wchar_t* wxCStrData::AsWChar() const
340{
341 // remove previously cache value, if any (see FIXMEs above):
342 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
343
344 // convert the string and keep it:
6c4ebcda
VS
345 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
346 m_str->wc_str().release();
11aac4ba
VS
347
348 return s + m_offset;
349}
350#endif // !wxUSE_UNICODE_WCHAR
351
11aac4ba
VS
352wxString::~wxString()
353{
354#if wxUSE_UNICODE
355 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
356 DeleteStringFromConversionCache(gs_stringsCharCache, this);
357#endif
358#if !wxUSE_UNICODE_WCHAR
359 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
360#endif
361}
132276cf
VS
362#endif
363
111d9948 364#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
365const char* wxCStrData::AsChar() const
366{
111d9948
VS
367#if wxUSE_UNICODE_UTF8
368 if ( wxLocaleIsUtf8 )
369 return AsInternal();
370#endif
371 // under non-UTF8 locales, we have to convert the internal UTF-8
372 // representation using wxConvLibc and cache the result
373
132276cf 374 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
375
376 // convert the string:
2a7431e1
VZ
377 //
378 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
379 // have it) but it's unfortunately not obvious to implement
380 // because we don't know how big buffer do we need for the
381 // given string length (in case of multibyte encodings, e.g.
382 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
383 //
384 // One idea would be to store more than just m_convertedToChar
385 // in wxString: then we could record the length of the string
386 // which was converted the last time and try to reuse the same
387 // buffer if the current length is not greater than it (this
388 // could still fail because string could have been modified in
389 // place but it would work most of the time, so we'd do it and
390 // only allocate the new buffer if in-place conversion returned
391 // an error). We could also store a bit saying if the string
392 // was modified since the last conversion (and update it in all
393 // operation modifying the string, of course) to avoid unneeded
394 // consequential conversions. But both of these ideas require
395 // adding more fields to wxString and require profiling results
396 // to be sure that we really gain enough from them to justify
397 // doing it.
de4983f3 398 wxScopedCharBuffer buf(str->mb_str());
05f32fc3 399
28be59b4
VZ
400 // if it failed, return empty string and not NULL to avoid crashes in code
401 // written with either wxWidgets 2 wxString or std::string behaviour in
402 // mind: neither of them ever returns NULL and so we shouldn't neither
403 if ( !buf )
404 return "";
405
05f32fc3
VS
406 if ( str->m_convertedToChar &&
407 strlen(buf) == strlen(str->m_convertedToChar) )
408 {
409 // keep the same buffer for as long as possible, so that several calls
410 // to c_str() in a row still work:
411 strcpy(str->m_convertedToChar, buf);
412 }
413 else
414 {
415 str->m_convertedToChar = buf.release();
416 }
417
418 // and keep it:
132276cf
VS
419 return str->m_convertedToChar + m_offset;
420}
111d9948 421#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
422
423#if !wxUSE_UNICODE_WCHAR
424const wchar_t* wxCStrData::AsWChar() const
425{
426 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
427
428 // convert the string:
de4983f3 429 wxScopedWCharBuffer buf(str->wc_str());
05f32fc3 430
28be59b4
VZ
431 // notice that here, unlike above in AsChar(), conversion can't fail as our
432 // internal UTF-8 is always well-formed -- or the string was corrupted and
433 // all bets are off anyhow
434
05f32fc3
VS
435 // FIXME-UTF8: do the conversion in-place in the existing buffer
436 if ( str->m_convertedToWChar &&
437 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
438 {
439 // keep the same buffer for as long as possible, so that several calls
440 // to c_str() in a row still work:
441 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
442 }
443 else
444 {
445 str->m_convertedToWChar = buf.release();
446 }
447
448 // and keep it:
132276cf
VS
449 return str->m_convertedToWChar + m_offset;
450}
451#endif // !wxUSE_UNICODE_WCHAR
452
453// ===========================================================================
454// wxString class core
455// ===========================================================================
456
457// ---------------------------------------------------------------------------
458// construction and conversion
459// ---------------------------------------------------------------------------
11aac4ba 460
81727065 461#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
462/* static */
463wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 464 const wxMBConv& conv)
8f93a29f
VS
465{
466 // anything to do?
467 if ( !psz || nLength == 0 )
de4983f3 468 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
469
470 if ( nLength == npos )
471 nLength = wxNO_LEN;
472
473 size_t wcLen;
de4983f3 474 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
8f93a29f 475 if ( !wcLen )
de4983f3 476 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
477 else
478 return SubstrBufFromMB(wcBuf, wcLen);
479}
81727065
VS
480#endif // wxUSE_UNICODE_WCHAR
481
482#if wxUSE_UNICODE_UTF8
483/* static */
484wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
485 const wxMBConv& conv)
486{
81727065
VS
487 // anything to do?
488 if ( !psz || nLength == 0 )
de4983f3 489 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065 490
111d9948
VS
491 // if psz is already in UTF-8, we don't have to do the roundtrip to
492 // wchar_t* and back:
493 if ( conv.IsUTF8() )
494 {
495 // we need to validate the input because UTF8 iterators assume valid
496 // UTF-8 sequence and psz may be invalid:
497 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
498 {
9ef1ad0d
VZ
499 // we must pass the real string length to SubstrBufFromMB ctor
500 if ( nLength == npos )
501 nLength = psz ? strlen(psz) : 0;
111d9948
VS
502 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
503 }
504 // else: do the roundtrip through wchar_t*
505 }
506
81727065
VS
507 if ( nLength == npos )
508 nLength = wxNO_LEN;
509
510 // first convert to wide string:
511 size_t wcLen;
de4983f3 512 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
81727065 513 if ( !wcLen )
de4983f3 514 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065
VS
515
516 // and then to UTF-8:
4fdfe2f3 517 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065
VS
518 // widechar -> UTF-8 conversion isn't supposed to ever fail:
519 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
520
521 return buf;
522}
523#endif // wxUSE_UNICODE_UTF8
524
525#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
526/* static */
527wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 528 const wxMBConv& conv)
8f93a29f
VS
529{
530 // anything to do?
531 if ( !pwz || nLength == 0 )
de4983f3 532 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
533
534 if ( nLength == npos )
535 nLength = wxNO_LEN;
536
537 size_t mbLen;
de4983f3 538 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
8f93a29f 539 if ( !mbLen )
de4983f3 540 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
541 else
542 return SubstrBufFromWC(mbBuf, mbLen);
543}
81727065 544#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
545
546
81727065 547#if wxUSE_UNICODE_WCHAR
e87b7833 548
06386448 549//Convert wxString in Unicode mode to a multi-byte string
de4983f3 550const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 551{
81727065 552 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
553}
554
81727065 555#elif wxUSE_UNICODE_UTF8
e87b7833 556
de4983f3 557const wxScopedWCharBuffer wxString::wc_str() const
81727065 558{
4fdfe2f3
VZ
559 return wxMBConvStrictUTF8().cMB2WC
560 (
561 m_impl.c_str(),
562 m_impl.length() + 1, // size, not length
563 NULL
564 );
81727065
VS
565}
566
de4983f3 567const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
81727065 568{
111d9948 569 if ( conv.IsUTF8() )
de4983f3 570 return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str());
111d9948 571
81727065
VS
572 // FIXME-UTF8: use wc_str() here once we have buffers with length
573
574 size_t wcLen;
de4983f3
VS
575 wxScopedWCharBuffer wcBuf
576 (
577 wxMBConvStrictUTF8().cMB2WC
578 (
579 m_impl.c_str(),
580 m_impl.length() + 1, // size
581 &wcLen
582 )
583 );
81727065
VS
584 if ( !wcLen )
585 return wxCharBuffer("");
586
4f696af8 587 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
81727065
VS
588}
589
590#else // ANSI
eec47cc6 591
7663d0d4 592//Converts this string to a wide character string if unicode
06386448 593//mode is not enabled and wxUSE_WCHAR_T is enabled
de4983f3 594const wxScopedWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 595{
81727065 596 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 597}
7663d0d4 598
e87b7833
MB
599#endif // Unicode/ANSI
600
601// shrink to minimal size (releasing extra memory)
602bool wxString::Shrink()
603{
604 wxString tmp(begin(), end());
605 swap(tmp);
606 return tmp.length() == length();
607}
608
d8a4b666 609// deprecated compatibility code:
a7ea63e2 610#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 611wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
612{
613 return DoGetWriteBuf(nLen);
614}
615
616void wxString::UngetWriteBuf()
617{
618 DoUngetWriteBuf();
619}
620
621void wxString::UngetWriteBuf(size_t nLen)
622{
623 DoUngetWriteBuf(nLen);
624}
a7ea63e2 625#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 626
d8a4b666 627
e87b7833
MB
628// ---------------------------------------------------------------------------
629// data access
630// ---------------------------------------------------------------------------
631
632// all functions are inline in string.h
633
634// ---------------------------------------------------------------------------
e8f59039 635// concatenation operators
e87b7833
MB
636// ---------------------------------------------------------------------------
637
c801d85f 638/*
c801d85f
KB
639 * concatenation functions come in 5 flavours:
640 * string + string
641 * char + string and string + char
642 * C str + string and string + C str
643 */
644
b1801e0e 645wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 646{
992527a5 647#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
648 wxASSERT( str1.IsValid() );
649 wxASSERT( str2.IsValid() );
e87b7833 650#endif
097c080b 651
3458e408
WS
652 wxString s = str1;
653 s += str2;
3168a13f 654
3458e408 655 return s;
c801d85f
KB
656}
657
c9f78968 658wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 659{
992527a5 660#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 661 wxASSERT( str.IsValid() );
e87b7833 662#endif
3168a13f 663
3458e408
WS
664 wxString s = str;
665 s += ch;
097c080b 666
3458e408 667 return s;
c801d85f
KB
668}
669
c9f78968 670wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 671{
992527a5 672#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 673 wxASSERT( str.IsValid() );
e87b7833 674#endif
097c080b 675
3458e408
WS
676 wxString s = ch;
677 s += str;
3168a13f 678
3458e408 679 return s;
c801d85f
KB
680}
681
8f93a29f 682wxString operator+(const wxString& str, const char *psz)
c801d85f 683{
992527a5 684#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 685 wxASSERT( str.IsValid() );
e87b7833 686#endif
097c080b 687
3458e408 688 wxString s;
8f93a29f 689 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
690 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
691 }
692 s += str;
693 s += psz;
3168a13f 694
3458e408 695 return s;
c801d85f
KB
696}
697
8f93a29f 698wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 699{
992527a5 700#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
701 wxASSERT( str.IsValid() );
702#endif
703
704 wxString s;
705 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
706 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
707 }
708 s += str;
709 s += pwz;
710
711 return s;
712}
713
714wxString operator+(const char *psz, const wxString& str)
715{
a7ea63e2
VS
716#if !wxUSE_STL_BASED_WXSTRING
717 wxASSERT( str.IsValid() );
718#endif
719
720 wxString s;
721 if ( !s.Alloc(strlen(psz) + str.length()) ) {
722 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
723 }
724 s = psz;
725 s += str;
726
727 return s;
728}
729
730wxString operator+(const wchar_t *pwz, const wxString& str)
731{
732#if !wxUSE_STL_BASED_WXSTRING
733 wxASSERT( str.IsValid() );
734#endif
735
736 wxString s;
737 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
738 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
739 }
740 s = pwz;
741 s += str;
742
743 return s;
744}
745
746// ---------------------------------------------------------------------------
747// string comparison
748// ---------------------------------------------------------------------------
749
52de37c7
VS
750bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
751{
752 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
753 : wxToupper(GetChar(0u)) == wxToupper(c));
754}
755
a7ea63e2
VS
756#ifdef HAVE_STD_STRING_COMPARE
757
758// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
759// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
760// sort strings in characters code point order by sorting the byte sequence
761// in byte values order (i.e. what strcmp() and memcmp() do).
762
763int wxString::compare(const wxString& str) const
764{
765 return m_impl.compare(str.m_impl);
766}
767
768int wxString::compare(size_t nStart, size_t nLen,
769 const wxString& str) const
770{
771 size_t pos, len;
772 PosLenToImpl(nStart, nLen, &pos, &len);
773 return m_impl.compare(pos, len, str.m_impl);
774}
775
776int wxString::compare(size_t nStart, size_t nLen,
777 const wxString& str,
778 size_t nStart2, size_t nLen2) const
779{
780 size_t pos, len;
781 PosLenToImpl(nStart, nLen, &pos, &len);
782
783 size_t pos2, len2;
784 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
785
786 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
787}
788
789int wxString::compare(const char* sz) const
790{
791 return m_impl.compare(ImplStr(sz));
792}
793
794int wxString::compare(const wchar_t* sz) const
795{
796 return m_impl.compare(ImplStr(sz));
797}
798
799int wxString::compare(size_t nStart, size_t nLen,
800 const char* sz, size_t nCount) const
801{
802 size_t pos, len;
803 PosLenToImpl(nStart, nLen, &pos, &len);
804
805 SubstrBufFromMB str(ImplStr(sz, nCount));
806
807 return m_impl.compare(pos, len, str.data, str.len);
808}
809
810int wxString::compare(size_t nStart, size_t nLen,
811 const wchar_t* sz, size_t nCount) const
812{
813 size_t pos, len;
814 PosLenToImpl(nStart, nLen, &pos, &len);
815
816 SubstrBufFromWC str(ImplStr(sz, nCount));
817
818 return m_impl.compare(pos, len, str.data, str.len);
819}
820
821#else // !HAVE_STD_STRING_COMPARE
822
823static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
824 const wxStringCharType* s2, size_t l2)
825{
826 if( l1 == l2 )
827 return wxStringMemcmp(s1, s2, l1);
828 else if( l1 < l2 )
829 {
830 int ret = wxStringMemcmp(s1, s2, l1);
831 return ret == 0 ? -1 : ret;
832 }
833 else
834 {
835 int ret = wxStringMemcmp(s1, s2, l2);
836 return ret == 0 ? +1 : ret;
837 }
838}
839
840int wxString::compare(const wxString& str) const
841{
842 return ::wxDoCmp(m_impl.data(), m_impl.length(),
843 str.m_impl.data(), str.m_impl.length());
844}
845
846int wxString::compare(size_t nStart, size_t nLen,
847 const wxString& str) const
848{
849 wxASSERT(nStart <= length());
850 size_type strLen = length() - nStart;
851 nLen = strLen < nLen ? strLen : nLen;
852
853 size_t pos, len;
854 PosLenToImpl(nStart, nLen, &pos, &len);
855
856 return ::wxDoCmp(m_impl.data() + pos, len,
857 str.m_impl.data(), str.m_impl.length());
858}
859
860int wxString::compare(size_t nStart, size_t nLen,
861 const wxString& str,
862 size_t nStart2, size_t nLen2) const
863{
864 wxASSERT(nStart <= length());
865 wxASSERT(nStart2 <= str.length());
866 size_type strLen = length() - nStart,
867 strLen2 = str.length() - nStart2;
868 nLen = strLen < nLen ? strLen : nLen;
869 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
870
871 size_t pos, len;
872 PosLenToImpl(nStart, nLen, &pos, &len);
873 size_t pos2, len2;
874 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
875
876 return ::wxDoCmp(m_impl.data() + pos, len,
877 str.m_impl.data() + pos2, len2);
878}
879
880int wxString::compare(const char* sz) const
881{
882 SubstrBufFromMB str(ImplStr(sz, npos));
883 if ( str.len == npos )
884 str.len = wxStringStrlen(str.data);
885 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
886}
887
888int wxString::compare(const wchar_t* sz) const
889{
890 SubstrBufFromWC str(ImplStr(sz, npos));
891 if ( str.len == npos )
892 str.len = wxStringStrlen(str.data);
893 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
894}
895
896int wxString::compare(size_t nStart, size_t nLen,
897 const char* sz, size_t nCount) const
898{
899 wxASSERT(nStart <= length());
900 size_type strLen = length() - nStart;
901 nLen = strLen < nLen ? strLen : nLen;
097c080b 902
a7ea63e2
VS
903 size_t pos, len;
904 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 905
a7ea63e2
VS
906 SubstrBufFromMB str(ImplStr(sz, nCount));
907 if ( str.len == npos )
908 str.len = wxStringStrlen(str.data);
909
910 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
911}
912
a7ea63e2
VS
913int wxString::compare(size_t nStart, size_t nLen,
914 const wchar_t* sz, size_t nCount) const
8f93a29f 915{
a7ea63e2
VS
916 wxASSERT(nStart <= length());
917 size_type strLen = length() - nStart;
918 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 919
a7ea63e2
VS
920 size_t pos, len;
921 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 922
a7ea63e2
VS
923 SubstrBufFromWC str(ImplStr(sz, nCount));
924 if ( str.len == npos )
925 str.len = wxStringStrlen(str.data);
926
927 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
928}
929
a7ea63e2
VS
930#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
931
932
8f93a29f
VS
933// ---------------------------------------------------------------------------
934// find_{first,last}_[not]_of functions
935// ---------------------------------------------------------------------------
936
937#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 938
8f93a29f
VS
939// NB: All these functions are implemented with the argument being wxChar*,
940// i.e. widechar string in any Unicode build, even though native string
941// representation is char* in the UTF-8 build. This is because we couldn't
942// use memchr() to determine if a character is in a set encoded as UTF-8.
943
944size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 945{
8f93a29f 946 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
947}
948
8f93a29f 949size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 950{
8f93a29f 951 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
952}
953
8f93a29f 954size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 955{
8f93a29f 956 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 957
8f93a29f
VS
958 size_t idx = nStart;
959 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 960 {
8f93a29f
VS
961 if ( wxTmemchr(sz, *i, n) )
962 return idx;
dcb68102 963 }
8f93a29f
VS
964
965 return npos;
966}
967
968size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
969{
970 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
971
972 size_t idx = nStart;
973 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 974 {
8f93a29f
VS
975 if ( !wxTmemchr(sz, *i, n) )
976 return idx;
977 }
978
979 return npos;
980}
981
982
983size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
984{
985 return find_last_of(sz, nStart, wxStrlen(sz));
986}
987
988size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
989{
990 return find_last_not_of(sz, nStart, wxStrlen(sz));
991}
992
993size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
994{
995 size_t len = length();
996
997 if ( nStart == npos )
998 {
999 nStart = len - 1;
dcb68102 1000 }
2c09fb3b 1001 else
dcb68102 1002 {
8f93a29f 1003 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 1004 }
8f93a29f
VS
1005
1006 size_t idx = nStart;
1007 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1008 i != rend(); --idx, ++i )
1009 {
1010 if ( wxTmemchr(sz, *i, n) )
1011 return idx;
1012 }
1013
1014 return npos;
dcb68102
RN
1015}
1016
8f93a29f 1017size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 1018{
8f93a29f
VS
1019 size_t len = length();
1020
1021 if ( nStart == npos )
1022 {
1023 nStart = len - 1;
1024 }
1025 else
1026 {
1027 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1028 }
1029
1030 size_t idx = nStart;
1031 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1032 i != rend(); --idx, ++i )
1033 {
1034 if ( !wxTmemchr(sz, *i, n) )
1035 return idx;
1036 }
1037
1038 return npos;
dcb68102
RN
1039}
1040
8f93a29f 1041size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1042{
8f93a29f
VS
1043 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1044
1045 size_t idx = nStart;
1046 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1047 {
1048 if ( *i != ch )
1049 return idx;
1050 }
1051
1052 return npos;
1053}
1054
1055size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1056{
1057 size_t len = length();
1058
1059 if ( nStart == npos )
1060 {
1061 nStart = len - 1;
1062 }
1063 else
1064 {
1065 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1066 }
1067
1068 size_t idx = nStart;
1069 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1070 i != rend(); --idx, ++i )
1071 {
1072 if ( *i != ch )
1073 return idx;
1074 }
1075
1076 return npos;
1077}
1078
1079// the functions above were implemented for wchar_t* arguments in Unicode
1080// build and char* in ANSI build; below are implementations for the other
1081// version:
1082#if wxUSE_UNICODE
1083 #define wxOtherCharType char
1084 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1085#else
1086 #define wxOtherCharType wchar_t
1087 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1088#endif
1089
1090size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1091 { return find_first_of(STRCONV(sz), nStart); }
1092
1093size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1094 size_t n) const
1095 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1096size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1097 { return find_last_of(STRCONV(sz), nStart); }
1098size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1099 size_t n) const
1100 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1101size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1102 { return find_first_not_of(STRCONV(sz), nStart); }
1103size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1104 size_t n) const
1105 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1106size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1107 { return find_last_not_of(STRCONV(sz), nStart); }
1108size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1109 size_t n) const
1110 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1111
1112#undef wxOtherCharType
1113#undef STRCONV
1114
1115#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1116
1117// ===========================================================================
1118// other common string functions
1119// ===========================================================================
1120
1121int wxString::CmpNoCase(const wxString& s) const
1122{
6689960c 1123#if wxUSE_UNICODE_UTF8
8f93a29f
VS
1124 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1125
8f93a29f
VS
1126 const_iterator i1 = begin();
1127 const_iterator end1 = end();
1128 const_iterator i2 = s.begin();
1129 const_iterator end2 = s.end();
1130
0d8b0f94 1131 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1132 {
1133 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1134 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1135 if ( lower1 != lower2 )
1136 return lower1 < lower2 ? -1 : 1;
1137 }
1138
1139 size_t len1 = length();
1140 size_t len2 = s.length();
dcb68102 1141
8f93a29f
VS
1142 if ( len1 < len2 )
1143 return -1;
1144 else if ( len1 > len2 )
1145 return 1;
1146 return 0;
6689960c
VZ
1147#else // wxUSE_UNICODE_WCHAR or ANSI
1148 return wxStricmp(m_impl.c_str(), s.m_impl.c_str());
1149#endif
dcb68102
RN
1150}
1151
1152
b1ac3b56 1153#if wxUSE_UNICODE
e015c2a3 1154
cf6bedce
SC
1155#ifdef __MWERKS__
1156#ifndef __SCHAR_MAX__
1157#define __SCHAR_MAX__ 127
1158#endif
1159#endif
1160
e6310bbc 1161wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1162{
e6310bbc 1163 if (!ascii || len == 0)
b1ac3b56 1164 return wxEmptyString;
e015c2a3 1165
b1ac3b56 1166 wxString res;
e015c2a3 1167
e6310bbc 1168 {
6798451b 1169 wxStringInternalBuffer buf(res, len);
602a857b 1170 wxStringCharType *dest = buf;
c1eada83 1171
602a857b
VS
1172 for ( ; len > 0; --len )
1173 {
1174 unsigned char c = (unsigned char)*ascii++;
1175 wxASSERT_MSG( c < 0x80,
1176 _T("Non-ASCII value passed to FromAscii().") );
c1eada83 1177
602a857b
VS
1178 *dest++ = (wchar_t)c;
1179 }
e015c2a3
VZ
1180 }
1181
b1ac3b56
RR
1182 return res;
1183}
1184
e6310bbc
VS
1185wxString wxString::FromAscii(const char *ascii)
1186{
0081dd72 1187 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1188}
1189
c5288c5c 1190wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1191{
1192 // What do we do with '\0' ?
1193
c1eada83 1194 unsigned char c = (unsigned char)ascii;
8760bc65 1195
c1eada83
VS
1196 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1197
1198 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1199 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1200}
1201
de4983f3 1202const wxScopedCharBuffer wxString::ToAscii() const
b1ac3b56 1203{
e015c2a3
VZ
1204 // this will allocate enough space for the terminating NUL too
1205 wxCharBuffer buffer(length());
6e394fc6 1206 char *dest = buffer.data();
e015c2a3 1207
c1eada83 1208 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1209 {
c1eada83
VS
1210 wxUniChar c(*i);
1211 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1212 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1213
1214 // the output string can't have embedded NULs anyhow, so we can safely
1215 // stop at first of them even if we do have any
c1eada83 1216 if ( !c )
e015c2a3 1217 break;
b1ac3b56 1218 }
e015c2a3 1219
b1ac3b56
RR
1220 return buffer;
1221}
e015c2a3 1222
c1eada83 1223#endif // wxUSE_UNICODE
b1ac3b56 1224
c801d85f 1225// extract string of length nCount starting at nFirst
c801d85f
KB
1226wxString wxString::Mid(size_t nFirst, size_t nCount) const
1227{
73f507f5 1228 size_t nLen = length();
30d9011f 1229
73f507f5
WS
1230 // default value of nCount is npos and means "till the end"
1231 if ( nCount == npos )
1232 {
1233 nCount = nLen - nFirst;
1234 }
30d9011f 1235
73f507f5
WS
1236 // out-of-bounds requests return sensible things
1237 if ( nFirst + nCount > nLen )
1238 {
1239 nCount = nLen - nFirst;
1240 }
c801d85f 1241
73f507f5
WS
1242 if ( nFirst > nLen )
1243 {
1244 // AllocCopy() will return empty string
1245 return wxEmptyString;
1246 }
c801d85f 1247
73f507f5
WS
1248 wxString dest(*this, nFirst, nCount);
1249 if ( dest.length() != nCount )
1250 {
1251 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1252 }
30d9011f 1253
73f507f5 1254 return dest;
c801d85f
KB
1255}
1256
e87b7833 1257// check that the string starts with prefix and return the rest of the string
d775fa82 1258// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1259bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1260{
c5e7a7d7
VS
1261 if ( compare(0, prefix.length(), prefix) != 0 )
1262 return false;
f6bcfd97
BP
1263
1264 if ( rest )
1265 {
1266 // put the rest of the string into provided pointer
c5e7a7d7 1267 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1268 }
1269
d775fa82 1270 return true;
f6bcfd97
BP
1271}
1272
3affcd07
VZ
1273
1274// check that the string ends with suffix and return the rest of it in the
1275// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1276bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1277{
c5e7a7d7 1278 int start = length() - suffix.length();
81727065
VS
1279
1280 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1281 return false;
1282
1283 if ( rest )
1284 {
1285 // put the rest of the string into provided pointer
1286 rest->assign(*this, 0, start);
1287 }
1288
1289 return true;
1290}
1291
1292
c801d85f
KB
1293// extract nCount last (rightmost) characters
1294wxString wxString::Right(size_t nCount) const
1295{
e87b7833
MB
1296 if ( nCount > length() )
1297 nCount = length();
c801d85f 1298
e87b7833
MB
1299 wxString dest(*this, length() - nCount, nCount);
1300 if ( dest.length() != nCount ) {
b1801e0e
GD
1301 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1302 }
c801d85f
KB
1303 return dest;
1304}
1305
7929902d 1306// get all characters after the last occurrence of ch
c801d85f 1307// (returns the whole string if ch not found)
c9f78968 1308wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1309{
1310 wxString str;
d775fa82 1311 int iPos = Find(ch, true);
3c67202d 1312 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1313 str = *this;
1314 else
c565abe1 1315 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1316
1317 return str;
1318}
1319
1320// extract nCount first (leftmost) characters
1321wxString wxString::Left(size_t nCount) const
1322{
e87b7833
MB
1323 if ( nCount > length() )
1324 nCount = length();
c801d85f 1325
e87b7833
MB
1326 wxString dest(*this, 0, nCount);
1327 if ( dest.length() != nCount ) {
b1801e0e
GD
1328 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1329 }
c801d85f
KB
1330 return dest;
1331}
1332
7929902d 1333// get all characters before the first occurrence of ch
c801d85f 1334// (returns the whole string if ch not found)
c9f78968 1335wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1336{
e87b7833 1337 int iPos = Find(ch);
c565abe1
VZ
1338 if ( iPos == wxNOT_FOUND )
1339 iPos = length();
e87b7833 1340 return wxString(*this, 0, iPos);
c801d85f
KB
1341}
1342
7929902d 1343/// get all characters before the last occurrence of ch
c801d85f 1344/// (returns empty string if ch not found)
c9f78968 1345wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1346{
1347 wxString str;
d775fa82 1348 int iPos = Find(ch, true);
3c67202d 1349 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1350 str = wxString(c_str(), iPos);
c801d85f
KB
1351
1352 return str;
1353}
1354
7929902d 1355/// get all characters after the first occurrence of ch
c801d85f 1356/// (returns empty string if ch not found)
c9f78968 1357wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1358{
1359 wxString str;
1360 int iPos = Find(ch);
3c67202d 1361 if ( iPos != wxNOT_FOUND )
c565abe1 1362 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1363
1364 return str;
1365}
1366
7929902d 1367// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1368size_t wxString::Replace(const wxString& strOld,
1369 const wxString& strNew, bool bReplaceAll)
c801d85f 1370{
a8f1f1b2 1371 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1372 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1373 _T("wxString::Replace(): invalid parameter") );
1374
68482dc5
VZ
1375 wxSTRING_INVALIDATE_CACHE();
1376
510bb748 1377 size_t uiCount = 0; // count of replacements made
c801d85f 1378
8a627032
VZ
1379 // optimize the special common case: replacement of one character by
1380 // another one (in UTF-8 case we can only do this for ASCII characters)
1381 //
1382 // benchmarks show that this special version is around 3 times faster
1383 // (depending on the proportion of matching characters and UTF-8/wchar_t
1384 // build)
1385 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1386 {
1387 const wxStringCharType chOld = strOld.m_impl[0],
1388 chNew = strNew.m_impl[0];
1389
1390 // this loop is the simplified version of the one below
1391 for ( size_t pos = 0; ; )
1392 {
1393 pos = m_impl.find(chOld, pos);
1394 if ( pos == npos )
1395 break;
c801d85f 1396
8a627032
VZ
1397 m_impl[pos++] = chNew;
1398
1399 uiCount++;
1400
1401 if ( !bReplaceAll )
1402 break;
1403 }
1404 }
072682ce
VZ
1405 else if ( !bReplaceAll)
1406 {
1407 size_t pos = m_impl.find(strOld, 0);
1408 if ( pos != npos )
1409 {
1410 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1411 uiCount = 1;
1412 }
1413 }
1414 else // replace all occurrences
510bb748 1415 {
8a627032
VZ
1416 const size_t uiOldLen = strOld.m_impl.length();
1417 const size_t uiNewLen = strNew.m_impl.length();
1418
072682ce
VZ
1419 // first scan the string to find all positions at which the replacement
1420 // should be made
1421 wxVector<size_t> replacePositions;
1422
1423 size_t pos;
1424 for ( pos = m_impl.find(strOld.m_impl, 0);
1425 pos != npos;
1426 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
8a627032 1427 {
072682ce
VZ
1428 replacePositions.push_back(pos);
1429 ++uiCount;
1430 }
510bb748 1431
072682ce
VZ
1432 if ( !uiCount )
1433 return 0;
510bb748 1434
072682ce
VZ
1435 // allocate enough memory for the whole new string
1436 wxString tmp;
1437 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
ad5bb7d6 1438
072682ce
VZ
1439 // copy this string to tmp doing replacements on the fly
1440 size_t replNum = 0;
1441 for ( pos = 0; replNum < uiCount; replNum++ )
1442 {
1443 const size_t nextReplPos = replacePositions[replNum];
394b2900 1444
072682ce
VZ
1445 if ( pos != nextReplPos )
1446 {
1447 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1448 }
1449
1450 tmp.m_impl.append(strNew.m_impl);
1451 pos = nextReplPos + uiOldLen;
8a627032 1452 }
072682ce
VZ
1453
1454 if ( pos != m_impl.length() )
1455 {
1456 // append the rest of the string unchanged
1457 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1458 }
1459
1460 swap(tmp);
c801d85f 1461 }
c801d85f 1462
510bb748 1463 return uiCount;
c801d85f
KB
1464}
1465
1466bool wxString::IsAscii() const
1467{
a4a44612
VS
1468 for ( const_iterator i = begin(); i != end(); ++i )
1469 {
1470 if ( !(*i).IsAscii() )
1471 return false;
1472 }
1473
1474 return true;
c801d85f 1475}
dd1eaa89 1476
c801d85f
KB
1477bool wxString::IsWord() const
1478{
a4a44612
VS
1479 for ( const_iterator i = begin(); i != end(); ++i )
1480 {
1481 if ( !wxIsalpha(*i) )
1482 return false;
1483 }
1484
1485 return true;
c801d85f 1486}
dd1eaa89 1487
c801d85f
KB
1488bool wxString::IsNumber() const
1489{
a4a44612
VS
1490 if ( empty() )
1491 return true;
1492
1493 const_iterator i = begin();
1494
1495 if ( *i == _T('-') || *i == _T('+') )
1496 ++i;
1497
1498 for ( ; i != end(); ++i )
1499 {
1500 if ( !wxIsdigit(*i) )
1501 return false;
1502 }
1503
1504 return true;
c801d85f
KB
1505}
1506
c801d85f
KB
1507wxString wxString::Strip(stripType w) const
1508{
1509 wxString s = *this;
d775fa82
WS
1510 if ( w & leading ) s.Trim(false);
1511 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1512 return s;
1513}
1514
c801d85f
KB
1515// ---------------------------------------------------------------------------
1516// case conversion
1517// ---------------------------------------------------------------------------
1518
1519wxString& wxString::MakeUpper()
1520{
e87b7833
MB
1521 for ( iterator it = begin(), en = end(); it != en; ++it )
1522 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1523
1524 return *this;
1525}
1526
1527wxString& wxString::MakeLower()
1528{
e87b7833
MB
1529 for ( iterator it = begin(), en = end(); it != en; ++it )
1530 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1531
1532 return *this;
1533}
1534
0c7db140
VZ
1535wxString& wxString::MakeCapitalized()
1536{
1537 const iterator en = end();
1538 iterator it = begin();
1539 if ( it != en )
1540 {
1541 *it = (wxChar)wxToupper(*it);
1542 for ( ++it; it != en; ++it )
1543 *it = (wxChar)wxTolower(*it);
1544 }
1545
1546 return *this;
1547}
1548
c801d85f
KB
1549// ---------------------------------------------------------------------------
1550// trimming and padding
1551// ---------------------------------------------------------------------------
1552
d775fa82 1553// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1554// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1555// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1556// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1557// space-like symbols somewhere except in the first 128 chars), it is arguably
1558// still better than trimming away accented letters
1559inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1560
c801d85f
KB
1561// trims spaces (in the sense of isspace) from left or right side
1562wxString& wxString::Trim(bool bFromRight)
1563{
3458e408
WS
1564 // first check if we're going to modify the string at all
1565 if ( !empty() &&
1566 (
1567 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1568 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1569 )
2c3b684c 1570 )
2c3b684c 1571 {
3458e408
WS
1572 if ( bFromRight )
1573 {
1574 // find last non-space character
d4d02bd5 1575 reverse_iterator psz = rbegin();
32c62191 1576 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1577 ++psz;
92df97b8 1578
3458e408 1579 // truncate at trailing space start
d4d02bd5 1580 erase(psz.base(), end());
3458e408
WS
1581 }
1582 else
1583 {
1584 // find first non-space character
1585 iterator psz = begin();
32c62191 1586 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1587 ++psz;
2c3b684c 1588
3458e408
WS
1589 // fix up data and length
1590 erase(begin(), psz);
1591 }
2c3b684c 1592 }
c801d85f 1593
3458e408 1594 return *this;
c801d85f
KB
1595}
1596
1597// adds nCount characters chPad to the string from either side
c9f78968 1598wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1599{
3458e408 1600 wxString s(chPad, nCount);
c801d85f 1601
3458e408
WS
1602 if ( bFromRight )
1603 *this += s;
1604 else
1605 {
1606 s += *this;
1607 swap(s);
1608 }
c801d85f 1609
3458e408 1610 return *this;
c801d85f
KB
1611}
1612
1613// truncate the string
1614wxString& wxString::Truncate(size_t uiLen)
1615{
3458e408
WS
1616 if ( uiLen < length() )
1617 {
1618 erase(begin() + uiLen, end());
1619 }
1620 //else: nothing to do, string is already short enough
c801d85f 1621
3458e408 1622 return *this;
c801d85f
KB
1623}
1624
1625// ---------------------------------------------------------------------------
3c67202d 1626// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1627// ---------------------------------------------------------------------------
1628
1629// find a character
c9f78968 1630int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1631{
3458e408 1632 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1633
3458e408 1634 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1635}
1636
cd0b1709
VZ
1637// ----------------------------------------------------------------------------
1638// conversion to numbers
1639// ----------------------------------------------------------------------------
1640
52de37c7
VS
1641// The implementation of all the functions below is exactly the same so factor
1642// it out. Note that number extraction works correctly on UTF-8 strings, so
1643// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1644
92df97b8 1645#ifndef __WXWINCE__
941a4e62
VS
1646 #define DO_IF_NOT_WINCE(x) x
1647#else
1648 #define DO_IF_NOT_WINCE(x)
92df97b8 1649#endif
4ea4767e 1650
529e491c
FM
1651#define WX_STRING_TO_X_TYPE_START \
1652 wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
941a4e62 1653 DO_IF_NOT_WINCE( errno = 0; ) \
941a4e62 1654 const wxStringCharType *start = wx_str(); \
529e491c
FM
1655 wxStringCharType *end;
1656
1657#define WX_STRING_TO_X_TYPE_END \
941a4e62
VS
1658 /* return true only if scan was stopped by the terminating NUL and */ \
1659 /* if the string was not empty to start with and no under/overflow */ \
1660 /* occurred: */ \
c95e653c
VZ
1661 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1662 return false; \
529e491c
FM
1663 *pVal = val; \
1664 return true;
cd0b1709 1665
c95e653c 1666bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1667{
529e491c
FM
1668 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1669
1670 WX_STRING_TO_X_TYPE_START
1671 long val = wxStrtol(start, &end, base);
1672 WX_STRING_TO_X_TYPE_END
619dcb09 1673}
cd0b1709 1674
c95e653c 1675bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1676{
529e491c
FM
1677 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1678
1679 WX_STRING_TO_X_TYPE_START
1680 unsigned long val = wxStrtoul(start, &end, base);
1681 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1682}
1683
c95e653c 1684bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1685{
529e491c
FM
1686 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1687
1688 WX_STRING_TO_X_TYPE_START
1689 wxLongLong_t val = wxStrtoll(start, &end, base);
1690 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1691}
1692
c95e653c 1693bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1694{
529e491c
FM
1695 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1696
1697 WX_STRING_TO_X_TYPE_START
1698 wxULongLong_t val = wxStrtoull(start, &end, base);
1699 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1700}
1701
c95e653c 1702bool wxString::ToDouble(double *pVal) const
cd0b1709 1703{
529e491c
FM
1704 WX_STRING_TO_X_TYPE_START
1705 double val = wxStrtod(start, &end);
1706 WX_STRING_TO_X_TYPE_END
1707}
cd0b1709 1708
529e491c 1709#if wxUSE_XLOCALE
e71e5b37 1710
529e491c
FM
1711bool wxString::ToCLong(long *pVal, int base) const
1712{
1713 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
cd0b1709 1714
529e491c
FM
1715 WX_STRING_TO_X_TYPE_START
1716#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1717 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1718#else
1719 long val = wxStrtol_l(start, &end, base, wxCLocale);
1720#endif
1721 WX_STRING_TO_X_TYPE_END
1722}
c95e653c 1723
529e491c
FM
1724bool wxString::ToCULong(unsigned long *pVal, int base) const
1725{
1726 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
c95e653c 1727
529e491c
FM
1728 WX_STRING_TO_X_TYPE_START
1729#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1730 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1731#else
1732 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1733#endif
1734 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1735}
1736
529e491c
FM
1737bool wxString::ToCDouble(double *pVal) const
1738{
1739 WX_STRING_TO_X_TYPE_START
1740#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1741 double val = wxStrtod_lA(start, &end, wxCLocale);
1742#else
1743 double val = wxStrtod_l(start, &end, wxCLocale);
1744#endif
1745 WX_STRING_TO_X_TYPE_END
1746}
1747
1748#endif // wxUSE_XLOCALE
1749
c801d85f 1750// ---------------------------------------------------------------------------
9efd3367 1751// formatted output
c801d85f 1752// ---------------------------------------------------------------------------
378b05f7 1753
d1f6e2cf 1754#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1755/* static */
c9f78968 1756#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1757wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1758#else
d1f6e2cf 1759wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1760#endif
341e7d28 1761{
77c3e48a 1762 va_list argptr;
c9f78968 1763 va_start(argptr, format);
341e7d28 1764
77c3e48a 1765 wxString s;
c9f78968 1766 s.PrintfV(format, argptr);
341e7d28 1767
77c3e48a 1768 va_end(argptr);
341e7d28 1769
77c3e48a 1770 return s;
341e7d28 1771}
d1f6e2cf
VS
1772#endif // !wxUSE_UTF8_LOCALE_ONLY
1773
1774#if wxUSE_UNICODE_UTF8
1775/* static */
1776wxString wxString::DoFormatUtf8(const char *format, ...)
1777{
1778 va_list argptr;
1779 va_start(argptr, format);
1780
1781 wxString s;
1782 s.PrintfV(format, argptr);
1783
1784 va_end(argptr);
1785
1786 return s;
1787}
1788#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1789
1790/* static */
c9f78968 1791wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1792{
1793 wxString s;
c9f78968 1794 s.PrintfV(format, argptr);
341e7d28
VZ
1795 return s;
1796}
1797
d1f6e2cf 1798#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1799#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1800int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1801#else
d1f6e2cf 1802int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1803#endif
c801d85f 1804{
ba9bbf13 1805 va_list argptr;
c9f78968 1806 va_start(argptr, format);
c801d85f 1807
c9f78968
VS
1808#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1809 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1810 // because it's the only cast that works safely for downcasting when
1811 // multiple inheritance is used:
1812 wxString *str = static_cast<wxString*>(this);
1813#else
1814 wxString *str = this;
1815#endif
1816
1817 int iLen = str->PrintfV(format, argptr);
c801d85f 1818
ba9bbf13 1819 va_end(argptr);
c801d85f 1820
ba9bbf13 1821 return iLen;
c801d85f 1822}
d1f6e2cf
VS
1823#endif // !wxUSE_UTF8_LOCALE_ONLY
1824
1825#if wxUSE_UNICODE_UTF8
1826int wxString::DoPrintfUtf8(const char *format, ...)
1827{
1828 va_list argptr;
1829 va_start(argptr, format);
1830
1831 int iLen = PrintfV(format, argptr);
1832
1833 va_end(argptr);
1834
1835 return iLen;
1836}
1837#endif // wxUSE_UNICODE_UTF8
c801d85f 1838
67612ff1
DE
1839/*
1840 Uses wxVsnprintf and places the result into the this string.
1841
1842 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1843 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1844 the ISO C99 (and thus SUSv3) standard the return value for the case of
1845 an undersized buffer is inconsistent. For conforming vsnprintf
1846 implementations the function must return the number of characters that
1847 would have been printed had the buffer been large enough. For conforming
1848 vswprintf implementations the function must return a negative number
1849 and set errno.
1850
1851 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1852 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1853 those are defined in the standard and backed up by several conformance
1854 statements. Note that ENOMEM mentioned in the manual page does not
1855 apply to swprintf, only wprintf and fwprintf.
1856
1857 Official manual page:
1858 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1859
1860 Some conformance statements (AIX, Solaris):
1861 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1862 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1863
1864 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1865 EILSEQ and EINVAL are specifically defined to mean the error is other than
1866 an undersized buffer and no other errno are defined we treat those two
1867 as meaning hard errors and everything else gets the old behavior which
1868 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1869
67612ff1
DE
1870 In practice it's impossible to determine before compilation which behavior
1871 may be used. The vswprintf function may have vsnprintf-like behavior or
1872 vice-versa. Behavior detected on one release can theoretically change
1873 with an updated release. Not to mention that configure testing for it
1874 would require the test to be run on the host system, not the build system
1875 which makes cross compilation difficult. Therefore, we make no assumptions
1876 about behavior and try our best to handle every known case, including the
1877 case where wxVsnprintf returns a negative number and fails to set errno.
1878
1879 There is yet one more non-standard implementation and that is our own.
1880 Fortunately, that can be detected at compile-time.
1881
1882 On top of all that, ISO C99 explicitly defines snprintf to write a null
1883 character to the last position of the specified buffer. That would be at
1884 at the given buffer size minus 1. It is supposed to do this even if it
1885 turns out that the buffer is sized too small.
1886
1887 Darwin (tested on 10.5) follows the C99 behavior exactly.
1888
1889 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1890 errno even when it fails. However, it only seems to ever fail due
1891 to an undersized buffer.
1892*/
2523e9b7
VS
1893#if wxUSE_UNICODE_UTF8
1894template<typename BufferType>
1895#else
1896// we only need one version in non-UTF8 builds and at least two Windows
1897// compilers have problems with this function template, so use just one
1898// normal function here
1899#endif
1900static int DoStringPrintfV(wxString& str,
1901 const wxString& format, va_list argptr)
c801d85f 1902{
f6f5941b 1903 int size = 1024;
e87b7833 1904
f6f5941b
VZ
1905 for ( ;; )
1906 {
2523e9b7
VS
1907#if wxUSE_UNICODE_UTF8
1908 BufferType tmp(str, size + 1);
1909 typename BufferType::CharType *buf = tmp;
1910#else
1911 wxStringBuffer tmp(str, size + 1);
de2589be 1912 wxChar *buf = tmp;
2523e9b7 1913#endif
2bb67b80 1914
ba9bbf13
WS
1915 if ( !buf )
1916 {
1917 // out of memory
a33c7045
VS
1918
1919 // in UTF-8 build, leaving uninitialized junk in the buffer
1920 // could result in invalid non-empty UTF-8 string, so just
1921 // reset the string to empty on failure:
1922 buf[0] = '\0';
ba9bbf13 1923 return -1;
e87b7833 1924 }
f6f5941b 1925
ba9bbf13
WS
1926 // wxVsnprintf() may modify the original arg pointer, so pass it
1927 // only a copy
1928 va_list argptrcopy;
1929 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1930
1931#ifndef __WXWINCE__
1932 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1933 errno = 0;
1934#endif
2523e9b7 1935 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1936 va_end(argptrcopy);
1937
1938 // some implementations of vsnprintf() don't NUL terminate
1939 // the string if there is not enough space for it so
1940 // always do it manually
67612ff1
DE
1941 // FIXME: This really seems to be the wrong and would be an off-by-one
1942 // bug except the code above allocates an extra character.
ba9bbf13
WS
1943 buf[size] = _T('\0');
1944
caff62f2
VZ
1945 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1946 // total number of characters which would have been written if the
b1727cfe 1947 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1948 if ( len < 0 )
1949 {
52de37c7
VS
1950 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1951 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1952 // is true if *both* of them use our own implementation,
1953 // otherwise we can't be sure
f2bbe5b6
VZ
1954#if wxUSE_WXVSNPRINTF
1955 // we know that our own implementation of wxVsnprintf() returns -1
1956 // only for a format error - thus there's something wrong with
1957 // the user's format string
a33c7045 1958 buf[0] = '\0';
f2bbe5b6 1959 return -1;
52de37c7
VS
1960#else // possibly using system version
1961 // assume it only returns error if there is not enough space, but
1962 // as we don't know how much we need, double the current size of
1963 // the buffer
67612ff1 1964#ifndef __WXWINCE__
a9a854d7
DE
1965 if( (errno == EILSEQ) || (errno == EINVAL) )
1966 // If errno was set to one of the two well-known hard errors
1967 // then fail immediately to avoid an infinite loop.
1968 return -1;
1969 else
1970#endif // __WXWINCE__
67612ff1
DE
1971 // still not enough, as we don't know how much we need, double the
1972 // current size of the buffer
1973 size *= 2;
f2bbe5b6 1974#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1975 }
64f8f94c 1976 else if ( len >= size )
de2589be 1977 {
f2bbe5b6 1978#if wxUSE_WXVSNPRINTF
c95e653c 1979 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1980 // size+1 when there's not enough space but that's not the size
1981 // of the required buffer!
1982 size *= 2; // so we just double the current size of the buffer
1983#else
64f8f94c
VZ
1984 // some vsnprintf() implementations NUL-terminate the buffer and
1985 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1986 // FIXME: I don't quite understand this comment. The vsnprintf
1987 // function is specifically defined to return the number of
1988 // characters printed not including the null terminator.
1989 // So OF COURSE you need to add 1 to get the right buffer size.
1990 // The following line is definitely correct, no question.
64f8f94c 1991 size = len + 1;
f2bbe5b6 1992#endif
de2589be
VZ
1993 }
1994 else // ok, there was enough space
f6f5941b 1995 {
f6f5941b
VZ
1996 break;
1997 }
f6f5941b
VZ
1998 }
1999
2000 // we could have overshot
2523e9b7
VS
2001 str.Shrink();
2002
2003 return str.length();
2004}
c801d85f 2005
2523e9b7
VS
2006int wxString::PrintfV(const wxString& format, va_list argptr)
2007{
2523e9b7
VS
2008#if wxUSE_UNICODE_UTF8
2009 #if wxUSE_STL_BASED_WXSTRING
2010 typedef wxStringTypeBuffer<char> Utf8Buffer;
2011 #else
6798451b 2012 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
2013 #endif
2014#endif
2015
2016#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 2017 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2018#else
2019 #if wxUSE_UNICODE_UTF8
2020 if ( wxLocaleIsUtf8 )
c6255a6e 2021 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2022 else
2023 // wxChar* version
c6255a6e 2024 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 2025 #else
c6255a6e 2026 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
2027 #endif // UTF8/WCHAR
2028#endif
c801d85f
KB
2029}
2030
097c080b
VZ
2031// ----------------------------------------------------------------------------
2032// misc other operations
2033// ----------------------------------------------------------------------------
0c5d3e1c 2034
d775fa82 2035// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
2036// '?' metacharacters (as usual, '?' matches any character and '*' any number
2037// of them)
8a540c88 2038bool wxString::Matches(const wxString& mask) const
097c080b 2039{
d6044f58
VZ
2040 // I disable this code as it doesn't seem to be faster (in fact, it seems
2041 // to be much slower) than the old, hand-written code below and using it
2042 // here requires always linking with libregex even if the user code doesn't
2043 // use it
2044#if 0 // wxUSE_REGEX
706c2ac9
VZ
2045 // first translate the shell-like mask into a regex
2046 wxString pattern;
2047 pattern.reserve(wxStrlen(pszMask));
2048
2049 pattern += _T('^');
2050 while ( *pszMask )
2051 {
2052 switch ( *pszMask )
2053 {
2054 case _T('?'):
2055 pattern += _T('.');
2056 break;
2057
2058 case _T('*'):
2059 pattern += _T(".*");
2060 break;
2061
2062 case _T('^'):
2063 case _T('.'):
2064 case _T('$'):
2065 case _T('('):
2066 case _T(')'):
2067 case _T('|'):
2068 case _T('+'):
2069 case _T('\\'):
2070 // these characters are special in a RE, quote them
2071 // (however note that we don't quote '[' and ']' to allow
2072 // using them for Unix shell like matching)
2073 pattern += _T('\\');
2074 // fall through
2075
2076 default:
2077 pattern += *pszMask;
2078 }
2079
2080 pszMask++;
2081 }
2082 pattern += _T('$');
2083
2084 // and now use it
2085 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2086#else // !wxUSE_REGEX
9a4232dc
VZ
2087 // TODO: this is, of course, awfully inefficient...
2088
8a540c88
VS
2089 // FIXME-UTF8: implement using iterators, remove #if
2090#if wxUSE_UNICODE_UTF8
de4983f3
VS
2091 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2092 const wxScopedWCharBuffer txtBuf = wc_str();
8a540c88
VS
2093 const wxChar *pszMask = maskBuf.data();
2094 const wxChar *pszTxt = txtBuf.data();
2095#else
2096 const wxChar *pszMask = mask.wx_str();
9a4232dc 2097 // the char currently being checked
8a540c88
VS
2098 const wxChar *pszTxt = wx_str();
2099#endif
9a4232dc
VZ
2100
2101 // the last location where '*' matched
2102 const wxChar *pszLastStarInText = NULL;
2103 const wxChar *pszLastStarInMask = NULL;
2104
2105match:
2106 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2107 switch ( *pszMask ) {
223d09f6
KB
2108 case wxT('?'):
2109 if ( *pszTxt == wxT('\0') )
d775fa82 2110 return false;
097c080b 2111
9a4232dc 2112 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2113
097c080b
VZ
2114 break;
2115
223d09f6 2116 case wxT('*'):
097c080b 2117 {
9a4232dc
VZ
2118 // remember where we started to be able to backtrack later
2119 pszLastStarInText = pszTxt;
2120 pszLastStarInMask = pszMask;
2121
097c080b 2122 // ignore special chars immediately following this one
9a4232dc 2123 // (should this be an error?)
223d09f6 2124 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2125 pszMask++;
2126
2127 // if there is nothing more, match
223d09f6 2128 if ( *pszMask == wxT('\0') )
d775fa82 2129 return true;
097c080b
VZ
2130
2131 // are there any other metacharacters in the mask?
c86f1403 2132 size_t uiLenMask;
223d09f6 2133 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2134
2135 if ( pEndMask != NULL ) {
2136 // we have to match the string between two metachars
2137 uiLenMask = pEndMask - pszMask;
2138 }
2139 else {
2140 // we have to match the remainder of the string
2bb67b80 2141 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2142 }
2143
2144 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2145 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2146 if ( pMatch == NULL )
d775fa82 2147 return false;
097c080b
VZ
2148
2149 // -1 to compensate "++" in the loop
2150 pszTxt = pMatch + uiLenMask - 1;
2151 pszMask += uiLenMask - 1;
2152 }
2153 break;
2154
2155 default:
2156 if ( *pszMask != *pszTxt )
d775fa82 2157 return false;
097c080b
VZ
2158 break;
2159 }
2160 }
2161
2162 // match only if nothing left
9a4232dc 2163 if ( *pszTxt == wxT('\0') )
d775fa82 2164 return true;
9a4232dc
VZ
2165
2166 // if we failed to match, backtrack if we can
2167 if ( pszLastStarInText ) {
2168 pszTxt = pszLastStarInText + 1;
2169 pszMask = pszLastStarInMask;
2170
2171 pszLastStarInText = NULL;
2172
2173 // don't bother resetting pszLastStarInMask, it's unnecessary
2174
2175 goto match;
2176 }
2177
d775fa82 2178 return false;
706c2ac9 2179#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2180}
2181
1fc5dd6f 2182// Count the number of chars
c9f78968 2183int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2184{
2185 int count = 0;
8f93a29f 2186 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2187 {
8f93a29f 2188 if ( *i == ch )
1fc5dd6f
JS
2189 count ++;
2190 }
2191 return count;
2192}
4e79262f 2193