]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
blind PCH-less build fixes: add missing headers
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
ba7e7253 27 #include "wx/log.h"
6b769f3d 28#endif
c801d85f
KB
29
30#include <ctype.h>
92df97b8
WS
31
32#ifndef __WXWINCE__
33 #include <errno.h>
34#endif
35
c801d85f
KB
36#include <string.h>
37#include <stdlib.h>
9a08c20e 38
8116a0c5 39#include "wx/hashmap.h"
072682ce 40#include "wx/vector.h"
529e491c 41#include "wx/xlocale.h"
8f93a29f 42
825d69c1
VZ
43#ifdef __WXMSW__
44 #include "wx/msw/wrapwin.h"
45#endif // __WXMSW__
46
8f93a29f
VS
47// string handling functions used by wxString:
48#if wxUSE_UNICODE_UTF8
49 #define wxStringMemcpy memcpy
50 #define wxStringMemcmp memcmp
51 #define wxStringMemchr memchr
52 #define wxStringStrlen strlen
53#else
54 #define wxStringMemcpy wxTmemcpy
55 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
56 #define wxStringMemchr wxTmemchr
57 #define wxStringStrlen wxStrlen
58#endif
8f93a29f 59
4e79262f
VZ
60// ----------------------------------------------------------------------------
61// global variables
62// ----------------------------------------------------------------------------
63
64namespace wxPrivate
65{
66
6df09f32 67static UntypedBufferData s_untypedNullData(NULL, 0);
4e79262f
VZ
68
69UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
70
71} // namespace wxPrivate
e87b7833 72
a7ea63e2
VS
73// ---------------------------------------------------------------------------
74// static class variables definition
75// ---------------------------------------------------------------------------
e87b7833 76
a7ea63e2
VS
77//According to STL _must_ be a -1 size_t
78const size_t wxString::npos = (size_t) -1;
8f93a29f 79
68482dc5 80#if wxUSE_STRING_POS_CACHE
68482dc5 81
e810df36
VZ
82#ifdef wxHAS_COMPILER_TLS
83
84wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
85
86#else // !wxHAS_COMPILER_TLS
87
ad8ae788
VZ
88struct wxStrCacheInitializer
89{
90 wxStrCacheInitializer()
91 {
92 // calling this function triggers s_cache initialization in it, and
93 // from now on it becomes safe to call from multiple threads
94 wxString::GetCache();
95 }
96};
97
e317bd3f
SC
98/*
99wxString::Cache& wxString::GetCache()
100{
101 static wxTLS_TYPE(Cache) s_cache;
102
103 return wxTLS_VALUE(s_cache);
104}
105*/
106
ad8ae788
VZ
107static wxStrCacheInitializer gs_stringCacheInit;
108
e810df36
VZ
109#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
110
68482dc5
VZ
111// gdb seems to be unable to display thread-local variables correctly, at least
112// not my 6.4.98 version under amd64, so provide this debugging helper to do it
4b6a582b 113#if wxDEBUG_LEVEL >= 2
68482dc5
VZ
114
115struct wxStrCacheDumper
116{
117 static void ShowAll()
118 {
119 puts("*** wxString cache dump:");
120 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
121 {
122 const wxString::Cache::Element&
8b73c531 123 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
124
125 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
126 n,
8b73c531 127 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
128 c.str,
129 (unsigned long)c.pos,
130 (unsigned long)c.impl,
131 (long)c.len);
132 }
133 }
134};
135
136void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
137
4b6a582b 138#endif // wxDEBUG_LEVEL >= 2
68482dc5
VZ
139
140#ifdef wxPROFILE_STRING_CACHE
141
142wxString::CacheStats wxString::ms_cacheStats;
143
8c3b65d9 144struct wxStrCacheStatsDumper
68482dc5 145{
8c3b65d9 146 ~wxStrCacheStatsDumper()
68482dc5
VZ
147 {
148 const wxString::CacheStats& stats = wxString::ms_cacheStats;
149
150 if ( stats.postot )
151 {
152 puts("*** wxString cache statistics:");
153 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
154 stats.postot);
155 printf("\tHits %u (of which %u not used) or %.2f%%\n",
156 stats.poshits,
157 stats.mishits,
158 100.*float(stats.poshits - stats.mishits)/stats.postot);
159 printf("\tAverage position requested: %.2f\n",
160 float(stats.sumpos) / stats.postot);
161 printf("\tAverage offset after cached hint: %.2f\n",
162 float(stats.sumofs) / stats.postot);
163 }
164
165 if ( stats.lentot )
166 {
167 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
168 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
169 }
170 }
8c3b65d9 171};
68482dc5 172
8c3b65d9 173static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
174
175#endif // wxPROFILE_STRING_CACHE
176
177#endif // wxUSE_STRING_POS_CACHE
178
a7ea63e2
VS
179// ----------------------------------------------------------------------------
180// global functions
181// ----------------------------------------------------------------------------
e87b7833 182
a7ea63e2 183#if wxUSE_STD_IOSTREAM
8f93a29f 184
a7ea63e2 185#include <iostream>
8f93a29f 186
a7ea63e2 187wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 188{
7a906e1a 189#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
de4983f3 190 const wxScopedCharBuffer buf(str.AsCharBuf());
ddf01bdb
VZ
191 if ( !buf )
192 os.clear(wxSTD ios_base::failbit);
193 else
194 os << buf.data();
195
196 return os;
a7ea63e2 197#else
7a906e1a 198 return os << str.AsInternal();
a7ea63e2 199#endif
8f93a29f
VS
200}
201
04abe4bc
VS
202wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
203{
204 return os << str.c_str();
205}
206
de4983f3 207wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
04abe4bc
VS
208{
209 return os << str.data();
210}
211
212#ifndef __BORLANDC__
de4983f3 213wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
04abe4bc
VS
214{
215 return os << str.data();
216}
217#endif
218
6a6ea041 219#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
220
221wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
222{
223 return wos << str.wc_str();
224}
225
226wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
227{
228 return wos << str.AsWChar();
229}
230
de4983f3 231wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
6b61b594
VZ
232{
233 return wos << str.data();
234}
235
6a6ea041 236#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 237
a7ea63e2 238#endif // wxUSE_STD_IOSTREAM
e87b7833 239
81727065
VS
240// ===========================================================================
241// wxString class core
242// ===========================================================================
243
244#if wxUSE_UNICODE_UTF8
245
81727065
VS
246void wxString::PosLenToImpl(size_t pos, size_t len,
247 size_t *implPos, size_t *implLen) const
248{
249 if ( pos == npos )
68482dc5 250 {
81727065 251 *implPos = npos;
68482dc5
VZ
252 }
253 else // have valid start position
81727065 254 {
68482dc5
VZ
255 const const_iterator b = GetIterForNthChar(pos);
256 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 257 if ( len == npos )
68482dc5 258 {
81727065 259 *implLen = npos;
68482dc5
VZ
260 }
261 else // have valid length too
81727065 262 {
68482dc5
VZ
263 // we need to handle the case of length specifying a substring
264 // going beyond the end of the string, just as std::string does
265 const const_iterator e(end());
266 const_iterator i(b);
267 while ( len && i <= e )
268 {
269 ++i;
270 --len;
271 }
272
273 *implLen = i.impl() - b.impl();
81727065
VS
274 }
275 }
276}
277
278#endif // wxUSE_UNICODE_UTF8
279
11aac4ba
VS
280// ----------------------------------------------------------------------------
281// wxCStrData converted strings caching
282// ----------------------------------------------------------------------------
283
132276cf
VS
284// FIXME-UTF8: temporarily disabled because it doesn't work with global
285// string objects; re-enable after fixing this bug and benchmarking
286// performance to see if using a hash is a good idea at all
287#if 0
288
11aac4ba
VS
289// For backward compatibility reasons, it must be possible to assign the value
290// returned by wxString::c_str() to a char* or wchar_t* variable and work with
291// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
292// because the memory would be freed immediately, but it has to be valid as long
293// as the string is not modified, so that code like this still works:
294//
295// const wxChar *s = str.c_str();
296// while ( s ) { ... }
297
298// FIXME-UTF8: not thread safe!
299// FIXME-UTF8: we currently clear the cached conversion only when the string is
300// destroyed, but we should do it when the string is modified, to
301// keep memory usage down
302// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
303// invalidated the cache on every change, we could keep the previous
304// conversion
305// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
306// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
307
308template<typename T>
309static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
310{
6c4ebcda 311 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
312 if ( i != hash.end() )
313 {
314 free(i->second);
315 hash.erase(i);
316 }
317}
318
319#if wxUSE_UNICODE
6c4ebcda
VS
320// NB: non-STL implementation doesn't compile with "const wxString*" key type,
321// so we have to use wxString* here and const-cast when used
11aac4ba
VS
322WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
323 wxStringCharConversionCache);
324static wxStringCharConversionCache gs_stringsCharCache;
325
326const char* wxCStrData::AsChar() const
327{
328 // remove previously cache value, if any (see FIXMEs above):
329 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
330
331 // convert the string and keep it:
6c4ebcda
VS
332 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
333 m_str->mb_str().release();
11aac4ba
VS
334
335 return s + m_offset;
336}
337#endif // wxUSE_UNICODE
338
339#if !wxUSE_UNICODE_WCHAR
340WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
341 wxStringWCharConversionCache);
342static wxStringWCharConversionCache gs_stringsWCharCache;
343
344const wchar_t* wxCStrData::AsWChar() const
345{
346 // remove previously cache value, if any (see FIXMEs above):
347 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
348
349 // convert the string and keep it:
6c4ebcda
VS
350 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
351 m_str->wc_str().release();
11aac4ba
VS
352
353 return s + m_offset;
354}
355#endif // !wxUSE_UNICODE_WCHAR
356
11aac4ba
VS
357wxString::~wxString()
358{
359#if wxUSE_UNICODE
360 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
361 DeleteStringFromConversionCache(gs_stringsCharCache, this);
362#endif
363#if !wxUSE_UNICODE_WCHAR
364 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
365#endif
366}
132276cf
VS
367#endif
368
111d9948 369#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
370const char* wxCStrData::AsChar() const
371{
111d9948
VS
372#if wxUSE_UNICODE_UTF8
373 if ( wxLocaleIsUtf8 )
374 return AsInternal();
375#endif
376 // under non-UTF8 locales, we have to convert the internal UTF-8
377 // representation using wxConvLibc and cache the result
378
132276cf 379 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
380
381 // convert the string:
2a7431e1
VZ
382 //
383 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
384 // have it) but it's unfortunately not obvious to implement
385 // because we don't know how big buffer do we need for the
386 // given string length (in case of multibyte encodings, e.g.
387 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
388 //
389 // One idea would be to store more than just m_convertedToChar
390 // in wxString: then we could record the length of the string
391 // which was converted the last time and try to reuse the same
392 // buffer if the current length is not greater than it (this
393 // could still fail because string could have been modified in
394 // place but it would work most of the time, so we'd do it and
395 // only allocate the new buffer if in-place conversion returned
396 // an error). We could also store a bit saying if the string
397 // was modified since the last conversion (and update it in all
398 // operation modifying the string, of course) to avoid unneeded
399 // consequential conversions. But both of these ideas require
400 // adding more fields to wxString and require profiling results
401 // to be sure that we really gain enough from them to justify
402 // doing it.
de4983f3 403 wxScopedCharBuffer buf(str->mb_str());
05f32fc3 404
28be59b4
VZ
405 // if it failed, return empty string and not NULL to avoid crashes in code
406 // written with either wxWidgets 2 wxString or std::string behaviour in
407 // mind: neither of them ever returns NULL and so we shouldn't neither
408 if ( !buf )
409 return "";
410
05f32fc3
VS
411 if ( str->m_convertedToChar &&
412 strlen(buf) == strlen(str->m_convertedToChar) )
413 {
414 // keep the same buffer for as long as possible, so that several calls
415 // to c_str() in a row still work:
416 strcpy(str->m_convertedToChar, buf);
417 }
418 else
419 {
420 str->m_convertedToChar = buf.release();
421 }
422
423 // and keep it:
132276cf
VS
424 return str->m_convertedToChar + m_offset;
425}
111d9948 426#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
427
428#if !wxUSE_UNICODE_WCHAR
429const wchar_t* wxCStrData::AsWChar() const
430{
431 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
432
433 // convert the string:
de4983f3 434 wxScopedWCharBuffer buf(str->wc_str());
05f32fc3 435
28be59b4
VZ
436 // notice that here, unlike above in AsChar(), conversion can't fail as our
437 // internal UTF-8 is always well-formed -- or the string was corrupted and
438 // all bets are off anyhow
439
05f32fc3
VS
440 // FIXME-UTF8: do the conversion in-place in the existing buffer
441 if ( str->m_convertedToWChar &&
442 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
443 {
444 // keep the same buffer for as long as possible, so that several calls
445 // to c_str() in a row still work:
446 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
447 }
448 else
449 {
450 str->m_convertedToWChar = buf.release();
451 }
452
453 // and keep it:
132276cf
VS
454 return str->m_convertedToWChar + m_offset;
455}
456#endif // !wxUSE_UNICODE_WCHAR
457
458// ===========================================================================
459// wxString class core
460// ===========================================================================
461
462// ---------------------------------------------------------------------------
463// construction and conversion
464// ---------------------------------------------------------------------------
11aac4ba 465
81727065 466#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
467/* static */
468wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 469 const wxMBConv& conv)
8f93a29f
VS
470{
471 // anything to do?
472 if ( !psz || nLength == 0 )
de4983f3 473 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
474
475 if ( nLength == npos )
476 nLength = wxNO_LEN;
477
478 size_t wcLen;
de4983f3 479 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
8f93a29f 480 if ( !wcLen )
de4983f3 481 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
482 else
483 return SubstrBufFromMB(wcBuf, wcLen);
484}
81727065
VS
485#endif // wxUSE_UNICODE_WCHAR
486
487#if wxUSE_UNICODE_UTF8
488/* static */
489wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
490 const wxMBConv& conv)
491{
81727065
VS
492 // anything to do?
493 if ( !psz || nLength == 0 )
de4983f3 494 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065 495
111d9948
VS
496 // if psz is already in UTF-8, we don't have to do the roundtrip to
497 // wchar_t* and back:
498 if ( conv.IsUTF8() )
499 {
500 // we need to validate the input because UTF8 iterators assume valid
501 // UTF-8 sequence and psz may be invalid:
502 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
503 {
9ef1ad0d
VZ
504 // we must pass the real string length to SubstrBufFromMB ctor
505 if ( nLength == npos )
506 nLength = psz ? strlen(psz) : 0;
38d26d60 507 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
6df09f32 508 nLength);
111d9948
VS
509 }
510 // else: do the roundtrip through wchar_t*
511 }
512
81727065
VS
513 if ( nLength == npos )
514 nLength = wxNO_LEN;
515
516 // first convert to wide string:
517 size_t wcLen;
de4983f3 518 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
81727065 519 if ( !wcLen )
de4983f3 520 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065
VS
521
522 // and then to UTF-8:
4fdfe2f3 523 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065
VS
524 // widechar -> UTF-8 conversion isn't supposed to ever fail:
525 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
526
527 return buf;
528}
529#endif // wxUSE_UNICODE_UTF8
530
531#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
532/* static */
533wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 534 const wxMBConv& conv)
8f93a29f
VS
535{
536 // anything to do?
537 if ( !pwz || nLength == 0 )
de4983f3 538 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
539
540 if ( nLength == npos )
541 nLength = wxNO_LEN;
542
543 size_t mbLen;
de4983f3 544 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
8f93a29f 545 if ( !mbLen )
de4983f3 546 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
547 else
548 return SubstrBufFromWC(mbBuf, mbLen);
549}
81727065 550#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
551
552
81727065 553#if wxUSE_UNICODE_WCHAR
e87b7833 554
06386448 555//Convert wxString in Unicode mode to a multi-byte string
de4983f3 556const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 557{
38d26d60
VS
558 // NB: Length passed to cWC2MB() doesn't include terminating NUL, it's
559 // added by it automatically. If we passed length()+1 here, it would
560 // create a buffer with 2 trailing NULs of length one greater than
561 // expected.
562 return conv.cWC2MB(wx_str(), length(), NULL);
e87b7833
MB
563}
564
81727065 565#elif wxUSE_UNICODE_UTF8
e87b7833 566
de4983f3 567const wxScopedWCharBuffer wxString::wc_str() const
81727065 568{
38d26d60
VS
569 // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
570 // added by it automatically. If we passed length()+1 here, it would
571 // create a buffer with 2 trailing NULs of length one greater than
572 // expected.
4fdfe2f3
VZ
573 return wxMBConvStrictUTF8().cMB2WC
574 (
575 m_impl.c_str(),
38d26d60 576 m_impl.length(),
4fdfe2f3
VZ
577 NULL
578 );
81727065
VS
579}
580
de4983f3 581const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
81727065 582{
111d9948 583 if ( conv.IsUTF8() )
6df09f32 584 return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length());
111d9948 585
38d26d60
VS
586 wxScopedWCharBuffer wcBuf(wc_str());
587 if ( !wcBuf.length() )
81727065
VS
588 return wxCharBuffer("");
589
38d26d60 590 return conv.cWC2MB(wcBuf.data(), wcBuf.length(), NULL);
81727065
VS
591}
592
593#else // ANSI
eec47cc6 594
7663d0d4 595//Converts this string to a wide character string if unicode
06386448 596//mode is not enabled and wxUSE_WCHAR_T is enabled
de4983f3 597const wxScopedWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 598{
38d26d60
VS
599 // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
600 // added by it automatically. If we passed length()+1 here, it would
601 // create a buffer with 2 trailing NULs of length one greater than
602 // expected.
603 return conv.cMB2WC(wx_str(), length(), NULL);
265d5cce 604}
7663d0d4 605
e87b7833
MB
606#endif // Unicode/ANSI
607
608// shrink to minimal size (releasing extra memory)
609bool wxString::Shrink()
610{
611 wxString tmp(begin(), end());
612 swap(tmp);
613 return tmp.length() == length();
614}
615
d8a4b666 616// deprecated compatibility code:
a7ea63e2 617#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 618wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
619{
620 return DoGetWriteBuf(nLen);
621}
622
623void wxString::UngetWriteBuf()
624{
625 DoUngetWriteBuf();
626}
627
628void wxString::UngetWriteBuf(size_t nLen)
629{
630 DoUngetWriteBuf(nLen);
631}
a7ea63e2 632#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 633
d8a4b666 634
e87b7833
MB
635// ---------------------------------------------------------------------------
636// data access
637// ---------------------------------------------------------------------------
638
639// all functions are inline in string.h
640
641// ---------------------------------------------------------------------------
e8f59039 642// concatenation operators
e87b7833
MB
643// ---------------------------------------------------------------------------
644
c801d85f 645/*
c801d85f
KB
646 * concatenation functions come in 5 flavours:
647 * string + string
648 * char + string and string + char
649 * C str + string and string + C str
650 */
651
b1801e0e 652wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 653{
992527a5 654#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
655 wxASSERT( str1.IsValid() );
656 wxASSERT( str2.IsValid() );
e87b7833 657#endif
097c080b 658
3458e408
WS
659 wxString s = str1;
660 s += str2;
3168a13f 661
3458e408 662 return s;
c801d85f
KB
663}
664
c9f78968 665wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 666{
992527a5 667#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 668 wxASSERT( str.IsValid() );
e87b7833 669#endif
3168a13f 670
3458e408
WS
671 wxString s = str;
672 s += ch;
097c080b 673
3458e408 674 return s;
c801d85f
KB
675}
676
c9f78968 677wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 678{
992527a5 679#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 680 wxASSERT( str.IsValid() );
e87b7833 681#endif
097c080b 682
3458e408
WS
683 wxString s = ch;
684 s += str;
3168a13f 685
3458e408 686 return s;
c801d85f
KB
687}
688
8f93a29f 689wxString operator+(const wxString& str, const char *psz)
c801d85f 690{
992527a5 691#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 692 wxASSERT( str.IsValid() );
e87b7833 693#endif
097c080b 694
3458e408 695 wxString s;
8f93a29f 696 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
697 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
698 }
699 s += str;
700 s += psz;
3168a13f 701
3458e408 702 return s;
c801d85f
KB
703}
704
8f93a29f 705wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 706{
992527a5 707#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
708 wxASSERT( str.IsValid() );
709#endif
710
711 wxString s;
712 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
713 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
714 }
715 s += str;
716 s += pwz;
717
718 return s;
719}
720
721wxString operator+(const char *psz, const wxString& str)
722{
a7ea63e2
VS
723#if !wxUSE_STL_BASED_WXSTRING
724 wxASSERT( str.IsValid() );
725#endif
726
727 wxString s;
728 if ( !s.Alloc(strlen(psz) + str.length()) ) {
729 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
730 }
731 s = psz;
732 s += str;
733
734 return s;
735}
736
737wxString operator+(const wchar_t *pwz, const wxString& str)
738{
739#if !wxUSE_STL_BASED_WXSTRING
740 wxASSERT( str.IsValid() );
741#endif
742
743 wxString s;
744 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
745 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
746 }
747 s = pwz;
748 s += str;
749
750 return s;
751}
752
753// ---------------------------------------------------------------------------
754// string comparison
755// ---------------------------------------------------------------------------
756
52de37c7
VS
757bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
758{
759 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
760 : wxToupper(GetChar(0u)) == wxToupper(c));
761}
762
a7ea63e2
VS
763#ifdef HAVE_STD_STRING_COMPARE
764
765// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
766// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
767// sort strings in characters code point order by sorting the byte sequence
768// in byte values order (i.e. what strcmp() and memcmp() do).
769
770int wxString::compare(const wxString& str) const
771{
772 return m_impl.compare(str.m_impl);
773}
774
775int wxString::compare(size_t nStart, size_t nLen,
776 const wxString& str) const
777{
778 size_t pos, len;
779 PosLenToImpl(nStart, nLen, &pos, &len);
780 return m_impl.compare(pos, len, str.m_impl);
781}
782
783int wxString::compare(size_t nStart, size_t nLen,
784 const wxString& str,
785 size_t nStart2, size_t nLen2) const
786{
787 size_t pos, len;
788 PosLenToImpl(nStart, nLen, &pos, &len);
789
790 size_t pos2, len2;
791 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
792
793 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
794}
795
796int wxString::compare(const char* sz) const
797{
798 return m_impl.compare(ImplStr(sz));
799}
800
801int wxString::compare(const wchar_t* sz) const
802{
803 return m_impl.compare(ImplStr(sz));
804}
805
806int wxString::compare(size_t nStart, size_t nLen,
807 const char* sz, size_t nCount) const
808{
809 size_t pos, len;
810 PosLenToImpl(nStart, nLen, &pos, &len);
811
812 SubstrBufFromMB str(ImplStr(sz, nCount));
813
814 return m_impl.compare(pos, len, str.data, str.len);
815}
816
817int wxString::compare(size_t nStart, size_t nLen,
818 const wchar_t* sz, size_t nCount) const
819{
820 size_t pos, len;
821 PosLenToImpl(nStart, nLen, &pos, &len);
822
823 SubstrBufFromWC str(ImplStr(sz, nCount));
824
825 return m_impl.compare(pos, len, str.data, str.len);
826}
827
828#else // !HAVE_STD_STRING_COMPARE
829
830static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
831 const wxStringCharType* s2, size_t l2)
832{
833 if( l1 == l2 )
834 return wxStringMemcmp(s1, s2, l1);
835 else if( l1 < l2 )
836 {
837 int ret = wxStringMemcmp(s1, s2, l1);
838 return ret == 0 ? -1 : ret;
839 }
840 else
841 {
842 int ret = wxStringMemcmp(s1, s2, l2);
843 return ret == 0 ? +1 : ret;
844 }
845}
846
847int wxString::compare(const wxString& str) const
848{
849 return ::wxDoCmp(m_impl.data(), m_impl.length(),
850 str.m_impl.data(), str.m_impl.length());
851}
852
853int wxString::compare(size_t nStart, size_t nLen,
854 const wxString& str) const
855{
856 wxASSERT(nStart <= length());
857 size_type strLen = length() - nStart;
858 nLen = strLen < nLen ? strLen : nLen;
859
860 size_t pos, len;
861 PosLenToImpl(nStart, nLen, &pos, &len);
862
863 return ::wxDoCmp(m_impl.data() + pos, len,
864 str.m_impl.data(), str.m_impl.length());
865}
866
867int wxString::compare(size_t nStart, size_t nLen,
868 const wxString& str,
869 size_t nStart2, size_t nLen2) const
870{
871 wxASSERT(nStart <= length());
872 wxASSERT(nStart2 <= str.length());
873 size_type strLen = length() - nStart,
874 strLen2 = str.length() - nStart2;
875 nLen = strLen < nLen ? strLen : nLen;
876 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
877
878 size_t pos, len;
879 PosLenToImpl(nStart, nLen, &pos, &len);
880 size_t pos2, len2;
881 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
882
883 return ::wxDoCmp(m_impl.data() + pos, len,
884 str.m_impl.data() + pos2, len2);
885}
886
887int wxString::compare(const char* sz) const
888{
889 SubstrBufFromMB str(ImplStr(sz, npos));
890 if ( str.len == npos )
891 str.len = wxStringStrlen(str.data);
892 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
893}
894
895int wxString::compare(const wchar_t* sz) const
896{
897 SubstrBufFromWC str(ImplStr(sz, npos));
898 if ( str.len == npos )
899 str.len = wxStringStrlen(str.data);
900 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
901}
902
903int wxString::compare(size_t nStart, size_t nLen,
904 const char* sz, size_t nCount) const
905{
906 wxASSERT(nStart <= length());
907 size_type strLen = length() - nStart;
908 nLen = strLen < nLen ? strLen : nLen;
097c080b 909
a7ea63e2
VS
910 size_t pos, len;
911 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 912
a7ea63e2
VS
913 SubstrBufFromMB str(ImplStr(sz, nCount));
914 if ( str.len == npos )
915 str.len = wxStringStrlen(str.data);
916
917 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
918}
919
a7ea63e2
VS
920int wxString::compare(size_t nStart, size_t nLen,
921 const wchar_t* sz, size_t nCount) const
8f93a29f 922{
a7ea63e2
VS
923 wxASSERT(nStart <= length());
924 size_type strLen = length() - nStart;
925 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 926
a7ea63e2
VS
927 size_t pos, len;
928 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 929
a7ea63e2
VS
930 SubstrBufFromWC str(ImplStr(sz, nCount));
931 if ( str.len == npos )
932 str.len = wxStringStrlen(str.data);
933
934 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
935}
936
a7ea63e2
VS
937#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
938
939
8f93a29f
VS
940// ---------------------------------------------------------------------------
941// find_{first,last}_[not]_of functions
942// ---------------------------------------------------------------------------
943
944#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 945
8f93a29f
VS
946// NB: All these functions are implemented with the argument being wxChar*,
947// i.e. widechar string in any Unicode build, even though native string
948// representation is char* in the UTF-8 build. This is because we couldn't
949// use memchr() to determine if a character is in a set encoded as UTF-8.
950
951size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 952{
8f93a29f 953 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
954}
955
8f93a29f 956size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 957{
8f93a29f 958 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
959}
960
8f93a29f 961size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 962{
8f93a29f 963 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 964
8f93a29f
VS
965 size_t idx = nStart;
966 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 967 {
8f93a29f
VS
968 if ( wxTmemchr(sz, *i, n) )
969 return idx;
dcb68102 970 }
8f93a29f
VS
971
972 return npos;
973}
974
975size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
976{
977 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
978
979 size_t idx = nStart;
980 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 981 {
8f93a29f
VS
982 if ( !wxTmemchr(sz, *i, n) )
983 return idx;
984 }
985
986 return npos;
987}
988
989
990size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
991{
992 return find_last_of(sz, nStart, wxStrlen(sz));
993}
994
995size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
996{
997 return find_last_not_of(sz, nStart, wxStrlen(sz));
998}
999
1000size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
1001{
1002 size_t len = length();
1003
1004 if ( nStart == npos )
1005 {
1006 nStart = len - 1;
dcb68102 1007 }
2c09fb3b 1008 else
dcb68102 1009 {
8f93a29f 1010 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 1011 }
8f93a29f
VS
1012
1013 size_t idx = nStart;
1014 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1015 i != rend(); --idx, ++i )
1016 {
1017 if ( wxTmemchr(sz, *i, n) )
1018 return idx;
1019 }
1020
1021 return npos;
dcb68102
RN
1022}
1023
8f93a29f 1024size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 1025{
8f93a29f
VS
1026 size_t len = length();
1027
1028 if ( nStart == npos )
1029 {
1030 nStart = len - 1;
1031 }
1032 else
1033 {
1034 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1035 }
1036
1037 size_t idx = nStart;
1038 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1039 i != rend(); --idx, ++i )
1040 {
1041 if ( !wxTmemchr(sz, *i, n) )
1042 return idx;
1043 }
1044
1045 return npos;
dcb68102
RN
1046}
1047
8f93a29f 1048size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1049{
8f93a29f
VS
1050 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1051
1052 size_t idx = nStart;
1053 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1054 {
1055 if ( *i != ch )
1056 return idx;
1057 }
1058
1059 return npos;
1060}
1061
1062size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1063{
1064 size_t len = length();
1065
1066 if ( nStart == npos )
1067 {
1068 nStart = len - 1;
1069 }
1070 else
1071 {
1072 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1073 }
1074
1075 size_t idx = nStart;
1076 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1077 i != rend(); --idx, ++i )
1078 {
1079 if ( *i != ch )
1080 return idx;
1081 }
1082
1083 return npos;
1084}
1085
1086// the functions above were implemented for wchar_t* arguments in Unicode
1087// build and char* in ANSI build; below are implementations for the other
1088// version:
1089#if wxUSE_UNICODE
1090 #define wxOtherCharType char
1091 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1092#else
1093 #define wxOtherCharType wchar_t
1094 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1095#endif
1096
1097size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1098 { return find_first_of(STRCONV(sz), nStart); }
1099
1100size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1101 size_t n) const
1102 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1103size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1104 { return find_last_of(STRCONV(sz), nStart); }
1105size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1106 size_t n) const
1107 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1108size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1109 { return find_first_not_of(STRCONV(sz), nStart); }
1110size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1111 size_t n) const
1112 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1113size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1114 { return find_last_not_of(STRCONV(sz), nStart); }
1115size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1116 size_t n) const
1117 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1118
1119#undef wxOtherCharType
1120#undef STRCONV
1121
1122#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1123
1124// ===========================================================================
1125// other common string functions
1126// ===========================================================================
1127
1128int wxString::CmpNoCase(const wxString& s) const
1129{
825d69c1
VZ
1130#if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1131 // prefer to use CompareString() if available as it's more efficient than
1132 // doing it manual or even using wxStricmp() (see #10375)
1133 switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE,
1134 m_impl.c_str(), m_impl.length(),
1135 s.m_impl.c_str(), s.m_impl.length()) )
1136 {
1137 case CSTR_LESS_THAN:
1138 return -1;
1139
1140 case CSTR_EQUAL:
1141 return 0;
1142
1143 case CSTR_GREATER_THAN:
1144 return 1;
8f93a29f 1145
825d69c1
VZ
1146 default:
1147 wxFAIL_MSG( "unexpected CompareString() return value" );
1148 // fall through
1149
1150 case 0:
1151 wxLogLastError("CompareString");
1152 // use generic code below
1153 }
1154#endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1155
1156 // do the comparison manually: notice that we can't use wxStricmp() as it
1157 // doesn't handle embedded NULs
1158
1159 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
8f93a29f
VS
1160 const_iterator i1 = begin();
1161 const_iterator end1 = end();
1162 const_iterator i2 = s.begin();
1163 const_iterator end2 = s.end();
1164
0d8b0f94 1165 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1166 {
1167 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1168 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1169 if ( lower1 != lower2 )
1170 return lower1 < lower2 ? -1 : 1;
1171 }
1172
1173 size_t len1 = length();
1174 size_t len2 = s.length();
dcb68102 1175
8f93a29f
VS
1176 if ( len1 < len2 )
1177 return -1;
1178 else if ( len1 > len2 )
1179 return 1;
1180 return 0;
dcb68102
RN
1181}
1182
1183
b1ac3b56 1184#if wxUSE_UNICODE
e015c2a3 1185
cf6bedce
SC
1186#ifdef __MWERKS__
1187#ifndef __SCHAR_MAX__
1188#define __SCHAR_MAX__ 127
1189#endif
1190#endif
1191
e6310bbc 1192wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1193{
e6310bbc 1194 if (!ascii || len == 0)
b1ac3b56 1195 return wxEmptyString;
e015c2a3 1196
b1ac3b56 1197 wxString res;
e015c2a3 1198
e6310bbc 1199 {
6798451b 1200 wxStringInternalBuffer buf(res, len);
602a857b 1201 wxStringCharType *dest = buf;
c1eada83 1202
602a857b
VS
1203 for ( ; len > 0; --len )
1204 {
1205 unsigned char c = (unsigned char)*ascii++;
1206 wxASSERT_MSG( c < 0x80,
1207 _T("Non-ASCII value passed to FromAscii().") );
c1eada83 1208
602a857b
VS
1209 *dest++ = (wchar_t)c;
1210 }
e015c2a3
VZ
1211 }
1212
b1ac3b56
RR
1213 return res;
1214}
1215
e6310bbc
VS
1216wxString wxString::FromAscii(const char *ascii)
1217{
0081dd72 1218 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1219}
1220
c5288c5c 1221wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1222{
1223 // What do we do with '\0' ?
1224
c1eada83 1225 unsigned char c = (unsigned char)ascii;
8760bc65 1226
c1eada83
VS
1227 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1228
1229 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1230 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1231}
1232
de4983f3 1233const wxScopedCharBuffer wxString::ToAscii() const
b1ac3b56 1234{
e015c2a3
VZ
1235 // this will allocate enough space for the terminating NUL too
1236 wxCharBuffer buffer(length());
6e394fc6 1237 char *dest = buffer.data();
e015c2a3 1238
c1eada83 1239 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1240 {
c1eada83
VS
1241 wxUniChar c(*i);
1242 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1243 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1244
1245 // the output string can't have embedded NULs anyhow, so we can safely
1246 // stop at first of them even if we do have any
c1eada83 1247 if ( !c )
e015c2a3 1248 break;
b1ac3b56 1249 }
e015c2a3 1250
b1ac3b56
RR
1251 return buffer;
1252}
e015c2a3 1253
c1eada83 1254#endif // wxUSE_UNICODE
b1ac3b56 1255
c801d85f 1256// extract string of length nCount starting at nFirst
c801d85f
KB
1257wxString wxString::Mid(size_t nFirst, size_t nCount) const
1258{
73f507f5 1259 size_t nLen = length();
30d9011f 1260
73f507f5
WS
1261 // default value of nCount is npos and means "till the end"
1262 if ( nCount == npos )
1263 {
1264 nCount = nLen - nFirst;
1265 }
30d9011f 1266
73f507f5
WS
1267 // out-of-bounds requests return sensible things
1268 if ( nFirst + nCount > nLen )
1269 {
1270 nCount = nLen - nFirst;
1271 }
c801d85f 1272
73f507f5
WS
1273 if ( nFirst > nLen )
1274 {
1275 // AllocCopy() will return empty string
1276 return wxEmptyString;
1277 }
c801d85f 1278
73f507f5
WS
1279 wxString dest(*this, nFirst, nCount);
1280 if ( dest.length() != nCount )
1281 {
1282 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1283 }
30d9011f 1284
73f507f5 1285 return dest;
c801d85f
KB
1286}
1287
e87b7833 1288// check that the string starts with prefix and return the rest of the string
d775fa82 1289// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1290bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1291{
c5e7a7d7
VS
1292 if ( compare(0, prefix.length(), prefix) != 0 )
1293 return false;
f6bcfd97
BP
1294
1295 if ( rest )
1296 {
1297 // put the rest of the string into provided pointer
c5e7a7d7 1298 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1299 }
1300
d775fa82 1301 return true;
f6bcfd97
BP
1302}
1303
3affcd07
VZ
1304
1305// check that the string ends with suffix and return the rest of it in the
1306// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1307bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1308{
c5e7a7d7 1309 int start = length() - suffix.length();
81727065
VS
1310
1311 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1312 return false;
1313
1314 if ( rest )
1315 {
1316 // put the rest of the string into provided pointer
1317 rest->assign(*this, 0, start);
1318 }
1319
1320 return true;
1321}
1322
1323
c801d85f
KB
1324// extract nCount last (rightmost) characters
1325wxString wxString::Right(size_t nCount) const
1326{
e87b7833
MB
1327 if ( nCount > length() )
1328 nCount = length();
c801d85f 1329
e87b7833
MB
1330 wxString dest(*this, length() - nCount, nCount);
1331 if ( dest.length() != nCount ) {
b1801e0e
GD
1332 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1333 }
c801d85f
KB
1334 return dest;
1335}
1336
7929902d 1337// get all characters after the last occurrence of ch
c801d85f 1338// (returns the whole string if ch not found)
c9f78968 1339wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1340{
1341 wxString str;
d775fa82 1342 int iPos = Find(ch, true);
3c67202d 1343 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1344 str = *this;
1345 else
c565abe1 1346 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1347
1348 return str;
1349}
1350
1351// extract nCount first (leftmost) characters
1352wxString wxString::Left(size_t nCount) const
1353{
e87b7833
MB
1354 if ( nCount > length() )
1355 nCount = length();
c801d85f 1356
e87b7833
MB
1357 wxString dest(*this, 0, nCount);
1358 if ( dest.length() != nCount ) {
b1801e0e
GD
1359 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1360 }
c801d85f
KB
1361 return dest;
1362}
1363
7929902d 1364// get all characters before the first occurrence of ch
c801d85f 1365// (returns the whole string if ch not found)
c9f78968 1366wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1367{
e87b7833 1368 int iPos = Find(ch);
c565abe1
VZ
1369 if ( iPos == wxNOT_FOUND )
1370 iPos = length();
e87b7833 1371 return wxString(*this, 0, iPos);
c801d85f
KB
1372}
1373
7929902d 1374/// get all characters before the last occurrence of ch
c801d85f 1375/// (returns empty string if ch not found)
c9f78968 1376wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1377{
1378 wxString str;
d775fa82 1379 int iPos = Find(ch, true);
3c67202d 1380 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1381 str = wxString(c_str(), iPos);
c801d85f
KB
1382
1383 return str;
1384}
1385
7929902d 1386/// get all characters after the first occurrence of ch
c801d85f 1387/// (returns empty string if ch not found)
c9f78968 1388wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1389{
1390 wxString str;
1391 int iPos = Find(ch);
3c67202d 1392 if ( iPos != wxNOT_FOUND )
c565abe1 1393 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1394
1395 return str;
1396}
1397
7929902d 1398// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1399size_t wxString::Replace(const wxString& strOld,
1400 const wxString& strNew, bool bReplaceAll)
c801d85f 1401{
a8f1f1b2 1402 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1403 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1404 _T("wxString::Replace(): invalid parameter") );
1405
68482dc5
VZ
1406 wxSTRING_INVALIDATE_CACHE();
1407
510bb748 1408 size_t uiCount = 0; // count of replacements made
c801d85f 1409
8a627032
VZ
1410 // optimize the special common case: replacement of one character by
1411 // another one (in UTF-8 case we can only do this for ASCII characters)
1412 //
1413 // benchmarks show that this special version is around 3 times faster
1414 // (depending on the proportion of matching characters and UTF-8/wchar_t
1415 // build)
1416 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1417 {
1418 const wxStringCharType chOld = strOld.m_impl[0],
1419 chNew = strNew.m_impl[0];
1420
1421 // this loop is the simplified version of the one below
1422 for ( size_t pos = 0; ; )
1423 {
1424 pos = m_impl.find(chOld, pos);
1425 if ( pos == npos )
1426 break;
c801d85f 1427
8a627032
VZ
1428 m_impl[pos++] = chNew;
1429
1430 uiCount++;
1431
1432 if ( !bReplaceAll )
1433 break;
1434 }
1435 }
072682ce
VZ
1436 else if ( !bReplaceAll)
1437 {
1438 size_t pos = m_impl.find(strOld, 0);
1439 if ( pos != npos )
1440 {
1441 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1442 uiCount = 1;
1443 }
1444 }
1445 else // replace all occurrences
510bb748 1446 {
8a627032
VZ
1447 const size_t uiOldLen = strOld.m_impl.length();
1448 const size_t uiNewLen = strNew.m_impl.length();
1449
072682ce
VZ
1450 // first scan the string to find all positions at which the replacement
1451 // should be made
1452 wxVector<size_t> replacePositions;
1453
1454 size_t pos;
1455 for ( pos = m_impl.find(strOld.m_impl, 0);
1456 pos != npos;
1457 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
8a627032 1458 {
072682ce
VZ
1459 replacePositions.push_back(pos);
1460 ++uiCount;
1461 }
510bb748 1462
072682ce
VZ
1463 if ( !uiCount )
1464 return 0;
510bb748 1465
072682ce
VZ
1466 // allocate enough memory for the whole new string
1467 wxString tmp;
1468 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
ad5bb7d6 1469
072682ce
VZ
1470 // copy this string to tmp doing replacements on the fly
1471 size_t replNum = 0;
1472 for ( pos = 0; replNum < uiCount; replNum++ )
1473 {
1474 const size_t nextReplPos = replacePositions[replNum];
394b2900 1475
072682ce
VZ
1476 if ( pos != nextReplPos )
1477 {
1478 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1479 }
1480
1481 tmp.m_impl.append(strNew.m_impl);
1482 pos = nextReplPos + uiOldLen;
8a627032 1483 }
072682ce
VZ
1484
1485 if ( pos != m_impl.length() )
1486 {
1487 // append the rest of the string unchanged
1488 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1489 }
1490
1491 swap(tmp);
c801d85f 1492 }
c801d85f 1493
510bb748 1494 return uiCount;
c801d85f
KB
1495}
1496
1497bool wxString::IsAscii() const
1498{
a4a44612
VS
1499 for ( const_iterator i = begin(); i != end(); ++i )
1500 {
1501 if ( !(*i).IsAscii() )
1502 return false;
1503 }
1504
1505 return true;
c801d85f 1506}
dd1eaa89 1507
c801d85f
KB
1508bool wxString::IsWord() const
1509{
a4a44612
VS
1510 for ( const_iterator i = begin(); i != end(); ++i )
1511 {
1512 if ( !wxIsalpha(*i) )
1513 return false;
1514 }
1515
1516 return true;
c801d85f 1517}
dd1eaa89 1518
c801d85f
KB
1519bool wxString::IsNumber() const
1520{
a4a44612
VS
1521 if ( empty() )
1522 return true;
1523
1524 const_iterator i = begin();
1525
1526 if ( *i == _T('-') || *i == _T('+') )
1527 ++i;
1528
1529 for ( ; i != end(); ++i )
1530 {
1531 if ( !wxIsdigit(*i) )
1532 return false;
1533 }
1534
1535 return true;
c801d85f
KB
1536}
1537
c801d85f
KB
1538wxString wxString::Strip(stripType w) const
1539{
1540 wxString s = *this;
d775fa82
WS
1541 if ( w & leading ) s.Trim(false);
1542 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1543 return s;
1544}
1545
c801d85f
KB
1546// ---------------------------------------------------------------------------
1547// case conversion
1548// ---------------------------------------------------------------------------
1549
1550wxString& wxString::MakeUpper()
1551{
e87b7833
MB
1552 for ( iterator it = begin(), en = end(); it != en; ++it )
1553 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1554
1555 return *this;
1556}
1557
1558wxString& wxString::MakeLower()
1559{
e87b7833
MB
1560 for ( iterator it = begin(), en = end(); it != en; ++it )
1561 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1562
1563 return *this;
1564}
1565
0c7db140
VZ
1566wxString& wxString::MakeCapitalized()
1567{
1568 const iterator en = end();
1569 iterator it = begin();
1570 if ( it != en )
1571 {
1572 *it = (wxChar)wxToupper(*it);
1573 for ( ++it; it != en; ++it )
1574 *it = (wxChar)wxTolower(*it);
1575 }
1576
1577 return *this;
1578}
1579
c801d85f
KB
1580// ---------------------------------------------------------------------------
1581// trimming and padding
1582// ---------------------------------------------------------------------------
1583
d775fa82 1584// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1585// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1586// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1587// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1588// space-like symbols somewhere except in the first 128 chars), it is arguably
1589// still better than trimming away accented letters
1590inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1591
c801d85f
KB
1592// trims spaces (in the sense of isspace) from left or right side
1593wxString& wxString::Trim(bool bFromRight)
1594{
3458e408
WS
1595 // first check if we're going to modify the string at all
1596 if ( !empty() &&
1597 (
1598 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1599 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1600 )
2c3b684c 1601 )
2c3b684c 1602 {
3458e408
WS
1603 if ( bFromRight )
1604 {
1605 // find last non-space character
d4d02bd5 1606 reverse_iterator psz = rbegin();
32c62191 1607 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1608 ++psz;
92df97b8 1609
3458e408 1610 // truncate at trailing space start
d4d02bd5 1611 erase(psz.base(), end());
3458e408
WS
1612 }
1613 else
1614 {
1615 // find first non-space character
1616 iterator psz = begin();
32c62191 1617 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1618 ++psz;
2c3b684c 1619
3458e408
WS
1620 // fix up data and length
1621 erase(begin(), psz);
1622 }
2c3b684c 1623 }
c801d85f 1624
3458e408 1625 return *this;
c801d85f
KB
1626}
1627
1628// adds nCount characters chPad to the string from either side
c9f78968 1629wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1630{
3458e408 1631 wxString s(chPad, nCount);
c801d85f 1632
3458e408
WS
1633 if ( bFromRight )
1634 *this += s;
1635 else
1636 {
1637 s += *this;
1638 swap(s);
1639 }
c801d85f 1640
3458e408 1641 return *this;
c801d85f
KB
1642}
1643
1644// truncate the string
1645wxString& wxString::Truncate(size_t uiLen)
1646{
3458e408
WS
1647 if ( uiLen < length() )
1648 {
1649 erase(begin() + uiLen, end());
1650 }
1651 //else: nothing to do, string is already short enough
c801d85f 1652
3458e408 1653 return *this;
c801d85f
KB
1654}
1655
1656// ---------------------------------------------------------------------------
3c67202d 1657// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1658// ---------------------------------------------------------------------------
1659
1660// find a character
c9f78968 1661int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1662{
3458e408 1663 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1664
3458e408 1665 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1666}
1667
cd0b1709
VZ
1668// ----------------------------------------------------------------------------
1669// conversion to numbers
1670// ----------------------------------------------------------------------------
1671
52de37c7
VS
1672// The implementation of all the functions below is exactly the same so factor
1673// it out. Note that number extraction works correctly on UTF-8 strings, so
1674// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1675
92df97b8 1676#ifndef __WXWINCE__
941a4e62
VS
1677 #define DO_IF_NOT_WINCE(x) x
1678#else
1679 #define DO_IF_NOT_WINCE(x)
92df97b8 1680#endif
4ea4767e 1681
529e491c
FM
1682#define WX_STRING_TO_X_TYPE_START \
1683 wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
941a4e62 1684 DO_IF_NOT_WINCE( errno = 0; ) \
941a4e62 1685 const wxStringCharType *start = wx_str(); \
529e491c
FM
1686 wxStringCharType *end;
1687
1688#define WX_STRING_TO_X_TYPE_END \
941a4e62
VS
1689 /* return true only if scan was stopped by the terminating NUL and */ \
1690 /* if the string was not empty to start with and no under/overflow */ \
1691 /* occurred: */ \
c95e653c
VZ
1692 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1693 return false; \
529e491c
FM
1694 *pVal = val; \
1695 return true;
cd0b1709 1696
c95e653c 1697bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1698{
529e491c
FM
1699 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1700
1701 WX_STRING_TO_X_TYPE_START
1702 long val = wxStrtol(start, &end, base);
1703 WX_STRING_TO_X_TYPE_END
619dcb09 1704}
cd0b1709 1705
c95e653c 1706bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1707{
529e491c
FM
1708 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1709
1710 WX_STRING_TO_X_TYPE_START
1711 unsigned long val = wxStrtoul(start, &end, base);
1712 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1713}
1714
c95e653c 1715bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1716{
529e491c
FM
1717 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1718
1719 WX_STRING_TO_X_TYPE_START
1720 wxLongLong_t val = wxStrtoll(start, &end, base);
1721 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1722}
1723
c95e653c 1724bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1725{
529e491c
FM
1726 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1727
1728 WX_STRING_TO_X_TYPE_START
1729 wxULongLong_t val = wxStrtoull(start, &end, base);
1730 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1731}
1732
c95e653c 1733bool wxString::ToDouble(double *pVal) const
cd0b1709 1734{
529e491c
FM
1735 WX_STRING_TO_X_TYPE_START
1736 double val = wxStrtod(start, &end);
1737 WX_STRING_TO_X_TYPE_END
1738}
cd0b1709 1739
529e491c 1740#if wxUSE_XLOCALE
e71e5b37 1741
529e491c
FM
1742bool wxString::ToCLong(long *pVal, int base) const
1743{
1744 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
cd0b1709 1745
529e491c
FM
1746 WX_STRING_TO_X_TYPE_START
1747#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1748 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1749#else
1750 long val = wxStrtol_l(start, &end, base, wxCLocale);
1751#endif
1752 WX_STRING_TO_X_TYPE_END
1753}
c95e653c 1754
529e491c
FM
1755bool wxString::ToCULong(unsigned long *pVal, int base) const
1756{
1757 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
c95e653c 1758
529e491c
FM
1759 WX_STRING_TO_X_TYPE_START
1760#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1761 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1762#else
1763 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1764#endif
1765 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1766}
1767
529e491c
FM
1768bool wxString::ToCDouble(double *pVal) const
1769{
1770 WX_STRING_TO_X_TYPE_START
1771#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1772 double val = wxStrtod_lA(start, &end, wxCLocale);
1773#else
1774 double val = wxStrtod_l(start, &end, wxCLocale);
1775#endif
1776 WX_STRING_TO_X_TYPE_END
1777}
1778
1779#endif // wxUSE_XLOCALE
1780
c801d85f 1781// ---------------------------------------------------------------------------
9efd3367 1782// formatted output
c801d85f 1783// ---------------------------------------------------------------------------
378b05f7 1784
d1f6e2cf 1785#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1786/* static */
c9f78968 1787#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1788wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1789#else
d1f6e2cf 1790wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1791#endif
341e7d28 1792{
77c3e48a 1793 va_list argptr;
c9f78968 1794 va_start(argptr, format);
341e7d28 1795
77c3e48a 1796 wxString s;
c9f78968 1797 s.PrintfV(format, argptr);
341e7d28 1798
77c3e48a 1799 va_end(argptr);
341e7d28 1800
77c3e48a 1801 return s;
341e7d28 1802}
d1f6e2cf
VS
1803#endif // !wxUSE_UTF8_LOCALE_ONLY
1804
1805#if wxUSE_UNICODE_UTF8
1806/* static */
1807wxString wxString::DoFormatUtf8(const char *format, ...)
1808{
1809 va_list argptr;
1810 va_start(argptr, format);
1811
1812 wxString s;
1813 s.PrintfV(format, argptr);
1814
1815 va_end(argptr);
1816
1817 return s;
1818}
1819#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1820
1821/* static */
c9f78968 1822wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1823{
1824 wxString s;
c9f78968 1825 s.PrintfV(format, argptr);
341e7d28
VZ
1826 return s;
1827}
1828
d1f6e2cf 1829#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1830#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1831int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1832#else
d1f6e2cf 1833int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1834#endif
c801d85f 1835{
ba9bbf13 1836 va_list argptr;
c9f78968 1837 va_start(argptr, format);
c801d85f 1838
c9f78968
VS
1839#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1840 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1841 // because it's the only cast that works safely for downcasting when
1842 // multiple inheritance is used:
1843 wxString *str = static_cast<wxString*>(this);
1844#else
1845 wxString *str = this;
1846#endif
1847
1848 int iLen = str->PrintfV(format, argptr);
c801d85f 1849
ba9bbf13 1850 va_end(argptr);
c801d85f 1851
ba9bbf13 1852 return iLen;
c801d85f 1853}
d1f6e2cf
VS
1854#endif // !wxUSE_UTF8_LOCALE_ONLY
1855
1856#if wxUSE_UNICODE_UTF8
1857int wxString::DoPrintfUtf8(const char *format, ...)
1858{
1859 va_list argptr;
1860 va_start(argptr, format);
1861
1862 int iLen = PrintfV(format, argptr);
1863
1864 va_end(argptr);
1865
1866 return iLen;
1867}
1868#endif // wxUSE_UNICODE_UTF8
c801d85f 1869
67612ff1
DE
1870/*
1871 Uses wxVsnprintf and places the result into the this string.
1872
1873 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1874 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1875 the ISO C99 (and thus SUSv3) standard the return value for the case of
1876 an undersized buffer is inconsistent. For conforming vsnprintf
1877 implementations the function must return the number of characters that
1878 would have been printed had the buffer been large enough. For conforming
1879 vswprintf implementations the function must return a negative number
1880 and set errno.
1881
1882 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1883 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1884 those are defined in the standard and backed up by several conformance
1885 statements. Note that ENOMEM mentioned in the manual page does not
1886 apply to swprintf, only wprintf and fwprintf.
1887
1888 Official manual page:
1889 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1890
1891 Some conformance statements (AIX, Solaris):
1892 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1893 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1894
1895 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1896 EILSEQ and EINVAL are specifically defined to mean the error is other than
1897 an undersized buffer and no other errno are defined we treat those two
1898 as meaning hard errors and everything else gets the old behavior which
1899 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1900
67612ff1
DE
1901 In practice it's impossible to determine before compilation which behavior
1902 may be used. The vswprintf function may have vsnprintf-like behavior or
1903 vice-versa. Behavior detected on one release can theoretically change
1904 with an updated release. Not to mention that configure testing for it
1905 would require the test to be run on the host system, not the build system
1906 which makes cross compilation difficult. Therefore, we make no assumptions
1907 about behavior and try our best to handle every known case, including the
1908 case where wxVsnprintf returns a negative number and fails to set errno.
1909
1910 There is yet one more non-standard implementation and that is our own.
1911 Fortunately, that can be detected at compile-time.
1912
1913 On top of all that, ISO C99 explicitly defines snprintf to write a null
1914 character to the last position of the specified buffer. That would be at
1915 at the given buffer size minus 1. It is supposed to do this even if it
1916 turns out that the buffer is sized too small.
1917
1918 Darwin (tested on 10.5) follows the C99 behavior exactly.
1919
1920 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1921 errno even when it fails. However, it only seems to ever fail due
1922 to an undersized buffer.
1923*/
2523e9b7
VS
1924#if wxUSE_UNICODE_UTF8
1925template<typename BufferType>
1926#else
1927// we only need one version in non-UTF8 builds and at least two Windows
1928// compilers have problems with this function template, so use just one
1929// normal function here
1930#endif
1931static int DoStringPrintfV(wxString& str,
1932 const wxString& format, va_list argptr)
c801d85f 1933{
f6f5941b 1934 int size = 1024;
e87b7833 1935
f6f5941b
VZ
1936 for ( ;; )
1937 {
2523e9b7
VS
1938#if wxUSE_UNICODE_UTF8
1939 BufferType tmp(str, size + 1);
1940 typename BufferType::CharType *buf = tmp;
1941#else
1942 wxStringBuffer tmp(str, size + 1);
de2589be 1943 wxChar *buf = tmp;
2523e9b7 1944#endif
2bb67b80 1945
ba9bbf13
WS
1946 if ( !buf )
1947 {
1948 // out of memory
a33c7045
VS
1949
1950 // in UTF-8 build, leaving uninitialized junk in the buffer
1951 // could result in invalid non-empty UTF-8 string, so just
1952 // reset the string to empty on failure:
1953 buf[0] = '\0';
ba9bbf13 1954 return -1;
e87b7833 1955 }
f6f5941b 1956
ba9bbf13
WS
1957 // wxVsnprintf() may modify the original arg pointer, so pass it
1958 // only a copy
1959 va_list argptrcopy;
1960 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1961
1962#ifndef __WXWINCE__
1963 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1964 errno = 0;
1965#endif
2523e9b7 1966 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1967 va_end(argptrcopy);
1968
1969 // some implementations of vsnprintf() don't NUL terminate
1970 // the string if there is not enough space for it so
1971 // always do it manually
67612ff1
DE
1972 // FIXME: This really seems to be the wrong and would be an off-by-one
1973 // bug except the code above allocates an extra character.
ba9bbf13
WS
1974 buf[size] = _T('\0');
1975
caff62f2
VZ
1976 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1977 // total number of characters which would have been written if the
b1727cfe 1978 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1979 if ( len < 0 )
1980 {
52de37c7
VS
1981 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1982 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1983 // is true if *both* of them use our own implementation,
1984 // otherwise we can't be sure
f2bbe5b6
VZ
1985#if wxUSE_WXVSNPRINTF
1986 // we know that our own implementation of wxVsnprintf() returns -1
1987 // only for a format error - thus there's something wrong with
1988 // the user's format string
a33c7045 1989 buf[0] = '\0';
f2bbe5b6 1990 return -1;
52de37c7
VS
1991#else // possibly using system version
1992 // assume it only returns error if there is not enough space, but
1993 // as we don't know how much we need, double the current size of
1994 // the buffer
67612ff1 1995#ifndef __WXWINCE__
a9a854d7
DE
1996 if( (errno == EILSEQ) || (errno == EINVAL) )
1997 // If errno was set to one of the two well-known hard errors
1998 // then fail immediately to avoid an infinite loop.
1999 return -1;
2000 else
2001#endif // __WXWINCE__
67612ff1
DE
2002 // still not enough, as we don't know how much we need, double the
2003 // current size of the buffer
2004 size *= 2;
f2bbe5b6 2005#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 2006 }
64f8f94c 2007 else if ( len >= size )
de2589be 2008 {
f2bbe5b6 2009#if wxUSE_WXVSNPRINTF
c95e653c 2010 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
2011 // size+1 when there's not enough space but that's not the size
2012 // of the required buffer!
2013 size *= 2; // so we just double the current size of the buffer
2014#else
64f8f94c
VZ
2015 // some vsnprintf() implementations NUL-terminate the buffer and
2016 // some don't in len == size case, to be safe always add 1
67612ff1
DE
2017 // FIXME: I don't quite understand this comment. The vsnprintf
2018 // function is specifically defined to return the number of
2019 // characters printed not including the null terminator.
2020 // So OF COURSE you need to add 1 to get the right buffer size.
2021 // The following line is definitely correct, no question.
64f8f94c 2022 size = len + 1;
f2bbe5b6 2023#endif
de2589be
VZ
2024 }
2025 else // ok, there was enough space
f6f5941b 2026 {
f6f5941b
VZ
2027 break;
2028 }
f6f5941b
VZ
2029 }
2030
2031 // we could have overshot
2523e9b7
VS
2032 str.Shrink();
2033
2034 return str.length();
2035}
c801d85f 2036
2523e9b7
VS
2037int wxString::PrintfV(const wxString& format, va_list argptr)
2038{
2523e9b7
VS
2039#if wxUSE_UNICODE_UTF8
2040 #if wxUSE_STL_BASED_WXSTRING
2041 typedef wxStringTypeBuffer<char> Utf8Buffer;
2042 #else
6798451b 2043 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
2044 #endif
2045#endif
2046
2047#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 2048 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2049#else
2050 #if wxUSE_UNICODE_UTF8
2051 if ( wxLocaleIsUtf8 )
c6255a6e 2052 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2053 else
2054 // wxChar* version
c6255a6e 2055 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 2056 #else
c6255a6e 2057 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
2058 #endif // UTF8/WCHAR
2059#endif
c801d85f
KB
2060}
2061
097c080b
VZ
2062// ----------------------------------------------------------------------------
2063// misc other operations
2064// ----------------------------------------------------------------------------
0c5d3e1c 2065
d775fa82 2066// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
2067// '?' metacharacters (as usual, '?' matches any character and '*' any number
2068// of them)
8a540c88 2069bool wxString::Matches(const wxString& mask) const
097c080b 2070{
d6044f58
VZ
2071 // I disable this code as it doesn't seem to be faster (in fact, it seems
2072 // to be much slower) than the old, hand-written code below and using it
2073 // here requires always linking with libregex even if the user code doesn't
2074 // use it
2075#if 0 // wxUSE_REGEX
706c2ac9
VZ
2076 // first translate the shell-like mask into a regex
2077 wxString pattern;
2078 pattern.reserve(wxStrlen(pszMask));
2079
2080 pattern += _T('^');
2081 while ( *pszMask )
2082 {
2083 switch ( *pszMask )
2084 {
2085 case _T('?'):
2086 pattern += _T('.');
2087 break;
2088
2089 case _T('*'):
2090 pattern += _T(".*");
2091 break;
2092
2093 case _T('^'):
2094 case _T('.'):
2095 case _T('$'):
2096 case _T('('):
2097 case _T(')'):
2098 case _T('|'):
2099 case _T('+'):
2100 case _T('\\'):
2101 // these characters are special in a RE, quote them
2102 // (however note that we don't quote '[' and ']' to allow
2103 // using them for Unix shell like matching)
2104 pattern += _T('\\');
2105 // fall through
2106
2107 default:
2108 pattern += *pszMask;
2109 }
2110
2111 pszMask++;
2112 }
2113 pattern += _T('$');
2114
2115 // and now use it
2116 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2117#else // !wxUSE_REGEX
9a4232dc
VZ
2118 // TODO: this is, of course, awfully inefficient...
2119
8a540c88
VS
2120 // FIXME-UTF8: implement using iterators, remove #if
2121#if wxUSE_UNICODE_UTF8
de4983f3
VS
2122 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2123 const wxScopedWCharBuffer txtBuf = wc_str();
8a540c88
VS
2124 const wxChar *pszMask = maskBuf.data();
2125 const wxChar *pszTxt = txtBuf.data();
2126#else
2127 const wxChar *pszMask = mask.wx_str();
9a4232dc 2128 // the char currently being checked
8a540c88
VS
2129 const wxChar *pszTxt = wx_str();
2130#endif
9a4232dc
VZ
2131
2132 // the last location where '*' matched
2133 const wxChar *pszLastStarInText = NULL;
2134 const wxChar *pszLastStarInMask = NULL;
2135
2136match:
2137 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2138 switch ( *pszMask ) {
223d09f6
KB
2139 case wxT('?'):
2140 if ( *pszTxt == wxT('\0') )
d775fa82 2141 return false;
097c080b 2142
9a4232dc 2143 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2144
097c080b
VZ
2145 break;
2146
223d09f6 2147 case wxT('*'):
097c080b 2148 {
9a4232dc
VZ
2149 // remember where we started to be able to backtrack later
2150 pszLastStarInText = pszTxt;
2151 pszLastStarInMask = pszMask;
2152
097c080b 2153 // ignore special chars immediately following this one
9a4232dc 2154 // (should this be an error?)
223d09f6 2155 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2156 pszMask++;
2157
2158 // if there is nothing more, match
223d09f6 2159 if ( *pszMask == wxT('\0') )
d775fa82 2160 return true;
097c080b
VZ
2161
2162 // are there any other metacharacters in the mask?
c86f1403 2163 size_t uiLenMask;
223d09f6 2164 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2165
2166 if ( pEndMask != NULL ) {
2167 // we have to match the string between two metachars
2168 uiLenMask = pEndMask - pszMask;
2169 }
2170 else {
2171 // we have to match the remainder of the string
2bb67b80 2172 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2173 }
2174
2175 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2176 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2177 if ( pMatch == NULL )
d775fa82 2178 return false;
097c080b
VZ
2179
2180 // -1 to compensate "++" in the loop
2181 pszTxt = pMatch + uiLenMask - 1;
2182 pszMask += uiLenMask - 1;
2183 }
2184 break;
2185
2186 default:
2187 if ( *pszMask != *pszTxt )
d775fa82 2188 return false;
097c080b
VZ
2189 break;
2190 }
2191 }
2192
2193 // match only if nothing left
9a4232dc 2194 if ( *pszTxt == wxT('\0') )
d775fa82 2195 return true;
9a4232dc
VZ
2196
2197 // if we failed to match, backtrack if we can
2198 if ( pszLastStarInText ) {
2199 pszTxt = pszLastStarInText + 1;
2200 pszMask = pszLastStarInMask;
2201
2202 pszLastStarInText = NULL;
2203
2204 // don't bother resetting pszLastStarInMask, it's unnecessary
2205
2206 goto match;
2207 }
2208
d775fa82 2209 return false;
706c2ac9 2210#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2211}
2212
1fc5dd6f 2213// Count the number of chars
c9f78968 2214int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2215{
2216 int count = 0;
8f93a29f 2217 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2218 {
8f93a29f 2219 if ( *i == ch )
1fc5dd6f
JS
2220 count ++;
2221 }
2222 return count;
2223}
4e79262f 2224