]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
require libsm-dev, it's needed for KDE/GNOME detection
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
6b769f3d 27#endif
c801d85f
KB
28
29#include <ctype.h>
92df97b8
WS
30
31#ifndef __WXWINCE__
32 #include <errno.h>
33#endif
34
c801d85f
KB
35#include <string.h>
36#include <stdlib.h>
9a08c20e 37
ce3ed50d 38#ifdef __SALFORDC__
8898456d 39 #include <clib.h>
ce3ed50d
JS
40#endif
41
8116a0c5 42#include "wx/hashmap.h"
8f93a29f
VS
43
44// string handling functions used by wxString:
45#if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
50#else
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
55#endif
8f93a29f 56
e87b7833 57
a7ea63e2
VS
58// ---------------------------------------------------------------------------
59// static class variables definition
60// ---------------------------------------------------------------------------
e87b7833 61
a7ea63e2
VS
62//According to STL _must_ be a -1 size_t
63const size_t wxString::npos = (size_t) -1;
8f93a29f 64
a7ea63e2
VS
65// ----------------------------------------------------------------------------
66// global functions
67// ----------------------------------------------------------------------------
e87b7833 68
a7ea63e2 69#if wxUSE_STD_IOSTREAM
8f93a29f 70
a7ea63e2 71#include <iostream>
8f93a29f 72
a7ea63e2 73wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 74{
04abe4bc 75// FIXME-UTF8: always, not only if wxUSE_UNICODE
a7ea63e2 76#if wxUSE_UNICODE && !defined(__BORLANDC__)
681e4412 77 return os << (const wchar_t*)str.AsWCharBuf();
a7ea63e2 78#else
681e4412 79 return os << (const char*)str.AsCharBuf();
a7ea63e2 80#endif
8f93a29f
VS
81}
82
04abe4bc
VS
83wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
84{
85 return os << str.c_str();
86}
87
88wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
89{
90 return os << str.data();
91}
92
93#ifndef __BORLANDC__
94wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
95{
96 return os << str.data();
97}
98#endif
99
a7ea63e2 100#endif // wxUSE_STD_IOSTREAM
e87b7833 101
81727065
VS
102// ===========================================================================
103// wxString class core
104// ===========================================================================
105
106#if wxUSE_UNICODE_UTF8
107
81727065
VS
108void wxString::PosLenToImpl(size_t pos, size_t len,
109 size_t *implPos, size_t *implLen) const
110{
111 if ( pos == npos )
112 *implPos = npos;
113 else
114 {
115 const_iterator i = begin() + pos;
cf9a878b 116 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
81727065
VS
117 if ( len == npos )
118 *implLen = npos;
119 else
120 {
121 // too large length is interpreted as "to the end of the string"
122 // FIXME-UTF8: verify this is the case in std::string, assert
123 // otherwise
124 if ( pos + len > length() )
125 len = length() - pos;
126
cf9a878b 127 *implLen = (i + len).impl() - i.impl();
81727065
VS
128 }
129 }
130}
131
132#endif // wxUSE_UNICODE_UTF8
133
11aac4ba
VS
134// ----------------------------------------------------------------------------
135// wxCStrData converted strings caching
136// ----------------------------------------------------------------------------
137
132276cf
VS
138// FIXME-UTF8: temporarily disabled because it doesn't work with global
139// string objects; re-enable after fixing this bug and benchmarking
140// performance to see if using a hash is a good idea at all
141#if 0
142
11aac4ba
VS
143// For backward compatibility reasons, it must be possible to assign the value
144// returned by wxString::c_str() to a char* or wchar_t* variable and work with
145// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
146// because the memory would be freed immediately, but it has to be valid as long
147// as the string is not modified, so that code like this still works:
148//
149// const wxChar *s = str.c_str();
150// while ( s ) { ... }
151
152// FIXME-UTF8: not thread safe!
153// FIXME-UTF8: we currently clear the cached conversion only when the string is
154// destroyed, but we should do it when the string is modified, to
155// keep memory usage down
156// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
157// invalidated the cache on every change, we could keep the previous
158// conversion
159// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
160// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
161
162template<typename T>
163static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
164{
6c4ebcda 165 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
166 if ( i != hash.end() )
167 {
168 free(i->second);
169 hash.erase(i);
170 }
171}
172
173#if wxUSE_UNICODE
6c4ebcda
VS
174// NB: non-STL implementation doesn't compile with "const wxString*" key type,
175// so we have to use wxString* here and const-cast when used
11aac4ba
VS
176WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
177 wxStringCharConversionCache);
178static wxStringCharConversionCache gs_stringsCharCache;
179
180const char* wxCStrData::AsChar() const
181{
182 // remove previously cache value, if any (see FIXMEs above):
183 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
184
185 // convert the string and keep it:
6c4ebcda
VS
186 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
187 m_str->mb_str().release();
11aac4ba
VS
188
189 return s + m_offset;
190}
191#endif // wxUSE_UNICODE
192
193#if !wxUSE_UNICODE_WCHAR
194WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
195 wxStringWCharConversionCache);
196static wxStringWCharConversionCache gs_stringsWCharCache;
197
198const wchar_t* wxCStrData::AsWChar() const
199{
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
202
203 // convert the string and keep it:
6c4ebcda
VS
204 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
205 m_str->wc_str().release();
11aac4ba
VS
206
207 return s + m_offset;
208}
209#endif // !wxUSE_UNICODE_WCHAR
210
11aac4ba
VS
211wxString::~wxString()
212{
213#if wxUSE_UNICODE
214 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
215 DeleteStringFromConversionCache(gs_stringsCharCache, this);
216#endif
217#if !wxUSE_UNICODE_WCHAR
218 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
219#endif
220}
132276cf
VS
221#endif
222
111d9948 223#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
224const char* wxCStrData::AsChar() const
225{
111d9948
VS
226#if wxUSE_UNICODE_UTF8
227 if ( wxLocaleIsUtf8 )
228 return AsInternal();
229#endif
230 // under non-UTF8 locales, we have to convert the internal UTF-8
231 // representation using wxConvLibc and cache the result
232
132276cf 233 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
234
235 // convert the string:
236 wxCharBuffer buf(str->mb_str());
237
238 // FIXME-UTF8: do the conversion in-place in the existing buffer
239 if ( str->m_convertedToChar &&
240 strlen(buf) == strlen(str->m_convertedToChar) )
241 {
242 // keep the same buffer for as long as possible, so that several calls
243 // to c_str() in a row still work:
244 strcpy(str->m_convertedToChar, buf);
245 }
246 else
247 {
248 str->m_convertedToChar = buf.release();
249 }
250
251 // and keep it:
132276cf
VS
252 return str->m_convertedToChar + m_offset;
253}
111d9948 254#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
255
256#if !wxUSE_UNICODE_WCHAR
257const wchar_t* wxCStrData::AsWChar() const
258{
259 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
260
261 // convert the string:
262 wxWCharBuffer buf(str->wc_str());
263
264 // FIXME-UTF8: do the conversion in-place in the existing buffer
265 if ( str->m_convertedToWChar &&
266 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
267 {
268 // keep the same buffer for as long as possible, so that several calls
269 // to c_str() in a row still work:
270 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
271 }
272 else
273 {
274 str->m_convertedToWChar = buf.release();
275 }
276
277 // and keep it:
132276cf
VS
278 return str->m_convertedToWChar + m_offset;
279}
280#endif // !wxUSE_UNICODE_WCHAR
281
282// ===========================================================================
283// wxString class core
284// ===========================================================================
285
286// ---------------------------------------------------------------------------
287// construction and conversion
288// ---------------------------------------------------------------------------
11aac4ba 289
81727065 290#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
291/* static */
292wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 293 const wxMBConv& conv)
8f93a29f
VS
294{
295 // anything to do?
296 if ( !psz || nLength == 0 )
81727065 297 return SubstrBufFromMB(L"", 0);
8f93a29f
VS
298
299 if ( nLength == npos )
300 nLength = wxNO_LEN;
301
302 size_t wcLen;
303 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
304 if ( !wcLen )
81727065 305 return SubstrBufFromMB(_T(""), 0);
8f93a29f
VS
306 else
307 return SubstrBufFromMB(wcBuf, wcLen);
308}
81727065
VS
309#endif // wxUSE_UNICODE_WCHAR
310
311#if wxUSE_UNICODE_UTF8
312/* static */
313wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
314 const wxMBConv& conv)
315{
81727065
VS
316 // anything to do?
317 if ( !psz || nLength == 0 )
318 return SubstrBufFromMB("", 0);
319
111d9948
VS
320 // if psz is already in UTF-8, we don't have to do the roundtrip to
321 // wchar_t* and back:
322 if ( conv.IsUTF8() )
323 {
324 // we need to validate the input because UTF8 iterators assume valid
325 // UTF-8 sequence and psz may be invalid:
326 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
327 {
328 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
329 }
330 // else: do the roundtrip through wchar_t*
331 }
332
81727065
VS
333 if ( nLength == npos )
334 nLength = wxNO_LEN;
335
336 // first convert to wide string:
337 size_t wcLen;
338 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
339 if ( !wcLen )
340 return SubstrBufFromMB("", 0);
341
342 // and then to UTF-8:
343 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
344 // widechar -> UTF-8 conversion isn't supposed to ever fail:
345 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
346
347 return buf;
348}
349#endif // wxUSE_UNICODE_UTF8
350
351#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
352/* static */
353wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 354 const wxMBConv& conv)
8f93a29f
VS
355{
356 // anything to do?
357 if ( !pwz || nLength == 0 )
81727065 358 return SubstrBufFromWC("", 0);
8f93a29f
VS
359
360 if ( nLength == npos )
361 nLength = wxNO_LEN;
362
363 size_t mbLen;
364 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
365 if ( !mbLen )
81727065 366 return SubstrBufFromWC("", 0);
8f93a29f
VS
367 else
368 return SubstrBufFromWC(mbBuf, mbLen);
369}
81727065 370#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
371
372
81727065 373#if wxUSE_UNICODE_WCHAR
e87b7833 374
06386448 375//Convert wxString in Unicode mode to a multi-byte string
830f8f11 376const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 377{
81727065 378 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
379}
380
81727065 381#elif wxUSE_UNICODE_UTF8
e87b7833 382
81727065
VS
383const wxWCharBuffer wxString::wc_str() const
384{
385 return wxConvUTF8.cMB2WC(m_impl.c_str(),
386 m_impl.length() + 1 /* size, not length */,
387 NULL);
388}
389
390const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
391{
111d9948
VS
392 if ( conv.IsUTF8() )
393 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
394
81727065
VS
395 // FIXME-UTF8: use wc_str() here once we have buffers with length
396
397 size_t wcLen;
398 wxWCharBuffer wcBuf(
399 wxConvUTF8.cMB2WC(m_impl.c_str(),
400 m_impl.length() + 1 /* size, not length */,
401 &wcLen));
402 if ( !wcLen )
403 return wxCharBuffer("");
404
405 return conv.cWC2MB(wcBuf, wcLen, NULL);
406}
407
408#else // ANSI
eec47cc6 409
7663d0d4 410//Converts this string to a wide character string if unicode
06386448 411//mode is not enabled and wxUSE_WCHAR_T is enabled
830f8f11 412const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 413{
81727065 414 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 415}
7663d0d4 416
e87b7833
MB
417#endif // Unicode/ANSI
418
419// shrink to minimal size (releasing extra memory)
420bool wxString::Shrink()
421{
422 wxString tmp(begin(), end());
423 swap(tmp);
424 return tmp.length() == length();
425}
426
d8a4b666 427// deprecated compatibility code:
a7ea63e2 428#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 429wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
430{
431 return DoGetWriteBuf(nLen);
432}
433
434void wxString::UngetWriteBuf()
435{
436 DoUngetWriteBuf();
437}
438
439void wxString::UngetWriteBuf(size_t nLen)
440{
441 DoUngetWriteBuf(nLen);
442}
a7ea63e2 443#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 444
d8a4b666 445
e87b7833
MB
446// ---------------------------------------------------------------------------
447// data access
448// ---------------------------------------------------------------------------
449
450// all functions are inline in string.h
451
452// ---------------------------------------------------------------------------
e8f59039 453// concatenation operators
e87b7833
MB
454// ---------------------------------------------------------------------------
455
c801d85f 456/*
c801d85f
KB
457 * concatenation functions come in 5 flavours:
458 * string + string
459 * char + string and string + char
460 * C str + string and string + C str
461 */
462
b1801e0e 463wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 464{
992527a5 465#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
466 wxASSERT( str1.IsValid() );
467 wxASSERT( str2.IsValid() );
e87b7833 468#endif
097c080b 469
3458e408
WS
470 wxString s = str1;
471 s += str2;
3168a13f 472
3458e408 473 return s;
c801d85f
KB
474}
475
c9f78968 476wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 477{
992527a5 478#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 479 wxASSERT( str.IsValid() );
e87b7833 480#endif
3168a13f 481
3458e408
WS
482 wxString s = str;
483 s += ch;
097c080b 484
3458e408 485 return s;
c801d85f
KB
486}
487
c9f78968 488wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 489{
992527a5 490#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 491 wxASSERT( str.IsValid() );
e87b7833 492#endif
097c080b 493
3458e408
WS
494 wxString s = ch;
495 s += str;
3168a13f 496
3458e408 497 return s;
c801d85f
KB
498}
499
8f93a29f 500wxString operator+(const wxString& str, const char *psz)
c801d85f 501{
992527a5 502#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 503 wxASSERT( str.IsValid() );
e87b7833 504#endif
097c080b 505
3458e408 506 wxString s;
8f93a29f 507 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
508 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
509 }
510 s += str;
511 s += psz;
3168a13f 512
3458e408 513 return s;
c801d85f
KB
514}
515
8f93a29f 516wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 517{
992527a5 518#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
519 wxASSERT( str.IsValid() );
520#endif
521
522 wxString s;
523 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
524 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
525 }
526 s += str;
527 s += pwz;
528
529 return s;
530}
531
532wxString operator+(const char *psz, const wxString& str)
533{
a7ea63e2
VS
534#if !wxUSE_STL_BASED_WXSTRING
535 wxASSERT( str.IsValid() );
536#endif
537
538 wxString s;
539 if ( !s.Alloc(strlen(psz) + str.length()) ) {
540 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
541 }
542 s = psz;
543 s += str;
544
545 return s;
546}
547
548wxString operator+(const wchar_t *pwz, const wxString& str)
549{
550#if !wxUSE_STL_BASED_WXSTRING
551 wxASSERT( str.IsValid() );
552#endif
553
554 wxString s;
555 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
556 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
557 }
558 s = pwz;
559 s += str;
560
561 return s;
562}
563
564// ---------------------------------------------------------------------------
565// string comparison
566// ---------------------------------------------------------------------------
567
568#ifdef HAVE_STD_STRING_COMPARE
569
570// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
571// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
572// sort strings in characters code point order by sorting the byte sequence
573// in byte values order (i.e. what strcmp() and memcmp() do).
574
575int wxString::compare(const wxString& str) const
576{
577 return m_impl.compare(str.m_impl);
578}
579
580int wxString::compare(size_t nStart, size_t nLen,
581 const wxString& str) const
582{
583 size_t pos, len;
584 PosLenToImpl(nStart, nLen, &pos, &len);
585 return m_impl.compare(pos, len, str.m_impl);
586}
587
588int wxString::compare(size_t nStart, size_t nLen,
589 const wxString& str,
590 size_t nStart2, size_t nLen2) const
591{
592 size_t pos, len;
593 PosLenToImpl(nStart, nLen, &pos, &len);
594
595 size_t pos2, len2;
596 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
597
598 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
599}
600
601int wxString::compare(const char* sz) const
602{
603 return m_impl.compare(ImplStr(sz));
604}
605
606int wxString::compare(const wchar_t* sz) const
607{
608 return m_impl.compare(ImplStr(sz));
609}
610
611int wxString::compare(size_t nStart, size_t nLen,
612 const char* sz, size_t nCount) const
613{
614 size_t pos, len;
615 PosLenToImpl(nStart, nLen, &pos, &len);
616
617 SubstrBufFromMB str(ImplStr(sz, nCount));
618
619 return m_impl.compare(pos, len, str.data, str.len);
620}
621
622int wxString::compare(size_t nStart, size_t nLen,
623 const wchar_t* sz, size_t nCount) const
624{
625 size_t pos, len;
626 PosLenToImpl(nStart, nLen, &pos, &len);
627
628 SubstrBufFromWC str(ImplStr(sz, nCount));
629
630 return m_impl.compare(pos, len, str.data, str.len);
631}
632
633#else // !HAVE_STD_STRING_COMPARE
634
635static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
636 const wxStringCharType* s2, size_t l2)
637{
638 if( l1 == l2 )
639 return wxStringMemcmp(s1, s2, l1);
640 else if( l1 < l2 )
641 {
642 int ret = wxStringMemcmp(s1, s2, l1);
643 return ret == 0 ? -1 : ret;
644 }
645 else
646 {
647 int ret = wxStringMemcmp(s1, s2, l2);
648 return ret == 0 ? +1 : ret;
649 }
650}
651
652int wxString::compare(const wxString& str) const
653{
654 return ::wxDoCmp(m_impl.data(), m_impl.length(),
655 str.m_impl.data(), str.m_impl.length());
656}
657
658int wxString::compare(size_t nStart, size_t nLen,
659 const wxString& str) const
660{
661 wxASSERT(nStart <= length());
662 size_type strLen = length() - nStart;
663 nLen = strLen < nLen ? strLen : nLen;
664
665 size_t pos, len;
666 PosLenToImpl(nStart, nLen, &pos, &len);
667
668 return ::wxDoCmp(m_impl.data() + pos, len,
669 str.m_impl.data(), str.m_impl.length());
670}
671
672int wxString::compare(size_t nStart, size_t nLen,
673 const wxString& str,
674 size_t nStart2, size_t nLen2) const
675{
676 wxASSERT(nStart <= length());
677 wxASSERT(nStart2 <= str.length());
678 size_type strLen = length() - nStart,
679 strLen2 = str.length() - nStart2;
680 nLen = strLen < nLen ? strLen : nLen;
681 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
682
683 size_t pos, len;
684 PosLenToImpl(nStart, nLen, &pos, &len);
685 size_t pos2, len2;
686 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
687
688 return ::wxDoCmp(m_impl.data() + pos, len,
689 str.m_impl.data() + pos2, len2);
690}
691
692int wxString::compare(const char* sz) const
693{
694 SubstrBufFromMB str(ImplStr(sz, npos));
695 if ( str.len == npos )
696 str.len = wxStringStrlen(str.data);
697 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
698}
699
700int wxString::compare(const wchar_t* sz) const
701{
702 SubstrBufFromWC str(ImplStr(sz, npos));
703 if ( str.len == npos )
704 str.len = wxStringStrlen(str.data);
705 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
706}
707
708int wxString::compare(size_t nStart, size_t nLen,
709 const char* sz, size_t nCount) const
710{
711 wxASSERT(nStart <= length());
712 size_type strLen = length() - nStart;
713 nLen = strLen < nLen ? strLen : nLen;
097c080b 714
a7ea63e2
VS
715 size_t pos, len;
716 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 717
a7ea63e2
VS
718 SubstrBufFromMB str(ImplStr(sz, nCount));
719 if ( str.len == npos )
720 str.len = wxStringStrlen(str.data);
721
722 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
723}
724
a7ea63e2
VS
725int wxString::compare(size_t nStart, size_t nLen,
726 const wchar_t* sz, size_t nCount) const
8f93a29f 727{
a7ea63e2
VS
728 wxASSERT(nStart <= length());
729 size_type strLen = length() - nStart;
730 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 731
a7ea63e2
VS
732 size_t pos, len;
733 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 734
a7ea63e2
VS
735 SubstrBufFromWC str(ImplStr(sz, nCount));
736 if ( str.len == npos )
737 str.len = wxStringStrlen(str.data);
738
739 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
740}
741
a7ea63e2
VS
742#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
743
744
8f93a29f
VS
745// ---------------------------------------------------------------------------
746// find_{first,last}_[not]_of functions
747// ---------------------------------------------------------------------------
748
749#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 750
8f93a29f
VS
751// NB: All these functions are implemented with the argument being wxChar*,
752// i.e. widechar string in any Unicode build, even though native string
753// representation is char* in the UTF-8 build. This is because we couldn't
754// use memchr() to determine if a character is in a set encoded as UTF-8.
755
756size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 757{
8f93a29f 758 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
759}
760
8f93a29f 761size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 762{
8f93a29f 763 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
764}
765
8f93a29f 766size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 767{
8f93a29f 768 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 769
8f93a29f
VS
770 size_t idx = nStart;
771 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 772 {
8f93a29f
VS
773 if ( wxTmemchr(sz, *i, n) )
774 return idx;
dcb68102 775 }
8f93a29f
VS
776
777 return npos;
778}
779
780size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
781{
782 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
783
784 size_t idx = nStart;
785 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 786 {
8f93a29f
VS
787 if ( !wxTmemchr(sz, *i, n) )
788 return idx;
789 }
790
791 return npos;
792}
793
794
795size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
796{
797 return find_last_of(sz, nStart, wxStrlen(sz));
798}
799
800size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
801{
802 return find_last_not_of(sz, nStart, wxStrlen(sz));
803}
804
805size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
806{
807 size_t len = length();
808
809 if ( nStart == npos )
810 {
811 nStart = len - 1;
dcb68102 812 }
2c09fb3b 813 else
dcb68102 814 {
8f93a29f 815 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 816 }
8f93a29f
VS
817
818 size_t idx = nStart;
819 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
820 i != rend(); --idx, ++i )
821 {
822 if ( wxTmemchr(sz, *i, n) )
823 return idx;
824 }
825
826 return npos;
dcb68102
RN
827}
828
8f93a29f 829size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 830{
8f93a29f
VS
831 size_t len = length();
832
833 if ( nStart == npos )
834 {
835 nStart = len - 1;
836 }
837 else
838 {
839 wxASSERT_MSG( nStart <= len, _T("invalid index") );
840 }
841
842 size_t idx = nStart;
843 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
844 i != rend(); --idx, ++i )
845 {
846 if ( !wxTmemchr(sz, *i, n) )
847 return idx;
848 }
849
850 return npos;
dcb68102
RN
851}
852
8f93a29f 853size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 854{
8f93a29f
VS
855 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
856
857 size_t idx = nStart;
858 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
859 {
860 if ( *i != ch )
861 return idx;
862 }
863
864 return npos;
865}
866
867size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
868{
869 size_t len = length();
870
871 if ( nStart == npos )
872 {
873 nStart = len - 1;
874 }
875 else
876 {
877 wxASSERT_MSG( nStart <= len, _T("invalid index") );
878 }
879
880 size_t idx = nStart;
881 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
882 i != rend(); --idx, ++i )
883 {
884 if ( *i != ch )
885 return idx;
886 }
887
888 return npos;
889}
890
891// the functions above were implemented for wchar_t* arguments in Unicode
892// build and char* in ANSI build; below are implementations for the other
893// version:
894#if wxUSE_UNICODE
895 #define wxOtherCharType char
896 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
897#else
898 #define wxOtherCharType wchar_t
899 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
900#endif
901
902size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
903 { return find_first_of(STRCONV(sz), nStart); }
904
905size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
906 size_t n) const
907 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
908size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
909 { return find_last_of(STRCONV(sz), nStart); }
910size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
911 size_t n) const
912 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
913size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
914 { return find_first_not_of(STRCONV(sz), nStart); }
915size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
916 size_t n) const
917 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
918size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
919 { return find_last_not_of(STRCONV(sz), nStart); }
920size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
921 size_t n) const
922 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
923
924#undef wxOtherCharType
925#undef STRCONV
926
927#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
928
929// ===========================================================================
930// other common string functions
931// ===========================================================================
932
933int wxString::CmpNoCase(const wxString& s) const
934{
935 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
936
937 size_t idx = 0;
938 const_iterator i1 = begin();
939 const_iterator end1 = end();
940 const_iterator i2 = s.begin();
941 const_iterator end2 = s.end();
942
943 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
944 {
945 wxUniChar lower1 = (wxChar)wxTolower(*i1);
946 wxUniChar lower2 = (wxChar)wxTolower(*i2);
947 if ( lower1 != lower2 )
948 return lower1 < lower2 ? -1 : 1;
949 }
950
951 size_t len1 = length();
952 size_t len2 = s.length();
dcb68102 953
8f93a29f
VS
954 if ( len1 < len2 )
955 return -1;
956 else if ( len1 > len2 )
957 return 1;
958 return 0;
dcb68102
RN
959}
960
961
b1ac3b56 962#if wxUSE_UNICODE
e015c2a3 963
cf6bedce
SC
964#ifdef __MWERKS__
965#ifndef __SCHAR_MAX__
966#define __SCHAR_MAX__ 127
967#endif
968#endif
969
e015c2a3 970wxString wxString::FromAscii(const char *ascii)
b1ac3b56
RR
971{
972 if (!ascii)
973 return wxEmptyString;
e015c2a3 974
c1eada83 975 size_t len = strlen(ascii);
b1ac3b56 976 wxString res;
e015c2a3
VZ
977
978 if ( len )
979 {
c1eada83
VS
980 wxImplStringBuffer buf(res, len);
981 wxStringCharType *dest = buf;
e015c2a3
VZ
982
983 for ( ;; )
984 {
c1eada83
VS
985 unsigned char c = (unsigned char)*ascii++;
986 wxASSERT_MSG( c < 0x80,
987 _T("Non-ASCII value passed to FromAscii().") );
988
989 *dest++ = (wchar_t)c;
990
991 if ( c == '\0' )
992 break;
e015c2a3
VZ
993 }
994 }
995
b1ac3b56
RR
996 return res;
997}
998
2b5f62a0
VZ
999wxString wxString::FromAscii(const char ascii)
1000{
1001 // What do we do with '\0' ?
1002
c1eada83 1003 unsigned char c = (unsigned char)ascii;
8760bc65 1004
c1eada83
VS
1005 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1006
1007 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1008 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1009}
1010
b1ac3b56
RR
1011const wxCharBuffer wxString::ToAscii() const
1012{
e015c2a3
VZ
1013 // this will allocate enough space for the terminating NUL too
1014 wxCharBuffer buffer(length());
6e394fc6 1015 char *dest = buffer.data();
e015c2a3 1016
c1eada83 1017 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1018 {
c1eada83
VS
1019 wxUniChar c(*i);
1020 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1021 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1022
1023 // the output string can't have embedded NULs anyhow, so we can safely
1024 // stop at first of them even if we do have any
c1eada83 1025 if ( !c )
e015c2a3 1026 break;
b1ac3b56 1027 }
e015c2a3 1028
b1ac3b56
RR
1029 return buffer;
1030}
e015c2a3 1031
c1eada83 1032#endif // wxUSE_UNICODE
b1ac3b56 1033
c801d85f 1034// extract string of length nCount starting at nFirst
c801d85f
KB
1035wxString wxString::Mid(size_t nFirst, size_t nCount) const
1036{
73f507f5 1037 size_t nLen = length();
30d9011f 1038
73f507f5
WS
1039 // default value of nCount is npos and means "till the end"
1040 if ( nCount == npos )
1041 {
1042 nCount = nLen - nFirst;
1043 }
30d9011f 1044
73f507f5
WS
1045 // out-of-bounds requests return sensible things
1046 if ( nFirst + nCount > nLen )
1047 {
1048 nCount = nLen - nFirst;
1049 }
c801d85f 1050
73f507f5
WS
1051 if ( nFirst > nLen )
1052 {
1053 // AllocCopy() will return empty string
1054 return wxEmptyString;
1055 }
c801d85f 1056
73f507f5
WS
1057 wxString dest(*this, nFirst, nCount);
1058 if ( dest.length() != nCount )
1059 {
1060 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1061 }
30d9011f 1062
73f507f5 1063 return dest;
c801d85f
KB
1064}
1065
e87b7833 1066// check that the string starts with prefix and return the rest of the string
d775fa82 1067// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1068bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1069{
c5e7a7d7
VS
1070 if ( compare(0, prefix.length(), prefix) != 0 )
1071 return false;
f6bcfd97
BP
1072
1073 if ( rest )
1074 {
1075 // put the rest of the string into provided pointer
c5e7a7d7 1076 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1077 }
1078
d775fa82 1079 return true;
f6bcfd97
BP
1080}
1081
3affcd07
VZ
1082
1083// check that the string ends with suffix and return the rest of it in the
1084// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1085bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1086{
c5e7a7d7 1087 int start = length() - suffix.length();
81727065
VS
1088
1089 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1090 return false;
1091
1092 if ( rest )
1093 {
1094 // put the rest of the string into provided pointer
1095 rest->assign(*this, 0, start);
1096 }
1097
1098 return true;
1099}
1100
1101
c801d85f
KB
1102// extract nCount last (rightmost) characters
1103wxString wxString::Right(size_t nCount) const
1104{
e87b7833
MB
1105 if ( nCount > length() )
1106 nCount = length();
c801d85f 1107
e87b7833
MB
1108 wxString dest(*this, length() - nCount, nCount);
1109 if ( dest.length() != nCount ) {
b1801e0e
GD
1110 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1111 }
c801d85f
KB
1112 return dest;
1113}
1114
1115// get all characters after the last occurence of ch
1116// (returns the whole string if ch not found)
c9f78968 1117wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1118{
1119 wxString str;
d775fa82 1120 int iPos = Find(ch, true);
3c67202d 1121 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1122 str = *this;
1123 else
c9f78968 1124 str = wx_str() + iPos + 1;
c801d85f
KB
1125
1126 return str;
1127}
1128
1129// extract nCount first (leftmost) characters
1130wxString wxString::Left(size_t nCount) const
1131{
e87b7833
MB
1132 if ( nCount > length() )
1133 nCount = length();
c801d85f 1134
e87b7833
MB
1135 wxString dest(*this, 0, nCount);
1136 if ( dest.length() != nCount ) {
b1801e0e
GD
1137 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1138 }
c801d85f
KB
1139 return dest;
1140}
1141
1142// get all characters before the first occurence of ch
1143// (returns the whole string if ch not found)
c9f78968 1144wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1145{
e87b7833
MB
1146 int iPos = Find(ch);
1147 if ( iPos == wxNOT_FOUND ) iPos = length();
1148 return wxString(*this, 0, iPos);
c801d85f
KB
1149}
1150
1151/// get all characters before the last occurence of ch
1152/// (returns empty string if ch not found)
c9f78968 1153wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1154{
1155 wxString str;
d775fa82 1156 int iPos = Find(ch, true);
3c67202d 1157 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1158 str = wxString(c_str(), iPos);
c801d85f
KB
1159
1160 return str;
1161}
1162
1163/// get all characters after the first occurence of ch
1164/// (returns empty string if ch not found)
c9f78968 1165wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1166{
1167 wxString str;
1168 int iPos = Find(ch);
3c67202d 1169 if ( iPos != wxNOT_FOUND )
c9f78968 1170 str = wx_str() + iPos + 1;
c801d85f
KB
1171
1172 return str;
1173}
1174
1175// replace first (or all) occurences of some substring with another one
8a540c88
VS
1176size_t wxString::Replace(const wxString& strOld,
1177 const wxString& strNew, bool bReplaceAll)
c801d85f 1178{
a8f1f1b2 1179 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1180 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1181 _T("wxString::Replace(): invalid parameter") );
1182
510bb748 1183 size_t uiCount = 0; // count of replacements made
c801d85f 1184
8a540c88
VS
1185 size_t uiOldLen = strOld.length();
1186 size_t uiNewLen = strNew.length();
c801d85f 1187
510bb748 1188 size_t dwPos = 0;
c801d85f 1189
8a540c88 1190 while ( (*this)[dwPos] != wxT('\0') )
510bb748
RN
1191 {
1192 //DO NOT USE STRSTR HERE
1193 //this string can contain embedded null characters,
1194 //so strstr will function incorrectly
8a540c88 1195 dwPos = find(strOld, dwPos);
ad5bb7d6 1196 if ( dwPos == npos )
510bb748 1197 break; // exit the loop
ad5bb7d6 1198 else
510bb748
RN
1199 {
1200 //replace this occurance of the old string with the new one
8a540c88 1201 replace(dwPos, uiOldLen, strNew, uiNewLen);
510bb748 1202
2df0258e
RN
1203 //move up pos past the string that was replaced
1204 dwPos += uiNewLen;
510bb748
RN
1205
1206 //increase replace count
1207 ++uiCount;
ad5bb7d6 1208
510bb748 1209 // stop now?
ad5bb7d6 1210 if ( !bReplaceAll )
510bb748
RN
1211 break; // exit the loop
1212 }
c801d85f 1213 }
c801d85f 1214
510bb748 1215 return uiCount;
c801d85f
KB
1216}
1217
1218bool wxString::IsAscii() const
1219{
a4a44612
VS
1220 for ( const_iterator i = begin(); i != end(); ++i )
1221 {
1222 if ( !(*i).IsAscii() )
1223 return false;
1224 }
1225
1226 return true;
c801d85f 1227}
dd1eaa89 1228
c801d85f
KB
1229bool wxString::IsWord() const
1230{
a4a44612
VS
1231 for ( const_iterator i = begin(); i != end(); ++i )
1232 {
1233 if ( !wxIsalpha(*i) )
1234 return false;
1235 }
1236
1237 return true;
c801d85f 1238}
dd1eaa89 1239
c801d85f
KB
1240bool wxString::IsNumber() const
1241{
a4a44612
VS
1242 if ( empty() )
1243 return true;
1244
1245 const_iterator i = begin();
1246
1247 if ( *i == _T('-') || *i == _T('+') )
1248 ++i;
1249
1250 for ( ; i != end(); ++i )
1251 {
1252 if ( !wxIsdigit(*i) )
1253 return false;
1254 }
1255
1256 return true;
c801d85f
KB
1257}
1258
c801d85f
KB
1259wxString wxString::Strip(stripType w) const
1260{
1261 wxString s = *this;
d775fa82
WS
1262 if ( w & leading ) s.Trim(false);
1263 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1264 return s;
1265}
1266
c801d85f
KB
1267// ---------------------------------------------------------------------------
1268// case conversion
1269// ---------------------------------------------------------------------------
1270
1271wxString& wxString::MakeUpper()
1272{
e87b7833
MB
1273 for ( iterator it = begin(), en = end(); it != en; ++it )
1274 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1275
1276 return *this;
1277}
1278
1279wxString& wxString::MakeLower()
1280{
e87b7833
MB
1281 for ( iterator it = begin(), en = end(); it != en; ++it )
1282 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1283
1284 return *this;
1285}
1286
1287// ---------------------------------------------------------------------------
1288// trimming and padding
1289// ---------------------------------------------------------------------------
1290
d775fa82 1291// some compilers (VC++ 6.0 not to name them) return true for a call to
576c608d
VZ
1292