]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
don't declare inline function with dllexport declaration, this provokes mingw32 warni...
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
6b769f3d 26#endif
c801d85f
KB
27
28#include <ctype.h>
92df97b8
WS
29
30#ifndef __WXWINCE__
31 #include <errno.h>
32#endif
33
c801d85f
KB
34#include <string.h>
35#include <stdlib.h>
9a08c20e 36
ce3ed50d 37#ifdef __SALFORDC__
8898456d 38 #include <clib.h>
ce3ed50d
JS
39#endif
40
8116a0c5 41#include "wx/hashmap.h"
8f93a29f
VS
42
43// string handling functions used by wxString:
44#if wxUSE_UNICODE_UTF8
45 #define wxStringMemcpy memcpy
46 #define wxStringMemcmp memcmp
47 #define wxStringMemchr memchr
48 #define wxStringStrlen strlen
49#else
50 #define wxStringMemcpy wxTmemcpy
51 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
52 #define wxStringMemchr wxTmemchr
53 #define wxStringStrlen wxStrlen
54#endif
8f93a29f 55
e87b7833 56
a7ea63e2
VS
57// ---------------------------------------------------------------------------
58// static class variables definition
59// ---------------------------------------------------------------------------
e87b7833 60
a7ea63e2
VS
61//According to STL _must_ be a -1 size_t
62const size_t wxString::npos = (size_t) -1;
8f93a29f 63
a7ea63e2
VS
64// ----------------------------------------------------------------------------
65// global functions
66// ----------------------------------------------------------------------------
e87b7833 67
a7ea63e2 68#if wxUSE_STD_IOSTREAM
8f93a29f 69
a7ea63e2 70#include <iostream>
8f93a29f 71
a7ea63e2 72wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 73{
04abe4bc 74// FIXME-UTF8: always, not only if wxUSE_UNICODE
a7ea63e2 75#if wxUSE_UNICODE && !defined(__BORLANDC__)
681e4412 76 return os << (const wchar_t*)str.AsWCharBuf();
a7ea63e2 77#else
681e4412 78 return os << (const char*)str.AsCharBuf();
a7ea63e2 79#endif
8f93a29f
VS
80}
81
04abe4bc
VS
82wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
83{
84 return os << str.c_str();
85}
86
87wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
88{
89 return os << str.data();
90}
91
92#ifndef __BORLANDC__
93wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
94{
95 return os << str.data();
96}
97#endif
98
a7ea63e2 99#endif // wxUSE_STD_IOSTREAM
e87b7833 100
81727065
VS
101// ===========================================================================
102// wxString class core
103// ===========================================================================
104
105#if wxUSE_UNICODE_UTF8
106
81727065
VS
107void wxString::PosLenToImpl(size_t pos, size_t len,
108 size_t *implPos, size_t *implLen) const
109{
110 if ( pos == npos )
111 *implPos = npos;
112 else
113 {
114 const_iterator i = begin() + pos;
cf9a878b 115 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
81727065
VS
116 if ( len == npos )
117 *implLen = npos;
118 else
119 {
120 // too large length is interpreted as "to the end of the string"
121 // FIXME-UTF8: verify this is the case in std::string, assert
122 // otherwise
123 if ( pos + len > length() )
124 len = length() - pos;
125
cf9a878b 126 *implLen = (i + len).impl() - i.impl();
81727065
VS
127 }
128 }
129}
130
131#endif // wxUSE_UNICODE_UTF8
132
11aac4ba
VS
133// ----------------------------------------------------------------------------
134// wxCStrData converted strings caching
135// ----------------------------------------------------------------------------
136
132276cf
VS
137// FIXME-UTF8: temporarily disabled because it doesn't work with global
138// string objects; re-enable after fixing this bug and benchmarking
139// performance to see if using a hash is a good idea at all
140#if 0
141
11aac4ba
VS
142// For backward compatibility reasons, it must be possible to assign the value
143// returned by wxString::c_str() to a char* or wchar_t* variable and work with
144// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
145// because the memory would be freed immediately, but it has to be valid as long
146// as the string is not modified, so that code like this still works:
147//
148// const wxChar *s = str.c_str();
149// while ( s ) { ... }
150
151// FIXME-UTF8: not thread safe!
152// FIXME-UTF8: we currently clear the cached conversion only when the string is
153// destroyed, but we should do it when the string is modified, to
154// keep memory usage down
155// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
156// invalidated the cache on every change, we could keep the previous
157// conversion
158// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
159// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
160
161template<typename T>
162static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
163{
6c4ebcda 164 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
165 if ( i != hash.end() )
166 {
167 free(i->second);
168 hash.erase(i);
169 }
170}
171
172#if wxUSE_UNICODE
6c4ebcda
VS
173// NB: non-STL implementation doesn't compile with "const wxString*" key type,
174// so we have to use wxString* here and const-cast when used
11aac4ba
VS
175WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
176 wxStringCharConversionCache);
177static wxStringCharConversionCache gs_stringsCharCache;
178
179const char* wxCStrData::AsChar() const
180{
181 // remove previously cache value, if any (see FIXMEs above):
182 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
183
184 // convert the string and keep it:
6c4ebcda
VS
185 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
186 m_str->mb_str().release();
11aac4ba
VS
187
188 return s + m_offset;
189}
190#endif // wxUSE_UNICODE
191
192#if !wxUSE_UNICODE_WCHAR
193WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
194 wxStringWCharConversionCache);
195static wxStringWCharConversionCache gs_stringsWCharCache;
196
197const wchar_t* wxCStrData::AsWChar() const
198{
199 // remove previously cache value, if any (see FIXMEs above):
200 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
201
202 // convert the string and keep it:
6c4ebcda
VS
203 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
204 m_str->wc_str().release();
11aac4ba
VS
205
206 return s + m_offset;
207}
208#endif // !wxUSE_UNICODE_WCHAR
209
11aac4ba
VS
210wxString::~wxString()
211{
212#if wxUSE_UNICODE
213 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
214 DeleteStringFromConversionCache(gs_stringsCharCache, this);
215#endif
216#if !wxUSE_UNICODE_WCHAR
217 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
218#endif
219}
132276cf
VS
220#endif
221
222#if wxUSE_UNICODE
223const char* wxCStrData::AsChar() const
224{
225 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
226
227 // convert the string:
228 wxCharBuffer buf(str->mb_str());
229
230 // FIXME-UTF8: do the conversion in-place in the existing buffer
231 if ( str->m_convertedToChar &&
232 strlen(buf) == strlen(str->m_convertedToChar) )
233 {
234 // keep the same buffer for as long as possible, so that several calls
235 // to c_str() in a row still work:
236 strcpy(str->m_convertedToChar, buf);
237 }
238 else
239 {
240 str->m_convertedToChar = buf.release();
241 }
242
243 // and keep it:
132276cf
VS
244 return str->m_convertedToChar + m_offset;
245}
246#endif // wxUSE_UNICODE
247
248#if !wxUSE_UNICODE_WCHAR
249const wchar_t* wxCStrData::AsWChar() const
250{
251 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
252
253 // convert the string:
254 wxWCharBuffer buf(str->wc_str());
255
256 // FIXME-UTF8: do the conversion in-place in the existing buffer
257 if ( str->m_convertedToWChar &&
258 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
259 {
260 // keep the same buffer for as long as possible, so that several calls
261 // to c_str() in a row still work:
262 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
263 }
264 else
265 {
266 str->m_convertedToWChar = buf.release();
267 }
268
269 // and keep it:
132276cf
VS
270 return str->m_convertedToWChar + m_offset;
271}
272#endif // !wxUSE_UNICODE_WCHAR
273
274// ===========================================================================
275// wxString class core
276// ===========================================================================
277
278// ---------------------------------------------------------------------------
279// construction and conversion
280// ---------------------------------------------------------------------------
11aac4ba 281
81727065 282#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
283/* static */
284wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 285 const wxMBConv& conv)
8f93a29f
VS
286{
287 // anything to do?
288 if ( !psz || nLength == 0 )
81727065 289 return SubstrBufFromMB(L"", 0);
8f93a29f
VS
290
291 if ( nLength == npos )
292 nLength = wxNO_LEN;
293
294 size_t wcLen;
295 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
296 if ( !wcLen )
81727065 297 return SubstrBufFromMB(_T(""), 0);
8f93a29f
VS
298 else
299 return SubstrBufFromMB(wcBuf, wcLen);
300}
81727065
VS
301#endif // wxUSE_UNICODE_WCHAR
302
303#if wxUSE_UNICODE_UTF8
304/* static */
305wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
306 const wxMBConv& conv)
307{
308 // FIXME-UTF8: return as-is without copying under UTF8 locale, return
309 // converted string under other locales - needs wxCharBuffer
310 // changes
311
312 // anything to do?
313 if ( !psz || nLength == 0 )
314 return SubstrBufFromMB("", 0);
315
316 if ( nLength == npos )
317 nLength = wxNO_LEN;
318
319 // first convert to wide string:
320 size_t wcLen;
321 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
322 if ( !wcLen )
323 return SubstrBufFromMB("", 0);
324
325 // and then to UTF-8:
326 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
327 // widechar -> UTF-8 conversion isn't supposed to ever fail:
328 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
329
330 return buf;
331}
332#endif // wxUSE_UNICODE_UTF8
333
334#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
335/* static */
336wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 337 const wxMBConv& conv)
8f93a29f
VS
338{
339 // anything to do?
340 if ( !pwz || nLength == 0 )
81727065 341 return SubstrBufFromWC("", 0);
8f93a29f
VS
342
343 if ( nLength == npos )
344 nLength = wxNO_LEN;
345
346 size_t mbLen;
347 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
348 if ( !mbLen )
81727065 349 return SubstrBufFromWC("", 0);
8f93a29f
VS
350 else
351 return SubstrBufFromWC(mbBuf, mbLen);
352}
81727065 353#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
354
355
81727065 356#if wxUSE_UNICODE_WCHAR
e87b7833 357
06386448 358//Convert wxString in Unicode mode to a multi-byte string
830f8f11 359const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 360{
81727065 361 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
362}
363
81727065 364#elif wxUSE_UNICODE_UTF8
e87b7833 365
81727065
VS
366const wxWCharBuffer wxString::wc_str() const
367{
368 return wxConvUTF8.cMB2WC(m_impl.c_str(),
369 m_impl.length() + 1 /* size, not length */,
370 NULL);
371}
372
373const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
374{
375 // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
376 // under UTF8 locale
377 // FIXME-UTF8: use wc_str() here once we have buffers with length
378
379 size_t wcLen;
380 wxWCharBuffer wcBuf(
381 wxConvUTF8.cMB2WC(m_impl.c_str(),
382 m_impl.length() + 1 /* size, not length */,
383 &wcLen));
384 if ( !wcLen )
385 return wxCharBuffer("");
386
387 return conv.cWC2MB(wcBuf, wcLen, NULL);
388}
389
390#else // ANSI
eec47cc6 391
7663d0d4 392//Converts this string to a wide character string if unicode
06386448 393//mode is not enabled and wxUSE_WCHAR_T is enabled
830f8f11 394const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 395{
81727065 396 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 397}
7663d0d4 398
e87b7833
MB
399#endif // Unicode/ANSI
400
401// shrink to minimal size (releasing extra memory)
402bool wxString::Shrink()
403{
404 wxString tmp(begin(), end());
405 swap(tmp);
406 return tmp.length() == length();
407}
408
d8a4b666 409// deprecated compatibility code:
a7ea63e2 410#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 411wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
412{
413 return DoGetWriteBuf(nLen);
414}
415
416void wxString::UngetWriteBuf()
417{
418 DoUngetWriteBuf();
419}
420
421void wxString::UngetWriteBuf(size_t nLen)
422{
423 DoUngetWriteBuf(nLen);
424}
a7ea63e2 425#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 426
d8a4b666 427
e87b7833
MB
428// ---------------------------------------------------------------------------
429// data access
430// ---------------------------------------------------------------------------
431
432// all functions are inline in string.h
433
434// ---------------------------------------------------------------------------
e8f59039 435// concatenation operators
e87b7833
MB
436// ---------------------------------------------------------------------------
437
c801d85f 438/*
c801d85f
KB
439 * concatenation functions come in 5 flavours:
440 * string + string
441 * char + string and string + char
442 * C str + string and string + C str
443 */
444
b1801e0e 445wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 446{
992527a5 447#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
448 wxASSERT( str1.IsValid() );
449 wxASSERT( str2.IsValid() );
e87b7833 450#endif
097c080b 451
3458e408
WS
452 wxString s = str1;
453 s += str2;
3168a13f 454
3458e408 455 return s;
c801d85f
KB
456}
457
c9f78968 458wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 459{
992527a5 460#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 461 wxASSERT( str.IsValid() );
e87b7833 462#endif
3168a13f 463
3458e408
WS
464 wxString s = str;
465 s += ch;
097c080b 466
3458e408 467 return s;
c801d85f
KB
468}
469
c9f78968 470wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 471{
992527a5 472#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 473 wxASSERT( str.IsValid() );
e87b7833 474#endif
097c080b 475
3458e408
WS
476 wxString s = ch;
477 s += str;
3168a13f 478
3458e408 479 return s;
c801d85f
KB
480}
481
8f93a29f 482wxString operator+(const wxString& str, const char *psz)
c801d85f 483{
992527a5 484#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 485 wxASSERT( str.IsValid() );
e87b7833 486#endif
097c080b 487
3458e408 488 wxString s;
8f93a29f 489 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
490 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
491 }
492 s += str;
493 s += psz;
3168a13f 494
3458e408 495 return s;
c801d85f
KB
496}
497
8f93a29f 498wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 499{
992527a5 500#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
501 wxASSERT( str.IsValid() );
502#endif
503
504 wxString s;
505 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
506 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
507 }
508 s += str;
509 s += pwz;
510
511 return s;
512}
513
514wxString operator+(const char *psz, const wxString& str)
515{
a7ea63e2
VS
516#if !wxUSE_STL_BASED_WXSTRING
517 wxASSERT( str.IsValid() );
518#endif
519
520 wxString s;
521 if ( !s.Alloc(strlen(psz) + str.length()) ) {
522 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
523 }
524 s = psz;
525 s += str;
526
527 return s;
528}
529
530wxString operator+(const wchar_t *pwz, const wxString& str)
531{
532#if !wxUSE_STL_BASED_WXSTRING
533 wxASSERT( str.IsValid() );
534#endif
535
536 wxString s;
537 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
538 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
539 }
540 s = pwz;
541 s += str;
542
543 return s;
544}
545
546// ---------------------------------------------------------------------------
547// string comparison
548// ---------------------------------------------------------------------------
549
550#ifdef HAVE_STD_STRING_COMPARE
551
552// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
553// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
554// sort strings in characters code point order by sorting the byte sequence
555// in byte values order (i.e. what strcmp() and memcmp() do).
556
557int wxString::compare(const wxString& str) const
558{
559 return m_impl.compare(str.m_impl);
560}
561
562int wxString::compare(size_t nStart, size_t nLen,
563 const wxString& str) const
564{
565 size_t pos, len;
566 PosLenToImpl(nStart, nLen, &pos, &len);
567 return m_impl.compare(pos, len, str.m_impl);
568}
569
570int wxString::compare(size_t nStart, size_t nLen,
571 const wxString& str,
572 size_t nStart2, size_t nLen2) const
573{
574 size_t pos, len;
575 PosLenToImpl(nStart, nLen, &pos, &len);
576
577 size_t pos2, len2;
578 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
579
580 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
581}
582
583int wxString::compare(const char* sz) const
584{
585 return m_impl.compare(ImplStr(sz));
586}
587
588int wxString::compare(const wchar_t* sz) const
589{
590 return m_impl.compare(ImplStr(sz));
591}
592
593int wxString::compare(size_t nStart, size_t nLen,
594 const char* sz, size_t nCount) const
595{
596 size_t pos, len;
597 PosLenToImpl(nStart, nLen, &pos, &len);
598
599 SubstrBufFromMB str(ImplStr(sz, nCount));
600
601 return m_impl.compare(pos, len, str.data, str.len);
602}
603
604int wxString::compare(size_t nStart, size_t nLen,
605 const wchar_t* sz, size_t nCount) const
606{
607 size_t pos, len;
608 PosLenToImpl(nStart, nLen, &pos, &len);
609
610 SubstrBufFromWC str(ImplStr(sz, nCount));
611
612 return m_impl.compare(pos, len, str.data, str.len);
613}
614
615#else // !HAVE_STD_STRING_COMPARE
616
617static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
618 const wxStringCharType* s2, size_t l2)
619{
620 if( l1 == l2 )
621 return wxStringMemcmp(s1, s2, l1);
622 else if( l1 < l2 )
623 {
624 int ret = wxStringMemcmp(s1, s2, l1);
625 return ret == 0 ? -1 : ret;
626 }
627 else
628 {
629 int ret = wxStringMemcmp(s1, s2, l2);
630 return ret == 0 ? +1 : ret;
631 }
632}
633
634int wxString::compare(const wxString& str) const
635{
636 return ::wxDoCmp(m_impl.data(), m_impl.length(),
637 str.m_impl.data(), str.m_impl.length());
638}
639
640int wxString::compare(size_t nStart, size_t nLen,
641 const wxString& str) const
642{
643 wxASSERT(nStart <= length());
644 size_type strLen = length() - nStart;
645 nLen = strLen < nLen ? strLen : nLen;
646
647 size_t pos, len;
648 PosLenToImpl(nStart, nLen, &pos, &len);
649
650 return ::wxDoCmp(m_impl.data() + pos, len,
651 str.m_impl.data(), str.m_impl.length());
652}
653
654int wxString::compare(size_t nStart, size_t nLen,
655 const wxString& str,
656 size_t nStart2, size_t nLen2) const
657{
658 wxASSERT(nStart <= length());
659 wxASSERT(nStart2 <= str.length());
660 size_type strLen = length() - nStart,
661 strLen2 = str.length() - nStart2;
662 nLen = strLen < nLen ? strLen : nLen;
663 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
664
665 size_t pos, len;
666 PosLenToImpl(nStart, nLen, &pos, &len);
667 size_t pos2, len2;
668 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
669
670 return ::wxDoCmp(m_impl.data() + pos, len,
671 str.m_impl.data() + pos2, len2);
672}
673
674int wxString::compare(const char* sz) const
675{
676 SubstrBufFromMB str(ImplStr(sz, npos));
677 if ( str.len == npos )
678 str.len = wxStringStrlen(str.data);
679 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
680}
681
682int wxString::compare(const wchar_t* sz) const
683{
684 SubstrBufFromWC str(ImplStr(sz, npos));
685 if ( str.len == npos )
686 str.len = wxStringStrlen(str.data);
687 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
688}
689
690int wxString::compare(size_t nStart, size_t nLen,
691 const char* sz, size_t nCount) const
692{
693 wxASSERT(nStart <= length());
694 size_type strLen = length() - nStart;
695 nLen = strLen < nLen ? strLen : nLen;
097c080b 696
a7ea63e2
VS
697 size_t pos, len;
698 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 699
a7ea63e2
VS
700 SubstrBufFromMB str(ImplStr(sz, nCount));
701 if ( str.len == npos )
702 str.len = wxStringStrlen(str.data);
703
704 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
705}
706
a7ea63e2
VS
707int wxString::compare(size_t nStart, size_t nLen,
708 const wchar_t* sz, size_t nCount) const
8f93a29f 709{
a7ea63e2
VS
710 wxASSERT(nStart <= length());
711 size_type strLen = length() - nStart;
712 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 713
a7ea63e2
VS
714 size_t pos, len;
715 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 716
a7ea63e2
VS
717 SubstrBufFromWC str(ImplStr(sz, nCount));
718 if ( str.len == npos )
719 str.len = wxStringStrlen(str.data);
720
721 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
722}
723
a7ea63e2
VS
724#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
725
726
8f93a29f
VS
727// ---------------------------------------------------------------------------
728// find_{first,last}_[not]_of functions
729// ---------------------------------------------------------------------------
730
731#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 732
8f93a29f
VS
733// NB: All these functions are implemented with the argument being wxChar*,
734// i.e. widechar string in any Unicode build, even though native string
735// representation is char* in the UTF-8 build. This is because we couldn't
736// use memchr() to determine if a character is in a set encoded as UTF-8.
737
738size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 739{
8f93a29f 740 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
741}
742
8f93a29f 743size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 744{
8f93a29f 745 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
746}
747
8f93a29f 748size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 749{
8f93a29f 750 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 751
8f93a29f
VS
752 size_t idx = nStart;
753 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 754 {
8f93a29f
VS
755 if ( wxTmemchr(sz, *i, n) )
756 return idx;
dcb68102 757 }
8f93a29f
VS
758
759 return npos;
760}
761
762size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
763{
764 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
765
766 size_t idx = nStart;
767 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 768 {
8f93a29f
VS
769 if ( !wxTmemchr(sz, *i, n) )
770 return idx;
771 }
772
773 return npos;
774}
775
776
777size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
778{
779 return find_last_of(sz, nStart, wxStrlen(sz));
780}
781
782size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
783{
784 return find_last_not_of(sz, nStart, wxStrlen(sz));
785}
786
787size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
788{
789 size_t len = length();
790
791 if ( nStart == npos )
792 {
793 nStart = len - 1;
dcb68102 794 }
2c09fb3b 795 else
dcb68102 796 {
8f93a29f 797 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 798 }
8f93a29f
VS
799
800 size_t idx = nStart;
801 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
802 i != rend(); --idx, ++i )
803 {
804 if ( wxTmemchr(sz, *i, n) )
805 return idx;
806 }
807
808 return npos;
dcb68102
RN
809}
810
8f93a29f 811size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 812{
8f93a29f
VS
813 size_t len = length();
814
815 if ( nStart == npos )
816 {
817 nStart = len - 1;
818 }
819 else
820 {
821 wxASSERT_MSG( nStart <= len, _T("invalid index") );
822 }
823
824 size_t idx = nStart;
825 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
826 i != rend(); --idx, ++i )
827 {
828 if ( !wxTmemchr(sz, *i, n) )
829 return idx;
830 }
831
832 return npos;
dcb68102
RN
833}
834
8f93a29f 835size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 836{
8f93a29f
VS
837 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
838
839 size_t idx = nStart;
840 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
841 {
842 if ( *i != ch )
843 return idx;
844 }
845
846 return npos;
847}
848
849size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
850{
851 size_t len = length();
852
853 if ( nStart == npos )
854 {
855 nStart = len - 1;
856 }
857 else
858 {
859 wxASSERT_MSG( nStart <= len, _T("invalid index") );
860 }
861
862 size_t idx = nStart;
863 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
864 i != rend(); --idx, ++i )
865 {
866 if ( *i != ch )
867 return idx;
868 }
869
870 return npos;
871}
872
873// the functions above were implemented for wchar_t* arguments in Unicode
874// build and char* in ANSI build; below are implementations for the other
875// version:
876#if wxUSE_UNICODE
877 #define wxOtherCharType char
878 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
879#else
880 #define wxOtherCharType wchar_t
881 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
882#endif
883
884size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
885 { return find_first_of(STRCONV(sz), nStart); }
886
887size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
888 size_t n) const
889 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
890size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
891 { return find_last_of(STRCONV(sz), nStart); }
892size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
893 size_t n) const
894 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
895size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
896 { return find_first_not_of(STRCONV(sz), nStart); }
897size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
898 size_t n) const
899 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
900size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
901 { return find_last_not_of(STRCONV(sz), nStart); }
902size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
903 size_t n) const
904 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
905
906#undef wxOtherCharType
907#undef STRCONV
908
909#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
910
911// ===========================================================================
912// other common string functions
913// ===========================================================================
914
915int wxString::CmpNoCase(const wxString& s) const
916{
917 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
918
919 size_t idx = 0;
920 const_iterator i1 = begin();
921 const_iterator end1 = end();
922 const_iterator i2 = s.begin();
923 const_iterator end2 = s.end();
924
925 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
926 {
927 wxUniChar lower1 = (wxChar)wxTolower(*i1);
928 wxUniChar lower2 = (wxChar)wxTolower(*i2);
929 if ( lower1 != lower2 )
930 return lower1 < lower2 ? -1 : 1;
931 }
932
933 size_t len1 = length();
934 size_t len2 = s.length();
dcb68102 935
8f93a29f
VS
936 if ( len1 < len2 )
937 return -1;
938 else if ( len1 > len2 )
939 return 1;
940 return 0;
dcb68102
RN
941}
942
943
b1ac3b56 944#if wxUSE_UNICODE
e015c2a3 945
cf6bedce
SC
946#ifdef __MWERKS__
947#ifndef __SCHAR_MAX__
948#define __SCHAR_MAX__ 127
949#endif
950#endif
951
e015c2a3 952wxString wxString::FromAscii(const char *ascii)
b1ac3b56
RR
953{
954 if (!ascii)
955 return wxEmptyString;
e015c2a3 956
b1ac3b56
RR
957 size_t len = strlen( ascii );
958 wxString res;
e015c2a3
VZ
959
960 if ( len )
961 {
962 wxStringBuffer buf(res, len);
963
964 wchar_t *dest = buf;
965
966 for ( ;; )
967 {
968 if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' )
969 break;
970 }
971 }
972
b1ac3b56
RR
973 return res;
974}
975
2b5f62a0
VZ
976wxString wxString::FromAscii(const char ascii)
977{
978 // What do we do with '\0' ?
979
980 wxString res;
981 res += (wchar_t)(unsigned char) ascii;
8760bc65 982
2b5f62a0
VZ
983 return res;
984}
985
b1ac3b56
RR
986const wxCharBuffer wxString::ToAscii() const
987{
e015c2a3
VZ
988 // this will allocate enough space for the terminating NUL too
989 wxCharBuffer buffer(length());
b1ac3b56 990
be7eecf8 991
6e394fc6 992 char *dest = buffer.data();
e015c2a3
VZ
993
994 const wchar_t *pwc = c_str();
995 for ( ;; )
b1ac3b56 996 {
6e394fc6 997 *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc);
e015c2a3
VZ
998
999 // the output string can't have embedded NULs anyhow, so we can safely
1000 // stop at first of them even if we do have any
1001 if ( !*pwc++ )
1002 break;
b1ac3b56 1003 }
e015c2a3 1004
b1ac3b56
RR
1005 return buffer;
1006}
e015c2a3
VZ
1007
1008#endif // Unicode
b1ac3b56 1009
c801d85f 1010// extract string of length nCount starting at nFirst
c801d85f
KB
1011wxString wxString::Mid(size_t nFirst, size_t nCount) const
1012{
73f507f5 1013 size_t nLen = length();
30d9011f 1014
73f507f5
WS
1015 // default value of nCount is npos and means "till the end"
1016 if ( nCount == npos )
1017 {
1018 nCount = nLen - nFirst;
1019 }
30d9011f 1020
73f507f5
WS
1021 // out-of-bounds requests return sensible things
1022 if ( nFirst + nCount > nLen )
1023 {
1024 nCount = nLen - nFirst;
1025 }
c801d85f 1026
73f507f5
WS
1027 if ( nFirst > nLen )
1028 {
1029 // AllocCopy() will return empty string
1030 return wxEmptyString;
1031 }
c801d85f 1032
73f507f5
WS
1033 wxString dest(*this, nFirst, nCount);
1034 if ( dest.length() != nCount )
1035 {
1036 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1037 }
30d9011f 1038
73f507f5 1039 return dest;
c801d85f
KB
1040}
1041
e87b7833 1042// check that the string starts with prefix and return the rest of the string
d775fa82 1043// in the provided pointer if it is not NULL, otherwise return false
f6bcfd97
BP
1044bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
1045{
1046 wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
1047
1048 // first check if the beginning of the string matches the prefix: note
1049 // that we don't have to check that we don't run out of this string as
1050 // when we reach the terminating NUL, either prefix string ends too (and
1051 // then it's ok) or we break out of the loop because there is no match
1052 const wxChar *p = c_str();
1053 while ( *prefix )
1054 {
1055 if ( *prefix++ != *p++ )
1056 {
1057 // no match
d775fa82 1058 return false;
f6bcfd97
BP
1059 }
1060 }
1061
1062 if ( rest )
1063 {
1064 // put the rest of the string into provided pointer
1065 *rest = p;
1066 }
1067
d775fa82 1068 return true;
f6bcfd97
BP
1069}
1070
3affcd07
VZ
1071
1072// check that the string ends with suffix and return the rest of it in the
1073// provided pointer if it is not NULL, otherwise return false
1074bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
1075{
1076 wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
1077
1078 int start = length() - wxStrlen(suffix);
81727065
VS
1079
1080 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1081 return false;
1082
1083 if ( rest )
1084 {
1085 // put the rest of the string into provided pointer
1086 rest->assign(*this, 0, start);
1087 }
1088
1089 return true;
1090}
1091
1092
c801d85f
KB
1093// extract nCount last (rightmost) characters
1094wxString wxString::Right(size_t nCount) const
1095{
e87b7833
MB
1096 if ( nCount > length() )
1097 nCount = length();
c801d85f 1098
e87b7833
MB
1099 wxString dest(*this, length() - nCount, nCount);
1100 if ( dest.length() != nCount ) {
b1801e0e
GD
1101 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1102 }
c801d85f
KB
1103 return dest;
1104}
1105
1106// get all characters after the last occurence of ch
1107// (returns the whole string if ch not found)
c9f78968 1108wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1109{
1110 wxString str;
d775fa82 1111 int iPos = Find(ch, true);
3c67202d 1112 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1113 str = *this;
1114 else
c9f78968 1115 str = wx_str() + iPos + 1;
c801d85f
KB
1116
1117 return str;
1118}
1119
1120// extract nCount first (leftmost) characters
1121wxString wxString::Left(size_t nCount) const
1122{
e87b7833
MB
1123 if ( nCount > length() )
1124 nCount = length();
c801d85f 1125
e87b7833
MB
1126 wxString dest(*this, 0, nCount);
1127 if ( dest.length() != nCount ) {
b1801e0e
GD
1128 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1129 }
c801d85f
KB
1130 return dest;
1131}
1132
1133// get all characters before the first occurence of ch
1134// (returns the whole string if ch not found)
c9f78968 1135wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1136{
e87b7833
MB
1137 int iPos = Find(ch);
1138 if ( iPos == wxNOT_FOUND ) iPos = length();
1139 return wxString(*this, 0, iPos);
c801d85f
KB
1140}
1141
1142/// get all characters before the last occurence of ch
1143/// (returns empty string if ch not found)
c9f78968 1144wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1145{
1146 wxString str;
d775fa82 1147 int iPos = Find(ch, true);
3c67202d 1148 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1149 str = wxString(c_str(), iPos);
c801d85f
KB
1150
1151 return str;
1152}
1153
1154/// get all characters after the first occurence of ch
1155/// (returns empty string if ch not found)
c9f78968 1156wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1157{
1158 wxString str;
1159 int iPos = Find(ch);
3c67202d 1160 if ( iPos != wxNOT_FOUND )
c9f78968 1161 str = wx_str() + iPos + 1;
c801d85f
KB
1162
1163 return str;
1164}
1165
1166// replace first (or all) occurences of some substring with another one
8a540c88
VS
1167size_t wxString::Replace(const wxString& strOld,
1168 const wxString& strNew, bool bReplaceAll)
c801d85f 1169{
a8f1f1b2 1170 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1171 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1172 _T("wxString::Replace(): invalid parameter") );
1173
510bb748 1174 size_t uiCount = 0; // count of replacements made
c801d85f 1175
8a540c88
VS
1176 size_t uiOldLen = strOld.length();
1177 size_t uiNewLen = strNew.length();
c801d85f 1178
510bb748 1179 size_t dwPos = 0;
c801d85f 1180
8a540c88 1181 while ( (*this)[dwPos] != wxT('\0') )
510bb748
RN
1182 {
1183 //DO NOT USE STRSTR HERE
1184 //this string can contain embedded null characters,
1185 //so strstr will function incorrectly
8a540c88 1186 dwPos = find(strOld, dwPos);
ad5bb7d6 1187 if ( dwPos == npos )
510bb748 1188 break; // exit the loop
ad5bb7d6 1189 else
510bb748
RN
1190 {
1191 //replace this occurance of the old string with the new one
8a540c88 1192 replace(dwPos, uiOldLen, strNew, uiNewLen);
510bb748 1193
2df0258e
RN
1194 //move up pos past the string that was replaced
1195 dwPos += uiNewLen;
510bb748
RN
1196
1197 //increase replace count
1198 ++uiCount;
ad5bb7d6 1199
510bb748 1200 // stop now?
ad5bb7d6 1201 if ( !bReplaceAll )
510bb748
RN
1202 break; // exit the loop
1203 }
c801d85f 1204 }
c801d85f 1205
510bb748 1206 return uiCount;
c801d85f
KB
1207}
1208
1209bool wxString::IsAscii() const
1210{
a4a44612
VS
1211 for ( const_iterator i = begin(); i != end(); ++i )
1212 {
1213 if ( !(*i).IsAscii() )
1214 return false;
1215 }
1216
1217 return true;
c801d85f 1218}
dd1eaa89 1219
c801d85f
KB
1220bool wxString::IsWord() const
1221{
a4a44612
VS
1222 for ( const_iterator i = begin(); i != end(); ++i )
1223 {
1224 if ( !wxIsalpha(*i) )
1225 return false;
1226 }
1227
1228 return true;
c801d85f 1229}
dd1eaa89 1230
c801d85f
KB
1231bool wxString::IsNumber() const
1232{
a4a44612
VS
1233 if ( empty() )
1234 return true;
1235
1236 const_iterator i = begin();
1237
1238 if ( *i == _T('-') || *i == _T('+') )
1239 ++i;
1240
1241 for ( ; i != end(); ++i )
1242 {
1243 if ( !wxIsdigit(*i) )
1244 return false;
1245 }
1246
1247 return true;
c801d85f
KB
1248}
1249
c801d85f
KB
1250wxString wxString::Strip(stripType w) const
1251{
1252 wxString s = *this;
d775fa82
WS
1253 if ( w & leading ) s.Trim(false);
1254 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1255 return s;
1256}
1257
c801d85f
KB
1258// ---------------------------------------------------------------------------
1259// case conversion
1260// ---------------------------------------------------------------------------
1261
1262wxString& wxString::MakeUpper()
1263{
e87b7833
MB
1264 for ( iterator it = begin(), en = end(); it != en; ++it )
1265 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1266
1267 return *this;
1268}
1269
1270wxString& wxString::MakeLower()
1271{
e87b7833
MB
1272 for ( iterator it = begin(), en = end(); it != en; ++it )
1273 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1274
1275 return *this;
1276}
1277
1278// ---------------------------------------------------------------------------
1279// trimming and padding
1280// ---------------------------------------------------------------------------
1281
d775fa82 1282// some compilers (VC++ 6.0 not to name them) return true for a call to
576c608d
VZ
1283