]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
a398b215e5fcd07846fa6f6e1c9c813be1603ffd
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 /*
14 * About ref counting:
15 * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init())
16 * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one
17 * 3) Unlock() decrements nRefs and frees memory if it goes to 0
18 */
19
20 // ===========================================================================
21 // headers, declarations, constants
22 // ===========================================================================
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #ifndef WX_PRECOMP
32 #include "wx/string.h"
33 #endif
34
35 #include <ctype.h>
36
37 #ifndef __WXWINCE__
38 #include <errno.h>
39 #endif
40
41 #include <string.h>
42 #include <stdlib.h>
43
44 #ifdef __SALFORDC__
45 #include <clib.h>
46 #endif
47
48 #include "wx/hashmap.h"
49
50 // string handling functions used by wxString:
51 #if wxUSE_UNICODE_UTF8
52 #define wxStringMemcpy memcpy
53 #define wxStringMemcmp memcmp
54 #define wxStringMemchr memchr
55 #define wxStringStrlen strlen
56 #else
57 #define wxStringMemcpy wxTmemcpy
58 #define wxStringMemcmp wxTmemcmp
59 #define wxStringMemchr wxTmemchr
60 #define wxStringStrlen wxStrlen
61 #endif
62
63
64 // ---------------------------------------------------------------------------
65 // static class variables definition
66 // ---------------------------------------------------------------------------
67
68 //According to STL _must_ be a -1 size_t
69 const size_t wxString::npos = (size_t) -1;
70
71 // ----------------------------------------------------------------------------
72 // global functions
73 // ----------------------------------------------------------------------------
74
75 #if wxUSE_STD_IOSTREAM
76
77 #include <iostream>
78
79 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
80 {
81 // FIXME-UTF8: always, not only if wxUSE_UNICODE
82 #if wxUSE_UNICODE && !defined(__BORLANDC__)
83 return os << (const wchar_t*)str.AsWCharBuf();
84 #else
85 return os << (const char*)str.AsCharBuf();
86 #endif
87 }
88
89 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
90 {
91 return os << str.c_str();
92 }
93
94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
95 {
96 return os << str.data();
97 }
98
99 #ifndef __BORLANDC__
100 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
101 {
102 return os << str.data();
103 }
104 #endif
105
106 #endif // wxUSE_STD_IOSTREAM
107
108 // ===========================================================================
109 // wxString class core
110 // ===========================================================================
111
112 #if wxUSE_UNICODE_UTF8
113
114 // ---------------------------------------------------------------------------
115 // UTF-8 operations
116 // ---------------------------------------------------------------------------
117
118 //
119 // Table 3.1B from Unicode spec: Legal UTF-8 Byte Sequences
120 //
121 // Code Points | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
122 // -------------------+----------+----------+----------+----------+
123 // U+0000..U+007F | 00..7F | | | |
124 // U+0080..U+07FF | C2..DF | 80..BF | | |
125 // U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
126 // U+1000..U+FFFF | E1..EF | 80..BF | 80..BF | |
127 // U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
128 // U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
129 // U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
130 // -------------------+----------+----------+----------+----------+
131
132 bool wxString::IsValidUtf8String(const char *str)
133 {
134 if ( !str )
135 return true; // empty string is UTF8 string
136
137 const unsigned char *c = (const unsigned char*)str;
138
139 for ( ; *c; ++c )
140 {
141 unsigned char b = *c;
142
143 if ( b <= 0x7F ) // 00..7F
144 continue;
145
146 else if ( b < 0xC2 ) // invalid lead bytes: 80..C1
147 return false;
148
149 // two-byte sequences:
150 else if ( b <= 0xDF ) // C2..DF
151 {
152 b = *(++c);
153 if ( !(b >= 0x80 && b <= 0xBF ) )
154 return false;
155 }
156
157 // three-byte sequences:
158 else if ( b == 0xE0 )
159 {
160 b = *(++c);
161 if ( !(b >= 0xA0 && b <= 0xBF ) )
162 return false;
163 b = *(++c);
164 if ( !(b >= 0x80 && b <= 0xBF ) )
165 return false;
166 }
167 else if ( b <= 0xEF ) // E1..EF
168 {
169 for ( int i = 0; i < 2; ++i )
170 {
171 b = *(++c);
172 if ( !(b >= 0x80 && b <= 0xBF ) )
173 return false;
174 }
175 }
176
177 // four-byte sequences:
178 else if ( b == 0xF0 )
179 {
180 b = *(++c);
181 if ( !(b >= 0x90 && b <= 0xBF ) )
182 return false;
183 for ( int i = 0; i < 2; ++i )
184 {
185 b = *(++c);
186 if ( !(b >= 0x80 && b <= 0xBF ) )
187 return false;
188 }
189 }
190 else if ( b <= 0xF3 ) // F1..F3
191 {
192 for ( int i = 0; i < 3; ++i )
193 {
194 b = *(++c);
195 if ( !(b >= 0x80 && b <= 0xBF ) )
196 return false;
197 }
198 }
199 else if ( b == 0xF4 )
200 {
201 b = *(++c);
202 if ( !(b >= 0x80 && b <= 0x8F ) )
203 return false;
204 for ( int i = 0; i < 2; ++i )
205 {
206 b = *(++c);
207 if ( !(b >= 0x80 && b <= 0xBF ) )
208 return false;
209 }
210 }
211 else // otherwise, it's invalid lead byte
212 return false;
213 }
214
215 return true;
216 }
217
218 #ifdef __WXDEBUG__
219 /* static */
220 bool wxString::IsValidUtf8LeadByte(unsigned char c)
221 {
222 return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
223 }
224 #endif
225
226 unsigned char wxString::ms_utf8IterTable[256] = {
227 // single-byte sequences (ASCII):
228 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
229 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
230 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
231 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
232 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
233 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
234 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
235 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
236
237 // these are invalid, we use step 1 to skip
238 // over them (should never happen):
239 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80..8F
240 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90..9F
241 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0..AF
242 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0..BF
243 1, 1, // C0,C1
244
245 // two-byte sequences:
246 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF
247 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
248
249 // three-byte sequences:
250 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
251
252 // four-byte sequences:
253 4, 4, 4, 4, 4, // F0..F4
254
255 // these are invalid again (5- or 6-byte
256 // sequences and sequences for code points
257 // above U+10FFFF, as restricted by RFC 3629):
258 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F5..FF
259 };
260
261 /* static */
262 void wxString::DecIter(wxStringImpl::const_iterator& i)
263 {
264 wxASSERT( IsValidUtf8LeadByte(*i) );
265
266 // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
267 // binary), so we just have to go back until we hit a byte that is either
268 // < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in binary; this
269 // includes some invalid values, but we can ignore it here, because we
270 // assume valid UTF-8 input for the purpose of efficient implementation).
271 --i;
272 while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
273 --i;
274 }
275
276 /* static */
277 void wxString::DecIter(wxStringImpl::iterator& i)
278 {
279 // FIXME-UTF8: use template instead
280 wxASSERT( IsValidUtf8LeadByte(*i) );
281 --i;
282 while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
283 --i;
284 }
285
286 /* static */
287 wxStringImpl::const_iterator
288 wxString::AddToIter(wxStringImpl::const_iterator i, int n)
289 {
290 wxStringImpl::const_iterator out(i);
291
292 if ( n > 0 )
293 {
294 for ( int j = 0; j < n; ++j )
295 IncIter(out);
296 }
297 else if ( n < 0 )
298 {
299 for ( int j = 0; j > n; --j )
300 DecIter(out);
301 }
302
303 return out;
304 }
305
306 wxStringImpl::iterator
307 wxString::AddToIter(wxStringImpl::iterator i, int n)
308 {
309 // FIXME-UTF8: use template instead
310 wxStringImpl::iterator out(i);
311
312 if ( n > 0 )
313 {
314 for ( int j = 0; j < n; ++j )
315 IncIter(out);
316 }
317 else if ( n < 0 )
318 {
319 for ( int j = 0; j > n; --j )
320 DecIter(out);
321 }
322
323 return out;
324 }
325
326
327 /* static */
328 int wxString::DiffIters(wxStringImpl::const_iterator i1,
329 wxStringImpl::const_iterator i2)
330 {
331 int dist = 0;
332
333 if ( i1 < i2 )
334 {
335 while ( i1 != i2 )
336 {
337 IncIter(i1);
338 dist--;
339 }
340 }
341 else if ( i2 < i1 )
342 {
343 while ( i2 != i1 )
344 {
345 IncIter(i2);
346 dist++;
347 }
348 }
349
350 return dist;
351 }
352
353 int wxString::DiffIters(wxStringImpl::iterator i1, wxStringImpl::iterator i2)
354 {
355 // FIXME-UTF8: use template instead
356 int dist = 0;
357
358 if ( i1 < i2 )
359 {
360 while ( i1 != i2 )
361 {
362 IncIter(i1);
363 dist--;
364 }
365 }
366 else if ( i2 < i1 )
367 {
368 while ( i2 != i1 )
369 {
370 IncIter(i2);
371 dist++;
372 }
373 }
374
375 return dist;
376 }
377
378 /* static */
379 wxString::Utf8CharBuffer wxString::EncodeChar(wxUniChar ch)
380 {
381 Utf8CharBuffer buf;
382 char *out = buf.data;
383
384 wxUniChar::value_type code = ch.GetValue();
385
386 // Char. number range | UTF-8 octet sequence
387 // (hexadecimal) | (binary)
388 // ----------------------+---------------------------------------------
389 // 0000 0000 - 0000 007F | 0xxxxxxx
390 // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
391 // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
392 // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
393 //
394 // Code point value is stored in bits marked with 'x', lowest-order bit
395 // of the value on the right side in the diagram above.
396 // (from RFC 3629)
397
398 if ( code <= 0x7F )
399 {
400 out[1] = 0;
401 out[0] = (char)code;
402 }
403 else if ( code <= 0x07FF )
404 {
405 out[2] = 0;
406 // NB: this line takes 6 least significant bits, encodes them as
407 // 10xxxxxx and discards them so that the next byte can be encoded:
408 out[1] = 0x80 | (code & 0x3F); code >>= 6;
409 out[0] = 0xC0 | code;
410 }
411 else if ( code < 0xFFFF )
412 {
413 out[3] = 0;
414 out[2] = 0x80 | (code & 0x3F); code >>= 6;
415 out[1] = 0x80 | (code & 0x3F); code >>= 6;
416 out[0] = 0xE0 | code;
417 }
418 else if ( code <= 0x10FFFF )
419 {
420 out[4] = 0;
421 out[3] = 0x80 | (code & 0x3F); code >>= 6;
422 out[2] = 0x80 | (code & 0x3F); code >>= 6;
423 out[1] = 0x80 | (code & 0x3F); code >>= 6;
424 out[0] = 0xF0 | code;
425 }
426 else
427 {
428 wxFAIL_MSG( _T("trying to encode undefined Unicode character") );
429 out[0] = 0;
430 }
431
432 return buf;
433 }
434
435 /* static */
436 wxUniChar wxUniCharRef::DecodeChar(wxStringImpl::const_iterator i)
437 {
438 wxASSERT( wxString::IsValidUtf8LeadByte(*i) ); // FIXME-UTF8: no "wxString::"
439
440 wxUniChar::value_type code = 0;
441 size_t len = wxString::GetUtf8CharLength(*i);
442 wxASSERT_MSG( len <= 4, _T("invalid UTF-8 sequence length") );
443
444 // Char. number range | UTF-8 octet sequence
445 // (hexadecimal) | (binary)
446 // ----------------------+---------------------------------------------
447 // 0000 0000 - 0000 007F | 0xxxxxxx
448 // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
449 // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
450 // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
451 //
452 // Code point value is stored in bits marked with 'x', lowest-order bit
453 // of the value on the right side in the diagram above.
454 // (from RFC 3629)
455
456 // mask to extract lead byte's value ('x' bits above), by sequence's length:
457 static const unsigned char s_leadValueMask[4] = { 0x7F, 0x1F, 0x0F, 0x07 };
458 #ifdef __WXDEBUG__
459 // mask and value of lead byte's most significant bits, by length:
460 static const unsigned char s_leadMarkerMask[4] = { 0x80, 0xE0, 0xF0, 0xF8 };
461 static const unsigned char s_leadMarkerVal[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
462 #endif
463
464 // extract the lead byte's value bits:
465 wxASSERT_MSG( ((unsigned char)*i & s_leadMarkerMask[len-1]) ==
466 s_leadMarkerVal[len-1],
467 _T("invalid UTF-8 lead byte") );
468 code = (unsigned char)*i & s_leadValueMask[len-1];
469
470 // all remaining bytes, if any, are handled in the same way regardless of
471 // sequence's length:
472 for ( ++i ; len > 1; --len, ++i )
473 {
474 wxASSERT_MSG( ((unsigned char)*i & 0xC0) == 0x80,
475 _T("invalid UTF-8 byte") );
476
477 code <<= 6;
478 code |= (unsigned char)*i & 0x3F;
479 }
480
481 return wxUniChar(code);
482 }
483
484 /* static */
485 wxCharBuffer wxString::EncodeNChars(size_t n, wxUniChar ch)
486 {
487 Utf8CharBuffer once(EncodeChar(ch));
488 // the IncIter() table can be used to determine the length of ch's encoding:
489 size_t len = ms_utf8IterTable[(unsigned char)once.data[0]];
490
491 wxCharBuffer buf(n * len);
492 char *ptr = buf.data();
493 for ( size_t i = 0; i < n; i++, ptr += len )
494 {
495 memcpy(ptr, once.data, len);
496 }
497
498 return buf;
499 }
500
501
502 void wxString::PosLenToImpl(size_t pos, size_t len,
503 size_t *implPos, size_t *implLen) const
504 {
505 if ( pos == npos )
506 *implPos = npos;
507 else
508 {
509 const_iterator i = begin() + pos;
510 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
511 if ( len == npos )
512 *implLen = npos;
513 else
514 {
515 // too large length is interpreted as "to the end of the string"
516 // FIXME-UTF8: verify this is the case in std::string, assert
517 // otherwise
518 if ( pos + len > length() )
519 len = length() - pos;
520
521 *implLen = (i + len).impl() - i.impl();
522 }
523 }
524 }
525
526 #endif // wxUSE_UNICODE_UTF8
527
528 // ----------------------------------------------------------------------------
529 // wxCStrData converted strings caching
530 // ----------------------------------------------------------------------------
531
532 // FIXME-UTF8: temporarily disabled because it doesn't work with global
533 // string objects; re-enable after fixing this bug and benchmarking
534 // performance to see if using a hash is a good idea at all
535 #if 0
536
537 // For backward compatibility reasons, it must be possible to assign the value
538 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
539 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
540 // because the memory would be freed immediately, but it has to be valid as long
541 // as the string is not modified, so that code like this still works:
542 //
543 // const wxChar *s = str.c_str();
544 // while ( s ) { ... }
545
546 // FIXME-UTF8: not thread safe!
547 // FIXME-UTF8: we currently clear the cached conversion only when the string is
548 // destroyed, but we should do it when the string is modified, to
549 // keep memory usage down
550 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
551 // invalidated the cache on every change, we could keep the previous
552 // conversion
553 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
554 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
555
556 template<typename T>
557 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
558 {
559 typename T::iterator i = hash.find(wxConstCast(s, wxString));
560 if ( i != hash.end() )
561 {
562 free(i->second);
563 hash.erase(i);
564 }
565 }
566
567 #if wxUSE_UNICODE
568 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
569 // so we have to use wxString* here and const-cast when used
570 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
571 wxStringCharConversionCache);
572 static wxStringCharConversionCache gs_stringsCharCache;
573
574 const char* wxCStrData::AsChar() const
575 {
576 // remove previously cache value, if any (see FIXMEs above):
577 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
578
579 // convert the string and keep it:
580 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
581 m_str->mb_str().release();
582
583 return s + m_offset;
584 }
585 #endif // wxUSE_UNICODE
586
587 #if !wxUSE_UNICODE_WCHAR
588 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
589 wxStringWCharConversionCache);
590 static wxStringWCharConversionCache gs_stringsWCharCache;
591
592 const wchar_t* wxCStrData::AsWChar() const
593 {
594 // remove previously cache value, if any (see FIXMEs above):
595 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
596
597 // convert the string and keep it:
598 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
599 m_str->wc_str().release();
600
601 return s + m_offset;
602 }
603 #endif // !wxUSE_UNICODE_WCHAR
604
605 wxString::~wxString()
606 {
607 #if wxUSE_UNICODE
608 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
609 DeleteStringFromConversionCache(gs_stringsCharCache, this);
610 #endif
611 #if !wxUSE_UNICODE_WCHAR
612 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
613 #endif
614 }
615 #endif
616
617 #if wxUSE_UNICODE
618 const char* wxCStrData::AsChar() const
619 {
620 wxString *str = wxConstCast(m_str, wxString);
621
622 // convert the string:
623 wxCharBuffer buf(str->mb_str());
624
625 // FIXME-UTF8: do the conversion in-place in the existing buffer
626 if ( str->m_convertedToChar &&
627 strlen(buf) == strlen(str->m_convertedToChar) )
628 {
629 // keep the same buffer for as long as possible, so that several calls
630 // to c_str() in a row still work:
631 strcpy(str->m_convertedToChar, buf);
632 }
633 else
634 {
635 str->m_convertedToChar = buf.release();
636 }
637
638 // and keep it:
639 return str->m_convertedToChar + m_offset;
640 }
641 #endif // wxUSE_UNICODE
642
643 #if !wxUSE_UNICODE_WCHAR
644 const wchar_t* wxCStrData::AsWChar() const
645 {
646 wxString *str = wxConstCast(m_str, wxString);
647
648 // convert the string:
649 wxWCharBuffer buf(str->wc_str());
650
651 // FIXME-UTF8: do the conversion in-place in the existing buffer
652 if ( str->m_convertedToWChar &&
653 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
654 {
655 // keep the same buffer for as long as possible, so that several calls
656 // to c_str() in a row still work:
657 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
658 }
659 else
660 {
661 str->m_convertedToWChar = buf.release();
662 }
663
664 // and keep it:
665 return str->m_convertedToWChar + m_offset;
666 }
667 #endif // !wxUSE_UNICODE_WCHAR
668
669 // ===========================================================================
670 // wxString class core
671 // ===========================================================================
672
673 // ---------------------------------------------------------------------------
674 // construction and conversion
675 // ---------------------------------------------------------------------------
676
677 #if wxUSE_UNICODE_WCHAR
678 /* static */
679 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
680 const wxMBConv& conv)
681 {
682 // anything to do?
683 if ( !psz || nLength == 0 )
684 return SubstrBufFromMB(L"", 0);
685
686 if ( nLength == npos )
687 nLength = wxNO_LEN;
688
689 size_t wcLen;
690 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
691 if ( !wcLen )
692 return SubstrBufFromMB(_T(""), 0);
693 else
694 return SubstrBufFromMB(wcBuf, wcLen);
695 }
696 #endif // wxUSE_UNICODE_WCHAR
697
698 #if wxUSE_UNICODE_UTF8
699 /* static */
700 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
701 const wxMBConv& conv)
702 {
703 // FIXME-UTF8: return as-is without copying under UTF8 locale, return
704 // converted string under other locales - needs wxCharBuffer
705 // changes
706
707 // anything to do?
708 if ( !psz || nLength == 0 )
709 return SubstrBufFromMB("", 0);
710
711 if ( nLength == npos )
712 nLength = wxNO_LEN;
713
714 // first convert to wide string:
715 size_t wcLen;
716 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
717 if ( !wcLen )
718 return SubstrBufFromMB("", 0);
719
720 // and then to UTF-8:
721 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
722 // widechar -> UTF-8 conversion isn't supposed to ever fail:
723 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
724
725 return buf;
726 }
727 #endif // wxUSE_UNICODE_UTF8
728
729 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
730 /* static */
731 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
732 const wxMBConv& conv)
733 {
734 // anything to do?
735 if ( !pwz || nLength == 0 )
736 return SubstrBufFromWC("", 0);
737
738 if ( nLength == npos )
739 nLength = wxNO_LEN;
740
741 size_t mbLen;
742 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
743 if ( !mbLen )
744 return SubstrBufFromWC("", 0);
745 else
746 return SubstrBufFromWC(mbBuf, mbLen);
747 }
748 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
749
750
751 #if wxUSE_UNICODE_WCHAR
752
753 //Convert wxString in Unicode mode to a multi-byte string
754 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
755 {
756 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
757 }
758
759 #elif wxUSE_UNICODE_UTF8
760
761 const wxWCharBuffer wxString::wc_str() const
762 {
763 return wxConvUTF8.cMB2WC(m_impl.c_str(),
764 m_impl.length() + 1 /* size, not length */,
765 NULL);
766 }
767
768 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
769 {
770 // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
771 // under UTF8 locale
772 // FIXME-UTF8: use wc_str() here once we have buffers with length
773
774 size_t wcLen;
775 wxWCharBuffer wcBuf(
776 wxConvUTF8.cMB2WC(m_impl.c_str(),
777 m_impl.length() + 1 /* size, not length */,
778 &wcLen));
779 if ( !wcLen )
780 return wxCharBuffer("");
781
782 return conv.cWC2MB(wcBuf, wcLen, NULL);
783 }
784
785 #else // ANSI
786
787 //Converts this string to a wide character string if unicode
788 //mode is not enabled and wxUSE_WCHAR_T is enabled
789 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
790 {
791 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
792 }
793
794 #endif // Unicode/ANSI
795
796 // shrink to minimal size (releasing extra memory)
797 bool wxString::Shrink()
798 {
799 wxString tmp(begin(), end());
800 swap(tmp);
801 return tmp.length() == length();
802 }
803
804 // deprecated compatibility code:
805 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
806 wxChar *wxString::GetWriteBuf(size_t nLen)
807 {
808 return DoGetWriteBuf(nLen);
809 }
810
811 void wxString::UngetWriteBuf()
812 {
813 DoUngetWriteBuf();
814 }
815
816 void wxString::UngetWriteBuf(size_t nLen)
817 {
818 DoUngetWriteBuf(nLen);
819 }
820 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
821
822
823 // ---------------------------------------------------------------------------
824 // data access
825 // ---------------------------------------------------------------------------
826
827 // all functions are inline in string.h
828
829 // ---------------------------------------------------------------------------
830 // concatenation operators
831 // ---------------------------------------------------------------------------
832
833 /*
834 * concatenation functions come in 5 flavours:
835 * string + string
836 * char + string and string + char
837 * C str + string and string + C str
838 */
839
840 wxString operator+(const wxString& str1, const wxString& str2)
841 {
842 #if !wxUSE_STL_BASED_WXSTRING
843 wxASSERT( str1.IsValid() );
844 wxASSERT( str2.IsValid() );
845 #endif
846
847 wxString s = str1;
848 s += str2;
849
850 return s;
851 }
852
853 wxString operator+(const wxString& str, wxUniChar ch)
854 {
855 #if !wxUSE_STL_BASED_WXSTRING
856 wxASSERT( str.IsValid() );
857 #endif
858
859 wxString s = str;
860 s += ch;
861
862 return s;
863 }
864
865 wxString operator+(wxUniChar ch, const wxString& str)
866 {
867 #if !wxUSE_STL_BASED_WXSTRING
868 wxASSERT( str.IsValid() );
869 #endif
870
871 wxString s = ch;
872 s += str;
873
874 return s;
875 }
876
877 wxString operator+(const wxString& str, const char *psz)
878 {
879 #if !wxUSE_STL_BASED_WXSTRING
880 wxASSERT( str.IsValid() );
881 #endif
882
883 wxString s;
884 if ( !s.Alloc(strlen(psz) + str.length()) ) {
885 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
886 }
887 s += str;
888 s += psz;
889
890 return s;
891 }
892
893 wxString operator+(const wxString& str, const wchar_t *pwz)
894 {
895 #if !wxUSE_STL_BASED_WXSTRING
896 wxASSERT( str.IsValid() );
897 #endif
898
899 wxString s;
900 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
901 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
902 }
903 s += str;
904 s += pwz;
905
906 return s;
907 }
908
909 wxString operator+(const char *psz, const wxString& str)
910 {
911 #if !wxUSE_STL_BASED_WXSTRING
912 wxASSERT( str.IsValid() );
913 #endif
914
915 wxString s;
916 if ( !s.Alloc(strlen(psz) + str.length()) ) {
917 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
918 }
919 s = psz;
920 s += str;
921
922 return s;
923 }
924
925 wxString operator+(const wchar_t *pwz, const wxString& str)
926 {
927 #if !wxUSE_STL_BASED_WXSTRING
928 wxASSERT( str.IsValid() );
929 #endif
930
931 wxString s;
932 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
933 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
934 }
935 s = pwz;
936 s += str;
937
938 return s;
939 }
940
941 // ---------------------------------------------------------------------------
942 // string comparison
943 // ---------------------------------------------------------------------------
944
945 #ifdef HAVE_STD_STRING_COMPARE
946
947 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
948 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
949 // sort strings in characters code point order by sorting the byte sequence
950 // in byte values order (i.e. what strcmp() and memcmp() do).
951
952 int wxString::compare(const wxString& str) const
953 {
954 return m_impl.compare(str.m_impl);
955 }
956
957 int wxString::compare(size_t nStart, size_t nLen,
958 const wxString& str) const
959 {
960 size_t pos, len;
961 PosLenToImpl(nStart, nLen, &pos, &len);
962 return m_impl.compare(pos, len, str.m_impl);
963 }
964
965 int wxString::compare(size_t nStart, size_t nLen,
966 const wxString& str,
967 size_t nStart2, size_t nLen2) const
968 {
969 size_t pos, len;
970 PosLenToImpl(nStart, nLen, &pos, &len);
971
972 size_t pos2, len2;
973 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
974
975 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
976 }
977
978 int wxString::compare(const char* sz) const
979 {
980 return m_impl.compare(ImplStr(sz));
981 }
982
983 int wxString::compare(const wchar_t* sz) const
984 {
985 return m_impl.compare(ImplStr(sz));
986 }
987
988 int wxString::compare(size_t nStart, size_t nLen,
989 const char* sz, size_t nCount) const
990 {
991 size_t pos, len;
992 PosLenToImpl(nStart, nLen, &pos, &len);
993
994 SubstrBufFromMB str(ImplStr(sz, nCount));
995
996 return m_impl.compare(pos, len, str.data, str.len);
997 }
998
999 int wxString::compare(size_t nStart, size_t nLen,
1000 const wchar_t* sz, size_t nCount) const
1001 {
1002 size_t pos, len;
1003 PosLenToImpl(nStart, nLen, &pos, &len);
1004
1005 SubstrBufFromWC str(ImplStr(sz, nCount));
1006
1007 return m_impl.compare(pos, len, str.data, str.len);
1008 }
1009
1010 #else // !HAVE_STD_STRING_COMPARE
1011
1012 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
1013 const wxStringCharType* s2, size_t l2)
1014 {
1015 if( l1 == l2 )
1016 return wxStringMemcmp(s1, s2, l1);
1017 else if( l1 < l2 )
1018 {
1019 int ret = wxStringMemcmp(s1, s2, l1);
1020 return ret == 0 ? -1 : ret;
1021 }
1022 else
1023 {
1024 int ret = wxStringMemcmp(s1, s2, l2);
1025 return ret == 0 ? +1 : ret;
1026 }
1027 }
1028
1029 int wxString::compare(const wxString& str) const
1030 {
1031 return ::wxDoCmp(m_impl.data(), m_impl.length(),
1032 str.m_impl.data(), str.m_impl.length());
1033 }
1034
1035 int wxString::compare(size_t nStart, size_t nLen,
1036 const wxString& str) const
1037 {
1038 wxASSERT(nStart <= length());
1039 size_type strLen = length() - nStart;
1040 nLen = strLen < nLen ? strLen : nLen;
1041
1042 size_t pos, len;
1043 PosLenToImpl(nStart, nLen, &pos, &len);
1044
1045 return ::wxDoCmp(m_impl.data() + pos, len,
1046 str.m_impl.data(), str.m_impl.length());
1047 }
1048
1049 int wxString::compare(size_t nStart, size_t nLen,
1050 const wxString& str,
1051 size_t nStart2, size_t nLen2) const
1052 {
1053 wxASSERT(nStart <= length());
1054 wxASSERT(nStart2 <= str.length());
1055 size_type strLen = length() - nStart,
1056 strLen2 = str.length() - nStart2;
1057 nLen = strLen < nLen ? strLen : nLen;
1058 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
1059
1060 size_t pos, len;
1061 PosLenToImpl(nStart, nLen, &pos, &len);
1062 size_t pos2, len2;
1063 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
1064
1065 return ::wxDoCmp(m_impl.data() + pos, len,
1066 str.m_impl.data() + pos2, len2);
1067 }
1068
1069 int wxString::compare(const char* sz) const
1070 {
1071 SubstrBufFromMB str(ImplStr(sz, npos));
1072 if ( str.len == npos )
1073 str.len = wxStringStrlen(str.data);
1074 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
1075 }
1076
1077 int wxString::compare(const wchar_t* sz) const
1078 {
1079 SubstrBufFromWC str(ImplStr(sz, npos));
1080 if ( str.len == npos )
1081 str.len = wxStringStrlen(str.data);
1082 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
1083 }
1084
1085 int wxString::compare(size_t nStart, size_t nLen,
1086 const char* sz, size_t nCount) const
1087 {
1088 wxASSERT(nStart <= length());
1089 size_type strLen = length() - nStart;
1090 nLen = strLen < nLen ? strLen : nLen;
1091
1092 size_t pos, len;
1093 PosLenToImpl(nStart, nLen, &pos, &len);
1094
1095 SubstrBufFromMB str(ImplStr(sz, nCount));
1096 if ( str.len == npos )
1097 str.len = wxStringStrlen(str.data);
1098
1099 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
1100 }
1101
1102 int wxString::compare(size_t nStart, size_t nLen,
1103 const wchar_t* sz, size_t nCount) const
1104 {
1105 wxASSERT(nStart <= length());
1106 size_type strLen = length() - nStart;
1107 nLen = strLen < nLen ? strLen : nLen;
1108
1109 size_t pos, len;
1110 PosLenToImpl(nStart, nLen, &pos, &len);
1111
1112 SubstrBufFromWC str(ImplStr(sz, nCount));
1113 if ( str.len == npos )
1114 str.len = wxStringStrlen(str.data);
1115
1116 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
1117 }
1118
1119 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
1120
1121
1122 // ---------------------------------------------------------------------------
1123 // find_{first,last}_[not]_of functions
1124 // ---------------------------------------------------------------------------
1125
1126 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1127
1128 // NB: All these functions are implemented with the argument being wxChar*,
1129 // i.e. widechar string in any Unicode build, even though native string
1130 // representation is char* in the UTF-8 build. This is because we couldn't
1131 // use memchr() to determine if a character is in a set encoded as UTF-8.
1132
1133 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
1134 {
1135 return find_first_of(sz, nStart, wxStrlen(sz));
1136 }
1137
1138 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
1139 {
1140 return find_first_not_of(sz, nStart, wxStrlen(sz));
1141 }
1142
1143 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
1144 {
1145 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1146
1147 size_t idx = nStart;
1148 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1149 {
1150 if ( wxTmemchr(sz, *i, n) )
1151 return idx;
1152 }
1153
1154 return npos;
1155 }
1156
1157 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
1158 {
1159 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1160
1161 size_t idx = nStart;
1162 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1163 {
1164 if ( !wxTmemchr(sz, *i, n) )
1165 return idx;
1166 }
1167
1168 return npos;
1169 }
1170
1171
1172 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
1173 {
1174 return find_last_of(sz, nStart, wxStrlen(sz));
1175 }
1176
1177 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
1178 {
1179 return find_last_not_of(sz, nStart, wxStrlen(sz));
1180 }
1181
1182 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
1183 {
1184 size_t len = length();
1185
1186 if ( nStart == npos )
1187 {
1188 nStart = len - 1;
1189 }
1190 else
1191 {
1192 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1193 }
1194
1195 size_t idx = nStart;
1196 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1197 i != rend(); --idx, ++i )
1198 {
1199 if ( wxTmemchr(sz, *i, n) )
1200 return idx;
1201 }
1202
1203 return npos;
1204 }
1205
1206 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
1207 {
1208 size_t len = length();
1209
1210 if ( nStart == npos )
1211 {
1212 nStart = len - 1;
1213 }
1214 else
1215 {
1216 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1217 }
1218
1219 size_t idx = nStart;
1220 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1221 i != rend(); --idx, ++i )
1222 {
1223 if ( !wxTmemchr(sz, *i, n) )
1224 return idx;
1225 }
1226
1227 return npos;
1228 }
1229
1230 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1231 {
1232 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1233
1234 size_t idx = nStart;
1235 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1236 {
1237 if ( *i != ch )
1238 return idx;
1239 }
1240
1241 return npos;
1242 }
1243
1244 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1245 {
1246 size_t len = length();
1247
1248 if ( nStart == npos )
1249 {
1250 nStart = len - 1;
1251 }
1252 else
1253 {
1254 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1255 }
1256
1257 size_t idx = nStart;
1258 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1259 i != rend(); --idx, ++i )
1260 {
1261 if ( *i != ch )
1262 return idx;
1263 }
1264
1265 return npos;
1266 }
1267
1268 // the functions above were implemented for wchar_t* arguments in Unicode
1269 // build and char* in ANSI build; below are implementations for the other
1270 // version:
1271 #if wxUSE_UNICODE
1272 #define wxOtherCharType char
1273 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1274 #else
1275 #define wxOtherCharType wchar_t
1276 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1277 #endif
1278
1279 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1280 { return find_first_of(STRCONV(sz), nStart); }
1281
1282 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1283 size_t n) const
1284 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1285 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1286 { return find_last_of(STRCONV(sz), nStart); }
1287 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1288 size_t n) const
1289 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1290 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1291 { return find_first_not_of(STRCONV(sz), nStart); }
1292 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1293 size_t n) const
1294 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1295 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1296 { return find_last_not_of(STRCONV(sz), nStart); }
1297 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1298 size_t n) const
1299 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1300
1301 #undef wxOtherCharType
1302 #undef STRCONV
1303
1304 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1305
1306 // ===========================================================================
1307 // other common string functions
1308 // ===========================================================================
1309
1310 int wxString::CmpNoCase(const wxString& s) const
1311 {
1312 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1313
1314 size_t idx = 0;
1315 const_iterator i1 = begin();
1316 const_iterator end1 = end();
1317 const_iterator i2 = s.begin();
1318 const_iterator end2 = s.end();
1319
1320 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
1321 {
1322 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1323 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1324 if ( lower1 != lower2 )
1325 return lower1 < lower2 ? -1 : 1;
1326 }
1327
1328 size_t len1 = length();
1329 size_t len2 = s.length();
1330
1331 if ( len1 < len2 )
1332 return -1;
1333 else if ( len1 > len2 )
1334 return 1;
1335 return 0;
1336 }
1337
1338
1339 #if wxUSE_UNICODE
1340
1341 #ifdef __MWERKS__
1342 #ifndef __SCHAR_MAX__
1343 #define __SCHAR_MAX__ 127
1344 #endif
1345 #endif
1346
1347 wxString wxString::FromAscii(const char *ascii)
1348 {
1349 if (!ascii)
1350 return wxEmptyString;
1351
1352 size_t len = strlen( ascii );
1353 wxString res;
1354
1355 if ( len )
1356 {
1357 wxStringBuffer buf(res, len);
1358
1359 wchar_t *dest = buf;
1360
1361 for ( ;; )
1362 {
1363 if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' )
1364 break;
1365 }
1366 }
1367
1368 return res;
1369 }
1370
1371 wxString wxString::FromAscii(const char ascii)
1372 {
1373 // What do we do with '\0' ?
1374
1375 wxString res;
1376 res += (wchar_t)(unsigned char) ascii;
1377
1378 return res;
1379 }
1380
1381 const wxCharBuffer wxString::ToAscii() const
1382 {
1383 // this will allocate enough space for the terminating NUL too
1384 wxCharBuffer buffer(length());
1385
1386
1387 char *dest = buffer.data();
1388
1389 const wchar_t *pwc = c_str();
1390 for ( ;; )
1391 {
1392 *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc);
1393
1394 // the output string can't have embedded NULs anyhow, so we can safely
1395 // stop at first of them even if we do have any
1396 if ( !*pwc++ )
1397 break;
1398 }
1399
1400 return buffer;
1401 }
1402
1403 #endif // Unicode
1404
1405 // extract string of length nCount starting at nFirst
1406 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1407 {
1408 size_t nLen = length();
1409
1410 // default value of nCount is npos and means "till the end"
1411 if ( nCount == npos )
1412 {
1413 nCount = nLen - nFirst;
1414 }
1415
1416 // out-of-bounds requests return sensible things
1417 if ( nFirst + nCount > nLen )
1418 {
1419 nCount = nLen - nFirst;
1420 }
1421
1422 if ( nFirst > nLen )
1423 {
1424 // AllocCopy() will return empty string
1425 return wxEmptyString;
1426 }
1427
1428 wxString dest(*this, nFirst, nCount);
1429 if ( dest.length() != nCount )
1430 {
1431 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1432 }
1433
1434 return dest;
1435 }
1436
1437 // check that the string starts with prefix and return the rest of the string
1438 // in the provided pointer if it is not NULL, otherwise return false
1439 bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
1440 {
1441 wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
1442
1443 // first check if the beginning of the string matches the prefix: note
1444 // that we don't have to check that we don't run out of this string as
1445 // when we reach the terminating NUL, either prefix string ends too (and
1446 // then it's ok) or we break out of the loop because there is no match
1447 const wxChar *p = c_str();
1448 while ( *prefix )
1449 {
1450 if ( *prefix++ != *p++ )
1451 {
1452 // no match
1453 return false;
1454 }
1455 }
1456
1457 if ( rest )
1458 {
1459 // put the rest of the string into provided pointer
1460 *rest = p;
1461 }
1462
1463 return true;
1464 }
1465
1466
1467 // check that the string ends with suffix and return the rest of it in the
1468 // provided pointer if it is not NULL, otherwise return false
1469 bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
1470 {
1471 wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
1472
1473 int start = length() - wxStrlen(suffix);
1474
1475 if ( start < 0 || compare(start, npos, suffix) != 0 )
1476 return false;
1477
1478 if ( rest )
1479 {
1480 // put the rest of the string into provided pointer
1481 rest->assign(*this, 0, start);
1482 }
1483
1484 return true;
1485 }
1486
1487
1488 // extract nCount last (rightmost) characters
1489 wxString wxString::Right(size_t nCount) const
1490 {
1491 if ( nCount > length() )
1492 nCount = length();
1493
1494 wxString dest(*this, length() - nCount, nCount);
1495 if ( dest.length() != nCount ) {
1496 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1497 }
1498 return dest;
1499 }
1500
1501 // get all characters after the last occurence of ch
1502 // (returns the whole string if ch not found)
1503 wxString wxString::AfterLast(wxUniChar ch) const
1504 {
1505 wxString str;
1506 int iPos = Find(ch, true);
1507 if ( iPos == wxNOT_FOUND )
1508 str = *this;
1509 else
1510 str = wx_str() + iPos + 1;
1511
1512 return str;
1513 }
1514
1515 // extract nCount first (leftmost) characters
1516 wxString wxString::Left(size_t nCount) const
1517 {
1518 if ( nCount > length() )
1519 nCount = length();
1520
1521 wxString dest(*this, 0, nCount);
1522 if ( dest.length() != nCount ) {
1523 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1524 }
1525 return dest;
1526 }
1527
1528 // get all characters before the first occurence of ch
1529 // (returns the whole string if ch not found)
1530 wxString wxString::BeforeFirst(wxUniChar ch) const
1531 {
1532 int iPos = Find(ch);
1533 if ( iPos == wxNOT_FOUND ) iPos = length();
1534 return wxString(*this, 0, iPos);
1535 }
1536
1537 /// get all characters before the last occurence of ch
1538 /// (returns empty string if ch not found)
1539 wxString wxString::BeforeLast(wxUniChar ch) const
1540 {
1541 wxString str;
1542 int iPos = Find(ch, true);
1543 if ( iPos != wxNOT_FOUND && iPos != 0 )
1544 str = wxString(c_str(), iPos);
1545
1546 return str;
1547 }
1548
1549 /// get all characters after the first occurence of ch
1550 /// (returns empty string if ch not found)
1551 wxString wxString::AfterFirst(wxUniChar ch) const
1552 {
1553 wxString str;
1554 int iPos = Find(ch);
1555 if ( iPos != wxNOT_FOUND )
1556 str = wx_str() + iPos + 1;
1557
1558 return str;
1559 }
1560
1561 // replace first (or all) occurences of some substring with another one
1562 size_t wxString::Replace(const wxString& strOld,
1563 const wxString& strNew, bool bReplaceAll)
1564 {
1565 // if we tried to replace an empty string we'd enter an infinite loop below
1566 wxCHECK_MSG( !strOld.empty(), 0,
1567 _T("wxString::Replace(): invalid parameter") );
1568
1569 size_t uiCount = 0; // count of replacements made
1570
1571 size_t uiOldLen = strOld.length();
1572 size_t uiNewLen = strNew.length();
1573
1574 size_t dwPos = 0;
1575
1576 while ( (*this)[dwPos] != wxT('\0') )
1577 {
1578 //DO NOT USE STRSTR HERE
1579 //this string can contain embedded null characters,
1580 //so strstr will function incorrectly
1581 dwPos = find(strOld, dwPos);
1582 if ( dwPos == npos )
1583 break; // exit the loop
1584 else
1585 {
1586 //replace this occurance of the old string with the new one
1587 replace(dwPos, uiOldLen, strNew, uiNewLen);
1588
1589 //move up pos past the string that was replaced
1590 dwPos += uiNewLen;
1591
1592 //increase replace count
1593 ++uiCount;
1594
1595 // stop now?
1596 if ( !bReplaceAll )
1597 break; // exit the loop
1598 }
1599 }
1600
1601 return uiCount;
1602 }
1603
1604 bool wxString::IsAscii() const
1605 {
1606 for ( const_iterator i = begin(); i != end(); ++i )
1607 {
1608 if ( !(*i).IsAscii() )
1609 return false;
1610 }
1611
1612 return true;
1613 }
1614
1615 bool wxString::IsWord() const
1616 {
1617 for ( const_iterator i = begin(); i != end(); ++i )
1618 {
1619 if ( !wxIsalpha(*i) )
1620 return false;
1621 }
1622
1623 return true;
1624 }
1625
1626 bool wxString::IsNumber() const
1627 {
1628 if ( empty() )
1629 return true;
1630
1631 const_iterator i = begin();
1632
1633 if ( *i == _T('-') || *i == _T('+') )
1634 ++i;
1635
1636 for ( ; i != end(); ++i )
1637 {
1638 if ( !wxIsdigit(*i) )
1639 return false;
1640 }
1641
1642 return true;
1643 }
1644
1645 wxString wxString::Strip(stripType w) const
1646 {
1647 wxString s = *this;
1648 if ( w & leading ) s.Trim(false);
1649 if ( w & trailing ) s.Trim(true);
1650 return s;
1651 }
1652
1653 // ---------------------------------------------------------------------------
1654 // case conversion
1655 // ---------------------------------------------------------------------------
1656
1657 wxString& wxString::MakeUpper()
1658 {
1659 for ( iterator it = begin(), en = end(); it != en; ++it )
1660 *it = (wxChar)wxToupper(*it);
1661
1662 return *this;
1663 }
1664
1665 wxString& wxString::MakeLower()
1666 {
1667 for ( iterator it = begin(), en = end(); it != en; ++it )
1668 *it = (wxChar)wxTolower(*it);
1669
1670 return *this;
1671 }
1672
1673 // ---------------------------------------------------------------------------
1674 // trimming and padding
1675 // ---------------------------------------------------------------------------
1676
1677 // some compilers (VC++ 6.0 not to name them) return true for a call to
1678 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1679 // live with this by checking that the character is a 7 bit one - even if this
1680 // may fail to detect some spaces (I don't know if Unicode doesn't have
1681 // space-like symbols somewhere except in the first 128 chars), it is arguably
1682 // still better than trimming away accented letters
1683 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1684
1685 // trims spaces (in the sense of isspace) from left or right side
1686 wxString& wxString::Trim(bool bFromRight)
1687 {
1688 // first check if we're going to modify the string at all
1689 if ( !empty() &&
1690 (
1691 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1692 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1693 )
1694 )
1695 {
1696 if ( bFromRight )
1697 {
1698 // find last non-space character
1699 reverse_iterator psz = rbegin();
1700 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1701 psz++;
1702
1703 // truncate at trailing space start
1704 erase(psz.base(), end());
1705 }
1706 else
1707 {
1708 // find first non-space character
1709 iterator psz = begin();
1710 while ( (psz != end()) && wxSafeIsspace(*psz) )
1711 psz++;
1712
1713 // fix up data and length
1714 erase(begin(), psz);
1715 }
1716 }
1717
1718 return *this;
1719 }
1720
1721 // adds nCount characters chPad to the string from either side
1722 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1723 {
1724 wxString s(chPad, nCount);
1725
1726 if ( bFromRight )
1727 *this += s;
1728 else
1729 {
1730 s += *this;
1731 swap(s);
1732 }
1733
1734 return *this;
1735 }
1736
1737 // truncate the string
1738 wxString& wxString::Truncate(size_t uiLen)
1739 {
1740 if ( uiLen < length() )
1741 {
1742 erase(begin() + uiLen, end());
1743 }
1744 //else: nothing to do, string is already short enough
1745
1746 return *this;
1747 }
1748
1749 // ---------------------------------------------------------------------------
1750 // finding (return wxNOT_FOUND if not found and index otherwise)
1751 // ---------------------------------------------------------------------------
1752
1753 // find a character
1754 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1755 {
1756 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1757
1758 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1759 }
1760
1761 // ----------------------------------------------------------------------------
1762 // conversion to numbers
1763 // ----------------------------------------------------------------------------
1764
1765 // the implementation of all the functions below is exactly the same so factor
1766 // it out
1767
1768 template <typename T, typename F>
1769 bool wxStringToIntType(const wxChar *start,
1770 T *val,
1771 int base,
1772 F func)
1773 {
1774 wxCHECK_MSG( val, false, _T("NULL output pointer") );
1775 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1776
1777 #ifndef __WXWINCE__
1778 errno = 0;
1779 #endif
1780
1781 wxChar *end;
1782 *val = (*func)(start, &end, base);
1783
1784 // return true only if scan was stopped by the terminating NUL and if the
1785 // string was not empty to start with and no under/overflow occurred
1786 return !*end && (end != start)
1787 #ifndef __WXWINCE__
1788 && (errno != ERANGE)
1789 #endif
1790 ;
1791 }
1792
1793 bool wxString::ToLong(long *val, int base) const
1794 {
1795 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1796 }
1797
1798 bool wxString::ToULong(unsigned long *val, int base) const
1799 {
1800 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1801 }
1802
1803 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1804 {
1805 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1806 }
1807
1808 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1809 {
1810 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1811 }
1812
1813 bool wxString::ToDouble(double *val) const
1814 {
1815 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1816
1817 #ifndef __WXWINCE__
1818 errno = 0;
1819 #endif
1820
1821 const wxChar *start = c_str();
1822 wxChar *end;
1823 *val = wxStrtod(start, &end);
1824
1825 // return true only if scan was stopped by the terminating NUL and if the
1826 // string was not empty to start with and no under/overflow occurred
1827 return !*end && (end != start)
1828 #ifndef __WXWINCE__
1829 && (errno != ERANGE)
1830 #endif
1831 ;
1832 }
1833
1834 // ---------------------------------------------------------------------------
1835 // formatted output
1836 // ---------------------------------------------------------------------------
1837
1838 /* static */
1839 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1840 wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...)
1841 #else
1842 wxString wxString::DoFormat(const wxChar *format, ...)
1843 #endif
1844 {
1845 va_list argptr;
1846 va_start(argptr, format);
1847
1848 wxString s;
1849 s.PrintfV(format, argptr);
1850
1851 va_end(argptr);
1852
1853 return s;
1854 }
1855
1856 /* static */
1857 wxString wxString::FormatV(const wxString& format, va_list argptr)
1858 {
1859 wxString s;
1860 s.PrintfV(format, argptr);
1861 return s;
1862 }
1863
1864 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1865 int wxStringPrintfMixinBase::DoPrintf(const wxChar *format, ...)
1866 #else
1867 int wxString::DoPrintf(const wxChar *format, ...)
1868 #endif
1869 {
1870 va_list argptr;
1871 va_start(argptr, format);
1872
1873 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1874 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1875 // because it's the only cast that works safely for downcasting when
1876 // multiple inheritance is used:
1877 wxString *str = static_cast<wxString*>(this);
1878 #else
1879 wxString *str = this;
1880 #endif
1881
1882 int iLen = str->PrintfV(format, argptr);
1883
1884 va_end(argptr);
1885
1886 return iLen;
1887 }
1888
1889 int wxString::PrintfV(const wxString& format, va_list argptr)
1890 {
1891 int size = 1024;
1892
1893 for ( ;; )
1894 {
1895 wxStringBuffer tmp(*this, size + 1);
1896 wxChar *buf = tmp;
1897
1898 if ( !buf )
1899 {
1900 // out of memory
1901 return -1;
1902 }
1903
1904 // wxVsnprintf() may modify the original arg pointer, so pass it
1905 // only a copy
1906 va_list argptrcopy;
1907 wxVaCopy(argptrcopy, argptr);
1908 int len = wxVsnprintf(buf, size, (const wxChar*)/*FIXME-UTF8*/format, argptrcopy);
1909 va_end(argptrcopy);
1910
1911 // some implementations of vsnprintf() don't NUL terminate
1912 // the string if there is not enough space for it so
1913 // always do it manually
1914 buf[size] = _T('\0');
1915
1916 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1917 // total number of characters which would have been written if the
1918 // buffer were large enough (newer standards such as Unix98)
1919 if ( len < 0 )
1920 {
1921 #if wxUSE_WXVSNPRINTF
1922 // we know that our own implementation of wxVsnprintf() returns -1
1923 // only for a format error - thus there's something wrong with
1924 // the user's format string
1925 return -1;
1926 #else // assume that system version only returns error if not enough space
1927 // still not enough, as we don't know how much we need, double the
1928 // current size of the buffer
1929 size *= 2;
1930 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1931 }
1932 else if ( len >= size )
1933 {
1934 #if wxUSE_WXVSNPRINTF
1935 // we know that our own implementation of wxVsnprintf() returns
1936 // size+1 when there's not enough space but that's not the size
1937 // of the required buffer!
1938 size *= 2; // so we just double the current size of the buffer
1939 #else
1940 // some vsnprintf() implementations NUL-terminate the buffer and
1941 // some don't in len == size case, to be safe always add 1
1942 size = len + 1;
1943 #endif
1944 }
1945 else // ok, there was enough space
1946 {
1947 break;
1948 }
1949 }
1950
1951 // we could have overshot
1952 Shrink();
1953
1954 return length();
1955 }
1956
1957 // ----------------------------------------------------------------------------
1958 // misc other operations
1959 // ----------------------------------------------------------------------------
1960
1961 // returns true if the string matches the pattern which may contain '*' and
1962 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1963 // of them)
1964 bool wxString::Matches(const wxString& mask) const
1965 {
1966 // I disable this code as it doesn't seem to be faster (in fact, it seems
1967 // to be much slower) than the old, hand-written code below and using it
1968 // here requires always linking with libregex even if the user code doesn't
1969 // use it
1970 #if 0 // wxUSE_REGEX
1971 // first translate the shell-like mask into a regex
1972 wxString pattern;
1973 pattern.reserve(wxStrlen(pszMask));
1974
1975 pattern += _T('^');
1976 while ( *pszMask )
1977 {
1978 switch ( *pszMask )
1979 {
1980 case _T('?'):
1981 pattern += _T('.');
1982 break;
1983
1984 case _T('*'):
1985 pattern += _T(".*");
1986 break;
1987
1988 case _T('^'):
1989 case _T('.'):
1990 case _T('$'):
1991 case _T('('):
1992 case _T(')'):
1993 case _T('|'):
1994 case _T('+'):
1995 case _T('\\'):
1996 // these characters are special in a RE, quote them
1997 // (however note that we don't quote '[' and ']' to allow
1998 // using them for Unix shell like matching)
1999 pattern += _T('\\');
2000 // fall through
2001
2002 default:
2003 pattern += *pszMask;
2004 }
2005
2006 pszMask++;
2007 }
2008 pattern += _T('$');
2009
2010 // and now use it
2011 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2012 #else // !wxUSE_REGEX
2013 // TODO: this is, of course, awfully inefficient...
2014
2015 // FIXME-UTF8: implement using iterators, remove #if
2016 #if wxUSE_UNICODE_UTF8
2017 wxWCharBuffer maskBuf = mask.wc_str();
2018 wxWCharBuffer txtBuf = wc_str();
2019 const wxChar *pszMask = maskBuf.data();
2020 const wxChar *pszTxt = txtBuf.data();
2021 #else
2022 const wxChar *pszMask = mask.wx_str();
2023 // the char currently being checked
2024 const wxChar *pszTxt = wx_str();
2025 #endif
2026
2027 // the last location where '*' matched
2028 const wxChar *pszLastStarInText = NULL;
2029 const wxChar *pszLastStarInMask = NULL;
2030
2031 match:
2032 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2033 switch ( *pszMask ) {
2034 case wxT('?'):
2035 if ( *pszTxt == wxT('\0') )
2036 return false;
2037
2038 // pszTxt and pszMask will be incremented in the loop statement
2039
2040 break;
2041
2042 case wxT('*'):
2043 {
2044 // remember where we started to be able to backtrack later
2045 pszLastStarInText = pszTxt;
2046 pszLastStarInMask = pszMask;
2047
2048 // ignore special chars immediately following this one
2049 // (should this be an error?)
2050 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2051 pszMask++;
2052
2053 // if there is nothing more, match
2054 if ( *pszMask == wxT('\0') )
2055 return true;
2056
2057 // are there any other metacharacters in the mask?
2058 size_t uiLenMask;
2059 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2060
2061 if ( pEndMask != NULL ) {
2062 // we have to match the string between two metachars
2063 uiLenMask = pEndMask - pszMask;
2064 }
2065 else {
2066 // we have to match the remainder of the string
2067 uiLenMask = wxStrlen(pszMask);
2068 }
2069
2070 wxString strToMatch(pszMask, uiLenMask);
2071 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2072 if ( pMatch == NULL )
2073 return false;
2074
2075 // -1 to compensate "++" in the loop
2076 pszTxt = pMatch + uiLenMask - 1;
2077 pszMask += uiLenMask - 1;
2078 }
2079 break;
2080
2081 default:
2082 if ( *pszMask != *pszTxt )
2083 return false;
2084 break;
2085 }
2086 }
2087
2088 // match only if nothing left
2089 if ( *pszTxt == wxT('\0') )
2090 return true;
2091
2092 // if we failed to match, backtrack if we can
2093 if ( pszLastStarInText ) {
2094 pszTxt = pszLastStarInText + 1;
2095 pszMask = pszLastStarInMask;
2096
2097 pszLastStarInText = NULL;
2098
2099 // don't bother resetting pszLastStarInMask, it's unnecessary
2100
2101 goto match;
2102 }
2103
2104 return false;
2105 #endif // wxUSE_REGEX/!wxUSE_REGEX
2106 }
2107
2108 // Count the number of chars
2109 int wxString::Freq(wxUniChar ch) const
2110 {
2111 int count = 0;
2112 for ( const_iterator i = begin(); i != end(); ++i )
2113 {
2114 if ( *i == ch )
2115 count ++;
2116 }
2117 return count;
2118 }
2119
2120 // convert to upper case, return the copy of the string
2121 wxString wxString::Upper() const
2122 { wxString s(*this); return s.MakeUpper(); }
2123
2124 // convert to lower case, return the copy of the string
2125 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }