1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/stringimpl.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
15 * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init())
16 * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one
17 * 3) Unlock() decrements nRefs and frees memory if it goes to 0
20 // ===========================================================================
21 // headers, declarations, constants
22 // ===========================================================================
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
32 #include "wx/stringimpl.h"
45 // allocating extra space for each string consumes more memory but speeds up
46 // the concatenation operations (nLen is the current string's length)
47 // NB: EXTRA_ALLOC must be >= 0!
48 #define EXTRA_ALLOC (19 - nLen % 16)
51 // string handling functions used by wxString:
52 #if wxUSE_UNICODE_UTF8
53 #define wxStringMemcpy memcpy
54 #define wxStringMemcmp memcmp
55 #define wxStringMemchr memchr
57 #define wxStringMemcpy wxTmemcpy
58 #define wxStringMemcmp wxTmemcmp
59 #define wxStringMemchr wxTmemchr
63 // ---------------------------------------------------------------------------
64 // static class variables definition
65 // ---------------------------------------------------------------------------
67 #if !wxUSE_STL_BASED_WXSTRING
68 //According to STL _must_ be a -1 size_t
69 const size_t wxStringImpl::npos
= (size_t) -1;
72 // ----------------------------------------------------------------------------
74 // ----------------------------------------------------------------------------
76 #if wxUSE_STL_BASED_WXSTRING
78 // FIXME-UTF8: get rid of this, have only one wxEmptyString
79 #if wxUSE_UNICODE_UTF8
80 extern const wxStringCharType WXDLLIMPEXP_BASE
*wxEmptyStringImpl
= "";
82 extern const wxChar WXDLLIMPEXP_BASE
*wxEmptyString
= _T("");
86 // for an empty string, GetStringData() will return this address: this
87 // structure has the same layout as wxStringData and it's data() method will
88 // return the empty string (dummy pointer)
92 wxStringCharType dummy
;
93 } g_strEmpty
= { {-1, 0, 0}, wxT('\0') };
95 // empty C style string: points to 'string data' byte of g_strEmpty
96 #if wxUSE_UNICODE_UTF8
97 // FIXME-UTF8: get rid of this, have only one wxEmptyString
98 extern const wxStringCharType WXDLLIMPEXP_BASE
*wxEmptyStringImpl
= &g_strEmpty
.dummy
;
99 extern const wxChar WXDLLIMPEXP_BASE
*wxEmptyString
= _T("");
101 extern const wxStringCharType WXDLLIMPEXP_BASE
*wxEmptyString
= &g_strEmpty
.dummy
;
107 #if !wxUSE_STL_BASED_WXSTRING
109 // ----------------------------------------------------------------------------
111 // ----------------------------------------------------------------------------
113 // this small class is used to gather statistics for performance tuning
114 //#define WXSTRING_STATISTICS
115 #ifdef WXSTRING_STATISTICS
119 Averager(const wxStringCharType
*sz
) { m_sz
= sz
; m_nTotal
= m_nCount
= 0; }
121 { wxPrintf("wxString: average %s = %f\n", m_sz
, ((float)m_nTotal
)/m_nCount
); }
123 void Add(size_t n
) { m_nTotal
+= n
; m_nCount
++; }
126 size_t m_nCount
, m_nTotal
;
127 const wxStringCharType
*m_sz
;
128 } g_averageLength("allocation size"),
129 g_averageSummandLength("summand length"),
130 g_averageConcatHit("hit probability in concat"),
131 g_averageInitialLength("initial string length");
133 #define STATISTICS_ADD(av, val) g_average##av.Add(val)
135 #define STATISTICS_ADD(av, val)
136 #endif // WXSTRING_STATISTICS
138 // ===========================================================================
139 // wxStringData class deallocation
140 // ===========================================================================
142 #if defined(__VISUALC__) && defined(_MT) && !defined(_DLL)
143 # pragma message (__FILE__ ": building with Multithreaded non DLL runtime has a performance impact on wxString!")
144 void wxStringData::Free()
150 // ===========================================================================
152 // ===========================================================================
154 // takes nLength elements of psz starting at nPos
155 void wxStringImpl::InitWith(const wxStringCharType
*psz
,
156 size_t nPos
, size_t nLength
)
160 // if the length is not given, assume the string to be NUL terminated
161 if ( nLength
== npos
) {
162 wxASSERT_MSG( nPos
<= wxStrlen(psz
), _T("index out of bounds") );
164 nLength
= wxStrlen(psz
+ nPos
);
167 STATISTICS_ADD(InitialLength
, nLength
);
170 // trailing '\0' is written in AllocBuffer()
171 if ( !AllocBuffer(nLength
) ) {
172 wxFAIL_MSG( _T("out of memory in wxStringImpl::InitWith") );
175 wxStringMemcpy(m_pchData
, psz
+ nPos
, nLength
);
179 wxStringImpl::wxStringImpl(const_iterator first
, const_iterator last
)
183 InitWith(first
.GetPtr(), 0, last
- first
);
187 wxFAIL_MSG( _T("first must be before last") );
192 wxStringImpl::wxStringImpl(size_type n
, wxStringCharType ch
)
198 // ---------------------------------------------------------------------------
200 // ---------------------------------------------------------------------------
202 // allocates memory needed to store a C string of length nLen
203 bool wxStringImpl::AllocBuffer(size_t nLen
)
205 // allocating 0 sized buffer doesn't make sense, all empty strings should
207 wxASSERT( nLen
> 0 );
209 // make sure that we don't overflow
210 wxASSERT( nLen
< (INT_MAX
/ sizeof(wxStringCharType
)) -
211 (sizeof(wxStringData
) + EXTRA_ALLOC
+ 1) );
213 STATISTICS_ADD(Length
, nLen
);
216 // 1) one extra character for '\0' termination
217 // 2) sizeof(wxStringData) for housekeeping info
218 wxStringData
* pData
= (wxStringData
*)
219 malloc(sizeof(wxStringData
) + (nLen
+ EXTRA_ALLOC
+ 1)*sizeof(wxStringCharType
));
221 if ( pData
== NULL
) {
222 // allocation failures are handled by the caller
227 pData
->nDataLength
= nLen
;
228 pData
->nAllocLength
= nLen
+ EXTRA_ALLOC
;
229 m_pchData
= pData
->data(); // data starts after wxStringData
230 m_pchData
[nLen
] = wxT('\0');
234 // must be called before changing this string
235 bool wxStringImpl::CopyBeforeWrite()
237 wxStringData
* pData
= GetStringData();
239 if ( pData
->IsShared() ) {
240 pData
->Unlock(); // memory not freed because shared
241 size_t nLen
= pData
->nDataLength
;
242 if ( !AllocBuffer(nLen
) ) {
243 // allocation failures are handled by the caller
246 wxStringMemcpy(m_pchData
, pData
->data(), nLen
);
249 wxASSERT( !GetStringData()->IsShared() ); // we must be the only owner
254 // must be called before replacing contents of this string
255 bool wxStringImpl::AllocBeforeWrite(size_t nLen
)
257 wxASSERT( nLen
!= 0 ); // doesn't make any sense
259 // must not share string and must have enough space
260 wxStringData
* pData
= GetStringData();
261 if ( pData
->IsShared() || pData
->IsEmpty() ) {
262 // can't work with old buffer, get new one
264 if ( !AllocBuffer(nLen
) ) {
265 // allocation failures are handled by the caller
270 if ( nLen
> pData
->nAllocLength
) {
271 // realloc the buffer instead of calling malloc() again, this is more
273 STATISTICS_ADD(Length
, nLen
);
277 pData
= (wxStringData
*)
279 sizeof(wxStringData
) + (nLen
+ 1)*sizeof(wxStringCharType
));
281 if ( pData
== NULL
) {
282 // allocation failures are handled by the caller
283 // keep previous data since reallocation failed
287 pData
->nAllocLength
= nLen
;
288 m_pchData
= pData
->data();
292 wxASSERT( !GetStringData()->IsShared() ); // we must be the only owner
294 // it doesn't really matter what the string length is as it's going to be
295 // overwritten later but, for extra safety, set it to 0 for now as we may
296 // have some junk in m_pchData
297 GetStringData()->nDataLength
= 0;
302 wxStringImpl
& wxStringImpl::append(size_t n
, wxStringCharType ch
)
304 size_type len
= length();
306 if ( !Alloc(len
+ n
) || !CopyBeforeWrite() ) {
307 wxFAIL_MSG( _T("out of memory in wxStringImpl::append") );
310 GetStringData()->nDataLength
= len
+ n
;
311 m_pchData
[len
+ n
] = '\0';
312 for ( size_t i
= 0; i
< n
; ++i
)
313 m_pchData
[len
+ i
] = ch
;
317 void wxStringImpl::resize(size_t nSize
, wxStringCharType ch
)
319 size_t len
= length();
323 erase(begin() + nSize
, end());
325 else if ( nSize
> len
)
327 append(nSize
- len
, ch
);
329 //else: we have exactly the specified length, nothing to do
332 // allocate enough memory for nLen characters
333 bool wxStringImpl::Alloc(size_t nLen
)
335 wxStringData
*pData
= GetStringData();
336 if ( pData
->nAllocLength
<= nLen
) {
337 if ( pData
->IsEmpty() ) {
340 pData
= (wxStringData
*)
341 malloc(sizeof(wxStringData
) + (nLen
+ 1)*sizeof(wxStringCharType
));
343 if ( pData
== NULL
) {
344 // allocation failure handled by caller
349 pData
->nDataLength
= 0;
350 pData
->nAllocLength
= nLen
;
351 m_pchData
= pData
->data(); // data starts after wxStringData
352 m_pchData
[0u] = wxT('\0');
354 else if ( pData
->IsShared() ) {
355 pData
->Unlock(); // memory not freed because shared
356 size_t nOldLen
= pData
->nDataLength
;
357 if ( !AllocBuffer(nLen
) ) {
358 // allocation failure handled by caller
361 // +1 to copy the terminator, too
362 memcpy(m_pchData
, pData
->data(), (nOldLen
+1)*sizeof(wxStringCharType
));
363 GetStringData()->nDataLength
= nOldLen
;
368 pData
= (wxStringData
*)
369 realloc(pData
, sizeof(wxStringData
) + (nLen
+ 1)*sizeof(wxStringCharType
));
371 if ( pData
== NULL
) {
372 // allocation failure handled by caller
373 // keep previous data since reallocation failed
377 // it's not important if the pointer changed or not (the check for this
378 // is not faster than assigning to m_pchData in all cases)
379 pData
->nAllocLength
= nLen
;
380 m_pchData
= pData
->data();
383 //else: we've already got enough
387 wxStringImpl::iterator
wxStringImpl::begin()
394 wxStringImpl::iterator
wxStringImpl::end()
398 return m_pchData
+ length();
401 wxStringImpl::iterator
wxStringImpl::erase(iterator it
)
403 size_type idx
= it
- begin();
405 return begin() + idx
;
408 wxStringImpl
& wxStringImpl::erase(size_t nStart
, size_t nLen
)
410 wxASSERT(nStart
<= length());
411 size_t strLen
= length() - nStart
;
412 // delete nLen or up to the end of the string characters
413 nLen
= strLen
< nLen
? strLen
: nLen
;
414 wxStringImpl
strTmp(c_str(), nStart
);
415 strTmp
.append(c_str() + nStart
+ nLen
, length() - nStart
- nLen
);
421 wxStringImpl
& wxStringImpl::insert(size_t nPos
,
422 const wxStringCharType
*sz
, size_t n
)
424 wxASSERT( nPos
<= length() );
426 if ( n
== npos
) n
= wxStrlen(sz
);
427 if ( n
== 0 ) return *this;
429 if ( !Alloc(length() + n
) || !CopyBeforeWrite() ) {
430 wxFAIL_MSG( _T("out of memory in wxStringImpl::insert") );
434 memmove(m_pchData
+ nPos
+ n
, m_pchData
+ nPos
,
435 (length() - nPos
) * sizeof(wxStringCharType
));
436 memcpy(m_pchData
+ nPos
, sz
, n
* sizeof(wxStringCharType
));
437 GetStringData()->nDataLength
= length() + n
;
438 m_pchData
[length()] = '\0';
443 void wxStringImpl::swap(wxStringImpl
& str
)
445 wxStringCharType
* tmp
= str
.m_pchData
;
446 str
.m_pchData
= m_pchData
;
450 size_t wxStringImpl::find(const wxStringImpl
& str
, size_t nStart
) const
452 // deal with the special case of empty string first
453 const size_t nLen
= length();
454 const size_t nLenOther
= str
.length();
458 // empty string is a substring of anything
464 // the other string is non empty so can't be our substring
468 wxASSERT( str
.GetStringData()->IsValid() );
469 wxASSERT( nStart
<= nLen
);
471 const wxStringCharType
* const other
= str
.c_str();
474 const wxStringCharType
* p
=
475 (const wxStringCharType
*)wxStringMemchr(c_str() + nStart
,
482 while ( p
- c_str() + nLenOther
<= nLen
&&
483 wxStringMemcmp(p
, other
, nLenOther
) )
488 p
= (const wxStringCharType
*)
489 wxStringMemchr(p
, *other
, nLen
- (p
- c_str()));
495 return p
- c_str() + nLenOther
<= nLen
? p
- c_str() : npos
;
498 size_t wxStringImpl::find(const wxStringCharType
* sz
,
499 size_t nStart
, size_t n
) const
501 return find(wxStringImpl(sz
, n
), nStart
);
504 size_t wxStringImpl::find(wxStringCharType ch
, size_t nStart
) const
506 wxASSERT( nStart
<= length() );
508 const wxStringCharType
*p
= (const wxStringCharType
*)
509 wxStringMemchr(c_str() + nStart
, ch
, length() - nStart
);
511 return p
== NULL
? npos
: p
- c_str();
514 size_t wxStringImpl::rfind(const wxStringImpl
& str
, size_t nStart
) const
516 wxASSERT( str
.GetStringData()->IsValid() );
517 wxASSERT( nStart
== npos
|| nStart
<= length() );
519 if ( length() >= str
.length() )
521 // avoids a corner case later
522 if ( length() == 0 && str
.length() == 0 )
525 // "top" is the point where search starts from
526 size_t top
= length() - str
.length();
528 if ( nStart
== npos
)
529 nStart
= length() - 1;
533 const wxStringCharType
*cursor
= c_str() + top
;
536 if ( wxStringMemcmp(cursor
, str
.c_str(), str
.length()) == 0 )
538 return cursor
- c_str();
540 } while ( cursor
-- > c_str() );
546 size_t wxStringImpl::rfind(const wxStringCharType
* sz
,
547 size_t nStart
, size_t n
) const
549 return rfind(wxStringImpl(sz
, n
), nStart
);
552 size_t wxStringImpl::rfind(wxStringCharType ch
, size_t nStart
) const
554 if ( nStart
== npos
)
560 wxASSERT( nStart
<= length() );
563 const wxStringCharType
*actual
;
564 for ( actual
= c_str() + ( nStart
== npos
? length() : nStart
+ 1 );
565 actual
> c_str(); --actual
)
567 if ( *(actual
- 1) == ch
)
568 return (actual
- 1) - c_str();
574 wxStringImpl
& wxStringImpl::replace(size_t nStart
, size_t nLen
,
575 const wxStringCharType
*sz
, size_t nCount
)
577 // check and adjust parameters
578 const size_t lenOld
= length();
580 wxASSERT_MSG( nStart
<= lenOld
,
581 _T("index out of bounds in wxStringImpl::replace") );
582 size_t nEnd
= nStart
+ nLen
;
583 if ( nLen
> lenOld
- nStart
)
585 // nLen may be out of range, as it can be npos, just clump it down
586 nLen
= lenOld
- nStart
;
590 if ( nCount
== npos
)
591 nCount
= wxStrlen(sz
);
593 // build the new string from 3 pieces: part of this string before nStart,
594 // the new substring and the part of this string after nStart+nLen
596 const size_t lenNew
= lenOld
+ nCount
- nLen
;
599 tmp
.AllocBuffer(lenOld
+ nCount
- nLen
);
601 wxStringCharType
*dst
= tmp
.m_pchData
;
602 memcpy(dst
, m_pchData
, nStart
*sizeof(wxStringCharType
));
605 memcpy(dst
, sz
, nCount
*sizeof(wxStringCharType
));
608 memcpy(dst
, m_pchData
+ nEnd
, (lenOld
- nEnd
)*sizeof(wxStringCharType
));
611 // and replace this string contents with the new one
616 wxStringImpl
wxStringImpl::substr(size_t nStart
, size_t nLen
) const
619 nLen
= length() - nStart
;
620 return wxStringImpl(*this, nStart
, nLen
);
623 // assigns one string to another
624 wxStringImpl
& wxStringImpl::operator=(const wxStringImpl
& stringSrc
)
626 wxASSERT( stringSrc
.GetStringData()->IsValid() );
628 // don't copy string over itself
629 if ( m_pchData
!= stringSrc
.m_pchData
) {
630 if ( stringSrc
.GetStringData()->IsEmpty() ) {
635 GetStringData()->Unlock();
636 m_pchData
= stringSrc
.m_pchData
;
637 GetStringData()->Lock();
644 // assigns a single character
645 wxStringImpl
& wxStringImpl::operator=(wxStringCharType ch
)
647 wxStringCharType
c(ch
);
648 if ( !AssignCopy(1, &c
) ) {
649 wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(wxStringCharType)") );
655 wxStringImpl
& wxStringImpl::operator=(const wxStringCharType
*psz
)
657 if ( !AssignCopy(wxStrlen(psz
), psz
) ) {
658 wxFAIL_MSG( _T("out of memory in wxStringImpl::operator=(const wxStringCharType *)") );
663 // helper function: does real copy
664 bool wxStringImpl::AssignCopy(size_t nSrcLen
,
665 const wxStringCharType
*pszSrcData
)
667 if ( nSrcLen
== 0 ) {
671 if ( !AllocBeforeWrite(nSrcLen
) ) {
672 // allocation failure handled by caller
675 memcpy(m_pchData
, pszSrcData
, nSrcLen
*sizeof(wxStringCharType
));
676 GetStringData()->nDataLength
= nSrcLen
;
677 m_pchData
[nSrcLen
] = wxT('\0');
682 // ---------------------------------------------------------------------------
683 // string concatenation
684 // ---------------------------------------------------------------------------
686 // add something to this string
687 bool wxStringImpl::ConcatSelf(size_t nSrcLen
,
688 const wxStringCharType
*pszSrcData
,
691 STATISTICS_ADD(SummandLength
, nSrcLen
);
693 nSrcLen
= nSrcLen
< nMaxLen
? nSrcLen
: nMaxLen
;
695 // concatenating an empty string is a NOP
697 wxStringData
*pData
= GetStringData();
698 size_t nLen
= pData
->nDataLength
;
700 // take special care when appending part of this string to itself: the code
701 // below reallocates our buffer and this invalidates pszSrcData pointer so
702 // we have to copy it in another temporary string in this case (but avoid
703 // doing this unnecessarily)
704 if ( pszSrcData
>= m_pchData
&& pszSrcData
< m_pchData
+ nLen
)
706 wxStringImpl
tmp(pszSrcData
, nSrcLen
);
707 return ConcatSelf(nSrcLen
, tmp
.m_pchData
, nSrcLen
);
710 size_t nNewLen
= nLen
+ nSrcLen
;
712 // alloc new buffer if current is too small
713 if ( pData
->IsShared() ) {
714 STATISTICS_ADD(ConcatHit
, 0);
716 // we have to allocate another buffer
717 wxStringData
* pOldData
= GetStringData();
718 if ( !AllocBuffer(nNewLen
) ) {
719 // allocation failure handled by caller
722 memcpy(m_pchData
, pOldData
->data(), nLen
*sizeof(wxStringCharType
));
725 else if ( nNewLen
> pData
->nAllocLength
) {
726 STATISTICS_ADD(ConcatHit
, 0);
729 // we have to grow the buffer
730 if ( capacity() < nNewLen
) {
731 // allocation failure handled by caller
736 STATISTICS_ADD(ConcatHit
, 1);
738 // the buffer is already big enough
741 // should be enough space
742 wxASSERT( nNewLen
<= GetStringData()->nAllocLength
);
744 // fast concatenation - all is done in our buffer
745 memcpy(m_pchData
+ nLen
, pszSrcData
, nSrcLen
*sizeof(wxStringCharType
));
747 m_pchData
[nNewLen
] = wxT('\0'); // put terminating '\0'
748 GetStringData()->nDataLength
= nNewLen
; // and fix the length
750 //else: the string to append was empty
754 // get the pointer to writable buffer of (at least) nLen bytes
755 wxStringCharType
*wxStringImpl::DoGetWriteBuf(size_t nLen
)
757 if ( !AllocBeforeWrite(nLen
) ) {
758 // allocation failure handled by caller
762 wxASSERT( GetStringData()->nRefs
== 1 );
763 GetStringData()->Validate(false);
768 // put string back in a reasonable state after GetWriteBuf
769 void wxStringImpl::DoUngetWriteBuf()
771 DoUngetWriteBuf(wxStrlen(m_pchData
));
774 void wxStringImpl::DoUngetWriteBuf(size_t nLen
)
776 wxStringData
* const pData
= GetStringData();
778 wxASSERT_MSG( nLen
< pData
->nAllocLength
, _T("buffer overrun") );
780 // the strings we store are always NUL-terminated
781 pData
->data()[nLen
] = _T('\0');
782 pData
->nDataLength
= nLen
;
783 pData
->Validate(true);
786 #endif // !wxUSE_STL_BASED_WXSTRING