X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/b5343e065a0a9bb86aeac8a366ca7311c15e9d29..6a3ea4ead89a97b4dcc6a1fdc8c48119bdaabc23:/include/wx/string.h diff --git a/include/wx/string.h b/include/wx/string.h index 6ab3eda765..0a72bfe320 100644 --- a/include/wx/string.h +++ b/include/wx/string.h @@ -24,6 +24,7 @@ #include "wx/defs.h" // everybody should include this +#ifndef __WXPALMOS5__ #if defined(__WXMAC__) || defined(__VISAGECPP__) #include #endif @@ -46,10 +47,7 @@ #ifdef HAVE_STRCASECMP_IN_STRINGS_H #include // for strcasecmp() #endif // HAVE_STRCASECMP_IN_STRINGS_H - -#ifdef __WXPALMOS__ - #include -#endif +#endif // ! __WXPALMOS5__ #include "wx/wxcrtbase.h" // for wxChar, wxStrlen() etc. #include "wx/strvararg.h" @@ -67,6 +65,11 @@ class WXDLLIMPEXP_FWD_BASE wxString; #define WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER 1 #endif +namespace wxPrivate +{ + template struct wxStringAsBufHelper; +} + // --------------------------------------------------------------------------- // macros // --------------------------------------------------------------------------- @@ -433,7 +436,10 @@ private: size_t len; SubstrBufFromType(const T& data_, size_t len_) - : data(data_), len(len_) {} + : data(data_), len(len_) + { + wxASSERT_MSG( len != npos, "must have real length" ); + } }; #if wxUSE_UNICODE_UTF8 @@ -657,7 +663,14 @@ public: iterator(const iterator& i) : m_cur(i.m_cur), m_node(i.str(), &m_cur) {} iterator& operator=(const iterator& i) - { m_cur = i.m_cur; m_node.set(i.str(), &m_cur); return *this; } + { + if (&i != this) + { + m_cur = i.m_cur; + m_node.set(i.str(), &m_cur); + } + return *this; + } reference operator*() { return wxUniCharRef::CreateForString(m_node, m_cur); } @@ -692,7 +705,14 @@ public: : m_cur(i.m_cur), m_node(i.str(), &m_cur) {} const_iterator& operator=(const const_iterator& i) - { m_cur = i.m_cur; m_node.set(i.str(), &m_cur); return *this; } + { + if (&i != this) + { + m_cur = i.m_cur; + m_node.set(i.str(), &m_cur); + } + return *this; + } const_iterator& operator=(const iterator& i) { m_cur = i.m_cur; m_node.set(i.str(), &m_cur); return *this; } @@ -938,8 +958,11 @@ public: wxString(const wxWCharBuffer& buf) { assign(buf.data()); } // FIXME-UTF8: fix for embedded NUL and buffer length + // NB: this version uses m_impl.c_str() to force making a copy of the + // string, so that "wxString(str.c_str())" idiom for passing strings + // between threads works wxString(const wxCStrData& cstr) - : m_impl(cstr.AsString().m_impl) { } + : m_impl(cstr.AsString().m_impl.c_str()) { } // as we provide both ctors with this signature for both char and unsigned // char string, we need to provide one for wxCStrData to resolve ambiguity @@ -999,6 +1022,13 @@ public: #endif #endif // wxUSE_STL + wxString Clone() const + { + // make a deep copy of the string, i.e. the returned string will have + // ref count = 1 with refcounted implementation + return wxString::FromImpl(wxStringImpl(m_impl.c_str(), m_impl.length())); + } + // first valid index position const_iterator begin() const { return const_iterator(this, m_impl.begin()); } iterator begin() { return iterator(this, m_impl.begin()); } @@ -1186,6 +1216,30 @@ public: { return mb_str(conv); } wxWritableWCharBuffer wchar_str() const { return wc_str(); } + // conversion to the buffer of the given type T (= char or wchar_t) and + // also optionally return the buffer length + // + // this is mostly/only useful for the template functions + // + // FIXME-VC6: the second argument only exists for VC6 which doesn't support + // explicit template function selection, do not use it unless + // you must support VC6! + template + wxCharTypeBuffer tchar_str(size_t *len = NULL, + T * WXUNUSED(dummy) = NULL) const + { +#if wxUSE_UNICODE + // we need a helper dispatcher depending on type + return wxPrivate::wxStringAsBufHelper::Get(*this, len); +#else // ANSI + // T can only be char in ANSI build + if ( len ) + *len = length(); + + return wxCharTypeBuffer::CreateNonOwned(wx_str()); +#endif // Unicode build kind + } + // conversion to/from plain (i.e. 7 bit) ASCII: this is useful for // converting numbers or strings which are certain not to contain special // chars (typically system functions, X atoms, environment variables etc.) @@ -1214,7 +1268,7 @@ public: // conversion to/from UTF-8: #if wxUSE_UNICODE_UTF8 - static wxString FromUTF8(const char *utf8) + static wxString FromUTF8Unchecked(const char *utf8) { if ( !utf8 ) return wxEmptyString; @@ -1222,23 +1276,51 @@ public: wxASSERT( wxStringOperations::IsValidUtf8String(utf8) ); return FromImpl(wxStringImpl(utf8)); } - static wxString FromUTF8(const char *utf8, size_t len) + static wxString FromUTF8Unchecked(const char *utf8, size_t len) { if ( !utf8 ) return wxEmptyString; if ( len == npos ) - return FromUTF8(utf8); + return FromUTF8Unchecked(utf8); wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) ); return FromImpl(wxStringImpl(utf8, len)); } + + static wxString FromUTF8(const char *utf8) + { + if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8) ) + return ""; + + return FromImpl(wxStringImpl(utf8)); + } + static wxString FromUTF8(const char *utf8, size_t len) + { + if ( len == npos ) + return FromUTF8(utf8); + + if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8, len) ) + return ""; + + return FromImpl(wxStringImpl(utf8, len)); + } + const char* utf8_str() const { return wx_str(); } const char* ToUTF8() const { return wx_str(); } + + // this function exists in UTF-8 build only and returns the length of the + // internal UTF-8 representation + size_t utf8_length() const { return m_impl.length(); } #elif wxUSE_UNICODE_WCHAR - static wxString FromUTF8(const char *utf8) - { return wxString(utf8, wxMBConvUTF8()); } - static wxString FromUTF8(const char *utf8, size_t len) + static wxString FromUTF8(const char *utf8, size_t len = npos) { return wxString(utf8, wxMBConvUTF8(), len); } + static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos) + { + const wxString s(utf8, wxMBConvUTF8(), len); + wxASSERT_MSG( !utf8 || !*utf8 || !s.empty(), + "string must be valid UTF-8" ); + return s; + } const wxCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); } const wxCharBuffer ToUTF8() const { return utf8_str(); } #else // ANSI @@ -1246,9 +1328,20 @@ public: { return wxString(wxMBConvUTF8().cMB2WC(utf8)); } static wxString FromUTF8(const char *utf8, size_t len) { - size_t wlen; - wxWCharBuffer buf(wxMBConvUTF8().cMB2WC(utf8, len == npos ? wxNO_LEN : len, &wlen)); - return wxString(buf.data(), wlen); + size_t wlen; + wxWCharBuffer buf(wxMBConvUTF8().cMB2WC(utf8, len == npos ? wxNO_LEN : len, &wlen)); + return wxString(buf.data(), wlen); + } + static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos) + { + size_t wlen; + wxWCharBuffer buf(wxMBConvUTF8().cMB2WC(utf8, + len == npos ? wxNO_LEN : len, + &wlen)); + wxASSERT_MSG( !utf8 || !*utf8 || wlen, + "string must be valid UTF-8" ); + + return wxString(buf.data(), wlen); } const wxCharBuffer utf8_str() const { return wxMBConvUTF8().cWC2MB(wc_str()); } @@ -1293,7 +1386,7 @@ public: const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); } #if wxUSE_UNICODE_WCHAR - const wxChar* wc_str() const { return wx_str(); } + const wchar_t* wc_str() const { return wx_str(); } #elif wxUSE_UNICODE_UTF8 const wxWCharBuffer wc_str() const; #endif @@ -1311,7 +1404,7 @@ public: const wxChar* mb_str() const { return wx_str(); } // for compatibility with wxUSE_UNICODE version - const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return wx_str(); } + const char* mb_str(const wxMBConv& WXUNUSED(conv)) const { return wx_str(); } const wxWX2MBbuf mbc_str() const { return mb_str(); } @@ -1321,15 +1414,32 @@ public: const wxCharBuffer fn_str() const { return wxConvFile.cWC2WX( wc_str( wxConvLibc ) ); } #endif // Unicode/ANSI +#if wxUSE_UNICODE_UTF8 + const wxWCharBuffer t_str() const { return wc_str(); } +#elif wxUSE_UNICODE_WCHAR + const wchar_t* t_str() const { return wx_str(); } +#else + const char* t_str() const { return wx_str(); } +#endif + + // overloaded assignment // from another wxString wxString& operator=(const wxString& stringSrc) - { m_impl = stringSrc.m_impl; return *this; } + { if (&stringSrc != this) m_impl = stringSrc.m_impl; return *this; } wxString& operator=(const wxCStrData& cstr) { return *this = cstr.AsString(); } // from a character wxString& operator=(wxUniChar ch) - { m_impl = wxStringOperations::EncodeChar(ch); return *this; } + { +#if wxUSE_UNICODE_UTF8 + if ( !ch.IsAscii() ) + m_impl = wxStringOperations::EncodeChar(ch); + else +#endif + m_impl = (wxStringCharType)ch; + return *this; + } wxString& operator=(wxUniCharRef ch) { return operator=((wxUniChar)ch); } wxString& operator=(char ch) @@ -1419,6 +1529,16 @@ public: { append(psz); return *this; } wxString& Append(const wxWCharBuffer& psz) { append(psz); return *this; } + wxString& Append(const char* psz, size_t nLen) + { append(psz, nLen); return *this; } + wxString& Append(const wchar_t* pwz, size_t nLen) + { append(pwz, nLen); return *this; } + wxString& Append(const wxCStrData& psz, size_t nLen) + { append(psz, nLen); return *this; } + wxString& Append(const wxCharBuffer& psz, size_t nLen) + { append(psz, nLen); return *this; } + wxString& Append(const wxWCharBuffer& psz, size_t nLen) + { append(psz, nLen); return *this; } // append count copies of given character wxString& Append(wxUniChar ch, size_t count = 1u) { append(count, ch); return *this; } @@ -1430,10 +1550,6 @@ public: { append(count, ch); return *this; } wxString& Append(wchar_t ch, size_t count = 1u) { append(count, ch); return *this; } - wxString& Append(const char* psz, size_t nLen) - { append(psz, nLen); return *this; } - wxString& Append(const wchar_t* pwz, size_t nLen) - { append(pwz, nLen); return *this; } // prepend a string, return the string itself wxString& Prepend(const wxString& str) @@ -1508,14 +1624,23 @@ public: { return compare(s); } // same as Cmp() but not case-sensitive int CmpNoCase(const wxString& s) const; + // test for the string equality, either considering case or not // (if compareWithCase then the case matters) bool IsSameAs(const wxString& str, bool compareWithCase = true) const - { return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0; } + { +#if !wxUSE_UNICODE_UTF8 + // in UTF-8 build, length() is O(n) and doing this would be _slower_ + if ( length() != str.length() ) + return false; +#endif + return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0; + } bool IsSameAs(const char *str, bool compareWithCase = true) const { return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0; } bool IsSameAs(const wchar_t *str, bool compareWithCase = true) const { return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0; } + bool IsSameAs(const wxCStrData& str, bool compareWithCase = true) const { return IsSameAs(str.AsString(), compareWithCase); } bool IsSameAs(const wxCharBuffer& str, bool compareWithCase = true) const @@ -1700,7 +1825,7 @@ public: // raw access to string memory // ensure that string has space for at least nLen characters // only works if the data of this string is not shared - bool Alloc(size_t nLen) { reserve(nLen); /*return capacity() >= nLen;*/ return true; } + bool Alloc(size_t nLen) { reserve(nLen); return capacity() >= nLen; } // minimize the string's memory // only works if the data of this string is not shared bool Shrink(); @@ -1849,6 +1974,12 @@ public: { return append(str.data()); } wxString& append(const wxWCharBuffer& str) { return append(str.data()); } + wxString& append(const wxCStrData& str, size_t n) + { return append(str.AsString(), 0, n); } + wxString& append(const wxCharBuffer& str, size_t n) + { return append(str.data(), n); } + wxString& append(const wxWCharBuffer& str, size_t n) + { return append(str.data(), n); } // append n copies of ch wxString& append(size_t n, wxUniChar ch) @@ -1861,6 +1992,15 @@ public: m_impl.append(n, (wxStringCharType)ch); return *this; } + wxString& append(size_t n, wxUniCharRef ch) + { return append(n, wxUniChar(ch)); } + wxString& append(size_t n, char ch) + { return append(n, wxUniChar(ch)); } + wxString& append(size_t n, unsigned char ch) + { return append(n, wxUniChar(ch)); } + wxString& append(size_t n, wchar_t ch) + { return append(n, wxUniChar(ch)); } + // append from first to last wxString& append(const_iterator first, const_iterator last) { m_impl.append(first.impl(), last.impl()); return *this; } @@ -2219,8 +2359,15 @@ public: // find the first occurence of character ch after nStart size_t find(wxUniChar ch, size_t nStart = 0) const { - return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch), - PosToImpl(nStart))); +#if wxUSE_UNICODE_UTF8 + if ( !ch.IsAscii() ) + return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch), + PosToImpl(nStart))); + else +#endif + return PosFromImpl(m_impl.find((wxStringCharType)ch, + PosToImpl(nStart))); + } size_t find(wxUniCharRef ch, size_t nStart = 0) const { return find(wxUniChar(ch), nStart); } @@ -2257,8 +2404,14 @@ public: // as find, but from the end size_t rfind(wxUniChar ch, size_t nStart = npos) const { - return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch), - PosToImpl(nStart))); +#if wxUSE_UNICODE_UTF8 + if ( !ch.IsAscii() ) + return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch), + PosToImpl(nStart))); + else +#endif + return PosFromImpl(m_impl.rfind((wxStringCharType)ch, + PosToImpl(nStart))); } size_t rfind(wxUniCharRef ch, size_t nStart = npos) const { return rfind(wxUniChar(ch), nStart); } @@ -2496,7 +2649,15 @@ public: { return operator+=(s.data()); } // string += char wxString& operator+=(wxUniChar ch) - { m_impl += wxStringOperations::EncodeChar(ch); return *this; } + { +#if wxUSE_UNICODE_UTF8 + if ( !ch.IsAscii() ) + m_impl += wxStringOperations::EncodeChar(ch); + else +#endif + m_impl += (wxStringCharType)ch; + return *this; + } wxString& operator+=(wxUniCharRef ch) { return *this += wxUniChar(ch); } wxString& operator+=(int ch) { return *this += wxUniChar(ch); } wxString& operator+=(char ch) { return *this += wxUniChar(ch); } @@ -2631,6 +2792,67 @@ inline wxString operator+(wchar_t ch, const wxString& string) #define wxGetEmptyString() wxString() +// ---------------------------------------------------------------------------- +// helper functions which couldn't be defined inline +// ---------------------------------------------------------------------------- + +namespace wxPrivate +{ + +#if wxUSE_UNICODE_WCHAR + +template <> +struct wxStringAsBufHelper +{ + static wxCharBuffer Get(const wxString& s, size_t *len) + { + wxCharBuffer buf(s.mb_str()); + if ( len ) + *len = buf ? strlen(buf) : 0; + return buf; + } +}; + +template <> +struct wxStringAsBufHelper +{ + static wxWCharBuffer Get(const wxString& s, size_t *len) + { + if ( len ) + *len = s.length(); + return wxWCharBuffer::CreateNonOwned(s.wx_str()); + } +}; + +#elif wxUSE_UNICODE_UTF8 + +template <> +struct wxStringAsBufHelper +{ + static wxCharBuffer Get(const wxString& s, size_t *len) + { + if ( len ) + *len = s.utf8_length(); + return wxCharBuffer::CreateNonOwned(s.wx_str()); + } +}; + +template <> +struct wxStringAsBufHelper +{ + static wxWCharBuffer Get(const wxString& s, size_t *len) + { + wxWCharBuffer wbuf(s.wc_str()); + if ( len ) + *len = wxWcslen(wbuf); + return wbuf; + } +}; + +#endif // Unicode build kind + +} // namespace wxPrivate + // ---------------------------------------------------------------------------- // wxStringBuffer: a tiny class allowing to get a writable pointer into string // ---------------------------------------------------------------------------- @@ -2698,8 +2920,30 @@ public: wxStringTypeBufferBase(wxString& str, size_t lenWanted = 1024) : m_str(str), m_buf(lenWanted) - { } - + { + // for compatibility with old wxStringBuffer which provided direct + // access to wxString internal buffer, initialize ourselves with the + // string initial contents + + // FIXME-VC6: remove the ugly (CharType *)NULL and use normal + // tchar_str + size_t len; + const wxCharTypeBuffer buf(str.tchar_str(&len, (CharType *)NULL)); + if ( buf ) + { + if ( len > lenWanted ) + { + // in this case there is not enough space for terminating NUL, + // ensure that we still put it there + m_buf.data()[lenWanted] = 0; + len = lenWanted - 1; + } + + memcpy(m_buf.data(), buf, (len + 1)*sizeof(CharType)); + } + //else: conversion failed, this can happen when trying to get Unicode + // string contents into a char string + } operator CharType*() { return m_buf.data(); } @@ -2710,22 +2954,25 @@ protected: template class WXDLLIMPEXP_BASE wxStringTypeBufferLengthBase + : public wxStringTypeBufferBase { public: - typedef T CharType; - wxStringTypeBufferLengthBase(wxString& str, size_t lenWanted = 1024) - : m_str(str), m_buf(lenWanted), m_len(0), m_lenSet(false) + : wxStringTypeBufferBase(str, lenWanted), + m_len(0), + m_lenSet(false) { } - operator CharType*() { return m_buf.data(); } + ~wxStringTypeBufferLengthBase() + { + wxASSERT_MSG( this->m_lenSet, "forgot to call SetLength()" ); + } + void SetLength(size_t length) { m_len = length; m_lenSet = true; } protected: - wxString& m_str; - wxCharTypeBuffer m_buf; - size_t m_len; - bool m_lenSet; + size_t m_len; + bool m_lenSet; }; template @@ -2733,7 +2980,9 @@ class wxStringTypeBuffer : public wxStringTypeBufferBase { public: wxStringTypeBuffer(wxString& str, size_t lenWanted = 1024) - : wxStringTypeBufferBase(str, lenWanted) {} + : wxStringTypeBufferBase(str, lenWanted) + { } + ~wxStringTypeBuffer() { this->m_str.assign(this->m_buf.data()); @@ -2747,11 +2996,11 @@ class wxStringTypeBufferLength : public wxStringTypeBufferLengthBase { public: wxStringTypeBufferLength(wxString& str, size_t lenWanted = 1024) - : wxStringTypeBufferLengthBase(str, lenWanted) {} + : wxStringTypeBufferLengthBase(str, lenWanted) + { } ~wxStringTypeBufferLength() { - wxASSERT(this->m_lenSet); this->m_str.assign(this->m_buf.data(), this->m_len); } @@ -2785,12 +3034,12 @@ public: ~wxStringInternalBufferLength() { - wxASSERT(m_lenSet); m_str.m_impl.assign(m_buf.data(), m_len); } DECLARE_NO_COPY_CLASS(wxStringInternalBufferLength) }; + #endif // wxUSE_STL_BASED_WXSTRING @@ -2844,12 +3093,10 @@ wxDEFINE_ALL_COMPARISONS(const wxChar *, const wxString&, wxCMP_WXCHAR_STRING) #undef wxCMP_WXCHAR_STRING -// note that there is an optimization in operator==() and !=(): we (quickly) -// checks the strings length first, before comparing their data inline bool operator==(const wxString& s1, const wxString& s2) - { return (s1.Len() == s2.Len()) && (s1.Cmp(s2) == 0); } + { return s1.IsSameAs(s2); } inline bool operator!=(const wxString& s1, const wxString& s2) - { return (s1.Len() != s2.Len()) || (s1.Cmp(s2) != 0); } + { return !s1.IsSameAs(s2); } inline bool operator< (const wxString& s1, const wxString& s2) { return s1.Cmp(s2) < 0; } inline bool operator> (const wxString& s1, const wxString& s2)