X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/b053bf9ef4fa93ede7869438fa61e664b453ef18..c4021a7920ab9f41b1553e28f1177b6e39a6d901:/include/wx/string.h diff --git a/include/wx/string.h b/include/wx/string.h index 845316e883..b6cb7836b3 100644 --- a/include/wx/string.h +++ b/include/wx/string.h @@ -69,18 +69,14 @@ // use in DLL build under pre-Vista Windows so we disable this code for now, if // anybody really needs to use UTF-8 build under Windows with this optimization // it would have to be re-tested and probably corrected -#if wxUSE_UNICODE_UTF8 && !defined(__WXMSW__) +// CS: under OSX release builds the string destructor/cache cleanup sometimes +// crashes, disable until we find the true reason or a better workaround +#if wxUSE_UNICODE_UTF8 && !defined(__WXMSW__) && !defined(__WXOSX__) #define wxUSE_STRING_POS_CACHE 1 #else #define wxUSE_STRING_POS_CACHE 0 #endif -#ifndef wxHAS_COMPILER_TLS - // FIXME: currently the code only works with compiler TLS support - #undef wxUSE_STRING_POS_CACHE - #define wxUSE_STRING_POS_CACHE 0 -#endif - #if wxUSE_STRING_POS_CACHE #include "wx/tls.h" @@ -430,7 +426,7 @@ private: // the node belongs to a particular iterator instance, it's not copied // when a copy of the iterator is made - DECLARE_NO_COPY_CLASS(wxStringIteratorNode) + wxDECLARE_NO_COPY_CLASS(wxStringIteratorNode); }; #endif // wxUSE_UNICODE_UTF8 @@ -600,8 +596,41 @@ private: unsigned lastUsed; }; +#ifndef wxHAS_COMPILER_TLS + // we must use an accessor function and not a static variable when the TLS + // variables support is implemented in the library (and not by the compiler) + // because the global s_cache variable could be not yet initialized when a + // ctor of another global object is executed and if that ctor uses any + // wxString methods, bad things happen + // + // however notice that this approach does not work when compiler TLS is used, + // at least not with g++ 4.1.2 under amd64 as it apparently compiles code + // using this accessor incorrectly when optimizations are enabled (-O2 is + // enough) -- luckily we don't need it then neither as static __thread + // variables are initialized by 0 anyhow then and so we can use the variable + // directly + WXEXPORT static Cache& GetCache() + { + static wxTLS_TYPE(Cache) s_cache; + + return wxTLS_VALUE(s_cache); + } + + // this helper struct is used to ensure that GetCache() is called during + // static initialization time, i.e. before any threads creation, as otherwise + // the static s_cache construction inside GetCache() wouldn't be MT-safe + friend struct wxStrCacheInitializer; +#else // wxHAS_COMPILER_TLS static wxTLS_TYPE(Cache) ms_cache; + static Cache& GetCache() { return wxTLS_VALUE(ms_cache); } +#endif // !wxHAS_COMPILER_TLS/wxHAS_COMPILER_TLS + static Cache::Element *GetCacheBegin() { return GetCache().cached; } + static Cache::Element *GetCacheEnd() { return GetCacheBegin() + Cache::SIZE; } + static unsigned& LastUsedCacheElement() { return GetCache().lastUsed; } + + // this is used in debug builds only to provide a convenient function, + // callable from a debugger, to show the cache contents friend struct wxStrCacheDumper; // uncomment this to have access to some profiling statistics on program @@ -622,7 +651,7 @@ private: lenhits; // number of cache hits in length() } ms_cacheStats; - friend struct ShowCacheStats; + friend struct wxStrCacheStatsDumper; #define wxCACHE_PROFILE_FIELD_INC(field) ms_cacheStats.field++ #define wxCACHE_PROFILE_FIELD_ADD(field, val) ms_cacheStats.field += (val) @@ -644,9 +673,15 @@ private: // profiling seems to show a small but consistent gain if we use this // simple loop instead of starting from the last used element (there are // a lot of misses in this function...) - for ( Cache::Element *c = ms_cache.cached; - c != ms_cache.cached + Cache::SIZE; - c++ ) + Cache::Element * const cacheBegin = GetCacheBegin(); +#ifndef wxHAS_COMPILER_TLS + // during destruction tls calls may return NULL, in this case return NULL + // immediately without accessing anything else + if ( cacheBegin == NULL ) + return NULL; +#endif + Cache::Element * const cacheEnd = GetCacheEnd(); + for ( Cache::Element *c = cacheBegin; c != cacheEnd; c++ ) { if ( c->str == this ) return c; @@ -660,9 +695,9 @@ private: // its corresponding index in the byte string or not Cache::Element *GetCacheElement() const { - Cache::Element * const cacheBegin = ms_cache.cached; - Cache::Element * const cacheEnd = ms_cache.cached + Cache::SIZE; - Cache::Element * const cacheStart = cacheBegin + ms_cache.lastUsed; + Cache::Element * const cacheBegin = GetCacheBegin(); + Cache::Element * const cacheEnd = GetCacheEnd(); + Cache::Element * const cacheStart = cacheBegin + LastUsedCacheElement(); // check the last used first, this does no (measurable) harm for a miss // but does help for simple loops addressing the same string all the time @@ -685,7 +720,7 @@ private: c->Reset(); // and remember the last used element - ms_cache.lastUsed = c - cacheBegin; + LastUsedCacheElement() = c - cacheBegin; } return c; @@ -708,7 +743,9 @@ private: // used for length caching only so far, i.e. it doesn't count as a hit // from our point of view if ( cache->pos ) + { wxCACHE_PROFILE_FIELD_INC(poshits); + } if ( pos == cache->pos ) return cache->impl; @@ -823,22 +860,26 @@ public: typedef size_t size_type; typedef wxUniChar const_reference; -#if wxUSE_STL +#if wxUSE_STD_STRING #if wxUSE_UNICODE_UTF8 // random access is not O(1), as required by Random Access Iterator #define WX_STR_ITERATOR_TAG std::bidirectional_iterator_tag #else #define WX_STR_ITERATOR_TAG std::random_access_iterator_tag #endif + #define WX_DEFINE_ITERATOR_CATEGORY(cat) typedef cat iterator_category; #else - #define WX_STR_ITERATOR_TAG void /* dummy type */ + // not defining iterator_category at all in this case is better than defining + // it as some dummy type -- at least it results in more intelligible error + // messages + #define WX_DEFINE_ITERATOR_CATEGORY(cat) #endif #define WX_STR_ITERATOR_IMPL(iterator_name, pointer_type, reference_type) \ private: \ typedef wxStringImpl::iterator_name underlying_iterator; \ public: \ - typedef WX_STR_ITERATOR_TAG iterator_category; \ + WX_DEFINE_ITERATOR_CATEGORY(WX_STR_ITERATOR_TAG) \ typedef wxUniChar value_type; \ typedef int difference_type; \ typedef reference_type reference; \ @@ -948,7 +989,7 @@ public: iterator(wxString *str, underlying_iterator ptr) : m_cur(ptr), m_node(str, &m_cur) {} - wxString* str() const { return wx_const_cast(wxString*, m_node.m_str); } + wxString* str() const { return const_cast(m_node.m_str); } wxStringIteratorNode m_node; @@ -1073,7 +1114,7 @@ public: public: typedef T iterator_type; - typedef typename T::iterator_category iterator_category; + WX_DEFINE_ITERATOR_CATEGORY(typename T::iterator_category) typedef typename T::value_type value_type; typedef typename T::difference_type difference_type; typedef typename T::reference reference; @@ -1419,12 +1460,7 @@ public: // truncate the string to given length wxString& Truncate(size_t uiLen); // empty string contents - void Empty() - { - Truncate(0); - - wxASSERT_MSG( empty(), _T("string not empty after call to Empty()?") ); - } + void Empty() { clear(); } // empty the string and free memory void Clear() { clear(); } @@ -1492,11 +1528,53 @@ public: { return at(n); } #endif // size_t != unsigned int - // explicit conversion to C string (use this with printf()!) + + /* + Overview of wxString conversions, implicit and explicit: + + - wxString has a std::[w]string-like c_str() method, however it does + not return a C-style string directly but instead returns wxCStrData + helper object which is convertible to either "char *" narrow string + or "wchar_t *" wide string. Usually the correct conversion will be + applied by the compiler automatically but if this doesn't happen you + need to explicitly choose one using wxCStrData::AsChar() or AsWChar() + methods or another wxString conversion function. + + - One of the places where the conversion does *NOT* happen correctly is + when c_str() is passed to a vararg function such as printf() so you + must *NOT* use c_str() with them. Either use wxPrintf() (all wx + functions do handle c_str() correctly, even if they appear to be + vararg (but they're not, really)) or add an explicit AsChar() or, if + compatibility with previous wxWidgets versions is important, add a + cast to "const char *". + + - In non-STL mode only, wxString is also implicitly convertible to + wxCStrData. The same warning as above applies. + + - c_str() is polymorphic as it can be converted to either narrow or + wide string. If you explicitly need one or the other, choose to use + mb_str() (for narrow) or wc_str() (for wide) instead. Notice that + these functions can return either the pointer to string directly (if + this is what the string uses internally) or a temporary buffer + containing the string and convertible to it. Again, conversion will + usually be done automatically by the compiler but beware of the + vararg functions: you need an explicit cast when using them. + + - There are also non-const versions of mb_str() and wc_str() called + char_str() and wchar_str(). They are only meant to be used with + non-const-correct functions and they always return buffers. + + - Finally wx_str() returns whatever string representation is used by + wxString internally. It may be either a narrow or wide string + depending on wxWidgets build mode but it will always be a raw pointer + (and not a buffer). + */ + + // explicit conversion to wxCStrData wxCStrData c_str() const { return wxCStrData(this); } wxCStrData data() const { return c_str(); } - // implicit conversion to C string + // implicit conversion to wxCStrData operator wxCStrData() const { return c_str(); } // the first two operators conflict with operators for conversion to @@ -1732,7 +1810,7 @@ public: const wchar_t* t_str() const { return wx_str(); } #else const char* t_str() const { return wx_str(); } -#endif +#endif // overloaded assignment @@ -2041,16 +2119,16 @@ public: wxString Left(size_t nCount) const; // get last nCount characters wxString Right(size_t nCount) const; - // get all characters before the first occurance of ch + // get all characters before the first occurrence of ch // (returns the whole string if ch not found) wxString BeforeFirst(wxUniChar ch) const; - // get all characters before the last occurence of ch + // get all characters before the last occurrence of ch // (returns empty string if ch not found) wxString BeforeLast(wxUniChar ch) const; - // get all characters after the first occurence of ch + // get all characters after the first occurrence of ch // (returns empty string if ch not found) wxString AfterFirst(wxUniChar ch) const; - // get all characters after the last occurence of ch + // get all characters after the last occurrence of ch // (returns the whole string if ch not found) wxString AfterLast(wxUniChar ch) const; @@ -2115,7 +2193,7 @@ public: int Find(const wxWCharBuffer& sub) const { return Find(sub.data()); } - // replace first (or all of bReplaceAll) occurences of substring with + // replace first (or all of bReplaceAll) occurrences of substring with // another string, returns the number of replacements made size_t Replace(const wxString& strOld, const wxString& strNew, @@ -2938,7 +3016,7 @@ public: size_t find(const wxCStrData& s, size_t nStart = 0, size_t n = npos) const { return find(s.AsWChar(), nStart, n); } - // find the first occurence of character ch after nStart + // find the first occurrence of character ch after nStart size_t find(wxUniChar ch, size_t nStart = 0) const { #if wxUSE_UNICODE_UTF8 @@ -3004,7 +3082,7 @@ public: size_t rfind(wchar_t ch, size_t nStart = npos) const { return rfind(wxUniChar(ch), nStart); } - // find first/last occurence of any character (not) in the set: + // find first/last occurrence of any character (not) in the set: #if wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8 // FIXME-UTF8: this is not entirely correct, because it doesn't work if // sizeof(wchar_t)==2 and surrogates are present in the string; @@ -3095,7 +3173,7 @@ public: size_t find_last_of(wxUniChar c, size_t nStart = npos) const { return rfind(c, nStart); } - // find first/last occurence of any character not in the set + // find first/last occurrence of any character not in the set // as strspn() (starting from nStart), returns npos on failure size_t find_first_not_of(const wxString& str, size_t nStart = 0) const @@ -3350,7 +3428,7 @@ private: // copying is disallowed as it would result in more than one pointer into // the same linked list - DECLARE_NO_COPY_CLASS(wxStringIteratorNodeHead) + wxDECLARE_NO_COPY_CLASS(wxStringIteratorNodeHead); }; wxStringIteratorNodeHead m_iterators; @@ -3493,7 +3571,7 @@ private: wxString& m_str; wxStringCharType *m_buf; - DECLARE_NO_COPY_CLASS(wxStringInternalBuffer) + wxDECLARE_NO_COPY_CLASS(wxStringInternalBuffer); }; class wxStringInternalBufferLength @@ -3523,13 +3601,13 @@ private: size_t m_len; bool m_lenSet; - DECLARE_NO_COPY_CLASS(wxStringInternalBufferLength) + wxDECLARE_NO_COPY_CLASS(wxStringInternalBufferLength); }; #endif // !wxUSE_STL_BASED_WXSTRING template -class WXDLLIMPEXP_BASE wxStringTypeBufferBase +class wxStringTypeBufferBase { public: typedef T CharType; @@ -3569,8 +3647,7 @@ protected: }; template -class WXDLLIMPEXP_BASE wxStringTypeBufferLengthBase - : public wxStringTypeBufferBase +class wxStringTypeBufferLengthBase : public wxStringTypeBufferBase { public: wxStringTypeBufferLengthBase(wxString& str, size_t lenWanted = 1024) @@ -3604,7 +3681,7 @@ public: this->m_str.assign(this->m_buf.data()); } - DECLARE_NO_COPY_CLASS(wxStringTypeBuffer) + wxDECLARE_NO_COPY_CLASS(wxStringTypeBuffer); }; template @@ -3620,7 +3697,7 @@ public: this->m_str.assign(this->m_buf.data(), this->m_len); } - DECLARE_NO_COPY_CLASS(wxStringTypeBufferLength) + wxDECLARE_NO_COPY_CLASS(wxStringTypeBufferLength); }; #if wxUSE_STL_BASED_WXSTRING @@ -3635,7 +3712,7 @@ public: ~wxStringInternalBuffer() { m_str.m_impl.assign(m_buf.data()); } - DECLARE_NO_COPY_CLASS(wxStringInternalBuffer) + wxDECLARE_NO_COPY_CLASS(wxStringInternalBuffer); }; WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( @@ -3653,7 +3730,7 @@ public: m_str.m_impl.assign(m_buf.data(), m_len); } - DECLARE_NO_COPY_CLASS(wxStringInternalBufferLength) + wxDECLARE_NO_COPY_CLASS(wxStringInternalBufferLength); }; #endif // wxUSE_STL_BASED_WXSTRING @@ -3674,27 +3751,59 @@ typedef wxStringInternalBufferLength wxUTF8StringBufferLength; WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxStringTypeBufferBase ) -class WXDLLIMPEXP_BASE wxUTF8StringBuffer : public wxStringTypeBufferBase +// Note about inlined dtors in the classes below: this is done not for +// performance reasons but just to avoid linking errors in the MSVC DLL build +// under Windows: if a class has non-inline methods it must be declared as +// being DLL-exported but, due to an extremely interesting feature of MSVC 7 +// and later, any template class which is used as a base of a DLL-exported +// class is implicitly made DLL-exported too, as explained at the bottom of +// http://msdn.microsoft.com/en-us/library/twa2aw10.aspx (just to confirm: yes, +// _inheriting_ from a class can change whether it is being exported from DLL) +// +// But this results in link errors because the base template class is not DLL- +// exported, whether it is declared with WXDLLIMPEXP_BASE or not, because it +// does have only inline functions. So the simplest fix is to just make all the +// functions of these classes inline too. + +class wxUTF8StringBuffer : public wxStringTypeBufferBase { public: wxUTF8StringBuffer(wxString& str, size_t lenWanted = 1024) : wxStringTypeBufferBase(str, lenWanted) {} - ~wxUTF8StringBuffer(); + ~wxUTF8StringBuffer() + { + wxMBConvStrictUTF8 conv; + size_t wlen = conv.ToWChar(NULL, 0, m_buf); + wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" ); + + wxStringInternalBuffer wbuf(m_str, wlen); + conv.ToWChar(wbuf, wlen, m_buf); + } - DECLARE_NO_COPY_CLASS(wxUTF8StringBuffer) + wxDECLARE_NO_COPY_CLASS(wxUTF8StringBuffer); }; WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxStringTypeBufferLengthBase ) -class WXDLLIMPEXP_BASE wxUTF8StringBufferLength - : public wxStringTypeBufferLengthBase +class wxUTF8StringBufferLength : public wxStringTypeBufferLengthBase { public: wxUTF8StringBufferLength(wxString& str, size_t lenWanted = 1024) : wxStringTypeBufferLengthBase(str, lenWanted) {} - ~wxUTF8StringBufferLength(); + ~wxUTF8StringBufferLength() + { + wxCHECK_RET(m_lenSet, "length not set"); + + wxMBConvStrictUTF8 conv; + size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len); + wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" ); - DECLARE_NO_COPY_CLASS(wxUTF8StringBufferLength) + wxStringInternalBufferLength wbuf(m_str, wlen); + conv.ToWChar(wbuf, wlen, m_buf, m_len); + wbuf.SetLength(wlen); + } + + wxDECLARE_NO_COPY_CLASS(wxUTF8StringBufferLength); }; #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR @@ -3848,7 +3957,7 @@ inline wxCStrData::wxCStrData(const wxCStrData& data) inline wxCStrData::~wxCStrData() { if ( m_owned ) - delete wx_const_cast(wxString*, m_str); // cast to silence warnings + delete const_cast(m_str); // cast to silence warnings } // simple cases for AsChar() and AsWChar(), the complicated ones are @@ -3971,7 +4080,7 @@ void wxStringIteratorNode::DoSet(const wxString *str, if ( str ) { m_next = str->m_iterators.ptr; - wx_const_cast(wxString*, m_str)->m_iterators.ptr = this; + const_cast(m_str)->m_iterators.ptr = this; if ( m_next ) m_next->m_prev = this; } @@ -3988,7 +4097,7 @@ void wxStringIteratorNode::clear() if ( m_prev ) m_prev->m_next = m_next; else if ( m_str ) // first in the list - wx_const_cast(wxString*, m_str)->m_iterators.ptr = m_next; + const_cast(m_str)->m_iterators.ptr = m_next; m_next = m_prev = NULL; m_citer = NULL; @@ -4005,4 +4114,19 @@ void wxStringIteratorNode::clear() #include "wx/crt.h" #endif +// ---------------------------------------------------------------------------- +// Checks on wxString characters +// ---------------------------------------------------------------------------- + +template + inline bool wxStringCheck(const wxString& val) + { + for ( wxString::const_iterator i = val.begin(); + i != val.end(); + ++i ) + if (T(*i) == 0) + return false; + return true; + } + #endif // _WX_WXSTRING_H_