X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/4626c57c58bfb151d6e1d5177c180c18e91c6613..2865d42dd600e0260cb8b9c4310b587c3558798c:/src/common/tokenzr.cpp diff --git a/src/common/tokenzr.cpp b/src/common/tokenzr.cpp index f69e8e6a73..2822025bb7 100644 --- a/src/common/tokenzr.cpp +++ b/src/common/tokenzr.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: tokenzr.cpp +// Name: src/common/tokenzr.cpp // Purpose: String tokenizer // Author: Guilhem Lavaux // Modified by: Vadim Zeitlin (almost full rewrite) @@ -25,7 +25,11 @@ #endif #include "wx/tokenzr.h" -#include "wx/arrstr.h" + +#ifndef WX_PRECOMP + #include "wx/arrstr.h" + #include "wx/crt.h" +#endif // Required for wxIs... functions #include @@ -34,6 +38,42 @@ // implementation // ============================================================================ +// ---------------------------------------------------------------------------- +// helpers +// ---------------------------------------------------------------------------- + +static wxString::const_iterator +find_first_of(const wxChar *delims, size_t len, + const wxString::const_iterator& from, + const wxString::const_iterator& end) +{ + wxASSERT_MSG( from <= end, wxT("invalid index") ); + + for ( wxString::const_iterator i = from; i != end; ++i ) + { + if ( wxTmemchr(delims, *i, len) ) + return i; + } + + return end; +} + +static wxString::const_iterator +find_first_not_of(const wxChar *delims, size_t len, + const wxString::const_iterator& from, + const wxString::const_iterator& end) +{ + wxASSERT_MSG( from <= end, wxT("invalid index") ); + + for ( wxString::const_iterator i = from; i != end; ++i ) + { + if ( !wxTmemchr(delims, *i, len) ) + return i; + } + + return end; +} + // ---------------------------------------------------------------------------- // wxStringTokenizer construction // ---------------------------------------------------------------------------- @@ -55,14 +95,14 @@ void wxStringTokenizer::SetString(const wxString& str, // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for // whitespace delimiters, strtok() behaviour is better because we want // to count consecutive spaces as one delimiter) - const wxChar *p; - for ( p = delims.c_str(); *p; p++ ) + wxString::const_iterator p; + for ( p = delims.begin(); p != delims.end(); ++p ) { if ( !wxIsspace(*p) ) break; } - if ( *p ) + if ( p != delims.end() ) { // not whitespace char in delims mode = wxTOKEN_RET_EMPTY; @@ -74,7 +114,13 @@ void wxStringTokenizer::SetString(const wxString& str, } } - m_delims = delims; +#if wxUSE_UNICODE // FIXME-UTF8: only wc_str() + m_delims = delims.wc_str(); +#else + m_delims = delims.mb_str(); +#endif + m_delimsLen = delims.length(); + m_mode = mode; Reinit(str); @@ -82,11 +128,13 @@ void wxStringTokenizer::SetString(const wxString& str, void wxStringTokenizer::Reinit(const wxString& str) { - wxASSERT_MSG( IsOk(), _T("you should call SetString() first") ); + wxASSERT_MSG( IsOk(), wxT("you should call SetString() first") ); m_string = str; - m_pos = 0; - m_lastDelim = _T('\0'); + m_stringEnd = m_string.end(); + m_pos = m_string.begin(); + m_lastDelim = wxT('\0'); + m_hasMoreTokens = MoreTokens_Unknown; } // ---------------------------------------------------------------------------- @@ -96,9 +144,28 @@ void wxStringTokenizer::Reinit(const wxString& str) // do we have more of them? bool wxStringTokenizer::HasMoreTokens() const { - wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") ); + // GetNextToken() calls HasMoreTokens() and so HasMoreTokens() is called + // twice in every interation in the following common usage patten: + // while ( HasMoreTokens() ) + // GetNextToken(); + // We optimize this case by caching HasMoreTokens() return value here: + if ( m_hasMoreTokens == MoreTokens_Unknown ) + { + bool r = DoHasMoreTokens(); + wxConstCast(this, wxStringTokenizer)->m_hasMoreTokens = + r ? MoreTokens_Yes : MoreTokens_No; + return r; + } + else + return m_hasMoreTokens == MoreTokens_Yes; +} - if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos ) +bool wxStringTokenizer::DoHasMoreTokens() const +{ + wxCHECK_MSG( IsOk(), false, wxT("you should call SetString() first") ); + + if ( find_first_not_of(m_delims, m_delimsLen, m_pos, m_stringEnd) + != m_stringEnd ) { // there are non delimiter characters left, so we do have more tokens return true; @@ -110,7 +177,7 @@ bool wxStringTokenizer::HasMoreTokens() const case wxTOKEN_RET_DELIMS: // special hack for wxTOKEN_RET_EMPTY: we should return the initial // empty token even if there are only delimiters after it - return m_pos == 0 && !m_string.empty(); + return !m_string.empty() && m_pos == m_string.begin(); case wxTOKEN_RET_EMPTY_ALL: // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had @@ -119,11 +186,11 @@ bool wxStringTokenizer::HasMoreTokens() const // up to the end of the string in GetNextToken(), but if it is not // NUL yet we still have this last token to return even if m_pos is // already at m_string.length() - return m_pos < m_string.length() || m_lastDelim != _T('\0'); + return m_pos < m_stringEnd || m_lastDelim != wxT('\0'); case wxTOKEN_INVALID: case wxTOKEN_DEFAULT: - wxFAIL_MSG( _T("unexpected tokenizer mode") ); + wxFAIL_MSG( wxT("unexpected tokenizer mode") ); // fall through case wxTOKEN_STRTOK: @@ -137,13 +204,13 @@ bool wxStringTokenizer::HasMoreTokens() const // count the number of (remaining) tokens in the string size_t wxStringTokenizer::CountTokens() const { - wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") ); + wxCHECK_MSG( IsOk(), 0, wxT("you should call SetString() first") ); // VZ: this function is IMHO not very useful, so it's probably not very // important if its implementation here is not as efficient as it // could be -- but OTOH like this we're sure to get the correct answer // in all modes - wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode); + wxStringTokenizer tkz(wxString(m_pos, m_stringEnd), m_delims, m_mode); size_t count = 0; while ( tkz.HasMoreTokens() ) @@ -170,36 +237,39 @@ wxString wxStringTokenizer::GetNextToken() break; } + m_hasMoreTokens = MoreTokens_Unknown; + // find the end of this token - size_t pos = m_string.find_first_of(m_delims, m_pos); + wxString::const_iterator pos = + find_first_of(m_delims, m_delimsLen, m_pos, m_stringEnd); // and the start of the next one - if ( pos == wxString::npos ) + if ( pos == m_stringEnd ) { // no more delimiters, the token is everything till the end of // string - token.assign(m_string, m_pos, wxString::npos); + token.assign(m_pos, m_stringEnd); // skip the token - m_pos = m_string.length(); + m_pos = m_stringEnd; // it wasn't terminated - m_lastDelim = _T('\0'); + m_lastDelim = wxT('\0'); } else // we found a delimiter at pos { // in wxTOKEN_RET_DELIMS mode we return the delimiter character // with token, otherwise leave it out - size_t len = pos - m_pos; + wxString::const_iterator tokenEnd(pos); if ( m_mode == wxTOKEN_RET_DELIMS ) - len++; + ++tokenEnd; - token.assign(m_string, m_pos, len); + token.assign(m_pos, tokenEnd); // skip the token and the trailing delimiter m_pos = pos + 1; - m_lastDelim = m_string[pos]; + m_lastDelim = (pos == m_stringEnd) ? wxT('\0') : (wxChar)*pos; } } while ( !AllowEmpty() && token.empty() );