X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/0bf751e7e901fffb9ba9d850948915e33456524a..0bbe61b8c18a1795189f0cf73cc61c14a0fb846d:/src/common/tokenzr.cpp diff --git a/src/common/tokenzr.cpp b/src/common/tokenzr.cpp index 34ba2b0523..8c29cd7cfa 100644 --- a/src/common/tokenzr.cpp +++ b/src/common/tokenzr.cpp @@ -38,6 +38,42 @@ // implementation // ============================================================================ +// ---------------------------------------------------------------------------- +// helpers +// ---------------------------------------------------------------------------- + +static wxString::const_iterator +find_first_of(const wxChar *delims, size_t len, + const wxString::const_iterator& from, + const wxString::const_iterator& end) +{ + wxASSERT_MSG( from <= end, _T("invalid index") ); + + for ( wxString::const_iterator i = from; i != end; ++i ) + { + if ( wxTmemchr(delims, *i, len) ) + return i; + } + + return end; +} + +static wxString::const_iterator +find_first_not_of(const wxChar *delims, size_t len, + const wxString::const_iterator& from, + const wxString::const_iterator& end) +{ + wxASSERT_MSG( from <= end, _T("invalid index") ); + + for ( wxString::const_iterator i = from; i != end; ++i ) + { + if ( !wxTmemchr(delims, *i, len) ) + return i; + } + + return end; +} + // ---------------------------------------------------------------------------- // wxStringTokenizer construction // ---------------------------------------------------------------------------- @@ -59,14 +95,14 @@ void wxStringTokenizer::SetString(const wxString& str, // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for // whitespace delimiters, strtok() behaviour is better because we want // to count consecutive spaces as one delimiter) - const wxChar *p; - for ( p = delims.c_str(); *p; p++ ) + wxString::const_iterator p; + for ( p = delims.begin(); p != delims.end(); ++p ) { if ( !wxIsspace(*p) ) break; } - if ( *p ) + if ( p != delims.end() ) { // not whitespace char in delims mode = wxTOKEN_RET_EMPTY; @@ -78,7 +114,13 @@ void wxStringTokenizer::SetString(const wxString& str, } } - m_delims = delims; +#if wxUSE_UNICODE // FIXME-UTF8: only wc_str() + m_delims = delims.wc_str(); +#else + m_delims = delims.mb_str(); +#endif + m_delimsLen = delims.length(); + m_mode = mode; Reinit(str); @@ -89,8 +131,10 @@ void wxStringTokenizer::Reinit(const wxString& str) wxASSERT_MSG( IsOk(), _T("you should call SetString() first") ); m_string = str; - m_pos = 0; + m_stringEnd = m_string.end(); + m_pos = m_string.begin(); m_lastDelim = _T('\0'); + m_hasMoreTokens = MoreTokens_Unknown; } // ---------------------------------------------------------------------------- @@ -99,10 +143,29 @@ void wxStringTokenizer::Reinit(const wxString& str) // do we have more of them? bool wxStringTokenizer::HasMoreTokens() const +{ + // GetNextToken() calls HasMoreTokens() and so HasMoreTokens() is called + // twice in every interation in the following common usage patten: + // while ( HasMoreTokens() ) + // GetNextToken(); + // We optimize this case by caching HasMoreTokens() return value here: + if ( m_hasMoreTokens == MoreTokens_Unknown ) + { + bool r = DoHasMoreTokens(); + wxConstCast(this, wxStringTokenizer)->m_hasMoreTokens = + r ? MoreTokens_Yes : MoreTokens_No; + return r; + } + else + return m_hasMoreTokens == MoreTokens_Yes; +} + +bool wxStringTokenizer::DoHasMoreTokens() const { wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") ); - if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos ) + if ( find_first_not_of(m_delims, m_delimsLen, m_pos, m_stringEnd) + != m_stringEnd ) { // there are non delimiter characters left, so we do have more tokens return true; @@ -114,7 +177,7 @@ bool wxStringTokenizer::HasMoreTokens() const case wxTOKEN_RET_DELIMS: // special hack for wxTOKEN_RET_EMPTY: we should return the initial // empty token even if there are only delimiters after it - return m_pos == 0 && !m_string.empty(); + return !m_string.empty() && m_pos == m_string.begin(); case wxTOKEN_RET_EMPTY_ALL: // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had @@ -123,7 +186,7 @@ bool wxStringTokenizer::HasMoreTokens() const // up to the end of the string in GetNextToken(), but if it is not // NUL yet we still have this last token to return even if m_pos is // already at m_string.length() - return m_pos < m_string.length() || m_lastDelim != _T('\0'); + return m_pos < m_stringEnd || m_lastDelim != _T('\0'); case wxTOKEN_INVALID: case wxTOKEN_DEFAULT: @@ -147,7 +210,7 @@ size_t wxStringTokenizer::CountTokens() const // important if its implementation here is not as efficient as it // could be -- but OTOH like this we're sure to get the correct answer // in all modes - wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode); + wxStringTokenizer tkz(wxString(m_pos, m_stringEnd), m_delims, m_mode); size_t count = 0; while ( tkz.HasMoreTokens() ) @@ -174,18 +237,21 @@ wxString wxStringTokenizer::GetNextToken() break; } + m_hasMoreTokens = MoreTokens_Unknown; + // find the end of this token - size_t pos = m_string.find_first_of(m_delims, m_pos); + wxString::const_iterator pos = + find_first_of(m_delims, m_delimsLen, m_pos, m_stringEnd); // and the start of the next one - if ( pos == wxString::npos ) + if ( pos == m_stringEnd ) { // no more delimiters, the token is everything till the end of // string - token.assign(m_string, m_pos, wxString::npos); + token.assign(m_pos, m_stringEnd); // skip the token - m_pos = m_string.length(); + m_pos = m_stringEnd; // it wasn't terminated m_lastDelim = _T('\0'); @@ -194,16 +260,16 @@ wxString wxStringTokenizer::GetNextToken() { // in wxTOKEN_RET_DELIMS mode we return the delimiter character // with token, otherwise leave it out - size_t len = pos - m_pos; + wxString::const_iterator tokenEnd(pos); if ( m_mode == wxTOKEN_RET_DELIMS ) - len++; + ++tokenEnd; - token.assign(m_string, m_pos, len); + token.assign(m_pos, tokenEnd); // skip the token and the trailing delimiter m_pos = pos + 1; - m_lastDelim = m_string[pos]; + m_lastDelim = (pos == m_stringEnd) ? _T('\0') : (wxChar)*pos; } } while ( !AllowEmpty() && token.empty() );