From: Vadim Zeitlin Date: Mon, 31 Jan 2000 20:46:49 +0000 (+0000) Subject: 1. bug in wxString::find_first_of() fixed X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/bbf8fc5391b2458d630d29a794df99c8e681e6eb 1. bug in wxString::find_first_of() fixed 2. new wxStringTokenizer class and the docs for it git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@5766 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- diff --git a/docs/latex/wx/tokenizr.tex b/docs/latex/wx/tokenizr.tex index ffc8d5dfe7..b066262a52 100644 --- a/docs/latex/wx/tokenizr.tex +++ b/docs/latex/wx/tokenizr.tex @@ -2,6 +2,36 @@ wxStringTokenizer helps you to break a string up into a number of tokens. +To use this class, you should create a wxStringTokenizer object, give it the +string to tokenize and also the delimiters which separate tokens in the string +(by default, white space characters will be used). + +Then \helpref{GetNextToken}{wxstringtokenizergetnexttoken} may be called +repeatedly until it \helpref{HasMoreTokens}{wxstringtokenizerhasmoretokens} +returns FALSE. + +For example: + +\begin{verbatim} + +wxStringTokenizer tkz("first:second:third::fivth", ":"); +while ( tkz.HasMoreTokens() ) +{ + wxString token = tkz.GetNextToken(); + + // process token here +} +\end{verbatim} + +Another feature of this class is that it may return the delimiter which +was found after the token with it. In a simple case like above, you are not +interested in this because the delimiter is always {\tt ':'}, but if the +delimiters string has several characters, you might need to know which of them +follows the current token. In this case, pass {\tt TRUE} to wxStringTokenizer +constructor or \helpref{SetString}{wxstringtokenizersetstring} method and +the delimiter will be appended to each returned token (except for the last +one). + \wxheading{Derived from} \helpref{wxObject}{wxobject} @@ -21,7 +51,7 @@ Default constructor. \func{}{wxStringTokenizer}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}} Constructor. Pass the string to tokenize, a string containing delimiters, -a flag specifying whether delimiters are retained. +a flag specifying whether to return delimiters with tokens. \membersection{wxStringTokenizer::\destruct{wxStringTokenizer}}\label{wxstringtokenizerdtor} @@ -45,13 +75,21 @@ Returns TRUE if the tokenizer has further tokens. \constfunc{wxString}{GetNextToken}{\void} -Returns the next token. +Returns the next token or empty string if the end of string was reached. + +\membersection{wxStringTokenizer::GetPosition}\label{wxstringtokenizergetposition} + +\constfunc{size\_t}{GetPosition}{\void} + +Returns the current position (i.e. one index after the last returned +token or 0 if GetNextToken() has never been called) in the original +string. \membersection{wxStringTokenizer::GetString}\label{wxstringtokenizergetstring} \constfunc{wxString}{GetString}{\void} -Returns the input string. +Returns the part of the starting string without all token already extracted. \membersection{wxStringTokenizer::SetString}\label{wxstringtokenizersetstring} @@ -60,5 +98,5 @@ Returns the input string. Initializes the tokenizer. Pass the string to tokenize, a string containing delimiters, -a flag specifying whether delimiters are retained. +a flag specifying whether to return delimiters with tokens. diff --git a/include/wx/tokenzr.h b/include/wx/tokenzr.h index b1d8ed65cc..9ff109d46d 100644 --- a/include/wx/tokenzr.h +++ b/include/wx/tokenzr.h @@ -2,7 +2,7 @@ // Name: tokenzr.h // Purpose: String tokenizer // Author: Guilhem Lavaux -// Modified by: Gregory Pietsch +// Modified by: Vadim Zeitlin // Created: 04/22/98 // RCS-ID: $Id$ // Copyright: (c) Guilhem Lavaux @@ -18,47 +18,49 @@ #include "wx/object.h" #include "wx/string.h" -#include "wx/filefn.h" + +// default: delimiters are usual white space characters +#define wxDEFAULT_DELIMITERS (_T(" \t\r\n")) class WXDLLEXPORT wxStringTokenizer : public wxObject { public: + // ctors and such + wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; } wxStringTokenizer(const wxString& to_tokenize, - const wxString& delims = " \t\r\n", + const wxString& delims = wxDEFAULT_DELIMITERS, bool ret_delim = FALSE); - wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; } + void SetString(const wxString& to_tokenize, + const wxString& delims = wxDEFAULT_DELIMITERS, + bool ret_delim = FALSE); virtual ~wxStringTokenizer(); - int CountTokens() const; - bool HasMoreTokens(); - - wxString NextToken(); - wxString GetNextToken() { return NextToken(); }; + // count tokens/get next token + size_t CountTokens() const; + bool HasMoreTokens() { return m_hasMore; } + wxString GetNextToken(); - wxString GetString() const { return m_string; } // One note about GetString -- it returns the string // remaining after the previous tokens have been removed, // not the original string + wxString GetString() const { return m_string; } - void SetString(const wxString& to_tokenize, - const wxString& delims = " \t\r\n", - bool ret_delim = FALSE) - { - m_string = to_tokenize; - m_delims = delims; - m_retdelims = ret_delim; - m_pos = 0; - } + // returns the current position (i.e. one index after the last returned + // token or 0 if GetNextToken() has never been called) in the original + // string + size_t GetPosition() const { return m_pos; } - // Here's the desired function. It returns the position - // of the next token in the original string by keeping track - // of everything that's been deleted by GetNextToken. - wxUint32 GetPosition() { return m_pos; } + // for compatibility only, use GetNextToken() instead + wxString NextToken() { return GetNextToken(); } protected: - wxString m_string, m_delims; - bool m_retdelims; - wxUint32 m_pos; // the position + wxString m_string, // the (rest of) string to tokenize + m_delims; // all delimiters + + size_t m_pos; // the position in the original string + + bool m_retdelims; // if TRUE, return delims with tokens + bool m_hasMore; // do we have more tokens? }; #endif // _WX_TOKENZRH diff --git a/samples/console/console.cpp b/samples/console/console.cpp index 3f924c1ddd..bd8b550139 100644 --- a/samples/console/console.cpp +++ b/samples/console/console.cpp @@ -29,16 +29,16 @@ // what to test? -#define TEST_ARRAYS -#define TEST_CMDLINE -#define TEST_DIR -#define TEST_EXECUTE -#define TEST_LOG -#define TEST_LONGLONG -#define TEST_MIME +//#define TEST_ARRAYS +//#define TEST_CMDLINE +//#define TEST_DIR +//#define TEST_EXECUTE +//#define TEST_LOG +//#define TEST_LONGLONG +//#define TEST_MIME #define TEST_STRINGS -#define TEST_THREADS -#define TEST_TIME +//#define TEST_THREADS +//#define TEST_TIME // ============================================================================ // implementation @@ -1699,6 +1699,7 @@ void PrintArray(const char* name, const wxArrayString& array) #ifdef TEST_STRINGS #include "wx/timer.h" +#include "wx/tokenzr.h" static void TestString() { @@ -1826,6 +1827,77 @@ static void TestStringFind() puts(""); } +// replace TABs with \t and CRs with \n +static wxString MakePrintable(const wxChar *s) +{ + wxString str(s); + (void)str.Replace(_T("\t"), _T("\\t")); + (void)str.Replace(_T("\n"), _T("\\n")); + (void)str.Replace(_T("\r"), _T("\\r")); + + return str; +} + +static void TestStringTokenizer() +{ + puts("*** Testing wxStringTokenizer ***"); + + static const struct StringTokenizerTest + { + const wxChar *str; // string to tokenize + const wxChar *delims; // delimiters to use + size_t count; // count of token + bool with; // return tokens with delimiters? + } tokenizerTestData[] = + { + { _T(""), _T(" "), 0, FALSE }, + { _T("Hello, world"), _T(" "), 2, FALSE }, + { _T("Hello, world"), _T(","), 2, FALSE }, + { _T("Hello, world!"), _T(",!"), 3, TRUE }, + { _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7, FALSE }, + { _T("1 \t3\t4 6 "), wxDEFAULT_DELIMITERS, 9, TRUE }, + { _T("01/02/99"), _T("/-"), 3, FALSE }, + }; + + for ( size_t n = 0; n < WXSIZEOF(tokenizerTestData); n++ ) + { + const StringTokenizerTest& tt = tokenizerTestData[n]; + wxStringTokenizer tkz(tt.str, tt.delims, tt.with); + + size_t count = tkz.CountTokens(); + printf(_T("String '%s' has %u tokens delimited by '%s' "), + tt.str, + count, + MakePrintable(tt.delims).c_str()); + if ( count == tt.count ) + { + puts(_T("(ok)")); + } + else + { + printf(_T("(ERROR: should be %u)\n"), tt.count); + + continue; + } + + // now show the tokens themselves + size_t count2 = 0; + while ( tkz.HasMoreTokens() ) + { + printf(_T("\ttoken %u: '%s'\n"), + ++count2, + MakePrintable(tkz.GetNextToken()).c_str()); + } + + if ( count2 != count ) + { + puts(_T("ERROR: token count mismatch")); + } + } + + puts(""); +} + #endif // TEST_STRINGS // ---------------------------------------------------------------------------- @@ -1889,8 +1961,9 @@ int main(int argc, char **argv) { TestStringSub(); TestStringFormat(); + TestStringFind(); } - TestStringFind(); + TestStringTokenizer(); #endif // TEST_STRINGS #ifdef TEST_ARRAYS diff --git a/src/common/string.cpp b/src/common/string.cpp index b0273c9dda..fd6ee0a780 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -1596,7 +1596,7 @@ size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const const wxChar *start = c_str() + nStart; const wxChar *firstOf = wxStrpbrk(start, sz); if ( firstOf ) - return firstOf - start; + return firstOf - c_str(); else return npos; } diff --git a/src/common/tokenzr.cpp b/src/common/tokenzr.cpp index 95eb0d5860..0601d5d0e5 100644 --- a/src/common/tokenzr.cpp +++ b/src/common/tokenzr.cpp @@ -2,13 +2,21 @@ // Name: tokenzr.cpp // Purpose: String tokenizer // Author: Guilhem Lavaux -// Modified by: Gregory Pietsch +// Modified by: Vadim Zeitlin // Created: 04/22/98 // RCS-ID: $Id$ // Copyright: (c) Guilhem Lavaux // Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// +// ============================================================================ +// declarations +// ============================================================================ + +// ---------------------------------------------------------------------------- +// headers +// ---------------------------------------------------------------------------- + #ifdef __GNUG__ #pragma implementation "tokenzr.h" #endif @@ -22,86 +30,101 @@ #include "wx/tokenzr.h" +// ============================================================================ +// implementation +// ============================================================================ + +// ---------------------------------------------------------------------------- +// wxStringTokenizer construction +// ---------------------------------------------------------------------------- + wxStringTokenizer::wxStringTokenizer(const wxString& to_tokenize, const wxString& delims, bool ret_delims) +{ + SetString(to_tokenize, delims, ret_delims); +} + +void wxStringTokenizer::SetString(const wxString& to_tokenize, + const wxString& delims, + bool ret_delim) { m_string = to_tokenize; m_delims = delims; - m_retdelims = ret_delims; + m_retdelims = ret_delim; m_pos = 0; + + // empty string doesn't have any tokens + m_hasMore = !m_string.empty(); } wxStringTokenizer::~wxStringTokenizer() { } -int wxStringTokenizer::CountTokens() const +// ---------------------------------------------------------------------------- +// count the number of tokens in the string +// ---------------------------------------------------------------------------- + +size_t wxStringTokenizer::CountTokens() const { size_t pos = 0; - int count = 0; - bool at_delim; - - while (pos < m_string.length()) { - // while we're still counting ... - at_delim = (m_delims.find(m_string.at(pos)) < m_delims.length()); - // are we at a delimiter? if so, move to the next nondelimiter; - // if not, move to the next delimiter. If the find_first_of - // and find_first_not_of methods fail, pos will be assigned - // npos (0xFFFFFFFF) which will terminate the loop on the next - // go-round unless we have a really long string, which is unlikely - pos = at_delim ? m_string.find_first_not_of(m_delims, pos) - : m_string.find_first_of(m_delims, pos); - if (m_retdelims) + size_t count = 0; + for ( ;; ) + { + pos = m_string.find_first_of(m_delims, pos); + if ( pos == wxString::npos ) + break; + + count++; // one more token found + + pos++; // skip delimiter + } + + // normally, we didn't count the last token in the loop above - so add it + // unless the string was empty from the very beginning, in which case it + // still has 0 (and not 1) tokens + if ( !m_string.empty() ) + { + count++; + } + + return count; +} + +// ---------------------------------------------------------------------------- +// token extraction +// ---------------------------------------------------------------------------- + +wxString wxStringTokenizer::GetNextToken() +{ + wxString token; + if ( HasMoreTokens() ) + { + size_t pos = m_string.find_first_of(m_delims); // end of token + size_t pos2; // start of the next one + if ( pos != wxString::npos ) { - // if we're retaining delimiters, increment count - count++; + // return the delimiter too + pos2 = pos + 1; } else { - // if we're not retaining delimiters and at a token, inc count - count += (!at_delim); + pos2 = m_string.length(); + + // no more tokens in this string + m_hasMore = FALSE; } - } - return count; -} -bool wxStringTokenizer::HasMoreTokens() -{ - return (m_retdelims - ? !m_string.IsEmpty() - : m_string.find_first_not_of(m_delims) < m_string.length()); -} + token = wxString(m_string, m_retdelims ? pos2 : pos); -wxString wxStringTokenizer::NextToken() -{ - size_t pos; - wxString r_string; - - if ( m_string.IsEmpty() ) - return m_string; - pos = m_string.find_first_not_of(m_delims); - if ( m_retdelims ) { - // we're retaining delimiters (unusual behavior, IMHO) - if (pos == 0) - // first char is a non-delimiter - pos = m_string.find_first_of(m_delims); - } else { - // we're not retaining delimiters - m_string.erase(0, pos); - m_pos += pos; - if (m_string.IsEmpty()) - return m_string; - pos = m_string.find_first_of(m_delims); - } - if (pos <= m_string.length()) { - r_string = m_string.substr(0, pos); - m_string.erase(0, pos); - m_pos += pos; - } else { - r_string = m_string; - m_pos += m_string.length(); - m_string.Empty(); + // remove token with the following it delimiter from string + m_string.erase(0, pos2); + + // keep track of the position in the original string too + m_pos += pos2; } - return r_string; + //else: no more tokens, return empty token + + return token; }