another new version of wxStringTokenizer (with tests and docs)

author Vadim Zeitlin <vadim@wxwidgets.org>

Fri, 4 Feb 2000 18:31:26 +0000 (18:31 +0000)

committer Vadim Zeitlin <vadim@wxwidgets.org>

Fri, 4 Feb 2000 18:31:26 +0000 (18:31 +0000)
author Vadim Zeitlin <vadim@wxwidgets.org>
Fri, 4 Feb 2000 18:31:26 +0000 (18:31 +0000)
committer Vadim Zeitlin <vadim@wxwidgets.org>
Fri, 4 Feb 2000 18:31:26 +0000 (18:31 +0000)
diff --git a/docs/latex/wx/tokenizr.tex b/docs/latex/wx/tokenizr.tex

index b066262a5214afee98c71f9cb22b2b605022bc6b..0b8490ce1debe606822fe1c5f4e00bbe96875eb8 100644 (file)
--- a/docs/latex/wx/tokenizr.tex
+++ b/docs/latex/wx/tokenizr.tex
@@ -1,6 +1,8 @@
  \section{\class{wxStringTokenizer}}\label{wxstringtokenizer}
  
-wxStringTokenizer helps you to break a string up into a number of tokens.
+wxStringTokenizer helps you to break a string up into a number of tokens. It
+replaces the standard C function {\tt strtok()} and also extends it in a
+number of ways.
  
  To use this class, you should create a wxStringTokenizer object, give it the
  string to tokenize and also the delimiters which separate tokens in the string
@@ -14,7 +16,7 @@ For example:
  
  \begin{verbatim}
  
-wxStringTokenizer tkz("first:second:third::fivth", ":");
+wxStringTokenizer tkz("first:second:third:fourth", ":");
  while ( tkz.HasMoreTokens() )
  {
      wxString token = tkz.GetNextToken();
@@ -23,14 +25,36 @@ while ( tkz.HasMoreTokens() )
  }
  \end{verbatim}
  
-Another feature of this class is that it may return the delimiter which
-was found after the token with it. In a simple case like above, you are not
-interested in this because the delimiter is always {\tt ':'}, but if the
-delimiters string has several characters, you might need to know which of them
-follows the current token. In this case, pass {\tt TRUE} to wxStringTokenizer
-constructor or \helpref{SetString}{wxstringtokenizersetstring} method and
-the delimiter will be appended to each returned token (except for the last
-one).
+By default, wxStringTokenizer will behave in the same way as {\tt strtok()} if
+the delimiters string only contains white space characters but, unlike the
+standard function, it will return empty tokens if this is not the case. This
+is helpful for parsing strictly formatted data where the number of fields is
+fixed but some of them may be empty (i.e. {\tt TAB} or comma delimited text
+files).
+
+The behaviour is governed by the last 
+\helpref{constructor}{wxstringtokenizerwxstringtokenizer}/\helpref{SetString}{wxstringtokenizersetstring} 
+parameter {\tt mode} which may be one of the following:
+
+\twocolwidtha{5cm}%
+\begin{twocollist}\itemsep=0pt
+\twocolitem{{\tt wxTOKEN\_DEFAULT}}{Default behaviour (as described above):
+same as {\tt wxTOKEN\_STRTOK} if the delimiter string contains only
+whitespaces, same as {\tt wxTOKEN\_RET\_EMPTY} otherwise}
+\twocolitem{{\tt wxTOKEN\_RET\_EMPTY}}{In this mode, the empty tokens in the
+middle of the string will be returned, i.e. {\tt "a::b:"} will be tokenized in
+three tokens `a', `' and `b'.}
+\twocolitem{{\tt wxTOKEN\_RET\_EMPTY\_ALL}}{In this mode, empty trailing token
+(after the last delimiter character) will be returned as well. The string as
+above will contain four tokens: the already mentioned ones and another empty
+one as the last one.}
+\twocolitem{{\tt wxTOKEN\_RET\_DELIMS}}{In this mode, the delimiter character
+after the end of the current token (there may be none if this is the last
+token) is returned appended to the token. Otherwise, it is the same mode as 
+{\tt wxTOKEN\_RET\_EMPTY}.}
+\twocolitem{{\tt wxTOKEN\_STRTOK}}{In this mode the class behaves exactly like
+the standard {\tt strtok()} function. The empty tokens are never returned.}
+\end{twocollist}
  
  \wxheading{Derived from}
  
@@ -46,18 +70,14 @@ one).
  
  \func{}{wxStringTokenizer}{\void}
  
-Default constructor.
+Default constructor. You must call 
+\helpref{SetString}{wxstringtokenizersetstring} before calling any other
+methods.
  
-\func{}{wxStringTokenizer}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}}
+\func{}{wxStringTokenizer}{\param{const wxString\& }{str}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{wxStringTokenizerMode }{mode = wxTOKEN\_DEFAULT}}
  
-Constructor. Pass the string to tokenize, a string containing delimiters,
-a flag specifying whether to return delimiters with tokens.
-
-\membersection{wxStringTokenizer::\destruct{wxStringTokenizer}}\label{wxstringtokenizerdtor}
-
-\func{}{\destruct{wxStringTokenizer}}{\void}
-
-Destructor.
+Constructor. Pass the string to tokenize, a string containing delimiters
+and the mode specifying how the string should be tokenized.
  
  \membersection{wxStringTokenizer::CountTokens}\label{wxstringtokenizercounttokens}
  
@@ -69,11 +89,11 @@ Returns the number of tokens in the input string.
  
  \constfunc{bool}{HasMoreTokens}{\void}
  
-Returns TRUE if the tokenizer has further tokens.
+Returns TRUE if the tokenizer has further tokens, FALSE if none are left.
  
  \membersection{wxStringTokenizer::GetNextToken}\label{wxstringtokenizergetnexttoken}
  
-\constfunc{wxString}{GetNextToken}{\void}
+\func{wxString}{GetNextToken}{\void}
  
  Returns the next token or empty string if the end of string was reached.
  
@@ -93,10 +113,10 @@ Returns the part of the starting string without all token already extracted.
  
  \membersection{wxStringTokenizer::SetString}\label{wxstringtokenizersetstring}
  
-\func{void}{SetString}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}}
+\func{void}{SetString}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{wxStringTokenizerMode }{mode = wxTOKEN\_DEFAULT}}
  
  Initializes the tokenizer.
  
  Pass the string to tokenize, a string containing delimiters,
-a flag specifying whether to return delimiters with tokens.
+and the mode specifying how the string should be tokenized.
  
diff --git a/include/wx/tokenzr.h b/include/wx/tokenzr.h

index 9ff109d46d1e05fd6d2d426b16e8b5d7faf6b989..9a08ddb460bb7f147d006dc55269fd0a1579092e 100644 (file)
--- a/include/wx/tokenzr.h
+++ b/include/wx/tokenzr.h
@@ -1,6 +1,6 @@
  /////////////////////////////////////////////////////////////////////////////
-// Name:        tokenzr.h
-// Purpose:     String tokenizer
+// Name:        wx/tokenzr.h
+// Purpose:     String tokenizer - a C++ replacement for strtok(3)
  // Author:      Guilhem Lavaux
  // Modified by: Vadim Zeitlin
  // Created:     04/22/98
@@ -19,48 +19,102 @@
  #include "wx/object.h"
  #include "wx/string.h"
  
+// ----------------------------------------------------------------------------
+// constants
+// ----------------------------------------------------------------------------
+
  // default: delimiters are usual white space characters
  #define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
  
+// wxStringTokenizer mode flags which determine its behaviour
+enum wxStringTokenizerMode
+{
+    wxTOKEN_INVALID = -1,   // set by def ctor until SetString() is called
+    wxTOKEN_DEFAULT,        // strtok() for whitespace delims, RET_EMPTY else
+    wxTOKEN_RET_EMPTY,      // return empty token in the middle of the string
+    wxTOKEN_RET_EMPTY_ALL,  // return trailing empty tokens too
+    wxTOKEN_RET_DELIMS,     // return the delim with token (implies RET_EMPTY)
+    wxTOKEN_STRTOK          // behave exactly like strtok(3)
+};
+
+// ----------------------------------------------------------------------------
+// wxStringTokenizer: replaces infamous strtok() and has some other features
+// ----------------------------------------------------------------------------
+
  class WXDLLEXPORT wxStringTokenizer : public wxObject
  {
  public:
-    // ctors and such
-    wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; }
-    wxStringTokenizer(const wxString& to_tokenize,
+    // ctors and initializers
+        // default ctor, call SetString() later
+    wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
+        // ctor which gives us the string
+    wxStringTokenizer(const wxString& str,
                        const wxString& delims = wxDEFAULT_DELIMITERS,
-                      bool ret_delim = FALSE);
-    void SetString(const wxString& to_tokenize,
+                      wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
+
+        // args are same as for the non default ctor above
+    void SetString(const wxString& str,
                     const wxString& delims = wxDEFAULT_DELIMITERS,
-                   bool ret_delim = FALSE);
-    virtual ~wxStringTokenizer();
+                   wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
+
+        // reinitialize the tokenizer with the same delimiters/mode
+    void Reinit(const wxString& str);
  
-    // count tokens/get next token
+    // tokens access
+        // count them
      size_t CountTokens() const;
-    bool HasMoreTokens() { return m_hasMore; }
+        // did we reach the end of the string?
+    bool HasMoreTokens() const;
+        // get the next token, will return empty string if !HasMoreTokens()
      wxString GetNextToken();
  
-    // One note about GetString -- it returns the string
-    // remaining after the previous tokens have been removed,
-    // not the original string
+    // get current tokenizer state
+        // returns the part of the string which remains to tokenize (*not* the
+        // initial string)
      wxString GetString() const { return m_string; }
  
-    // returns the current position (i.e. one index after the last returned
-    // token or 0 if GetNextToken() has never been called) in the original
-    // string
+        // returns the current position (i.e. one index after the last
+        // returned token or 0 if GetNextToken() has never been called) in the
+        // original string
      size_t GetPosition() const { return m_pos; }
  
+    // misc
+        // get the current mode - can be different from the one passed to the
+        // ctor if it was wxTOKEN_DEFAULT
+    wxStringTokenizerMode GetMode() const { return m_mode; }
+
+    // backwards compatibility section from now on
+    // -------------------------------------------
+
      // for compatibility only, use GetNextToken() instead
      wxString NextToken() { return GetNextToken(); }
  
+    // compatibility only, don't use
+    void SetString(const wxString& to_tokenize,
+                   const wxString& delims,
+                   bool ret_delim)
+    {
+        SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
+    }
+
+    wxStringTokenizer(const wxString& to_tokenize,
+                      const wxString& delims,
+                      bool ret_delim)
+    {
+        SetString(to_tokenize, delims, ret_delim);
+    }
+
  protected:
+    bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
+
      wxString m_string,              // the (rest of) string to tokenize
               m_delims;              // all delimiters
  
      size_t   m_pos;                 // the position in the original string
  
-    bool     m_retdelims;           // if TRUE, return delims with tokens
-    bool     m_hasMore;             // do we have more tokens?
+    wxStringTokenizerMode m_mode;   // see wxTOKEN_XXX values
+
+    bool     m_hasMore;             // do we have more (possible empty) tokens?
  };
  
  #endif // _WX_TOKENZRH
diff --git a/samples/console/console.cpp b/samples/console/console.cpp

index bd8b550139e42a25fc2ed945e9bb6c5e43d525a8..d073cb55119683dd1fc6e31b65f72d84b3edc1f1 100644 (file)
--- a/samples/console/console.cpp
+++ b/samples/console/console.cpp
@@ -425,7 +425,7 @@ static void TestAddition()
  #if wxUSE_LONGLONG_NATIVE
          wxASSERT_MSG( c == wxLongLongNative(a.GetHi(), a.GetLo()) +
                             wxLongLongNative(b.GetHi(), b.GetLo()),
-                      "addition failure" ); 
+                      "addition failure" );
  #else // !wxUSE_LONGLONG_NATIVE
          wxASSERT_MSG( c - b == a, "addition failure" );
  #endif // wxUSE_LONGLONG_NATIVE
@@ -921,7 +921,7 @@ def GetMonthWeek(dt):
      if weekNumMonth < 0:
          weekNumMonth = weekNumMonth + 53
      return weekNumMonth
-    
+
  def GetLastSundayBefore(dt):
      if dt.iso_week[2] == 7:
          return dt
@@ -1322,7 +1322,7 @@ static void TestTimeArithmetics()
      {
          wxDateSpan span;
          const char *name;
-    } testArithmData[] = 
+    } testArithmData[] =
      {
          { wxDateSpan::Day(),           "day"                                },
          { wxDateSpan::Week(),          "week"                               },
@@ -1330,7 +1330,7 @@ static void TestTimeArithmetics()
          { wxDateSpan::Year(),          "year"                               },
          { wxDateSpan(1, 2, 3, 4),      "year, 2 months, 3 weeks, 4 days"    },
      };
-    
+
      wxDateTime dt(29, wxDateTime::Dec, 1999), dt1, dt2;
  
      for ( size_t n = 0; n < WXSIZEOF(testArithmData); n++ )
@@ -1701,6 +1701,37 @@ void PrintArray(const char* name, const wxArrayString& array)
  #include "wx/timer.h"
  #include "wx/tokenzr.h"
  
+static void TestStringConstruction()
+{
+    puts("*** Testing wxString constructores ***");
+
+    #define TEST_CTOR(args, res)                                               \
+        {                                                                      \
+            wxString s args ;                                                  \
+            printf("wxString%s = %s ", #args, s.c_str());                      \
+            if ( s == res )                                                    \
+            {                                                                  \
+                puts("(ok)");                                                  \
+            }                                                                  \
+            else                                                               \
+            {                                                                  \
+                printf("(ERROR: should be %s)\n", res);                        \
+            }                                                                  \
+        }
+
+    TEST_CTOR((_T('Z'), 4), _T("ZZZZ"));
+    TEST_CTOR((_T("Hello"), 4), _T("Hell"));
+    TEST_CTOR((_T("Hello"), 5), _T("Hello"));
+    // TEST_CTOR((_T("Hello"), 6), _T("Hello")); -- should give assert failure
+
+    static const wxChar *s = _T("?really!");
+    const wxChar *start = wxStrchr(s, _T('r'));
+    const wxChar *end = wxStrchr(s, _T('!'));
+    TEST_CTOR((start, end), _T("really"));
+
+    puts("");
+}
+
  static void TestString()
  {
      wxStopWatch sw;
@@ -1842,33 +1873,49 @@ static void TestStringTokenizer()
  {
      puts("*** Testing wxStringTokenizer ***");
  
+    static const wxChar *modeNames[] =
+    {
+        _T("default"),
+        _T("return empty"),
+        _T("return all empty"),
+        _T("with delims"),
+        _T("like strtok"),
+    };
+
      static const struct StringTokenizerTest
      {
-        const wxChar *str;      // string to tokenize
-        const wxChar *delims;   // delimiters to use
-        size_t        count;    // count of token
-        bool          with;     // return tokens with delimiters?
-    } tokenizerTestData[] = 
-    {
-        { _T(""), _T(" "), 0, FALSE },
-        { _T("Hello, world"), _T(" "), 2, FALSE },
-        { _T("Hello, world"), _T(","), 2, FALSE },
-        { _T("Hello, world!"), _T(",!"), 3, TRUE },
-        { _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7, FALSE },
-        { _T("1 \t3\t4  6   "), wxDEFAULT_DELIMITERS, 9, TRUE },
-        { _T("01/02/99"), _T("/-"), 3, FALSE },
+        const wxChar *str;              // string to tokenize
+        const wxChar *delims;           // delimiters to use
+        size_t        count;            // count of token
+        wxStringTokenizerMode mode;     // how should we tokenize it
+    } tokenizerTestData[] =
+    {
+        { _T(""), _T(" "), 0 },
+        { _T("Hello, world"), _T(" "), 2 },
+        { _T("Hello,   world  "), _T(" "), 2 },
+        { _T("Hello, world"), _T(","), 2 },
+        { _T("Hello, world!"), _T(",!"), 2 },
+        { _T("Hello,, world!"), _T(",!"), 3 },
+        { _T("Hello, world!"), _T(",!"), 3, wxTOKEN_RET_EMPTY_ALL },
+        { _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7 },
+        { _T("1 \t3\t4  6   "), wxDEFAULT_DELIMITERS, 4 },
+        { _T("1 \t3\t4  6   "), wxDEFAULT_DELIMITERS, 6, wxTOKEN_RET_EMPTY },
+        { _T("1 \t3\t4  6   "), wxDEFAULT_DELIMITERS, 9, wxTOKEN_RET_EMPTY_ALL },
+        { _T("01/02/99"), _T("/-"), 3 },
+        { _T("01-02/99"), _T("/-"), 3, wxTOKEN_RET_DELIMS },
      };
  
      for ( size_t n = 0; n < WXSIZEOF(tokenizerTestData); n++ )
      {
          const StringTokenizerTest& tt = tokenizerTestData[n];
-        wxStringTokenizer tkz(tt.str, tt.delims, tt.with);
+        wxStringTokenizer tkz(tt.str, tt.delims, tt.mode);
  
          size_t count = tkz.CountTokens();
-        printf(_T("String '%s' has %u tokens delimited by '%s' "),
-               tt.str,
+        printf(_T("String '%s' has %u tokens delimited by '%s' (mode = %s) "),
+               MakePrintable(tt.str).c_str(),
                 count,
-               MakePrintable(tt.delims).c_str());
+               MakePrintable(tt.delims).c_str(),
+               modeNames[tkz.GetMode()]);
          if ( count == tt.count )
          {
              puts(_T("(ok)"));
@@ -1880,19 +1927,57 @@ static void TestStringTokenizer()
              continue;
          }
  
+        // if we emulate strtok(), check that we do it correctly
+        wxChar *buf, *s, *last;
+
+        if ( tkz.GetMode() == wxTOKEN_STRTOK )
+        {
+            buf = new wxChar[wxStrlen(tt.str) + 1];
+            wxStrcpy(buf, tt.str);
+
+            s = wxStrtok(buf, tt.delims, &last);
+        }
+        else
+        {
+            buf = NULL;
+        }
+
          // now show the tokens themselves
          size_t count2 = 0;
          while ( tkz.HasMoreTokens() )
          {
-            printf(_T("\ttoken %u: '%s'\n"),
+            wxString token = tkz.GetNextToken();
+
+            printf(_T("\ttoken %u: '%s'"),
                     ++count2,
-                   MakePrintable(tkz.GetNextToken()).c_str());
+                   MakePrintable(token).c_str());
+
+            if ( buf )
+            {
+                if ( token == s )
+                {
+                    puts(" (ok)");
+                }
+                else
+                {
+                    printf(" (ERROR: should be %s)\n", s);
+                }
+
+                s = wxStrtok(NULL, tt.delims, &last);
+            }
+            else
+            {
+                // nothing to compare with
+                puts("");
+            }
          }
  
          if ( count2 != count )
          {
-            puts(_T("ERROR: token count mismatch"));
+            puts(_T("\tERROR: token count mismatch"));
          }
+
+        delete [] buf;
      }
  
      puts("");
@@ -1959,11 +2044,12 @@ int main(int argc, char **argv)
      }
      if ( 0 )
      {
+        TestStringConstruction();
          TestStringSub();
          TestStringFormat();
          TestStringFind();
      }
-    TestStringTokenizer();
+        TestStringTokenizer();
  #endif // TEST_STRINGS
  
  #ifdef TEST_ARRAYS
diff --git a/src/common/tokenzr.cpp b/src/common/tokenzr.cpp

index 0601d5d0e5f762c9d84624ab38d2713923f1a0d3..64f49e5822f465ff504d1a628ce48faf1fbc25ac 100644 (file)
--- a/src/common/tokenzr.cpp
+++ b/src/common/tokenzr.cpp
@@ -38,57 +38,105 @@
  // wxStringTokenizer construction
  // ----------------------------------------------------------------------------
  
-wxStringTokenizer::wxStringTokenizer(const wxString& to_tokenize,
+wxStringTokenizer::wxStringTokenizer(const wxString& str,
                                       const wxString& delims,
-                                     bool ret_delims)
+                                     wxStringTokenizerMode mode)
  {
-    SetString(to_tokenize, delims, ret_delims);
+    SetString(str, delims, mode);
  }
  
-void wxStringTokenizer::SetString(const wxString& to_tokenize,
+void wxStringTokenizer::SetString(const wxString& str,
                                    const wxString& delims,
-                                  bool ret_delim)
+                                  wxStringTokenizerMode mode)
  {
-    m_string = to_tokenize;
+    if ( mode == wxTOKEN_DEFAULT )
+    {
+        // by default, we behave like strtok() if the delimiters are only
+        // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
+        // whitespace delimiters, strtok() behaviour is better because we want
+        // to count consecutive spaces as one delimiter)
+        const wxChar *p;
+        for ( p = delims.c_str(); *p; p++ )
+        {
+            if ( !wxIsspace(*p) )
+                break;
+        }
+
+        if ( *p )
+        {
+            // not whitespace char in delims
+            mode = wxTOKEN_RET_EMPTY;
+        }
+        else
+        {
+            // only whitespaces
+            mode = wxTOKEN_STRTOK;
+        }
+    }
+
      m_delims = delims;
-    m_retdelims = ret_delim;
-    m_pos = 0;
+    m_mode = mode;
  
-    // empty string doesn't have any tokens
-    m_hasMore = !m_string.empty();
+    Reinit(str);
  }
  
-wxStringTokenizer::~wxStringTokenizer()
+void wxStringTokenizer::Reinit(const wxString& str)
  {
+    wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
+
+    m_string = str;
+    m_pos = 0;
+
+    // empty string doesn't have any tokens
+    m_hasMore = !m_string.empty();
  }
  
  // ----------------------------------------------------------------------------
-// count the number of tokens in the string
+// access to the tokens
  // ----------------------------------------------------------------------------
  
-size_t wxStringTokenizer::CountTokens() const
+// do we have more of them?
+bool wxStringTokenizer::HasMoreTokens() const
  {
-    size_t pos = 0;
-    size_t count = 0;
-    for ( ;; )
+    wxCHECK_MSG( IsOk(), FALSE, _T("you should call SetString() first") );
+
+    if ( m_string.find_first_not_of(m_delims) == wxString::npos )
      {
-        pos = m_string.find_first_of(m_delims, pos);
-        if ( pos == wxString::npos )
-            break;
+        // no non empty tokens left, but in wxTOKEN_RET_EMPTY_ALL mode we
+        // still may return TRUE if GetNextToken() wasn't called yet for the
+        // last trailing empty token
+        return m_mode == wxTOKEN_RET_EMPTY_ALL ? m_hasMore : FALSE;
+    }
+    else
+    {
+        // there are non delimiter characters left, hence we do have more
+        // tokens
+        return TRUE;
+    }
+}
  
-        count++;    // one more token found
+// count the number of tokens in the string
+size_t wxStringTokenizer::CountTokens() const
+{
+    wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
  
-        pos++;      // skip delimiter
-    }
+    // VZ: this function is IMHO not very useful, so it's probably not very
+    //     important if it's implementation here is not as efficient as it
+    //     could be - but OTOH like this we're sure to get the correct answer
+    //     in all modes
+    wxStringTokenizer *self = (wxStringTokenizer *)this;    // const_cast
+    wxString stringInitial = m_string;
  
-    // normally, we didn't count the last token in the loop above - so add it
-    // unless the string was empty from the very beginning, in which case it
-    // still has 0 (and not 1) tokens
-    if ( !m_string.empty() )
+    size_t count = 0;
+    while ( self->HasMoreTokens() )
      {
          count++;
+
+        (void)self->GetNextToken();
      }
  
+    self->Reinit(stringInitial);
+
      return count;
  }
  
@@ -98,33 +146,50 @@ size_t wxStringTokenizer::CountTokens() const
  
  wxString wxStringTokenizer::GetNextToken()
  {
+    // strtok() doesn't return empty tokens, all other modes do
+    bool allowEmpty = m_mode != wxTOKEN_STRTOK;
+
      wxString token;
-    if ( HasMoreTokens() )
+    do
      {
-        size_t pos = m_string.find_first_of(m_delims); // end of token
-        size_t pos2;                                   // start of the next one
-        if ( pos != wxString::npos )
+        if ( !HasMoreTokens() )
          {
-            // return the delimiter too
-            pos2 = pos + 1;
+            break;
          }
-        else
+        // find the end of this token
+        size_t pos = m_string.find_first_of(m_delims);
+
+        // and the start of the next one
+        if ( pos == wxString::npos )
          {
-            pos2 = m_string.length();
+            // no more delimiters, the token is everything till the end of
+            // string
+            token = m_string;
+
+            m_pos += m_string.length();
+            m_string.clear();
  
-            // no more tokens in this string
+            // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
+            // mode (we will return the trailing one right now in this case)
              m_hasMore = FALSE;
          }
+        else
+        {
+            size_t pos2 = pos + 1;
  
-        token = wxString(m_string, m_retdelims ? pos2 : pos);
+            // in wxTOKEN_RET_DELIMS mode we return the delimiter character
+            // with token
+            token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
+                                                                    : pos);
  
-        // remove token with the following it delimiter from string
-        m_string.erase(0, pos2);
+            // remove token with the following it delimiter from string
+            m_string.erase(0, pos2);
  
-        // keep track of the position in the original string too
-        m_pos += pos2;
+            // keep track of the position in the original string too
+            m_pos += pos2;
+        }
      }
-    //else: no more tokens, return empty token
+    while ( !allowEmpty && token.empty() );
  
      return token;
  }
author	Vadim Zeitlin <vadim@wxwidgets.org>
	Fri, 4 Feb 2000 18:31:26 +0000 (18:31 +0000)
committer	Vadim Zeitlin <vadim@wxwidgets.org>
	Fri, 4 Feb 2000 18:31:26 +0000 (18:31 +0000)
docs/latex/wx/tokenizr.tex		patch \| blob \| blame \| history
include/wx/tokenzr.h		patch \| blob \| blame \| history
samples/console/console.cpp		patch \| blob \| blame \| history
src/common/tokenzr.cpp		patch \| blob \| blame \| history