From 7c968cee840eda3b7340cdb4ba822065721cbdf6 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Fri, 4 Feb 2000 18:31:26 +0000
Subject: [PATCH] another new version of wxStringTokenizer (with tests and
 docs)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@5839 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
---
 docs/latex/wx/tokenizr.tex  |  68 +++++++++++------
 include/wx/tokenzr.h        |  92 +++++++++++++++++-----
 samples/console/console.cpp | 136 +++++++++++++++++++++++++++------
 src/common/tokenzr.cpp      | 147 ++++++++++++++++++++++++++----------
 4 files changed, 334 insertions(+), 109 deletions(-)

diff --git a/docs/latex/wx/tokenizr.tex b/docs/latex/wx/tokenizr.tex
index b066262a52..0b8490ce1d 100644
--- a/docs/latex/wx/tokenizr.tex
+++ b/docs/latex/wx/tokenizr.tex
@@ -1,6 +1,8 @@
 \section{\class{wxStringTokenizer}}\label{wxstringtokenizer}
 
-wxStringTokenizer helps you to break a string up into a number of tokens.
+wxStringTokenizer helps you to break a string up into a number of tokens. It
+replaces the standard C function {\tt strtok()} and also extends it in a
+number of ways.
 
 To use this class, you should create a wxStringTokenizer object, give it the
 string to tokenize and also the delimiters which separate tokens in the string
@@ -14,7 +16,7 @@ For example:
 
 \begin{verbatim}
 
-wxStringTokenizer tkz("first:second:third::fivth", ":");
+wxStringTokenizer tkz("first:second:third:fourth", ":");
 while ( tkz.HasMoreTokens() )
 {
     wxString token = tkz.GetNextToken();
@@ -23,14 +25,36 @@ while ( tkz.HasMoreTokens() )
 }
 \end{verbatim}
 
-Another feature of this class is that it may return the delimiter which
-was found after the token with it. In a simple case like above, you are not
-interested in this because the delimiter is always {\tt ':'}, but if the
-delimiters string has several characters, you might need to know which of them
-follows the current token. In this case, pass {\tt TRUE} to wxStringTokenizer
-constructor or \helpref{SetString}{wxstringtokenizersetstring} method and
-the delimiter will be appended to each returned token (except for the last
-one).
+By default, wxStringTokenizer will behave in the same way as {\tt strtok()} if
+the delimiters string only contains white space characters but, unlike the
+standard function, it will return empty tokens if this is not the case. This
+is helpful for parsing strictly formatted data where the number of fields is
+fixed but some of them may be empty (i.e. {\tt TAB} or comma delimited text
+files).
+
+The behaviour is governed by the last 
+\helpref{constructor}{wxstringtokenizerwxstringtokenizer}/\helpref{SetString}{wxstringtokenizersetstring} 
+parameter {\tt mode} which may be one of the following:
+
+\twocolwidtha{5cm}%
+\begin{twocollist}\itemsep=0pt
+\twocolitem{{\tt wxTOKEN\_DEFAULT}}{Default behaviour (as described above):
+same as {\tt wxTOKEN\_STRTOK} if the delimiter string contains only
+whitespaces, same as {\tt wxTOKEN\_RET\_EMPTY} otherwise}
+\twocolitem{{\tt wxTOKEN\_RET\_EMPTY}}{In this mode, the empty tokens in the
+middle of the string will be returned, i.e. {\tt "a::b:"} will be tokenized in
+three tokens `a', `' and `b'.}
+\twocolitem{{\tt wxTOKEN\_RET\_EMPTY\_ALL}}{In this mode, empty trailing token
+(after the last delimiter character) will be returned as well. The string as
+above will contain four tokens: the already mentioned ones and another empty
+one as the last one.}
+\twocolitem{{\tt wxTOKEN\_RET\_DELIMS}}{In this mode, the delimiter character
+after the end of the current token (there may be none if this is the last
+token) is returned appended to the token. Otherwise, it is the same mode as 
+{\tt wxTOKEN\_RET\_EMPTY}.}
+\twocolitem{{\tt wxTOKEN\_STRTOK}}{In this mode the class behaves exactly like
+the standard {\tt strtok()} function. The empty tokens are never returned.}
+\end{twocollist}
 
 \wxheading{Derived from}
 
@@ -46,18 +70,14 @@ one).
 
 \func{}{wxStringTokenizer}{\void}
 
-Default constructor.
+Default constructor. You must call 
+\helpref{SetString}{wxstringtokenizersetstring} before calling any other
+methods.
 
-\func{}{wxStringTokenizer}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}}
+\func{}{wxStringTokenizer}{\param{const wxString\& }{str}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{wxStringTokenizerMode }{mode = wxTOKEN\_DEFAULT}}
 
-Constructor. Pass the string to tokenize, a string containing delimiters,
-a flag specifying whether to return delimiters with tokens.
-
-\membersection{wxStringTokenizer::\destruct{wxStringTokenizer}}\label{wxstringtokenizerdtor}
-
-\func{}{\destruct{wxStringTokenizer}}{\void}
-
-Destructor.
+Constructor. Pass the string to tokenize, a string containing delimiters
+and the mode specifying how the string should be tokenized.
 
 \membersection{wxStringTokenizer::CountTokens}\label{wxstringtokenizercounttokens}
 
@@ -69,11 +89,11 @@ Returns the number of tokens in the input string.
 
 \constfunc{bool}{HasMoreTokens}{\void}
 
-Returns TRUE if the tokenizer has further tokens.
+Returns TRUE if the tokenizer has further tokens, FALSE if none are left.
 
 \membersection{wxStringTokenizer::GetNextToken}\label{wxstringtokenizergetnexttoken}
 
-\constfunc{wxString}{GetNextToken}{\void}
+\func{wxString}{GetNextToken}{\void}
 
 Returns the next token or empty string if the end of string was reached.
 
@@ -93,10 +113,10 @@ Returns the part of the starting string without all token already extracted.
 
 \membersection{wxStringTokenizer::SetString}\label{wxstringtokenizersetstring}
 
-\func{void}{SetString}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}}
+\func{void}{SetString}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{wxStringTokenizerMode }{mode = wxTOKEN\_DEFAULT}}
 
 Initializes the tokenizer.
 
 Pass the string to tokenize, a string containing delimiters,
-a flag specifying whether to return delimiters with tokens.
+and the mode specifying how the string should be tokenized.
 
diff --git a/include/wx/tokenzr.h b/include/wx/tokenzr.h
index 9ff109d46d..9a08ddb460 100644
--- a/include/wx/tokenzr.h
+++ b/include/wx/tokenzr.h
@@ -1,6 +1,6 @@
 /////////////////////////////////////////////////////////////////////////////
-// Name:        tokenzr.h
-// Purpose:     String tokenizer
+// Name:        wx/tokenzr.h
+// Purpose:     String tokenizer - a C++ replacement for strtok(3)
 // Author:      Guilhem Lavaux
 // Modified by: Vadim Zeitlin
 // Created:     04/22/98
@@ -19,48 +19,102 @@
 #include "wx/object.h"
 #include "wx/string.h"
 
+// ----------------------------------------------------------------------------
+// constants
+// ----------------------------------------------------------------------------
+
 // default: delimiters are usual white space characters
 #define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
 
+// wxStringTokenizer mode flags which determine its behaviour
+enum wxStringTokenizerMode
+{
+    wxTOKEN_INVALID = -1,   // set by def ctor until SetString() is called
+    wxTOKEN_DEFAULT,        // strtok() for whitespace delims, RET_EMPTY else
+    wxTOKEN_RET_EMPTY,      // return empty token in the middle of the string
+    wxTOKEN_RET_EMPTY_ALL,  // return trailing empty tokens too
+    wxTOKEN_RET_DELIMS,     // return the delim with token (implies RET_EMPTY)
+    wxTOKEN_STRTOK          // behave exactly like strtok(3)
+};
+
+// ----------------------------------------------------------------------------
+// wxStringTokenizer: replaces infamous strtok() and has some other features
+// ----------------------------------------------------------------------------
+
 class WXDLLEXPORT wxStringTokenizer : public wxObject
 {
 public:
-    // ctors and such
-    wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; }
-    wxStringTokenizer(const wxString& to_tokenize,
+    // ctors and initializers
+        // default ctor, call SetString() later
+    wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
+        // ctor which gives us the string
+    wxStringTokenizer(const wxString& str,
                       const wxString& delims = wxDEFAULT_DELIMITERS,
-                      bool ret_delim = FALSE);
-    void SetString(const wxString& to_tokenize,
+                      wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
+
+        // args are same as for the non default ctor above
+    void SetString(const wxString& str,
                    const wxString& delims = wxDEFAULT_DELIMITERS,
-                   bool ret_delim = FALSE);
-    virtual ~wxStringTokenizer();
+                   wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
+
+        // reinitialize the tokenizer with the same delimiters/mode
+    void Reinit(const wxString& str);
 
-    // count tokens/get next token
+    // tokens access
+        // count them
     size_t CountTokens() const;
-    bool HasMoreTokens() { return m_hasMore; }
+        // did we reach the end of the string?
+    bool HasMoreTokens() const;
+        // get the next token, will return empty string if !HasMoreTokens()
     wxString GetNextToken();
 
-    // One note about GetString -- it returns the string
-    // remaining after the previous tokens have been removed,
-    // not the original string
+    // get current tokenizer state
+        // returns the part of the string which remains to tokenize (*not* the
+        // initial string)
     wxString GetString() const { return m_string; }
 
-    // returns the current position (i.e. one index after the last returned
-    // token or 0 if GetNextToken() has never been called) in the original
-    // string
+        // returns the current position (i.e. one index after the last
+        // returned token or 0 if GetNextToken() has never been called) in the
+        // original string
     size_t GetPosition() const { return m_pos; }
 
+    // misc
+        // get the current mode - can be different from the one passed to the
+        // ctor if it was wxTOKEN_DEFAULT
+    wxStringTokenizerMode GetMode() const { return m_mode; }
+
+    // backwards compatibility section from now on
+    // -------------------------------------------
+
     // for compatibility only, use GetNextToken() instead
     wxString NextToken() { return GetNextToken(); }
 
+    // compatibility only, don't use
+    void SetString(const wxString& to_tokenize,
+                   const wxString& delims,
+                   bool ret_delim)
+    {
+        SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
+    }
+
+    wxStringTokenizer(const wxString& to_tokenize,
+                      const wxString& delims,
+                      bool ret_delim)
+    {
+        SetString(to_tokenize, delims, ret_delim);
+    }
+
 protected:
+    bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
+
     wxString m_string,              // the (rest of) string to tokenize
              m_delims;              // all delimiters
 
     size_t   m_pos;                 // the position in the original string
 
-    bool     m_retdelims;           // if TRUE, return delims with tokens
-    bool     m_hasMore;             // do we have more tokens?
+    wxStringTokenizerMode m_mode;   // see wxTOKEN_XXX values
+
+    bool     m_hasMore;             // do we have more (possible empty) tokens?
 };
 
 #endif // _WX_TOKENZRH
diff --git a/samples/console/console.cpp b/samples/console/console.cpp
index bd8b550139..d073cb5511 100644
--- a/samples/console/console.cpp
+++ b/samples/console/console.cpp
@@ -425,7 +425,7 @@ static void TestAddition()
 #if wxUSE_LONGLONG_NATIVE
         wxASSERT_MSG( c == wxLongLongNative(a.GetHi(), a.GetLo()) +
                            wxLongLongNative(b.GetHi(), b.GetLo()),
-                      "addition failure" ); 
+                      "addition failure" );
 #else // !wxUSE_LONGLONG_NATIVE
         wxASSERT_MSG( c - b == a, "addition failure" );
 #endif // wxUSE_LONGLONG_NATIVE
@@ -921,7 +921,7 @@ def GetMonthWeek(dt):
     if weekNumMonth < 0:
         weekNumMonth = weekNumMonth + 53
     return weekNumMonth
-    
+
 def GetLastSundayBefore(dt):
     if dt.iso_week[2] == 7:
         return dt
@@ -1322,7 +1322,7 @@ static void TestTimeArithmetics()
     {
         wxDateSpan span;
         const char *name;
-    } testArithmData[] = 
+    } testArithmData[] =
     {
         { wxDateSpan::Day(),           "day"                                },
         { wxDateSpan::Week(),          "week"                               },
@@ -1330,7 +1330,7 @@ static void TestTimeArithmetics()
         { wxDateSpan::Year(),          "year"                               },
         { wxDateSpan(1, 2, 3, 4),      "year, 2 months, 3 weeks, 4 days"    },
     };
-    
+
     wxDateTime dt(29, wxDateTime::Dec, 1999), dt1, dt2;
 
     for ( size_t n = 0; n < WXSIZEOF(testArithmData); n++ )
@@ -1701,6 +1701,37 @@ void PrintArray(const char* name, const wxArrayString& array)
 #include "wx/timer.h"
 #include "wx/tokenzr.h"
 
+static void TestStringConstruction()
+{
+    puts("*** Testing wxString constructores ***");
+
+    #define TEST_CTOR(args, res)                                               \
+        {                                                                      \
+            wxString s args ;                                                  \
+            printf("wxString%s = %s ", #args, s.c_str());                      \
+            if ( s == res )                                                    \
+            {                                                                  \
+                puts("(ok)");                                                  \
+            }                                                                  \
+            else                                                               \
+            {                                                                  \
+                printf("(ERROR: should be %s)\n", res);                        \
+            }                                                                  \
+        }
+
+    TEST_CTOR((_T('Z'), 4), _T("ZZZZ"));
+    TEST_CTOR((_T("Hello"), 4), _T("Hell"));
+    TEST_CTOR((_T("Hello"), 5), _T("Hello"));
+    // TEST_CTOR((_T("Hello"), 6), _T("Hello")); -- should give assert failure
+
+    static const wxChar *s = _T("?really!");
+    const wxChar *start = wxStrchr(s, _T('r'));
+    const wxChar *end = wxStrchr(s, _T('!'));
+    TEST_CTOR((start, end), _T("really"));
+
+    puts("");
+}
+
 static void TestString()
 {
     wxStopWatch sw;
@@ -1842,33 +1873,49 @@ static void TestStringTokenizer()
 {
     puts("*** Testing wxStringTokenizer ***");
 
+    static const wxChar *modeNames[] =
+    {
+        _T("default"),
+        _T("return empty"),
+        _T("return all empty"),
+        _T("with delims"),
+        _T("like strtok"),
+    };
+
     static const struct StringTokenizerTest
     {
-        const wxChar *str;      // string to tokenize
-        const wxChar *delims;   // delimiters to use
-        size_t        count;    // count of token
-        bool          with;     // return tokens with delimiters?
-    } tokenizerTestData[] = 
-    {
-        { _T(""), _T(" "), 0, FALSE },
-        { _T("Hello, world"), _T(" "), 2, FALSE },
-        { _T("Hello, world"), _T(","), 2, FALSE },
-        { _T("Hello, world!"), _T(",!"), 3, TRUE },
-        { _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7, FALSE },
-        { _T("1 \t3\t4  6   "), wxDEFAULT_DELIMITERS, 9, TRUE },
-        { _T("01/02/99"), _T("/-"), 3, FALSE },
+        const wxChar *str;              // string to tokenize
+        const wxChar *delims;           // delimiters to use
+        size_t        count;            // count of token
+        wxStringTokenizerMode mode;     // how should we tokenize it
+    } tokenizerTestData[] =
+    {
+        { _T(""), _T(" "), 0 },
+        { _T("Hello, world"), _T(" "), 2 },
+        { _T("Hello,   world  "), _T(" "), 2 },
+        { _T("Hello, world"), _T(","), 2 },
+        { _T("Hello, world!"), _T(",!"), 2 },
+        { _T("Hello,, world!"), _T(",!"), 3 },
+        { _T("Hello, world!"), _T(",!"), 3, wxTOKEN_RET_EMPTY_ALL },
+        { _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7 },
+        { _T("1 \t3\t4  6   "), wxDEFAULT_DELIMITERS, 4 },
+        { _T("1 \t3\t4  6   "), wxDEFAULT_DELIMITERS, 6, wxTOKEN_RET_EMPTY },
+        { _T("1 \t3\t4  6   "), wxDEFAULT_DELIMITERS, 9, wxTOKEN_RET_EMPTY_ALL },
+        { _T("01/02/99"), _T("/-"), 3 },
+        { _T("01-02/99"), _T("/-"), 3, wxTOKEN_RET_DELIMS },
     };
 
     for ( size_t n = 0; n < WXSIZEOF(tokenizerTestData); n++ )
     {
         const StringTokenizerTest& tt = tokenizerTestData[n];
-        wxStringTokenizer tkz(tt.str, tt.delims, tt.with);
+        wxStringTokenizer tkz(tt.str, tt.delims, tt.mode);
 
         size_t count = tkz.CountTokens();
-        printf(_T("String '%s' has %u tokens delimited by '%s' "),
-               tt.str,
+        printf(_T("String '%s' has %u tokens delimited by '%s' (mode = %s) "),
+               MakePrintable(tt.str).c_str(),
                count,
-               MakePrintable(tt.delims).c_str());
+               MakePrintable(tt.delims).c_str(),
+               modeNames[tkz.GetMode()]);
         if ( count == tt.count )
         {
             puts(_T("(ok)"));
@@ -1880,19 +1927,57 @@ static void TestStringTokenizer()
             continue;
         }
 
+        // if we emulate strtok(), check that we do it correctly
+        wxChar *buf, *s, *last;
+
+        if ( tkz.GetMode() == wxTOKEN_STRTOK )
+        {
+            buf = new wxChar[wxStrlen(tt.str) + 1];
+            wxStrcpy(buf, tt.str);
+
+            s = wxStrtok(buf, tt.delims, &last);
+        }
+        else
+        {
+            buf = NULL;
+        }
+
         // now show the tokens themselves
         size_t count2 = 0;
         while ( tkz.HasMoreTokens() )
         {
-            printf(_T("\ttoken %u: '%s'\n"),
+            wxString token = tkz.GetNextToken();
+
+            printf(_T("\ttoken %u: '%s'"),
                    ++count2,
-                   MakePrintable(tkz.GetNextToken()).c_str());
+                   MakePrintable(token).c_str());
+
+            if ( buf )
+            {
+                if ( token == s )
+                {
+                    puts(" (ok)");
+                }
+                else
+                {
+                    printf(" (ERROR: should be %s)\n", s);
+                }
+
+                s = wxStrtok(NULL, tt.delims, &last);
+            }
+            else
+            {
+                // nothing to compare with
+                puts("");
+            }
         }
 
         if ( count2 != count )
         {
-            puts(_T("ERROR: token count mismatch"));
+            puts(_T("\tERROR: token count mismatch"));
         }
+
+        delete [] buf;
     }
 
     puts("");
@@ -1959,11 +2044,12 @@ int main(int argc, char **argv)
     }
     if ( 0 )
     {
+        TestStringConstruction();
         TestStringSub();
         TestStringFormat();
         TestStringFind();
     }
-    TestStringTokenizer();
+        TestStringTokenizer();
 #endif // TEST_STRINGS
 
 #ifdef TEST_ARRAYS
diff --git a/src/common/tokenzr.cpp b/src/common/tokenzr.cpp
index 0601d5d0e5..64f49e5822 100644
--- a/src/common/tokenzr.cpp
+++ b/src/common/tokenzr.cpp
@@ -38,57 +38,105 @@
 // wxStringTokenizer construction
 // ----------------------------------------------------------------------------
 
-wxStringTokenizer::wxStringTokenizer(const wxString& to_tokenize,
+wxStringTokenizer::wxStringTokenizer(const wxString& str,
                                      const wxString& delims,
-                                     bool ret_delims)
+                                     wxStringTokenizerMode mode)
 {
-    SetString(to_tokenize, delims, ret_delims);
+    SetString(str, delims, mode);
 }
 
-void wxStringTokenizer::SetString(const wxString& to_tokenize,
+void wxStringTokenizer::SetString(const wxString& str,
                                   const wxString& delims,
-                                  bool ret_delim)
+                                  wxStringTokenizerMode mode)
 {
-    m_string = to_tokenize;
+    if ( mode == wxTOKEN_DEFAULT )
+    {
+        // by default, we behave like strtok() if the delimiters are only
+        // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
+        // whitespace delimiters, strtok() behaviour is better because we want
+        // to count consecutive spaces as one delimiter)
+        const wxChar *p;
+        for ( p = delims.c_str(); *p; p++ )
+        {
+            if ( !wxIsspace(*p) )
+                break;
+        }
+
+        if ( *p )
+        {
+            // not whitespace char in delims
+            mode = wxTOKEN_RET_EMPTY;
+        }
+        else
+        {
+            // only whitespaces
+            mode = wxTOKEN_STRTOK;
+        }
+    }
+
     m_delims = delims;
-    m_retdelims = ret_delim;
-    m_pos = 0;
+    m_mode = mode;
 
-    // empty string doesn't have any tokens
-    m_hasMore = !m_string.empty();
+    Reinit(str);
 }
 
-wxStringTokenizer::~wxStringTokenizer()
+void wxStringTokenizer::Reinit(const wxString& str)
 {
+    wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
+
+    m_string = str;
+    m_pos = 0;
+
+    // empty string doesn't have any tokens
+    m_hasMore = !m_string.empty();
 }
 
 // ----------------------------------------------------------------------------
-// count the number of tokens in the string
+// access to the tokens
 // ----------------------------------------------------------------------------
 
-size_t wxStringTokenizer::CountTokens() const
+// do we have more of them?
+bool wxStringTokenizer::HasMoreTokens() const
 {
-    size_t pos = 0;
-    size_t count = 0;
-    for ( ;; )
+    wxCHECK_MSG( IsOk(), FALSE, _T("you should call SetString() first") );
+
+    if ( m_string.find_first_not_of(m_delims) == wxString::npos )
     {
-        pos = m_string.find_first_of(m_delims, pos);
-        if ( pos == wxString::npos )
-            break;
+        // no non empty tokens left, but in wxTOKEN_RET_EMPTY_ALL mode we
+        // still may return TRUE if GetNextToken() wasn't called yet for the
+        // last trailing empty token
+        return m_mode == wxTOKEN_RET_EMPTY_ALL ? m_hasMore : FALSE;
+    }
+    else
+    {
+        // there are non delimiter characters left, hence we do have more
+        // tokens
+        return TRUE;
+    }
+}
 
-        count++;    // one more token found
+// count the number of tokens in the string
+size_t wxStringTokenizer::CountTokens() const
+{
+    wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
 
-        pos++;      // skip delimiter
-    }
+    // VZ: this function is IMHO not very useful, so it's probably not very
+    //     important if it's implementation here is not as efficient as it
+    //     could be - but OTOH like this we're sure to get the correct answer
+    //     in all modes
+    wxStringTokenizer *self = (wxStringTokenizer *)this;    // const_cast
+    wxString stringInitial = m_string;
 
-    // normally, we didn't count the last token in the loop above - so add it
-    // unless the string was empty from the very beginning, in which case it
-    // still has 0 (and not 1) tokens
-    if ( !m_string.empty() )
+    size_t count = 0;
+    while ( self->HasMoreTokens() )
     {
         count++;
+
+        (void)self->GetNextToken();
     }
 
+    self->Reinit(stringInitial);
+
     return count;
 }
 
@@ -98,33 +146,50 @@ size_t wxStringTokenizer::CountTokens() const
 
 wxString wxStringTokenizer::GetNextToken()
 {
+    // strtok() doesn't return empty tokens, all other modes do
+    bool allowEmpty = m_mode != wxTOKEN_STRTOK;
+
     wxString token;
-    if ( HasMoreTokens() )
+    do
     {
-        size_t pos = m_string.find_first_of(m_delims); // end of token
-        size_t pos2;                                   // start of the next one
-        if ( pos != wxString::npos )
+        if ( !HasMoreTokens() )
         {
-            // return the delimiter too
-            pos2 = pos + 1;
+            break;
         }
-        else
+        // find the end of this token
+        size_t pos = m_string.find_first_of(m_delims);
+
+        // and the start of the next one
+        if ( pos == wxString::npos )
         {
-            pos2 = m_string.length();
+            // no more delimiters, the token is everything till the end of
+            // string
+            token = m_string;
+
+            m_pos += m_string.length();
+            m_string.clear();
 
-            // no more tokens in this string
+            // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
+            // mode (we will return the trailing one right now in this case)
             m_hasMore = FALSE;
         }
+        else
+        {
+            size_t pos2 = pos + 1;
 
-        token = wxString(m_string, m_retdelims ? pos2 : pos);
+            // in wxTOKEN_RET_DELIMS mode we return the delimiter character
+            // with token
+            token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
+                                                                    : pos);
 
-        // remove token with the following it delimiter from string
-        m_string.erase(0, pos2);
+            // remove token with the following it delimiter from string
+            m_string.erase(0, pos2);
 
-        // keep track of the position in the original string too
-        m_pos += pos2;
+            // keep track of the position in the original string too
+            m_pos += pos2;
+        }
     }
-    //else: no more tokens, return empty token
+    while ( !allowEmpty && token.empty() );
 
     return token;
 }
-- 
2.50.0