1. changed wxStringTokenizer to not modify the string we're iterating over

author Vadim Zeitlin <vadim@wxwidgets.org>

Sat, 24 Dec 2005 00:12:54 +0000 (00:12 +0000)

committer Vadim Zeitlin <vadim@wxwidgets.org>

Sat, 24 Dec 2005 00:12:54 +0000 (00:12 +0000)
author Vadim Zeitlin <vadim@wxwidgets.org>
Sat, 24 Dec 2005 00:12:54 +0000 (00:12 +0000)
committer Vadim Zeitlin <vadim@wxwidgets.org>
Sat, 24 Dec 2005 00:12:54 +0000 (00:12 +0000)
diff --git a/docs/changes.txt b/docs/changes.txt

index 12e343047cba1724b8a9031a5bc104d3a695fc30..50873d7eaf6fe89b74bca9f45928432eefada9f1 100644 (file)
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -14,6 +14,7 @@ INCOMPATIBLE CHANGES SINCE 2.6.x
  All:
  
  - wxLaunchDefaultBrowser() now supports wxBROWSER_NEW_WINDOW flag
+- Added wxStringTokenizer::GetLastDelimiter(); improved documentation
  - wxGetWorkingDirectory() deprecated. Use wxGetCwd() instead.
  
  All (GUI):
diff --git a/docs/latex/wx/tokenizr.tex b/docs/latex/wx/tokenizr.tex

index 19ca6196425d96cee475ddcd4d7e7c0526282642..3caf7bf3d2b5bf9155ae5590c35a71d2a40140c2 100644 (file)
--- a/docs/latex/wx/tokenizr.tex
+++ b/docs/latex/wx/tokenizr.tex
@@ -43,17 +43,23 @@ same as {\tt wxTOKEN\_STRTOK} if the delimiter string contains only
  whitespaces, same as {\tt wxTOKEN\_RET\_EMPTY} otherwise}
  \twocolitem{{\tt wxTOKEN\_RET\_EMPTY}}{In this mode, the empty tokens in the
  middle of the string will be returned, i.e. {\tt "a::b:"} will be tokenized in
-three tokens `a', `' and `b'.}
-\twocolitem{{\tt wxTOKEN\_RET\_EMPTY\_ALL}}{In this mode, empty trailing token
-(after the last delimiter character) will be returned as well. The string as
-above will contain four tokens: the already mentioned ones and another empty
-one as the last one.}
+three tokens `a', `' and `b'. Notice that all trailing delimiters are ignored
+in this mode, not just the last one, i.e. a string \texttt{"a::b::"} would
+still result in the same set of tokens.}
+\twocolitem{{\tt wxTOKEN\_RET\_EMPTY\_ALL}}{In this mode, empty trailing tokens
+(including the one after the last delimiter character) will be returned as
+well. The string \texttt{"a::b:"} will be tokenized in four tokens: the already
+mentioned ones and another empty one as the last one and a string 
+\texttt{"a::b::"} will have five tokens.}
  \twocolitem{{\tt wxTOKEN\_RET\_DELIMS}}{In this mode, the delimiter character
  after the end of the current token (there may be none if this is the last
  token) is returned appended to the token. Otherwise, it is the same mode as 
-{\tt wxTOKEN\_RET\_EMPTY}.}
+\texttt{wxTOKEN\_RET\_EMPTY}. Notice that there is no mode like this one but
+behaving like \texttt{wxTOKEN\_RET\_EMPTY\_ALL} instead of 
+\texttt{wxTOKEN\_RET\_EMPTY}, use \texttt{wxTOKEN\_RET\_EMPTY\_ALL} and 
+\helpref{GetLastDelimiter()}{wxstringtokenizergetlastdelimiter} to emulate it.}
  \twocolitem{{\tt wxTOKEN\_STRTOK}}{In this mode the class behaves exactly like
-the standard {\tt strtok()} function. The empty tokens are never returned.}
+the standard {\tt strtok()} function: the empty tokens are never returned.}
  \end{twocollist}
  
  \wxheading{Derived from}
@@ -103,9 +109,19 @@ reaches $0$ \helpref{HasMoreTokens}{wxstringtokenizerhasmoretokens} returns
  Returns \true if the tokenizer has further tokens, \false if none are left.
  
  
+\membersection{wxStringTokenizer::GetLastDelimiter}\label{wxstringtokenizergetlastdelimiter}
+
+\func{wxChar}{GetLastDelimiter}{\void}
+
+Returns the delimiter which ended scan for the last token returned by 
+\helpref{GetNextToken()}{wxstringtokenizergetnexttoken} or \texttt{NUL} if
+there had been no calls to this function yet or if it returned the trailing
+empty token in \texttt{wxTOKEN\_RET\_EMPTY\_ALL} mode.
+
+
  \membersection{wxStringTokenizer::GetNextToken}\label{wxstringtokenizergetnexttoken}
  
-\func{wxString}{GetNextToken}{\void}
+\constfunc{wxString}{GetNextToken}{\void}
  
  Returns the next token or empty string if the end of string was reached.
  
diff --git a/include/wx/tokenzr.h b/include/wx/tokenzr.h

index 4890d6260f6d8781418748d17752ec2ab6ac1c22..a26d62b186637ef9610223c81263002a966ecdb5 100644 (file)
--- a/include/wx/tokenzr.h
+++ b/include/wx/tokenzr.h
@@ -58,12 +58,16 @@ public:
      void Reinit(const wxString& str);
  
      // tokens access
-        // count them
+        // return the number of remaining tokens
      size_t CountTokens() const;
          // did we reach the end of the string?
      bool HasMoreTokens() const;
          // get the next token, will return empty string if !HasMoreTokens()
      wxString GetNextToken();
+        // get the delimiter which terminated the token last retrieved by
+        // GetNextToken() or NUL if there had been no tokens yet or the last
+        // one wasn't terminated (but ran to the end of the string)
+    wxChar GetLastDelimiter() const { return m_lastDelim; }
  
      // get current tokenizer state
          // returns the part of the string which remains to tokenize (*not* the
@@ -79,6 +83,9 @@ public:
          // get the current mode - can be different from the one passed to the
          // ctor if it was wxTOKEN_DEFAULT
      wxStringTokenizerMode GetMode() const { return m_mode; }
+        // do we return empty tokens?
+    bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
+
  
      // backwards compatibility section from now on
      // -------------------------------------------
@@ -104,14 +111,14 @@ public:
  protected:
      bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
  
-    wxString m_string,              // the (rest of) string to tokenize
-             m_delims;              // all delimiters
+    wxString m_string,              // the string we tokenize
+             m_delims;              // all possible delimiters
  
-    size_t   m_pos;                 // the position in the original string
+    size_t   m_pos;                 // the current position in m_string
  
      wxStringTokenizerMode m_mode;   // see wxTOKEN_XXX values
  
-    bool     m_hasMore;             // do we have more (possible empty) tokens?
+    wxChar   m_lastDelim;           // delimiter after last token or '\0'
  };
  
  // ----------------------------------------------------------------------------
diff --git a/src/common/tokenzr.cpp b/src/common/tokenzr.cpp

index 26612661eede23d491798300caf365e9aadb4068..f69e8e6a73f8a7a6d86e0db770db71a7023fbe12 100644 (file)
--- a/src/common/tokenzr.cpp
+++ b/src/common/tokenzr.cpp
@@ -86,9 +86,7 @@ void wxStringTokenizer::Reinit(const wxString& str)
  
      m_string = str;
      m_pos = 0;
-
-    // empty string doesn't have any tokens
-    m_hasMore = !m_string.empty();
+    m_lastDelim = _T('\0');
  }
  
  // ----------------------------------------------------------------------------
@@ -100,49 +98,61 @@ bool wxStringTokenizer::HasMoreTokens() const
  {
      wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
  
-    if ( m_string.find_first_not_of(m_delims) == wxString::npos )
+    if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos )
      {
-        // no non empty tokens left, but in 2 cases we still may return true if
-        // GetNextToken() wasn't called yet for this empty token:
-        //
-        //   a) in wxTOKEN_RET_EMPTY_ALL mode we always do it
-        //   b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a
-        //      string containing only the delimiter: then there is an empty
-        //      token just before it
-        return (m_mode == wxTOKEN_RET_EMPTY_ALL) ||
-               (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0)
-                    ? m_hasMore : false;
+        // there are non delimiter characters left, so we do have more tokens
+        return true;
      }
-    else
+
+    switch ( m_mode )
      {
-        // there are non delimiter characters left, hence we do have more
-        // tokens
-        return true;
+        case wxTOKEN_RET_EMPTY:
+        case wxTOKEN_RET_DELIMS:
+            // special hack for wxTOKEN_RET_EMPTY: we should return the initial
+            // empty token even if there are only delimiters after it
+            return m_pos == 0 && !m_string.empty();
+
+        case wxTOKEN_RET_EMPTY_ALL:
+            // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
+            // already returned the trailing empty token after the last
+            // delimiter by examining m_lastDelim: it is set to NUL if we run
+            // up to the end of the string in GetNextToken(), but if it is not
+            // NUL yet we still have this last token to return even if m_pos is
+            // already at m_string.length()
+            return m_pos < m_string.length() || m_lastDelim != _T('\0');
+
+        case wxTOKEN_INVALID:
+        case wxTOKEN_DEFAULT:
+            wxFAIL_MSG( _T("unexpected tokenizer mode") );
+            // fall through
+
+        case wxTOKEN_STRTOK:
+            // never return empty delimiters
+            break;
      }
+
+    return false;
  }
  
-// count the number of tokens in the string
+// count the number of (remaining) tokens in the string
  size_t wxStringTokenizer::CountTokens() const
  {
      wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
  
      // VZ: this function is IMHO not very useful, so it's probably not very
-    //     important if it's implementation here is not as efficient as it
-    //     could be - but OTOH like this we're sure to get the correct answer
+    //     important if its implementation here is not as efficient as it
+    //     could be -- but OTOH like this we're sure to get the correct answer
      //     in all modes
-    wxStringTokenizer *self = (wxStringTokenizer *)this;    // const_cast
-    wxString stringInitial = m_string;
+    wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode);
  
      size_t count = 0;
-    while ( self->HasMoreTokens() )
+    while ( tkz.HasMoreTokens() )
      {
          count++;
  
-        (void)self->GetNextToken();
+        (void)tkz.GetNextToken();
      }
  
-    self->Reinit(stringInitial);
-
      return count;
  }
  
@@ -152,9 +162,6 @@ size_t wxStringTokenizer::CountTokens() const
  
  wxString wxStringTokenizer::GetNextToken()
  {
-    // strtok() doesn't return empty tokens, all other modes do
-    bool allowEmpty = m_mode != wxTOKEN_STRTOK;
-
      wxString token;
      do
      {
@@ -162,40 +169,40 @@ wxString wxStringTokenizer::GetNextToken()
          {
              break;
          }
+
          // find the end of this token
-        size_t pos = m_string.find_first_of(m_delims);
+        size_t pos = m_string.find_first_of(m_delims, m_pos);
  
          // and the start of the next one
          if ( pos == wxString::npos )
          {
              // no more delimiters, the token is everything till the end of
              // string
-            token = m_string;
+            token.assign(m_string, m_pos, wxString::npos);
  
-            m_pos += m_string.length();
-            m_string.clear();
+            // skip the token
+            m_pos = m_string.length();
  
-            // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
-            // mode (we will return the trailing one right now in this case)
-            m_hasMore = false;
+            // it wasn't terminated
+            m_lastDelim = _T('\0');
          }
-        else
+        else // we found a delimiter at pos
          {
-            size_t pos2 = pos + 1;
-
              // in wxTOKEN_RET_DELIMS mode we return the delimiter character
-            // with token
-            token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
-                                                                    : pos);
+            // with token, otherwise leave it out
+            size_t len = pos - m_pos;
+            if ( m_mode == wxTOKEN_RET_DELIMS )
+                len++;
+
+            token.assign(m_string, m_pos, len);
  
-            // remove token with the following it delimiter from string
-            m_string.erase(0, pos2);
+            // skip the token and the trailing delimiter
+            m_pos = pos + 1;
  
-            // keep track of the position in the original string too
-            m_pos += pos2;
+            m_lastDelim = m_string[pos];
          }
      }
-    while ( !allowEmpty && token.empty() );
+    while ( !AllowEmpty() && token.empty() );
  
      return token;
  }
diff --git a/tests/strings/tokenizer.cpp b/tests/strings/tokenizer.cpp

index 17e9a911ea7872891b4a74f161dd9dcfeb60b054..788177b9eb4bd3db0b633fea4e528105c367a4de 100644 (file)
--- a/tests/strings/tokenizer.cpp
+++ b/tests/strings/tokenizer.cpp
@@ -36,11 +36,13 @@ private:
      CPPUNIT_TEST_SUITE( TokenizerTestCase );
          CPPUNIT_TEST( GetCount );
          CPPUNIT_TEST( GetPosition );
+        CPPUNIT_TEST( LastDelimiter );
          CPPUNIT_TEST( StrtokCompat );
      CPPUNIT_TEST_SUITE_END();
  
      void GetCount();
      void GetPosition();
+    void LastDelimiter();
      void StrtokCompat();
  
      DECLARE_NO_COPY_CLASS(TokenizerTestCase)
@@ -184,6 +186,23 @@ void TokenizerTestCase::GetPosition()
      DoTestGetPosition(_T("foo_bar_"), _T("_"), 4, 8, 0);
  }
  
+void TokenizerTestCase::LastDelimiter()
+{
+    wxStringTokenizer tkz(_T("a+-b=c"), _T("+-="));
+
+    tkz.GetNextToken();
+    CPPUNIT_ASSERT_EQUAL( _T('+'), tkz.GetLastDelimiter() );
+
+    tkz.GetNextToken();
+    CPPUNIT_ASSERT_EQUAL( _T('-'), tkz.GetLastDelimiter() );
+
+    tkz.GetNextToken();
+    CPPUNIT_ASSERT_EQUAL( _T('='), tkz.GetLastDelimiter() );
+
+    tkz.GetNextToken();
+    CPPUNIT_ASSERT_EQUAL( _T('\0'), tkz.GetLastDelimiter() );
+}
+
  void TokenizerTestCase::StrtokCompat()
  {
      for ( size_t n = 0; n < WXSIZEOF(gs_testData); n++ )
author	Vadim Zeitlin <vadim@wxwidgets.org>
	Sat, 24 Dec 2005 00:12:54 +0000 (00:12 +0000)
committer	Vadim Zeitlin <vadim@wxwidgets.org>
	Sat, 24 Dec 2005 00:12:54 +0000 (00:12 +0000)
docs/changes.txt		patch \| blob \| blame \| history
docs/latex/wx/tokenizr.tex		patch \| blob \| blame \| history
include/wx/tokenzr.h		patch \| blob \| blame \| history
src/common/tokenzr.cpp		patch \| blob \| blame \| history
tests/strings/tokenizer.cpp		patch \| blob \| blame \| history