X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/bbf8fc5391b2458d630d29a794df99c8e681e6eb..d642db66a5efc82d374b813022c72ba88bc50839:/include/wx/tokenzr.h

diff --git a/include/wx/tokenzr.h b/include/wx/tokenzr.h
index 9ff109d46d..0708ebff13 100644
--- a/include/wx/tokenzr.h
+++ b/include/wx/tokenzr.h
@@ -1,8 +1,8 @@
 /////////////////////////////////////////////////////////////////////////////
-// Name:        tokenzr.h
-// Purpose:     String tokenizer
+// Name:        wx/tokenzr.h
+// Purpose:     String tokenizer - a C++ replacement for strtok(3)
 // Author:      Guilhem Lavaux
-// Modified by: Vadim Zeitlin
+// Modified by: (or rather rewritten by) Vadim Zeitlin
 // Created:     04/22/98
 // RCS-ID:      $Id$
 // Copyright:   (c) Guilhem Lavaux
@@ -12,55 +12,138 @@
 #ifndef _WX_TOKENZRH
 #define _WX_TOKENZRH
 
-#ifdef __GNUG__
-    #pragma interface "tokenzr.h"
-#endif
-
 #include "wx/object.h"
 #include "wx/string.h"
+#include "wx/arrstr.h"
+
+// ----------------------------------------------------------------------------
+// constants
+// ----------------------------------------------------------------------------
 
 // default: delimiters are usual white space characters
-#define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
+#define wxDEFAULT_DELIMITERS (wxT(" \t\r\n"))
+
+// wxStringTokenizer mode flags which determine its behaviour
+enum wxStringTokenizerMode
+{
+    wxTOKEN_INVALID = -1,   // set by def ctor until SetString() is called
+    wxTOKEN_DEFAULT,        // strtok() for whitespace delims, RET_EMPTY else
+    wxTOKEN_RET_EMPTY,      // return empty token in the middle of the string
+    wxTOKEN_RET_EMPTY_ALL,  // return trailing empty tokens too
+    wxTOKEN_RET_DELIMS,     // return the delim with token (implies RET_EMPTY)
+    wxTOKEN_STRTOK          // behave exactly like strtok(3)
+};
+
+// ----------------------------------------------------------------------------
+// wxStringTokenizer: replaces infamous strtok() and has some other features
+// ----------------------------------------------------------------------------
 
-class WXDLLEXPORT wxStringTokenizer : public wxObject
+class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject
 {
 public:
-    // ctors and such
-    wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; }
-    wxStringTokenizer(const wxString& to_tokenize,
+    // ctors and initializers
+        // default ctor, call SetString() later
+    wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
+        // ctor which gives us the string
+    wxStringTokenizer(const wxString& str,
                       const wxString& delims = wxDEFAULT_DELIMITERS,
-                      bool ret_delim = FALSE);
-    void SetString(const wxString& to_tokenize,
+                      wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
+
+        // args are same as for the non default ctor above
+    void SetString(const wxString& str,
                    const wxString& delims = wxDEFAULT_DELIMITERS,
-                   bool ret_delim = FALSE);
-    virtual ~wxStringTokenizer();
+                   wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
 
-    // count tokens/get next token
+        // reinitialize the tokenizer with the same delimiters/mode
+    void Reinit(const wxString& str);
+
+    // tokens access
+        // return the number of remaining tokens
     size_t CountTokens() const;
-    bool HasMoreTokens() { return m_hasMore; }
+        // did we reach the end of the string?
+    bool HasMoreTokens() const;
+        // get the next token, will return empty string if !HasMoreTokens()
     wxString GetNextToken();
+        // get the delimiter which terminated the token last retrieved by
+        // GetNextToken() or NUL if there had been no tokens yet or the last
+        // one wasn't terminated (but ran to the end of the string)
+    wxChar GetLastDelimiter() const { return m_lastDelim; }
+
+    // get current tokenizer state
+        // returns the part of the string which remains to tokenize (*not* the
+        // initial string)
+    wxString GetString() const { return wxString(m_pos, m_string.end()); }
+
+        // returns the current position (i.e. one index after the last
+        // returned token or 0 if GetNextToken() has never been called) in the
+        // original string
+    size_t GetPosition() const { return m_pos - m_string.begin(); }
 
-    // One note about GetString -- it returns the string
-    // remaining after the previous tokens have been removed,
-    // not the original string
-    wxString GetString() const { return m_string; }
+    // misc
+        // get the current mode - can be different from the one passed to the
+        // ctor if it was wxTOKEN_DEFAULT
+    wxStringTokenizerMode GetMode() const { return m_mode; }
+        // do we return empty tokens?
+    bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
 
-    // returns the current position (i.e. one index after the last returned
-    // token or 0 if GetNextToken() has never been called) in the original
-    // string
-    size_t GetPosition() const { return m_pos; }
+
+    // backwards compatibility section from now on
+    // -------------------------------------------
 
     // for compatibility only, use GetNextToken() instead
     wxString NextToken() { return GetNextToken(); }
 
+    // compatibility only, don't use
+    void SetString(const wxString& to_tokenize,
+                   const wxString& delims,
+                   bool WXUNUSED(ret_delim))
+    {
+        SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
+    }
+
+    wxStringTokenizer(const wxString& to_tokenize,
+                      const wxString& delims,
+                      bool ret_delim)
+    {
+        SetString(to_tokenize, delims, ret_delim);
+    }
+
 protected:
-    wxString m_string,              // the (rest of) string to tokenize
-             m_delims;              // all delimiters
+    bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
+
+    bool DoHasMoreTokens() const;
 
-    size_t   m_pos;                 // the position in the original string
+    enum MoreTokensState
+    {
+        MoreTokens_Unknown,
+        MoreTokens_Yes,
+        MoreTokens_No
+    };
 
-    bool     m_retdelims;           // if TRUE, return delims with tokens
-    bool     m_hasMore;             // do we have more tokens?
+    MoreTokensState m_hasMoreTokens;
+
+    wxString m_string;              // the string we tokenize
+    wxString::const_iterator m_stringEnd;
+    // FIXME-UTF8: use wxWcharBuffer
+    wxWxCharBuffer m_delims;        // all possible delimiters
+    size_t m_delimsLen;
+
+    wxString::const_iterator m_pos; // the current position in m_string
+
+    wxStringTokenizerMode m_mode;   // see wxTOKEN_XXX values
+
+    wxChar   m_lastDelim;           // delimiter after last token or '\0'
 };
 
+// ----------------------------------------------------------------------------
+// convenience function which returns all tokens at once
+// ----------------------------------------------------------------------------
+
+// the function takes the same parameters as wxStringTokenizer ctor and returns
+// the array containing all tokens
+wxArrayString WXDLLIMPEXP_BASE
+wxStringTokenize(const wxString& str,
+                 const wxString& delims = wxDEFAULT_DELIMITERS,
+                 wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
+
 #endif // _WX_TOKENZRH