include/wx/tokenzr.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        wx/tokenzr.h
   3 // Purpose:     String tokenizer - a C++ replacement for strtok(3)
   4 // Author:      Guilhem Lavaux
   5 // Modified by: (or rather rewritten by) Vadim Zeitlin
   6 // Created:     04/22/98
   7 // Copyright:   (c) Guilhem Lavaux
   8 // Licence:     wxWindows licence
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 #ifndef _WX_TOKENZRH
  12 #define _WX_TOKENZRH
  13
  14 #include "wx/object.h"
  15 #include "wx/string.h"
  16 #include "wx/arrstr.h"
  17
  18 // ----------------------------------------------------------------------------
  19 // constants
  20 // ----------------------------------------------------------------------------
  21
  22 // default: delimiters are usual white space characters
  23 #define wxDEFAULT_DELIMITERS (wxT(" \t\r\n"))
  24
  25 // wxStringTokenizer mode flags which determine its behaviour
  26 enum wxStringTokenizerMode
  27 {
  28     wxTOKEN_INVALID = -1,   // set by def ctor until SetString() is called
  29     wxTOKEN_DEFAULT,        // strtok() for whitespace delims, RET_EMPTY else
  30     wxTOKEN_RET_EMPTY,      // return empty token in the middle of the string
  31     wxTOKEN_RET_EMPTY_ALL,  // return trailing empty tokens too
  32     wxTOKEN_RET_DELIMS,     // return the delim with token (implies RET_EMPTY)
  33     wxTOKEN_STRTOK          // behave exactly like strtok(3)
  34 };
  35
  36 // ----------------------------------------------------------------------------
  37 // wxStringTokenizer: replaces infamous strtok() and has some other features
  38 // ----------------------------------------------------------------------------
  39
  40 class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject
  41 {
  42 public:
  43     // ctors and initializers
  44         // default ctor, call SetString() later
  45     wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
  46         // ctor which gives us the string
  47     wxStringTokenizer(const wxString& str,
  48                       const wxString& delims = wxDEFAULT_DELIMITERS,
  49                       wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
  50
  51         // args are same as for the non default ctor above
  52     void SetString(const wxString& str,
  53                    const wxString& delims = wxDEFAULT_DELIMITERS,
  54                    wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
  55
  56         // reinitialize the tokenizer with the same delimiters/mode
  57     void Reinit(const wxString& str);
  58
  59     // tokens access
  60         // return the number of remaining tokens
  61     size_t CountTokens() const;
  62         // did we reach the end of the string?
  63     bool HasMoreTokens() const;
  64         // get the next token, will return empty string if !HasMoreTokens()
  65     wxString GetNextToken();
  66         // get the delimiter which terminated the token last retrieved by
  67         // GetNextToken() or NUL if there had been no tokens yet or the last
  68         // one wasn't terminated (but ran to the end of the string)
  69     wxChar GetLastDelimiter() const { return m_lastDelim; }
  70
  71     // get current tokenizer state
  72         // returns the part of the string which remains to tokenize (*not* the
  73         // initial string)
  74     wxString GetString() const { return wxString(m_pos, m_string.end()); }
  75
  76         // returns the current position (i.e. one index after the last
  77         // returned token or 0 if GetNextToken() has never been called) in the
  78         // original string
  79     size_t GetPosition() const { return m_pos - m_string.begin(); }
  80
  81     // misc
  82         // get the current mode - can be different from the one passed to the
  83         // ctor if it was wxTOKEN_DEFAULT
  84     wxStringTokenizerMode GetMode() const { return m_mode; }
  85         // do we return empty tokens?
  86     bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
  87
  88
  89     // backwards compatibility section from now on
  90     // -------------------------------------------
  91
  92     // for compatibility only, use GetNextToken() instead
  93     wxString NextToken() { return GetNextToken(); }
  94
  95     // compatibility only, don't use
  96     void SetString(const wxString& to_tokenize,
  97                    const wxString& delims,
  98                    bool WXUNUSED(ret_delim))
  99     {
 100         SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
 101     }
 102
 103     wxStringTokenizer(const wxString& to_tokenize,
 104                       const wxString& delims,
 105                       bool ret_delim)
 106     {
 107         SetString(to_tokenize, delims, ret_delim);
 108     }
 109
 110 protected:
 111     bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
 112
 113     bool DoHasMoreTokens() const;
 114
 115     enum MoreTokensState
 116     {
 117         MoreTokens_Unknown,
 118         MoreTokens_Yes,
 119         MoreTokens_No
 120     };
 121
 122     MoreTokensState m_hasMoreTokens;
 123
 124     wxString m_string;              // the string we tokenize
 125     wxString::const_iterator m_stringEnd;
 126     // FIXME-UTF8: use wxWcharBuffer
 127     wxWxCharBuffer m_delims;        // all possible delimiters
 128     size_t m_delimsLen;
 129
 130     wxString::const_iterator m_pos; // the current position in m_string
 131
 132     wxStringTokenizerMode m_mode;   // see wxTOKEN_XXX values
 133
 134     wxChar   m_lastDelim;           // delimiter after last token or '\0'
 135 };
 136
 137 // ----------------------------------------------------------------------------
 138 // convenience function which returns all tokens at once
 139 // ----------------------------------------------------------------------------
 140
 141 // the function takes the same parameters as wxStringTokenizer ctor and returns
 142 // the array containing all tokens
 143 wxArrayString WXDLLIMPEXP_BASE
 144 wxStringTokenize(const wxString& str,
 145                  const wxString& delims = wxDEFAULT_DELIMITERS,
 146                  wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
 147
 148 #endif // _WX_TOKENZRH