include/wx/tokenzr.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        wx/tokenzr.h
   3 // Purpose:     String tokenizer - a C++ replacement for strtok(3)
   4 // Author:      Guilhem Lavaux
   5 // Modified by: (or rather rewritten by) Vadim Zeitlin
   6 // Created:     04/22/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) Guilhem Lavaux
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 #ifndef _WX_TOKENZRH
  13 #define _WX_TOKENZRH
  14
  15 #include "wx/object.h"
  16 #include "wx/string.h"
  17 #include "wx/arrstr.h"
  18
  19 // ----------------------------------------------------------------------------
  20 // constants
  21 // ----------------------------------------------------------------------------
  22
  23 // default: delimiters are usual white space characters
  24 #define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
  25
  26 // wxStringTokenizer mode flags which determine its behaviour
  27 enum wxStringTokenizerMode
  28 {
  29     wxTOKEN_INVALID = -1,   // set by def ctor until SetString() is called
  30     wxTOKEN_DEFAULT,        // strtok() for whitespace delims, RET_EMPTY else
  31     wxTOKEN_RET_EMPTY,      // return empty token in the middle of the string
  32     wxTOKEN_RET_EMPTY_ALL,  // return trailing empty tokens too
  33     wxTOKEN_RET_DELIMS,     // return the delim with token (implies RET_EMPTY)
  34     wxTOKEN_STRTOK          // behave exactly like strtok(3)
  35 };
  36
  37 // ----------------------------------------------------------------------------
  38 // wxStringTokenizer: replaces infamous strtok() and has some other features
  39 // ----------------------------------------------------------------------------
  40
  41 class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject
  42 {
  43 public:
  44     // ctors and initializers
  45         // default ctor, call SetString() later
  46     wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
  47         // ctor which gives us the string
  48     wxStringTokenizer(const wxString& str,
  49                       const wxString& delims = wxDEFAULT_DELIMITERS,
  50                       wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
  51
  52         // args are same as for the non default ctor above
  53     void SetString(const wxString& str,
  54                    const wxString& delims = wxDEFAULT_DELIMITERS,
  55                    wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
  56
  57         // reinitialize the tokenizer with the same delimiters/mode
  58     void Reinit(const wxString& str);
  59
  60     // tokens access
  61         // return the number of remaining tokens
  62     size_t CountTokens() const;
  63         // did we reach the end of the string?
  64     bool HasMoreTokens() const;
  65         // get the next token, will return empty string if !HasMoreTokens()
  66     wxString GetNextToken();
  67         // get the delimiter which terminated the token last retrieved by
  68         // GetNextToken() or NUL if there had been no tokens yet or the last
  69         // one wasn't terminated (but ran to the end of the string)
  70     wxChar GetLastDelimiter() const { return m_lastDelim; }
  71
  72     // get current tokenizer state
  73         // returns the part of the string which remains to tokenize (*not* the
  74         // initial string)
  75     wxString GetString() const { return wxString(m_pos, m_string.end()); }
  76
  77         // returns the current position (i.e. one index after the last
  78         // returned token or 0 if GetNextToken() has never been called) in the
  79         // original string
  80     size_t GetPosition() const { return m_pos - m_string.begin(); }
  81
  82     // misc
  83         // get the current mode - can be different from the one passed to the
  84         // ctor if it was wxTOKEN_DEFAULT
  85     wxStringTokenizerMode GetMode() const { return m_mode; }
  86         // do we return empty tokens?
  87     bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
  88
  89
  90     // backwards compatibility section from now on
  91     // -------------------------------------------
  92
  93     // for compatibility only, use GetNextToken() instead
  94     wxString NextToken() { return GetNextToken(); }
  95
  96     // compatibility only, don't use
  97     void SetString(const wxString& to_tokenize,
  98                    const wxString& delims,
  99                    bool WXUNUSED(ret_delim))
 100     {
 101         SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
 102     }
 103
 104     wxStringTokenizer(const wxString& to_tokenize,
 105                       const wxString& delims,
 106                       bool ret_delim)
 107     {
 108         SetString(to_tokenize, delims, ret_delim);
 109     }
 110
 111 protected:
 112     bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
 113
 114     bool DoHasMoreTokens() const;
 115
 116     enum MoreTokensState
 117     {
 118         MoreTokens_Unknown,
 119         MoreTokens_Yes,
 120         MoreTokens_No
 121     };
 122
 123     MoreTokensState m_hasMoreTokens;
 124
 125     wxString m_string;              // the string we tokenize
 126     wxString::const_iterator m_stringEnd;
 127     // FIXME-UTF8: use wxWcharBuffer
 128     wxWxCharBuffer m_delims;        // all possible delimiters
 129     size_t m_delimsLen;
 130
 131     wxString::const_iterator m_pos; // the current position in m_string
 132
 133     wxStringTokenizerMode m_mode;   // see wxTOKEN_XXX values
 134
 135     wxChar   m_lastDelim;           // delimiter after last token or '\0'
 136 };
 137
 138 // ----------------------------------------------------------------------------
 139 // convenience function which returns all tokens at once
 140 // ----------------------------------------------------------------------------
 141
 142 // the function takes the same parameters as wxStringTokenizer ctor and returns
 143 // the array containing all tokens
 144 wxArrayString WXDLLIMPEXP_BASE
 145 wxStringTokenize(const wxString& str,
 146                  const wxString& delims = wxDEFAULT_DELIMITERS,
 147                  wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
 148
 149 #endif // _WX_TOKENZRH