interface/tokenzr.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        tokenzr.h
   3 // Purpose:     interface of wxStringTokenizer
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows license
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10     @class wxStringTokenizer
  11     @wxheader{tokenzr.h}
  12
  13     wxStringTokenizer helps you to break a string up into a number of tokens. It
  14     replaces the standard C function @c strtok() and also extends it in a
  15     number of ways.
  16
  17     To use this class, you should create a wxStringTokenizer object, give it the
  18     string to tokenize and also the delimiters which separate tokens in the string
  19     (by default, white space characters will be used).
  20
  21     Then wxStringTokenizer::GetNextToken may be called
  22     repeatedly until it wxStringTokenizer::HasMoreTokens
  23     returns @false.
  24
  25     For example:
  26
  27     @code
  28     wxStringTokenizer tkz(wxT("first:second:third:fourth"), wxT(":"));
  29     while ( tkz.HasMoreTokens() )
  30     {
  31         wxString token = tkz.GetNextToken();
  32
  33         // process token here
  34     }
  35     @endcode
  36
  37     By default, wxStringTokenizer will behave in the same way as @c strtok() if
  38     the delimiters string only contains white space characters but, unlike the
  39     standard function, it will return empty tokens if this is not the case. This
  40     is helpful for parsing strictly formatted data where the number of fields is
  41     fixed but some of them may be empty (i.e. @c TAB or comma delimited text
  42     files).
  43
  44     The behaviour is governed by the last
  45     @ref wxStringTokenizer::wxstringtokenizer
  46     constructor/wxStringTokenizer::SetString
  47     parameter @c mode which may be one of the following:
  48
  49
  50
  51     @c wxTOKEN_DEFAULT
  52
  53
  54     Default behaviour (as described above):
  55     same as @c wxTOKEN_STRTOK if the delimiter string contains only
  56     whitespaces, same as @c wxTOKEN_RET_EMPTY otherwise
  57
  58
  59     @c wxTOKEN_RET_EMPTY
  60
  61
  62     In this mode, the empty tokens in the
  63     middle of the string will be returned, i.e. @c "a::b:" will be tokenized in
  64     three tokens 'a', '' and 'b'. Notice that all trailing delimiters are ignored
  65     in this mode, not just the last one, i.e. a string @c "a::b::" would
  66     still result in the same set of tokens.
  67
  68
  69     @c wxTOKEN_RET_EMPTY_ALL
  70
  71
  72     In this mode, empty trailing tokens
  73     (including the one after the last delimiter character) will be returned as
  74     well. The string @c "a::b:" will be tokenized in four tokens: the already
  75     mentioned ones and another empty one as the last one and a string
  76     @c "a::b::" will have five tokens.
  77
  78
  79     @c wxTOKEN_RET_DELIMS
  80
  81
  82     In this mode, the delimiter character
  83     after the end of the current token (there may be none if this is the last
  84     token) is returned appended to the token. Otherwise, it is the same mode as
  85     @c wxTOKEN_RET_EMPTY. Notice that there is no mode like this one but
  86     behaving like @c wxTOKEN_RET_EMPTY_ALL instead of
  87     @c wxTOKEN_RET_EMPTY, use @c wxTOKEN_RET_EMPTY_ALL and
  88     wxStringTokenizer::GetLastDelimiter to emulate it.
  89
  90
  91     @c wxTOKEN_STRTOK
  92
  93
  94     In this mode the class behaves exactly like
  95     the standard @c strtok() function: the empty tokens are never returned.
  96
  97
  98
  99     @library{wxbase}
 100     @category{data}
 101
 102     @see wxStringTokenize()
 103 */
 104 class wxStringTokenizer : public wxObject
 105 {
 106 public:
 107     //@{
 108     /**
 109         Constructor. Pass the string to tokenize, a string containing delimiters
 110         and the mode specifying how the string should be tokenized.
 111     */
 112     wxStringTokenizer();
 113     wxStringTokenizer(const wxString& str,
 114                       const wxString& delims = " \t\r\n",
 115                       wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
 116     //@}
 117
 118     /**
 119         Returns the number of tokens remaining in the input string. The number of
 120         tokens returned by this function is decremented each time
 121         GetNextToken() is called and when it
 122         reaches 0 HasMoreTokens() returns
 123         @false.
 124     */
 125     int CountTokens() const;
 126
 127     /**
 128         Returns the delimiter which ended scan for the last token returned by
 129         GetNextToken() or @c NUL if
 130         there had been no calls to this function yet or if it returned the trailing
 131         empty token in @c wxTOKEN_RET_EMPTY_ALL mode.
 132
 133         @wxsince{2.7.0}
 134     */
 135     wxChar GetLastDelimiter();
 136
 137     /**
 138         Returns the next token or empty string if the end of string was reached.
 139     */
 140     wxString GetNextToken() const;
 141
 142     /**
 143         Returns the current position (i.e. one index after the last returned
 144         token or 0 if GetNextToken() has never been called) in the original
 145         string.
 146     */
 147     size_t GetPosition() const;
 148
 149     /**
 150         Returns the part of the starting string without all token already extracted.
 151     */
 152     wxString GetString() const;
 153
 154     /**
 155         Returns @true if the tokenizer has further tokens, @false if none are left.
 156     */
 157     bool HasMoreTokens() const;
 158
 159     /**
 160         Initializes the tokenizer.
 161         Pass the string to tokenize, a string containing delimiters,
 162         and the mode specifying how the string should be tokenized.
 163     */
 164     void SetString(const wxString& to_tokenize,
 165                    const wxString& delims = " \t\r\n",
 166                    wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
 167 };
 168