interface/tokenzr.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        tokenzr.h
   3 // Purpose:     interface of wxStringTokenizer
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows license
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10     The behaviour of wxStringTokenizer is governed by the
  11     wxStringTokenizer::wxStringTokenizer() or wxStringTokenizer::SetString()
  12     with the parameter @e mode, which may be one of the following:
  13 */
  14 enum wxStringTokenizerMode
  15 {
  16     wxTOKEN_INVALID = -1,   ///< Invalid tokenizer mode.
  17
  18     /**
  19         Default behaviour: wxStringTokenizer will behave in the same way as
  20         @c strtok() (::wxTOKEN_STRTOK) if the delimiters string only contains
  21         white space characters but, unlike the standard function, it will
  22         behave like ::wxTOKEN_RET_EMPTY, returning empty tokens if this is not
  23         the case. This is helpful for parsing strictly formatted data where
  24         the number of fields is fixed but some of them may be empty (i.e.
  25         @c TAB or comma delimited text files).
  26     */
  27     wxTOKEN_DEFAULT,
  28
  29     /**
  30         In this mode, the empty tokens in the middle of the string will be returned,
  31         i.e. @c "a::b:" will be tokenized in three tokens @c 'a', " and @c 'b'. Notice
  32         that all trailing delimiters are ignored in this mode, not just the last one,
  33         i.e. a string @c "a::b::" would still result in the same set of tokens.
  34     */
  35     wxTOKEN_RET_EMPTY,
  36
  37     /**
  38         In this mode, empty trailing tokens (including the one after the last delimiter
  39         character) will be returned as well. The string @c "a::b:" will be tokenized in
  40         four tokens: the already mentioned ones and another empty one as the last one
  41         and a string @c "a::b::" will have five tokens.
  42     */
  43     wxTOKEN_RET_EMPTY_ALL,
  44
  45     /**
  46         In this mode, the delimiter character after the end of the current token (there
  47         may be none if this is the last token) is returned appended to the token.
  48         Otherwise, it is the same mode as ::wxTOKEN_RET_EMPTY. Notice that there is no
  49         mode like this one but behaving like ::wxTOKEN_RET_EMPTY_ALL instead of
  50         ::wxTOKEN_RET_EMPTY, use ::wxTOKEN_RET_EMPTY_ALL and
  51         wxStringTokenizer::GetLastDelimiter() to emulate it.
  52     */
  53     wxTOKEN_RET_DELIMS,
  54
  55     /**
  56         In this mode the class behaves exactly like the standard @c strtok() function:
  57         the empty tokens are never returned.
  58     */
  59     wxTOKEN_STRTOK
  60 };
  61
  62 /**
  63     @class wxStringTokenizer
  64     @wxheader{tokenzr.h}
  65
  66     wxStringTokenizer helps you to break a string up into a number of tokens.
  67     It replaces the standard C function @c strtok() and also extends it in a
  68     number of ways.
  69
  70     To use this class, you should create a wxStringTokenizer object, give it the
  71     string to tokenize and also the delimiters which separate tokens in the string
  72     (by default, white space characters will be used).
  73
  74     Then wxStringTokenizer::GetNextToken() may be called repeatedly until
  75     wxStringTokenizer::HasMoreTokens() returns @false.
  76
  77     For example:
  78
  79     @code
  80     wxStringTokenizer tokenizer("first:second:third:fourth", ":");
  81     while ( tokenizer.HasMoreTokens() )
  82     {
  83         wxString token = tokenizer.GetNextToken();
  84
  85         // process token here
  86     }
  87     @endcode
  88
  89     @library{wxbase}
  90     @category{data}
  91
  92     @see wxStringTokenize()
  93 */
  94 class wxStringTokenizer : public wxObject
  95 {
  96 public:
  97     /**
  98         Default constructor. You must call SetString() before calling any other
  99         methods.
 100     */
 101     wxStringTokenizer();
 102     /**
 103         Constructor. Pass the string to tokenize, a string containing
 104         delimiters, and the @a mode specifying how the string should be
 105         tokenized.
 106
 107         @see SetString()
 108    */
 109     wxStringTokenizer(const wxString& str,
 110                       const wxString& delims = " \t\r\n",
 111                       wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
 112
 113     /**
 114         Returns the number of tokens remaining in the input string. The number
 115         of tokens returned by this function is decremented each time
 116         GetNextToken() is called and when it reaches 0, HasMoreTokens()
 117         returns @false.
 118     */
 119     int CountTokens() const;
 120
 121     /**
 122         Returns the delimiter which ended scan for the last token returned by
 123         GetNextToken() or @c NUL if there had been no calls to this function
 124         yet or if it returned the trailing empty token in
 125         ::wxTOKEN_RET_EMPTY_ALL mode.
 126
 127         @since 2.7.0
 128     */
 129     wxChar GetLastDelimiter();
 130
 131     /**
 132         Returns the next token or empty string if the end of string was reached.
 133     */
 134     wxString GetNextToken() const;
 135
 136     /**
 137         Returns the current position (i.e. one index after the last returned
 138         token or 0 if GetNextToken() has never been called) in the original
 139         string.
 140     */
 141     size_t GetPosition() const;
 142
 143     /**
 144         Returns the part of the starting string without all token already extracted.
 145     */
 146     wxString GetString() const;
 147
 148     /**
 149         Returns @true if the tokenizer has further tokens, @false if none are left.
 150     */
 151     bool HasMoreTokens() const;
 152
 153     /**
 154         Initializes the tokenizer. Pass the string to tokenize, a string
 155         containing delimiters, and the @a mode specifying how the string
 156         should be tokenized.
 157     */
 158     void SetString(const wxString& to_tokenize,
 159                    const wxString& delims = " \t\r\n",
 160                    wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
 161 };