]>
Commit | Line | Data |
---|---|---|
23324ae1 FM |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: tokenzr.h | |
e54c96f1 | 3 | // Purpose: interface of wxStringTokenizer |
23324ae1 FM |
4 | // Author: wxWidgets team |
5 | // RCS-ID: $Id$ | |
6 | // Licence: wxWindows license | |
7 | ///////////////////////////////////////////////////////////////////////////// | |
8 | ||
9 | /** | |
10 | @class wxStringTokenizer | |
11 | @wxheader{tokenzr.h} | |
7c913512 | 12 | |
23324ae1 FM |
13 | wxStringTokenizer helps you to break a string up into a number of tokens. It |
14 | replaces the standard C function @c strtok() and also extends it in a | |
15 | number of ways. | |
7c913512 | 16 | |
23324ae1 FM |
17 | To use this class, you should create a wxStringTokenizer object, give it the |
18 | string to tokenize and also the delimiters which separate tokens in the string | |
19 | (by default, white space characters will be used). | |
7c913512 | 20 | |
23324ae1 | 21 | Then wxStringTokenizer::GetNextToken may be called |
7c913512 | 22 | repeatedly until it wxStringTokenizer::HasMoreTokens |
23324ae1 | 23 | returns @false. |
7c913512 | 24 | |
23324ae1 | 25 | For example: |
7c913512 | 26 | |
23324ae1 FM |
27 | @code |
28 | wxStringTokenizer tkz(wxT("first:second:third:fourth"), wxT(":")); | |
29 | while ( tkz.HasMoreTokens() ) | |
30 | { | |
31 | wxString token = tkz.GetNextToken(); | |
7c913512 | 32 | |
23324ae1 FM |
33 | // process token here |
34 | } | |
35 | @endcode | |
7c913512 | 36 | |
23324ae1 FM |
37 | By default, wxStringTokenizer will behave in the same way as @c strtok() if |
38 | the delimiters string only contains white space characters but, unlike the | |
39 | standard function, it will return empty tokens if this is not the case. This | |
40 | is helpful for parsing strictly formatted data where the number of fields is | |
41 | fixed but some of them may be empty (i.e. @c TAB or comma delimited text | |
42 | files). | |
7c913512 FM |
43 | |
44 | The behaviour is governed by the last | |
23324ae1 | 45 | @ref wxStringTokenizer::wxstringtokenizer |
7c913512 | 46 | constructor/wxStringTokenizer::SetString |
23324ae1 | 47 | parameter @c mode which may be one of the following: |
7c913512 FM |
48 | |
49 | ||
50 | ||
23324ae1 | 51 | @c wxTOKEN_DEFAULT |
7c913512 FM |
52 | |
53 | ||
23324ae1 FM |
54 | Default behaviour (as described above): |
55 | same as @c wxTOKEN_STRTOK if the delimiter string contains only | |
56 | whitespaces, same as @c wxTOKEN_RET_EMPTY otherwise | |
7c913512 FM |
57 | |
58 | ||
23324ae1 | 59 | @c wxTOKEN_RET_EMPTY |
7c913512 FM |
60 | |
61 | ||
23324ae1 FM |
62 | In this mode, the empty tokens in the |
63 | middle of the string will be returned, i.e. @c "a::b:" will be tokenized in | |
64 | three tokens 'a', '' and 'b'. Notice that all trailing delimiters are ignored | |
65 | in this mode, not just the last one, i.e. a string @c "a::b::" would | |
66 | still result in the same set of tokens. | |
7c913512 FM |
67 | |
68 | ||
23324ae1 | 69 | @c wxTOKEN_RET_EMPTY_ALL |
7c913512 FM |
70 | |
71 | ||
23324ae1 FM |
72 | In this mode, empty trailing tokens |
73 | (including the one after the last delimiter character) will be returned as | |
74 | well. The string @c "a::b:" will be tokenized in four tokens: the already | |
7c913512 | 75 | mentioned ones and another empty one as the last one and a string |
23324ae1 | 76 | @c "a::b::" will have five tokens. |
7c913512 FM |
77 | |
78 | ||
23324ae1 | 79 | @c wxTOKEN_RET_DELIMS |
7c913512 FM |
80 | |
81 | ||
23324ae1 FM |
82 | In this mode, the delimiter character |
83 | after the end of the current token (there may be none if this is the last | |
7c913512 | 84 | token) is returned appended to the token. Otherwise, it is the same mode as |
23324ae1 | 85 | @c wxTOKEN_RET_EMPTY. Notice that there is no mode like this one but |
7c913512 FM |
86 | behaving like @c wxTOKEN_RET_EMPTY_ALL instead of |
87 | @c wxTOKEN_RET_EMPTY, use @c wxTOKEN_RET_EMPTY_ALL and | |
23324ae1 | 88 | wxStringTokenizer::GetLastDelimiter to emulate it. |
7c913512 FM |
89 | |
90 | ||
23324ae1 | 91 | @c wxTOKEN_STRTOK |
7c913512 FM |
92 | |
93 | ||
23324ae1 FM |
94 | In this mode the class behaves exactly like |
95 | the standard @c strtok() function: the empty tokens are never returned. | |
7c913512 FM |
96 | |
97 | ||
98 | ||
23324ae1 FM |
99 | @library{wxbase} |
100 | @category{data} | |
7c913512 | 101 | |
e54c96f1 | 102 | @see wxStringTokenize() |
23324ae1 FM |
103 | */ |
104 | class wxStringTokenizer : public wxObject | |
105 | { | |
106 | public: | |
107 | //@{ | |
108 | /** | |
109 | Constructor. Pass the string to tokenize, a string containing delimiters | |
110 | and the mode specifying how the string should be tokenized. | |
111 | */ | |
112 | wxStringTokenizer(); | |
7c913512 FM |
113 | wxStringTokenizer(const wxString& str, |
114 | const wxString& delims = " \t\r\n", | |
115 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); | |
23324ae1 FM |
116 | //@} |
117 | ||
118 | /** | |
119 | Returns the number of tokens remaining in the input string. The number of | |
7c913512 | 120 | tokens returned by this function is decremented each time |
23324ae1 FM |
121 | GetNextToken() is called and when it |
122 | reaches 0 HasMoreTokens() returns | |
123 | @false. | |
124 | */ | |
328f5751 | 125 | int CountTokens() const; |
23324ae1 FM |
126 | |
127 | /** | |
7c913512 | 128 | Returns the delimiter which ended scan for the last token returned by |
23324ae1 FM |
129 | GetNextToken() or @c NUL if |
130 | there had been no calls to this function yet or if it returned the trailing | |
131 | empty token in @c wxTOKEN_RET_EMPTY_ALL mode. | |
3c4f71cc | 132 | |
e54c96f1 | 133 | @wxsince{2.7.0} |
23324ae1 FM |
134 | */ |
135 | wxChar GetLastDelimiter(); | |
136 | ||
137 | /** | |
138 | Returns the next token or empty string if the end of string was reached. | |
139 | */ | |
328f5751 | 140 | wxString GetNextToken() const; |
23324ae1 FM |
141 | |
142 | /** | |
143 | Returns the current position (i.e. one index after the last returned | |
144 | token or 0 if GetNextToken() has never been called) in the original | |
145 | string. | |
146 | */ | |
328f5751 | 147 | size_t GetPosition() const; |
23324ae1 FM |
148 | |
149 | /** | |
150 | Returns the part of the starting string without all token already extracted. | |
151 | */ | |
328f5751 | 152 | wxString GetString() const; |
23324ae1 FM |
153 | |
154 | /** | |
155 | Returns @true if the tokenizer has further tokens, @false if none are left. | |
156 | */ | |
328f5751 | 157 | bool HasMoreTokens() const; |
23324ae1 FM |
158 | |
159 | /** | |
160 | Initializes the tokenizer. | |
23324ae1 FM |
161 | Pass the string to tokenize, a string containing delimiters, |
162 | and the mode specifying how the string should be tokenized. | |
163 | */ | |
164 | void SetString(const wxString& to_tokenize, | |
165 | const wxString& delims = " \t\r\n", | |
166 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); | |
167 | }; | |
e54c96f1 | 168 |