]>
Commit | Line | Data |
---|---|---|
f4ada568 | 1 | ///////////////////////////////////////////////////////////////////////////// |
aaa6d89a | 2 | // Name: src/common/tokenzr.cpp |
f4ada568 GL |
3 | // Purpose: String tokenizer |
4 | // Author: Guilhem Lavaux | |
1e6feb95 | 5 | // Modified by: Vadim Zeitlin (almost full rewrite) |
f4ada568 | 6 | // Created: 04/22/98 |
f4ada568 | 7 | // Copyright: (c) Guilhem Lavaux |
65571936 | 8 | // Licence: wxWindows licence |
f4ada568 GL |
9 | ///////////////////////////////////////////////////////////////////////////// |
10 | ||
bbf8fc53 VZ |
11 | // ============================================================================ |
12 | // declarations | |
13 | // ============================================================================ | |
14 | ||
15 | // ---------------------------------------------------------------------------- | |
16 | // headers | |
17 | // ---------------------------------------------------------------------------- | |
18 | ||
fcc6dddd JS |
19 | // For compilers that support precompilation, includes "wx.h". |
20 | #include "wx/wxprec.h" | |
21 | ||
22 | #ifdef __BORLANDC__ | |
85833f5c | 23 | #pragma hdrstop |
fcc6dddd JS |
24 | #endif |
25 | ||
f4ada568 | 26 | #include "wx/tokenzr.h" |
aaa6d89a WS |
27 | |
28 | #ifndef WX_PRECOMP | |
29 | #include "wx/arrstr.h" | |
0bf751e7 | 30 | #include "wx/crt.h" |
aaa6d89a | 31 | #endif |
f4ada568 | 32 | |
3f8e5072 JS |
33 | // Required for wxIs... functions |
34 | #include <ctype.h> | |
35 | ||
bbf8fc53 VZ |
36 | // ============================================================================ |
37 | // implementation | |
38 | // ============================================================================ | |
39 | ||
f0dfc29c VS |
40 | // ---------------------------------------------------------------------------- |
41 | // helpers | |
42 | // ---------------------------------------------------------------------------- | |
43 | ||
44 | static wxString::const_iterator | |
45 | find_first_of(const wxChar *delims, size_t len, | |
46 | const wxString::const_iterator& from, | |
47 | const wxString::const_iterator& end) | |
48 | { | |
9a83f860 | 49 | wxASSERT_MSG( from <= end, wxT("invalid index") ); |
f0dfc29c VS |
50 | |
51 | for ( wxString::const_iterator i = from; i != end; ++i ) | |
52 | { | |
53 | if ( wxTmemchr(delims, *i, len) ) | |
54 | return i; | |
55 | } | |
56 | ||
57 | return end; | |
58 | } | |
59 | ||
60 | static wxString::const_iterator | |
61 | find_first_not_of(const wxChar *delims, size_t len, | |
62 | const wxString::const_iterator& from, | |
63 | const wxString::const_iterator& end) | |
64 | { | |
9a83f860 | 65 | wxASSERT_MSG( from <= end, wxT("invalid index") ); |
f0dfc29c VS |
66 | |
67 | for ( wxString::const_iterator i = from; i != end; ++i ) | |
68 | { | |
69 | if ( !wxTmemchr(delims, *i, len) ) | |
70 | return i; | |
71 | } | |
72 | ||
73 | return end; | |
74 | } | |
75 | ||
bbf8fc53 VZ |
76 | // ---------------------------------------------------------------------------- |
77 | // wxStringTokenizer construction | |
78 | // ---------------------------------------------------------------------------- | |
79 | ||
7c968cee | 80 | wxStringTokenizer::wxStringTokenizer(const wxString& str, |
f4ada568 | 81 | const wxString& delims, |
7c968cee | 82 | wxStringTokenizerMode mode) |
bbf8fc53 | 83 | { |
7c968cee | 84 | SetString(str, delims, mode); |
bbf8fc53 VZ |
85 | } |
86 | ||
7c968cee | 87 | void wxStringTokenizer::SetString(const wxString& str, |
bbf8fc53 | 88 | const wxString& delims, |
7c968cee | 89 | wxStringTokenizerMode mode) |
f4ada568 | 90 | { |
7c968cee VZ |
91 | if ( mode == wxTOKEN_DEFAULT ) |
92 | { | |
93 | // by default, we behave like strtok() if the delimiters are only | |
94 | // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for | |
95 | // whitespace delimiters, strtok() behaviour is better because we want | |
96 | // to count consecutive spaces as one delimiter) | |
66c71d8a VS |
97 | wxString::const_iterator p; |
98 | for ( p = delims.begin(); p != delims.end(); ++p ) | |
7c968cee VZ |
99 | { |
100 | if ( !wxIsspace(*p) ) | |
101 | break; | |
102 | } | |
103 | ||
66c71d8a | 104 | if ( p != delims.end() ) |
7c968cee VZ |
105 | { |
106 | // not whitespace char in delims | |
107 | mode = wxTOKEN_RET_EMPTY; | |
108 | } | |
109 | else | |
110 | { | |
111 | // only whitespaces | |
112 | mode = wxTOKEN_STRTOK; | |
113 | } | |
114 | } | |
115 | ||
f0dfc29c VS |
116 | #if wxUSE_UNICODE // FIXME-UTF8: only wc_str() |
117 | m_delims = delims.wc_str(); | |
118 | #else | |
119 | m_delims = delims.mb_str(); | |
120 | #endif | |
121 | m_delimsLen = delims.length(); | |
122 | ||
7c968cee | 123 | m_mode = mode; |
bbf8fc53 | 124 | |
7c968cee | 125 | Reinit(str); |
f4ada568 GL |
126 | } |
127 | ||
7c968cee | 128 | void wxStringTokenizer::Reinit(const wxString& str) |
f4ada568 | 129 | { |
9a83f860 | 130 | wxASSERT_MSG( IsOk(), wxT("you should call SetString() first") ); |
7c968cee VZ |
131 | |
132 | m_string = str; | |
f0dfc29c VS |
133 | m_stringEnd = m_string.end(); |
134 | m_pos = m_string.begin(); | |
9a83f860 | 135 | m_lastDelim = wxT('\0'); |
f0dfc29c | 136 | m_hasMoreTokens = MoreTokens_Unknown; |
f4ada568 GL |
137 | } |
138 | ||
bbf8fc53 | 139 | // ---------------------------------------------------------------------------- |
7c968cee | 140 | // access to the tokens |
bbf8fc53 VZ |
141 | // ---------------------------------------------------------------------------- |
142 | ||
7c968cee VZ |
143 | // do we have more of them? |
144 | bool wxStringTokenizer::HasMoreTokens() const | |
f0dfc29c VS |
145 | { |
146 | // GetNextToken() calls HasMoreTokens() and so HasMoreTokens() is called | |
147 | // twice in every interation in the following common usage patten: | |
148 | // while ( HasMoreTokens() ) | |
149 | // GetNextToken(); | |
150 | // We optimize this case by caching HasMoreTokens() return value here: | |
151 | if ( m_hasMoreTokens == MoreTokens_Unknown ) | |
152 | { | |
153 | bool r = DoHasMoreTokens(); | |
154 | wxConstCast(this, wxStringTokenizer)->m_hasMoreTokens = | |
155 | r ? MoreTokens_Yes : MoreTokens_No; | |
156 | return r; | |
157 | } | |
158 | else | |
159 | return m_hasMoreTokens == MoreTokens_Yes; | |
160 | } | |
161 | ||
162 | bool wxStringTokenizer::DoHasMoreTokens() const | |
f4ada568 | 163 | { |
9a83f860 | 164 | wxCHECK_MSG( IsOk(), false, wxT("you should call SetString() first") ); |
7c968cee | 165 | |
f0dfc29c VS |
166 | if ( find_first_not_of(m_delims, m_delimsLen, m_pos, m_stringEnd) |
167 | != m_stringEnd ) | |
bbf8fc53 | 168 | { |
4626c57c VZ |
169 | // there are non delimiter characters left, so we do have more tokens |
170 | return true; | |
7c968cee | 171 | } |
4626c57c VZ |
172 | |
173 | switch ( m_mode ) | |
7c968cee | 174 | { |
4626c57c VZ |
175 | case wxTOKEN_RET_EMPTY: |
176 | case wxTOKEN_RET_DELIMS: | |
177 | // special hack for wxTOKEN_RET_EMPTY: we should return the initial | |
178 | // empty token even if there are only delimiters after it | |
f0dfc29c | 179 | return !m_string.empty() && m_pos == m_string.begin(); |
4626c57c VZ |
180 | |
181 | case wxTOKEN_RET_EMPTY_ALL: | |
182 | // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had | |
183 | // already returned the trailing empty token after the last | |
184 | // delimiter by examining m_lastDelim: it is set to NUL if we run | |
185 | // up to the end of the string in GetNextToken(), but if it is not | |
186 | // NUL yet we still have this last token to return even if m_pos is | |
187 | // already at m_string.length() | |
9a83f860 | 188 | return m_pos < m_stringEnd || m_lastDelim != wxT('\0'); |
4626c57c VZ |
189 | |
190 | case wxTOKEN_INVALID: | |
191 | case wxTOKEN_DEFAULT: | |
9a83f860 | 192 | wxFAIL_MSG( wxT("unexpected tokenizer mode") ); |
4626c57c VZ |
193 | // fall through |
194 | ||
195 | case wxTOKEN_STRTOK: | |
196 | // never return empty delimiters | |
197 | break; | |
7c968cee | 198 | } |
4626c57c VZ |
199 | |
200 | return false; | |
7c968cee | 201 | } |
bbf8fc53 | 202 | |
4626c57c | 203 | // count the number of (remaining) tokens in the string |
7c968cee VZ |
204 | size_t wxStringTokenizer::CountTokens() const |
205 | { | |
9a83f860 | 206 | wxCHECK_MSG( IsOk(), 0, wxT("you should call SetString() first") ); |
bbf8fc53 | 207 | |
7c968cee | 208 | // VZ: this function is IMHO not very useful, so it's probably not very |
4626c57c VZ |
209 | // important if its implementation here is not as efficient as it |
210 | // could be -- but OTOH like this we're sure to get the correct answer | |
7c968cee | 211 | // in all modes |
f0dfc29c | 212 | wxStringTokenizer tkz(wxString(m_pos, m_stringEnd), m_delims, m_mode); |
bbf8fc53 | 213 | |
7c968cee | 214 | size_t count = 0; |
4626c57c | 215 | while ( tkz.HasMoreTokens() ) |
bbf8fc53 VZ |
216 | { |
217 | count++; | |
7c968cee | 218 | |
4626c57c | 219 | (void)tkz.GetNextToken(); |
bbf8fc53 VZ |
220 | } |
221 | ||
222 | return count; | |
223 | } | |
224 | ||
225 | // ---------------------------------------------------------------------------- | |
226 | // token extraction | |
227 | // ---------------------------------------------------------------------------- | |
228 | ||
229 | wxString wxStringTokenizer::GetNextToken() | |
230 | { | |
231 | wxString token; | |
7c968cee | 232 | do |
bbf8fc53 | 233 | { |
7c968cee | 234 | if ( !HasMoreTokens() ) |
85833f5c | 235 | { |
7c968cee | 236 | break; |
85833f5c | 237 | } |
4626c57c | 238 | |
f0dfc29c VS |
239 | m_hasMoreTokens = MoreTokens_Unknown; |
240 | ||
7c968cee | 241 | // find the end of this token |
f0dfc29c VS |
242 | wxString::const_iterator pos = |
243 | find_first_of(m_delims, m_delimsLen, m_pos, m_stringEnd); | |
7c968cee VZ |
244 | |
245 | // and the start of the next one | |
f0dfc29c | 246 | if ( pos == m_stringEnd ) |
85833f5c | 247 | { |
7c968cee VZ |
248 | // no more delimiters, the token is everything till the end of |
249 | // string | |
f0dfc29c | 250 | token.assign(m_pos, m_stringEnd); |
7c968cee | 251 | |
4626c57c | 252 | // skip the token |
f0dfc29c | 253 | m_pos = m_stringEnd; |
bbf8fc53 | 254 | |
4626c57c | 255 | // it wasn't terminated |
9a83f860 | 256 | m_lastDelim = wxT('\0'); |
85833f5c | 257 | } |
4626c57c | 258 | else // we found a delimiter at pos |
7c968cee | 259 | { |
7c968cee | 260 | // in wxTOKEN_RET_DELIMS mode we return the delimiter character |
4626c57c | 261 | // with token, otherwise leave it out |
f06a1f33 | 262 | wxString::const_iterator tokenEnd(pos); |
4626c57c | 263 | if ( m_mode == wxTOKEN_RET_DELIMS ) |
f06a1f33 | 264 | ++tokenEnd; |
4626c57c | 265 | |
f06a1f33 | 266 | token.assign(m_pos, tokenEnd); |
dab58492 | 267 | |
4626c57c VZ |
268 | // skip the token and the trailing delimiter |
269 | m_pos = pos + 1; | |
bbf8fc53 | 270 | |
9a83f860 | 271 | m_lastDelim = (pos == m_stringEnd) ? wxT('\0') : (wxChar)*pos; |
7c968cee | 272 | } |
85833f5c | 273 | } |
4626c57c | 274 | while ( !AllowEmpty() && token.empty() ); |
bbf8fc53 VZ |
275 | |
276 | return token; | |
f4ada568 | 277 | } |
1e6feb95 VZ |
278 | |
279 | // ---------------------------------------------------------------------------- | |
280 | // public functions | |
281 | // ---------------------------------------------------------------------------- | |
282 | ||
283 | wxArrayString wxStringTokenize(const wxString& str, | |
284 | const wxString& delims, | |
285 | wxStringTokenizerMode mode) | |
286 | { | |
287 | wxArrayString tokens; | |
288 | wxStringTokenizer tk(str, delims, mode); | |
289 | while ( tk.HasMoreTokens() ) | |
290 | { | |
291 | tokens.Add(tk.GetNextToken()); | |
292 | } | |
293 | ||
294 | return tokens; | |
295 | } |