]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: src/common/tokenzr.cpp | |
3 | // Purpose: String tokenizer | |
4 | // Author: Guilhem Lavaux | |
5 | // Modified by: Vadim Zeitlin (almost full rewrite) | |
6 | // Created: 04/22/98 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Guilhem Lavaux | |
9 | // Licence: wxWindows licence | |
10 | ///////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | // ============================================================================ | |
13 | // declarations | |
14 | // ============================================================================ | |
15 | ||
16 | // ---------------------------------------------------------------------------- | |
17 | // headers | |
18 | // ---------------------------------------------------------------------------- | |
19 | ||
20 | // For compilers that support precompilation, includes "wx.h". | |
21 | #include "wx/wxprec.h" | |
22 | ||
23 | #ifdef __BORLANDC__ | |
24 | #pragma hdrstop | |
25 | #endif | |
26 | ||
27 | #include "wx/tokenzr.h" | |
28 | ||
29 | #ifndef WX_PRECOMP | |
30 | #include "wx/arrstr.h" | |
31 | #include "wx/crt.h" | |
32 | #endif | |
33 | ||
34 | // Required for wxIs... functions | |
35 | #include <ctype.h> | |
36 | ||
37 | // ============================================================================ | |
38 | // implementation | |
39 | // ============================================================================ | |
40 | ||
41 | // ---------------------------------------------------------------------------- | |
42 | // helpers | |
43 | // ---------------------------------------------------------------------------- | |
44 | ||
45 | static wxString::const_iterator | |
46 | find_first_of(const wxChar *delims, size_t len, | |
47 | const wxString::const_iterator& from, | |
48 | const wxString::const_iterator& end) | |
49 | { | |
50 | wxASSERT_MSG( from <= end, wxT("invalid index") ); | |
51 | ||
52 | for ( wxString::const_iterator i = from; i != end; ++i ) | |
53 | { | |
54 | if ( wxTmemchr(delims, *i, len) ) | |
55 | return i; | |
56 | } | |
57 | ||
58 | return end; | |
59 | } | |
60 | ||
61 | static wxString::const_iterator | |
62 | find_first_not_of(const wxChar *delims, size_t len, | |
63 | const wxString::const_iterator& from, | |
64 | const wxString::const_iterator& end) | |
65 | { | |
66 | wxASSERT_MSG( from <= end, wxT("invalid index") ); | |
67 | ||
68 | for ( wxString::const_iterator i = from; i != end; ++i ) | |
69 | { | |
70 | if ( !wxTmemchr(delims, *i, len) ) | |
71 | return i; | |
72 | } | |
73 | ||
74 | return end; | |
75 | } | |
76 | ||
77 | // ---------------------------------------------------------------------------- | |
78 | // wxStringTokenizer construction | |
79 | // ---------------------------------------------------------------------------- | |
80 | ||
81 | wxStringTokenizer::wxStringTokenizer(const wxString& str, | |
82 | const wxString& delims, | |
83 | wxStringTokenizerMode mode) | |
84 | { | |
85 | SetString(str, delims, mode); | |
86 | } | |
87 | ||
88 | void wxStringTokenizer::SetString(const wxString& str, | |
89 | const wxString& delims, | |
90 | wxStringTokenizerMode mode) | |
91 | { | |
92 | if ( mode == wxTOKEN_DEFAULT ) | |
93 | { | |
94 | // by default, we behave like strtok() if the delimiters are only | |
95 | // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for | |
96 | // whitespace delimiters, strtok() behaviour is better because we want | |
97 | // to count consecutive spaces as one delimiter) | |
98 | wxString::const_iterator p; | |
99 | for ( p = delims.begin(); p != delims.end(); ++p ) | |
100 | { | |
101 | if ( !wxIsspace(*p) ) | |
102 | break; | |
103 | } | |
104 | ||
105 | if ( p != delims.end() ) | |
106 | { | |
107 | // not whitespace char in delims | |
108 | mode = wxTOKEN_RET_EMPTY; | |
109 | } | |
110 | else | |
111 | { | |
112 | // only whitespaces | |
113 | mode = wxTOKEN_STRTOK; | |
114 | } | |
115 | } | |
116 | ||
117 | #if wxUSE_UNICODE // FIXME-UTF8: only wc_str() | |
118 | m_delims = delims.wc_str(); | |
119 | #else | |
120 | m_delims = delims.mb_str(); | |
121 | #endif | |
122 | m_delimsLen = delims.length(); | |
123 | ||
124 | m_mode = mode; | |
125 | ||
126 | Reinit(str); | |
127 | } | |
128 | ||
129 | void wxStringTokenizer::Reinit(const wxString& str) | |
130 | { | |
131 | wxASSERT_MSG( IsOk(), wxT("you should call SetString() first") ); | |
132 | ||
133 | m_string = str; | |
134 | m_stringEnd = m_string.end(); | |
135 | m_pos = m_string.begin(); | |
136 | m_lastDelim = wxT('\0'); | |
137 | m_hasMoreTokens = MoreTokens_Unknown; | |
138 | } | |
139 | ||
140 | // ---------------------------------------------------------------------------- | |
141 | // access to the tokens | |
142 | // ---------------------------------------------------------------------------- | |
143 | ||
144 | // do we have more of them? | |
145 | bool wxStringTokenizer::HasMoreTokens() const | |
146 | { | |
147 | // GetNextToken() calls HasMoreTokens() and so HasMoreTokens() is called | |
148 | // twice in every interation in the following common usage patten: | |
149 | // while ( HasMoreTokens() ) | |
150 | // GetNextToken(); | |
151 | // We optimize this case by caching HasMoreTokens() return value here: | |
152 | if ( m_hasMoreTokens == MoreTokens_Unknown ) | |
153 | { | |
154 | bool r = DoHasMoreTokens(); | |
155 | wxConstCast(this, wxStringTokenizer)->m_hasMoreTokens = | |
156 | r ? MoreTokens_Yes : MoreTokens_No; | |
157 | return r; | |
158 | } | |
159 | else | |
160 | return m_hasMoreTokens == MoreTokens_Yes; | |
161 | } | |
162 | ||
163 | bool wxStringTokenizer::DoHasMoreTokens() const | |
164 | { | |
165 | wxCHECK_MSG( IsOk(), false, wxT("you should call SetString() first") ); | |
166 | ||
167 | if ( find_first_not_of(m_delims, m_delimsLen, m_pos, m_stringEnd) | |
168 | != m_stringEnd ) | |
169 | { | |
170 | // there are non delimiter characters left, so we do have more tokens | |
171 | return true; | |
172 | } | |
173 | ||
174 | switch ( m_mode ) | |
175 | { | |
176 | case wxTOKEN_RET_EMPTY: | |
177 | case wxTOKEN_RET_DELIMS: | |
178 | // special hack for wxTOKEN_RET_EMPTY: we should return the initial | |
179 | // empty token even if there are only delimiters after it | |
180 | return !m_string.empty() && m_pos == m_string.begin(); | |
181 | ||
182 | case wxTOKEN_RET_EMPTY_ALL: | |
183 | // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had | |
184 | // already returned the trailing empty token after the last | |
185 | // delimiter by examining m_lastDelim: it is set to NUL if we run | |
186 | // up to the end of the string in GetNextToken(), but if it is not | |
187 | // NUL yet we still have this last token to return even if m_pos is | |
188 | // already at m_string.length() | |
189 | return m_pos < m_stringEnd || m_lastDelim != wxT('\0'); | |
190 | ||
191 | case wxTOKEN_INVALID: | |
192 | case wxTOKEN_DEFAULT: | |
193 | wxFAIL_MSG( wxT("unexpected tokenizer mode") ); | |
194 | // fall through | |
195 | ||
196 | case wxTOKEN_STRTOK: | |
197 | // never return empty delimiters | |
198 | break; | |
199 | } | |
200 | ||
201 | return false; | |
202 | } | |
203 | ||
204 | // count the number of (remaining) tokens in the string | |
205 | size_t wxStringTokenizer::CountTokens() const | |
206 | { | |
207 | wxCHECK_MSG( IsOk(), 0, wxT("you should call SetString() first") ); | |
208 | ||
209 | // VZ: this function is IMHO not very useful, so it's probably not very | |
210 | // important if its implementation here is not as efficient as it | |
211 | // could be -- but OTOH like this we're sure to get the correct answer | |
212 | // in all modes | |
213 | wxStringTokenizer tkz(wxString(m_pos, m_stringEnd), m_delims, m_mode); | |
214 | ||
215 | size_t count = 0; | |
216 | while ( tkz.HasMoreTokens() ) | |
217 | { | |
218 | count++; | |
219 | ||
220 | (void)tkz.GetNextToken(); | |
221 | } | |
222 | ||
223 | return count; | |
224 | } | |
225 | ||
226 | // ---------------------------------------------------------------------------- | |
227 | // token extraction | |
228 | // ---------------------------------------------------------------------------- | |
229 | ||
230 | wxString wxStringTokenizer::GetNextToken() | |
231 | { | |
232 | wxString token; | |
233 | do | |
234 | { | |
235 | if ( !HasMoreTokens() ) | |
236 | { | |
237 | break; | |
238 | } | |
239 | ||
240 | m_hasMoreTokens = MoreTokens_Unknown; | |
241 | ||
242 | // find the end of this token | |
243 | wxString::const_iterator pos = | |
244 | find_first_of(m_delims, m_delimsLen, m_pos, m_stringEnd); | |
245 | ||
246 | // and the start of the next one | |
247 | if ( pos == m_stringEnd ) | |
248 | { | |
249 | // no more delimiters, the token is everything till the end of | |
250 | // string | |
251 | token.assign(m_pos, m_stringEnd); | |
252 | ||
253 | // skip the token | |
254 | m_pos = m_stringEnd; | |
255 | ||
256 | // it wasn't terminated | |
257 | m_lastDelim = wxT('\0'); | |
258 | } | |
259 | else // we found a delimiter at pos | |
260 | { | |
261 | // in wxTOKEN_RET_DELIMS mode we return the delimiter character | |
262 | // with token, otherwise leave it out | |
263 | wxString::const_iterator tokenEnd(pos); | |
264 | if ( m_mode == wxTOKEN_RET_DELIMS ) | |
265 | ++tokenEnd; | |
266 | ||
267 | token.assign(m_pos, tokenEnd); | |
268 | ||
269 | // skip the token and the trailing delimiter | |
270 | m_pos = pos + 1; | |
271 | ||
272 | m_lastDelim = (pos == m_stringEnd) ? wxT('\0') : (wxChar)*pos; | |
273 | } | |
274 | } | |
275 | while ( !AllowEmpty() && token.empty() ); | |
276 | ||
277 | return token; | |
278 | } | |
279 | ||
280 | // ---------------------------------------------------------------------------- | |
281 | // public functions | |
282 | // ---------------------------------------------------------------------------- | |
283 | ||
284 | wxArrayString wxStringTokenize(const wxString& str, | |
285 | const wxString& delims, | |
286 | wxStringTokenizerMode mode) | |
287 | { | |
288 | wxArrayString tokens; | |
289 | wxStringTokenizer tk(str, delims, mode); | |
290 | while ( tk.HasMoreTokens() ) | |
291 | { | |
292 | tokens.Add(tk.GetNextToken()); | |
293 | } | |
294 | ||
295 | return tokens; | |
296 | } |