]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: src/common/tokenzr.cpp | |
3 | // Purpose: String tokenizer | |
4 | // Author: Guilhem Lavaux | |
5 | // Modified by: Vadim Zeitlin (almost full rewrite) | |
6 | // Created: 04/22/98 | |
7 | // Copyright: (c) Guilhem Lavaux | |
8 | // Licence: wxWindows licence | |
9 | ///////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // ============================================================================ | |
12 | // declarations | |
13 | // ============================================================================ | |
14 | ||
15 | // ---------------------------------------------------------------------------- | |
16 | // headers | |
17 | // ---------------------------------------------------------------------------- | |
18 | ||
19 | // For compilers that support precompilation, includes "wx.h". | |
20 | #include "wx/wxprec.h" | |
21 | ||
22 | #ifdef __BORLANDC__ | |
23 | #pragma hdrstop | |
24 | #endif | |
25 | ||
26 | #include "wx/tokenzr.h" | |
27 | ||
28 | #ifndef WX_PRECOMP | |
29 | #include "wx/arrstr.h" | |
30 | #include "wx/crt.h" | |
31 | #endif | |
32 | ||
33 | // Required for wxIs... functions | |
34 | #include <ctype.h> | |
35 | ||
36 | // ============================================================================ | |
37 | // implementation | |
38 | // ============================================================================ | |
39 | ||
40 | // ---------------------------------------------------------------------------- | |
41 | // helpers | |
42 | // ---------------------------------------------------------------------------- | |
43 | ||
44 | static wxString::const_iterator | |
45 | find_first_of(const wxChar *delims, size_t len, | |
46 | const wxString::const_iterator& from, | |
47 | const wxString::const_iterator& end) | |
48 | { | |
49 | wxASSERT_MSG( from <= end, wxT("invalid index") ); | |
50 | ||
51 | for ( wxString::const_iterator i = from; i != end; ++i ) | |
52 | { | |
53 | if ( wxTmemchr(delims, *i, len) ) | |
54 | return i; | |
55 | } | |
56 | ||
57 | return end; | |
58 | } | |
59 | ||
60 | static wxString::const_iterator | |
61 | find_first_not_of(const wxChar *delims, size_t len, | |
62 | const wxString::const_iterator& from, | |
63 | const wxString::const_iterator& end) | |
64 | { | |
65 | wxASSERT_MSG( from <= end, wxT("invalid index") ); | |
66 | ||
67 | for ( wxString::const_iterator i = from; i != end; ++i ) | |
68 | { | |
69 | if ( !wxTmemchr(delims, *i, len) ) | |
70 | return i; | |
71 | } | |
72 | ||
73 | return end; | |
74 | } | |
75 | ||
76 | // ---------------------------------------------------------------------------- | |
77 | // wxStringTokenizer construction | |
78 | // ---------------------------------------------------------------------------- | |
79 | ||
80 | wxStringTokenizer::wxStringTokenizer(const wxString& str, | |
81 | const wxString& delims, | |
82 | wxStringTokenizerMode mode) | |
83 | { | |
84 | SetString(str, delims, mode); | |
85 | } | |
86 | ||
87 | void wxStringTokenizer::SetString(const wxString& str, | |
88 | const wxString& delims, | |
89 | wxStringTokenizerMode mode) | |
90 | { | |
91 | if ( mode == wxTOKEN_DEFAULT ) | |
92 | { | |
93 | // by default, we behave like strtok() if the delimiters are only | |
94 | // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for | |
95 | // whitespace delimiters, strtok() behaviour is better because we want | |
96 | // to count consecutive spaces as one delimiter) | |
97 | wxString::const_iterator p; | |
98 | for ( p = delims.begin(); p != delims.end(); ++p ) | |
99 | { | |
100 | if ( !wxIsspace(*p) ) | |
101 | break; | |
102 | } | |
103 | ||
104 | if ( p != delims.end() ) | |
105 | { | |
106 | // not whitespace char in delims | |
107 | mode = wxTOKEN_RET_EMPTY; | |
108 | } | |
109 | else | |
110 | { | |
111 | // only whitespaces | |
112 | mode = wxTOKEN_STRTOK; | |
113 | } | |
114 | } | |
115 | ||
116 | #if wxUSE_UNICODE // FIXME-UTF8: only wc_str() | |
117 | m_delims = delims.wc_str(); | |
118 | #else | |
119 | m_delims = delims.mb_str(); | |
120 | #endif | |
121 | m_delimsLen = delims.length(); | |
122 | ||
123 | m_mode = mode; | |
124 | ||
125 | Reinit(str); | |
126 | } | |
127 | ||
128 | void wxStringTokenizer::Reinit(const wxString& str) | |
129 | { | |
130 | wxASSERT_MSG( IsOk(), wxT("you should call SetString() first") ); | |
131 | ||
132 | m_string = str; | |
133 | m_stringEnd = m_string.end(); | |
134 | m_pos = m_string.begin(); | |
135 | m_lastDelim = wxT('\0'); | |
136 | m_hasMoreTokens = MoreTokens_Unknown; | |
137 | } | |
138 | ||
139 | // ---------------------------------------------------------------------------- | |
140 | // access to the tokens | |
141 | // ---------------------------------------------------------------------------- | |
142 | ||
143 | // do we have more of them? | |
144 | bool wxStringTokenizer::HasMoreTokens() const | |
145 | { | |
146 | // GetNextToken() calls HasMoreTokens() and so HasMoreTokens() is called | |
147 | // twice in every interation in the following common usage patten: | |
148 | // while ( HasMoreTokens() ) | |
149 | // GetNextToken(); | |
150 | // We optimize this case by caching HasMoreTokens() return value here: | |
151 | if ( m_hasMoreTokens == MoreTokens_Unknown ) | |
152 | { | |
153 | bool r = DoHasMoreTokens(); | |
154 | wxConstCast(this, wxStringTokenizer)->m_hasMoreTokens = | |
155 | r ? MoreTokens_Yes : MoreTokens_No; | |
156 | return r; | |
157 | } | |
158 | else | |
159 | return m_hasMoreTokens == MoreTokens_Yes; | |
160 | } | |
161 | ||
162 | bool wxStringTokenizer::DoHasMoreTokens() const | |
163 | { | |
164 | wxCHECK_MSG( IsOk(), false, wxT("you should call SetString() first") ); | |
165 | ||
166 | if ( find_first_not_of(m_delims, m_delimsLen, m_pos, m_stringEnd) | |
167 | != m_stringEnd ) | |
168 | { | |
169 | // there are non delimiter characters left, so we do have more tokens | |
170 | return true; | |
171 | } | |
172 | ||
173 | switch ( m_mode ) | |
174 | { | |
175 | case wxTOKEN_RET_EMPTY: | |
176 | case wxTOKEN_RET_DELIMS: | |
177 | // special hack for wxTOKEN_RET_EMPTY: we should return the initial | |
178 | // empty token even if there are only delimiters after it | |
179 | return !m_string.empty() && m_pos == m_string.begin(); | |
180 | ||
181 | case wxTOKEN_RET_EMPTY_ALL: | |
182 | // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had | |
183 | // already returned the trailing empty token after the last | |
184 | // delimiter by examining m_lastDelim: it is set to NUL if we run | |
185 | // up to the end of the string in GetNextToken(), but if it is not | |
186 | // NUL yet we still have this last token to return even if m_pos is | |
187 | // already at m_string.length() | |
188 | return m_pos < m_stringEnd || m_lastDelim != wxT('\0'); | |
189 | ||
190 | case wxTOKEN_INVALID: | |
191 | case wxTOKEN_DEFAULT: | |
192 | wxFAIL_MSG( wxT("unexpected tokenizer mode") ); | |
193 | // fall through | |
194 | ||
195 | case wxTOKEN_STRTOK: | |
196 | // never return empty delimiters | |
197 | break; | |
198 | } | |
199 | ||
200 | return false; | |
201 | } | |
202 | ||
203 | // count the number of (remaining) tokens in the string | |
204 | size_t wxStringTokenizer::CountTokens() const | |
205 | { | |
206 | wxCHECK_MSG( IsOk(), 0, wxT("you should call SetString() first") ); | |
207 | ||
208 | // VZ: this function is IMHO not very useful, so it's probably not very | |
209 | // important if its implementation here is not as efficient as it | |
210 | // could be -- but OTOH like this we're sure to get the correct answer | |
211 | // in all modes | |
212 | wxStringTokenizer tkz(wxString(m_pos, m_stringEnd), m_delims, m_mode); | |
213 | ||
214 | size_t count = 0; | |
215 | while ( tkz.HasMoreTokens() ) | |
216 | { | |
217 | count++; | |
218 | ||
219 | (void)tkz.GetNextToken(); | |
220 | } | |
221 | ||
222 | return count; | |
223 | } | |
224 | ||
225 | // ---------------------------------------------------------------------------- | |
226 | // token extraction | |
227 | // ---------------------------------------------------------------------------- | |
228 | ||
229 | wxString wxStringTokenizer::GetNextToken() | |
230 | { | |
231 | wxString token; | |
232 | do | |
233 | { | |
234 | if ( !HasMoreTokens() ) | |
235 | { | |
236 | break; | |
237 | } | |
238 | ||
239 | m_hasMoreTokens = MoreTokens_Unknown; | |
240 | ||
241 | // find the end of this token | |
242 | wxString::const_iterator pos = | |
243 | find_first_of(m_delims, m_delimsLen, m_pos, m_stringEnd); | |
244 | ||
245 | // and the start of the next one | |
246 | if ( pos == m_stringEnd ) | |
247 | { | |
248 | // no more delimiters, the token is everything till the end of | |
249 | // string | |
250 | token.assign(m_pos, m_stringEnd); | |
251 | ||
252 | // skip the token | |
253 | m_pos = m_stringEnd; | |
254 | ||
255 | // it wasn't terminated | |
256 | m_lastDelim = wxT('\0'); | |
257 | } | |
258 | else // we found a delimiter at pos | |
259 | { | |
260 | // in wxTOKEN_RET_DELIMS mode we return the delimiter character | |
261 | // with token, otherwise leave it out | |
262 | wxString::const_iterator tokenEnd(pos); | |
263 | if ( m_mode == wxTOKEN_RET_DELIMS ) | |
264 | ++tokenEnd; | |
265 | ||
266 | token.assign(m_pos, tokenEnd); | |
267 | ||
268 | // skip the token and the trailing delimiter | |
269 | m_pos = pos + 1; | |
270 | ||
271 | m_lastDelim = (pos == m_stringEnd) ? wxT('\0') : (wxChar)*pos; | |
272 | } | |
273 | } | |
274 | while ( !AllowEmpty() && token.empty() ); | |
275 | ||
276 | return token; | |
277 | } | |
278 | ||
279 | // ---------------------------------------------------------------------------- | |
280 | // public functions | |
281 | // ---------------------------------------------------------------------------- | |
282 | ||
283 | wxArrayString wxStringTokenize(const wxString& str, | |
284 | const wxString& delims, | |
285 | wxStringTokenizerMode mode) | |
286 | { | |
287 | wxArrayString tokens; | |
288 | wxStringTokenizer tk(str, delims, mode); | |
289 | while ( tk.HasMoreTokens() ) | |
290 | { | |
291 | tokens.Add(tk.GetNextToken()); | |
292 | } | |
293 | ||
294 | return tokens; | |
295 | } |