]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: src/common/tokenzr.cpp | |
3 | // Purpose: String tokenizer | |
4 | // Author: Guilhem Lavaux | |
5 | // Modified by: Vadim Zeitlin (almost full rewrite) | |
6 | // Created: 04/22/98 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Guilhem Lavaux | |
9 | // Licence: wxWindows licence | |
10 | ///////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | // ============================================================================ | |
13 | // declarations | |
14 | // ============================================================================ | |
15 | ||
16 | // ---------------------------------------------------------------------------- | |
17 | // headers | |
18 | // ---------------------------------------------------------------------------- | |
19 | ||
20 | // For compilers that support precompilation, includes "wx.h". | |
21 | #include "wx/wxprec.h" | |
22 | ||
23 | #ifdef __BORLANDC__ | |
24 | #pragma hdrstop | |
25 | #endif | |
26 | ||
27 | #include "wx/tokenzr.h" | |
28 | ||
29 | #ifndef WX_PRECOMP | |
30 | #include "wx/arrstr.h" | |
31 | #endif | |
32 | ||
33 | // Required for wxIs... functions | |
34 | #include <ctype.h> | |
35 | ||
36 | // ============================================================================ | |
37 | // implementation | |
38 | // ============================================================================ | |
39 | ||
40 | // ---------------------------------------------------------------------------- | |
41 | // wxStringTokenizer construction | |
42 | // ---------------------------------------------------------------------------- | |
43 | ||
44 | wxStringTokenizer::wxStringTokenizer(const wxString& str, | |
45 | const wxString& delims, | |
46 | wxStringTokenizerMode mode) | |
47 | { | |
48 | SetString(str, delims, mode); | |
49 | } | |
50 | ||
51 | void wxStringTokenizer::SetString(const wxString& str, | |
52 | const wxString& delims, | |
53 | wxStringTokenizerMode mode) | |
54 | { | |
55 | if ( mode == wxTOKEN_DEFAULT ) | |
56 | { | |
57 | // by default, we behave like strtok() if the delimiters are only | |
58 | // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for | |
59 | // whitespace delimiters, strtok() behaviour is better because we want | |
60 | // to count consecutive spaces as one delimiter) | |
61 | const wxChar *p; | |
62 | for ( p = delims.c_str(); *p; p++ ) | |
63 | { | |
64 | if ( !wxIsspace(*p) ) | |
65 | break; | |
66 | } | |
67 | ||
68 | if ( *p ) | |
69 | { | |
70 | // not whitespace char in delims | |
71 | mode = wxTOKEN_RET_EMPTY; | |
72 | } | |
73 | else | |
74 | { | |
75 | // only whitespaces | |
76 | mode = wxTOKEN_STRTOK; | |
77 | } | |
78 | } | |
79 | ||
80 | m_delims = delims; | |
81 | m_mode = mode; | |
82 | ||
83 | Reinit(str); | |
84 | } | |
85 | ||
86 | void wxStringTokenizer::Reinit(const wxString& str) | |
87 | { | |
88 | wxASSERT_MSG( IsOk(), _T("you should call SetString() first") ); | |
89 | ||
90 | m_string = str; | |
91 | m_pos = 0; | |
92 | m_lastDelim = _T('\0'); | |
93 | } | |
94 | ||
95 | // ---------------------------------------------------------------------------- | |
96 | // access to the tokens | |
97 | // ---------------------------------------------------------------------------- | |
98 | ||
99 | // do we have more of them? | |
100 | bool wxStringTokenizer::HasMoreTokens() const | |
101 | { | |
102 | wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") ); | |
103 | ||
104 | if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos ) | |
105 | { | |
106 | // there are non delimiter characters left, so we do have more tokens | |
107 | return true; | |
108 | } | |
109 | ||
110 | switch ( m_mode ) | |
111 | { | |
112 | case wxTOKEN_RET_EMPTY: | |
113 | case wxTOKEN_RET_DELIMS: | |
114 | // special hack for wxTOKEN_RET_EMPTY: we should return the initial | |
115 | // empty token even if there are only delimiters after it | |
116 | return m_pos == 0 && !m_string.empty(); | |
117 | ||
118 | case wxTOKEN_RET_EMPTY_ALL: | |
119 | // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had | |
120 | // already returned the trailing empty token after the last | |
121 | // delimiter by examining m_lastDelim: it is set to NUL if we run | |
122 | // up to the end of the string in GetNextToken(), but if it is not | |
123 | // NUL yet we still have this last token to return even if m_pos is | |
124 | // already at m_string.length() | |
125 | return m_pos < m_string.length() || m_lastDelim != _T('\0'); | |
126 | ||
127 | case wxTOKEN_INVALID: | |
128 | case wxTOKEN_DEFAULT: | |
129 | wxFAIL_MSG( _T("unexpected tokenizer mode") ); | |
130 | // fall through | |
131 | ||
132 | case wxTOKEN_STRTOK: | |
133 | // never return empty delimiters | |
134 | break; | |
135 | } | |
136 | ||
137 | return false; | |
138 | } | |
139 | ||
140 | // count the number of (remaining) tokens in the string | |
141 | size_t wxStringTokenizer::CountTokens() const | |
142 | { | |
143 | wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") ); | |
144 | ||
145 | // VZ: this function is IMHO not very useful, so it's probably not very | |
146 | // important if its implementation here is not as efficient as it | |
147 | // could be -- but OTOH like this we're sure to get the correct answer | |
148 | // in all modes | |
149 | wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode); | |
150 | ||
151 | size_t count = 0; | |
152 | while ( tkz.HasMoreTokens() ) | |
153 | { | |
154 | count++; | |
155 | ||
156 | (void)tkz.GetNextToken(); | |
157 | } | |
158 | ||
159 | return count; | |
160 | } | |
161 | ||
162 | // ---------------------------------------------------------------------------- | |
163 | // token extraction | |
164 | // ---------------------------------------------------------------------------- | |
165 | ||
166 | wxString wxStringTokenizer::GetNextToken() | |
167 | { | |
168 | wxString token; | |
169 | do | |
170 | { | |
171 | if ( !HasMoreTokens() ) | |
172 | { | |
173 | break; | |
174 | } | |
175 | ||
176 | // find the end of this token | |
177 | size_t pos = m_string.find_first_of(m_delims, m_pos); | |
178 | ||
179 | // and the start of the next one | |
180 | if ( pos == wxString::npos ) | |
181 | { | |
182 | // no more delimiters, the token is everything till the end of | |
183 | // string | |
184 | token.assign(m_string, m_pos, wxString::npos); | |
185 | ||
186 | // skip the token | |
187 | m_pos = m_string.length(); | |
188 | ||
189 | // it wasn't terminated | |
190 | m_lastDelim = _T('\0'); | |
191 | } | |
192 | else // we found a delimiter at pos | |
193 | { | |
194 | // in wxTOKEN_RET_DELIMS mode we return the delimiter character | |
195 | // with token, otherwise leave it out | |
196 | size_t len = pos - m_pos; | |
197 | if ( m_mode == wxTOKEN_RET_DELIMS ) | |
198 | len++; | |
199 | ||
200 | token.assign(m_string, m_pos, len); | |
201 | ||
202 | // skip the token and the trailing delimiter | |
203 | m_pos = pos + 1; | |
204 | ||
205 | m_lastDelim = m_string[pos]; | |
206 | } | |
207 | } | |
208 | while ( !AllowEmpty() && token.empty() ); | |
209 | ||
210 | return token; | |
211 | } | |
212 | ||
213 | // ---------------------------------------------------------------------------- | |
214 | // public functions | |
215 | // ---------------------------------------------------------------------------- | |
216 | ||
217 | wxArrayString wxStringTokenize(const wxString& str, | |
218 | const wxString& delims, | |
219 | wxStringTokenizerMode mode) | |
220 | { | |
221 | wxArrayString tokens; | |
222 | wxStringTokenizer tk(str, delims, mode); | |
223 | while ( tk.HasMoreTokens() ) | |
224 | { | |
225 | tokens.Add(tk.GetNextToken()); | |
226 | } | |
227 | ||
228 | return tokens; | |
229 | } |