]>
Commit | Line | Data |
---|---|---|
f4ada568 | 1 | ///////////////////////////////////////////////////////////////////////////// |
aaa6d89a | 2 | // Name: src/common/tokenzr.cpp |
f4ada568 GL |
3 | // Purpose: String tokenizer |
4 | // Author: Guilhem Lavaux | |
1e6feb95 | 5 | // Modified by: Vadim Zeitlin (almost full rewrite) |
f4ada568 GL |
6 | // Created: 04/22/98 |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Guilhem Lavaux | |
65571936 | 9 | // Licence: wxWindows licence |
f4ada568 GL |
10 | ///////////////////////////////////////////////////////////////////////////// |
11 | ||
bbf8fc53 VZ |
12 | // ============================================================================ |
13 | // declarations | |
14 | // ============================================================================ | |
15 | ||
16 | // ---------------------------------------------------------------------------- | |
17 | // headers | |
18 | // ---------------------------------------------------------------------------- | |
19 | ||
fcc6dddd JS |
20 | // For compilers that support precompilation, includes "wx.h". |
21 | #include "wx/wxprec.h" | |
22 | ||
23 | #ifdef __BORLANDC__ | |
85833f5c | 24 | #pragma hdrstop |
fcc6dddd JS |
25 | #endif |
26 | ||
f4ada568 | 27 | #include "wx/tokenzr.h" |
aaa6d89a WS |
28 | |
29 | #ifndef WX_PRECOMP | |
30 | #include "wx/arrstr.h" | |
0bf751e7 | 31 | #include "wx/crt.h" |
aaa6d89a | 32 | #endif |
f4ada568 | 33 | |
3f8e5072 JS |
34 | // Required for wxIs... functions |
35 | #include <ctype.h> | |
36 | ||
bbf8fc53 VZ |
37 | // ============================================================================ |
38 | // implementation | |
39 | // ============================================================================ | |
40 | ||
41 | // ---------------------------------------------------------------------------- | |
42 | // wxStringTokenizer construction | |
43 | // ---------------------------------------------------------------------------- | |
44 | ||
7c968cee | 45 | wxStringTokenizer::wxStringTokenizer(const wxString& str, |
f4ada568 | 46 | const wxString& delims, |
7c968cee | 47 | wxStringTokenizerMode mode) |
bbf8fc53 | 48 | { |
7c968cee | 49 | SetString(str, delims, mode); |
bbf8fc53 VZ |
50 | } |
51 | ||
7c968cee | 52 | void wxStringTokenizer::SetString(const wxString& str, |
bbf8fc53 | 53 | const wxString& delims, |
7c968cee | 54 | wxStringTokenizerMode mode) |
f4ada568 | 55 | { |
7c968cee VZ |
56 | if ( mode == wxTOKEN_DEFAULT ) |
57 | { | |
58 | // by default, we behave like strtok() if the delimiters are only | |
59 | // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for | |
60 | // whitespace delimiters, strtok() behaviour is better because we want | |
61 | // to count consecutive spaces as one delimiter) | |
66c71d8a VS |
62 | wxString::const_iterator p; |
63 | for ( p = delims.begin(); p != delims.end(); ++p ) | |
7c968cee VZ |
64 | { |
65 | if ( !wxIsspace(*p) ) | |
66 | break; | |
67 | } | |
68 | ||
66c71d8a | 69 | if ( p != delims.end() ) |
7c968cee VZ |
70 | { |
71 | // not whitespace char in delims | |
72 | mode = wxTOKEN_RET_EMPTY; | |
73 | } | |
74 | else | |
75 | { | |
76 | // only whitespaces | |
77 | mode = wxTOKEN_STRTOK; | |
78 | } | |
79 | } | |
80 | ||
85833f5c | 81 | m_delims = delims; |
7c968cee | 82 | m_mode = mode; |
bbf8fc53 | 83 | |
7c968cee | 84 | Reinit(str); |
f4ada568 GL |
85 | } |
86 | ||
7c968cee | 87 | void wxStringTokenizer::Reinit(const wxString& str) |
f4ada568 | 88 | { |
7c968cee VZ |
89 | wxASSERT_MSG( IsOk(), _T("you should call SetString() first") ); |
90 | ||
91 | m_string = str; | |
92 | m_pos = 0; | |
4626c57c | 93 | m_lastDelim = _T('\0'); |
f4ada568 GL |
94 | } |
95 | ||
bbf8fc53 | 96 | // ---------------------------------------------------------------------------- |
7c968cee | 97 | // access to the tokens |
bbf8fc53 VZ |
98 | // ---------------------------------------------------------------------------- |
99 | ||
7c968cee VZ |
100 | // do we have more of them? |
101 | bool wxStringTokenizer::HasMoreTokens() const | |
f4ada568 | 102 | { |
cb719f2e | 103 | wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") ); |
7c968cee | 104 | |
4626c57c | 105 | if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos ) |
bbf8fc53 | 106 | { |
4626c57c VZ |
107 | // there are non delimiter characters left, so we do have more tokens |
108 | return true; | |
7c968cee | 109 | } |
4626c57c VZ |
110 | |
111 | switch ( m_mode ) | |
7c968cee | 112 | { |
4626c57c VZ |
113 | case wxTOKEN_RET_EMPTY: |
114 | case wxTOKEN_RET_DELIMS: | |
115 | // special hack for wxTOKEN_RET_EMPTY: we should return the initial | |
116 | // empty token even if there are only delimiters after it | |
117 | return m_pos == 0 && !m_string.empty(); | |
118 | ||
119 | case wxTOKEN_RET_EMPTY_ALL: | |
120 | // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had | |
121 | // already returned the trailing empty token after the last | |
122 | // delimiter by examining m_lastDelim: it is set to NUL if we run | |
123 | // up to the end of the string in GetNextToken(), but if it is not | |
124 | // NUL yet we still have this last token to return even if m_pos is | |
125 | // already at m_string.length() | |
126 | return m_pos < m_string.length() || m_lastDelim != _T('\0'); | |
127 | ||
128 | case wxTOKEN_INVALID: | |
129 | case wxTOKEN_DEFAULT: | |
130 | wxFAIL_MSG( _T("unexpected tokenizer mode") ); | |
131 | // fall through | |
132 | ||
133 | case wxTOKEN_STRTOK: | |
134 | // never return empty delimiters | |
135 | break; | |
7c968cee | 136 | } |
4626c57c VZ |
137 | |
138 | return false; | |
7c968cee | 139 | } |
bbf8fc53 | 140 | |
4626c57c | 141 | // count the number of (remaining) tokens in the string |
7c968cee VZ |
142 | size_t wxStringTokenizer::CountTokens() const |
143 | { | |
144 | wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") ); | |
bbf8fc53 | 145 | |
7c968cee | 146 | // VZ: this function is IMHO not very useful, so it's probably not very |
4626c57c VZ |
147 | // important if its implementation here is not as efficient as it |
148 | // could be -- but OTOH like this we're sure to get the correct answer | |
7c968cee | 149 | // in all modes |
4626c57c | 150 | wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode); |
bbf8fc53 | 151 | |
7c968cee | 152 | size_t count = 0; |
4626c57c | 153 | while ( tkz.HasMoreTokens() ) |
bbf8fc53 VZ |
154 | { |
155 | count++; | |
7c968cee | 156 | |
4626c57c | 157 | (void)tkz.GetNextToken(); |
bbf8fc53 VZ |
158 | } |
159 | ||
160 | return count; | |
161 | } | |
162 | ||
163 | // ---------------------------------------------------------------------------- | |
164 | // token extraction | |
165 | // ---------------------------------------------------------------------------- | |
166 | ||
167 | wxString wxStringTokenizer::GetNextToken() | |
168 | { | |
169 | wxString token; | |
7c968cee | 170 | do |
bbf8fc53 | 171 | { |
7c968cee | 172 | if ( !HasMoreTokens() ) |
85833f5c | 173 | { |
7c968cee | 174 | break; |
85833f5c | 175 | } |
4626c57c | 176 | |
7c968cee | 177 | // find the end of this token |
4626c57c | 178 | size_t pos = m_string.find_first_of(m_delims, m_pos); |
7c968cee VZ |
179 | |
180 | // and the start of the next one | |
181 | if ( pos == wxString::npos ) | |
85833f5c | 182 | { |
7c968cee VZ |
183 | // no more delimiters, the token is everything till the end of |
184 | // string | |
4626c57c | 185 | token.assign(m_string, m_pos, wxString::npos); |
7c968cee | 186 | |
4626c57c VZ |
187 | // skip the token |
188 | m_pos = m_string.length(); | |
bbf8fc53 | 189 | |
4626c57c VZ |
190 | // it wasn't terminated |
191 | m_lastDelim = _T('\0'); | |
85833f5c | 192 | } |
4626c57c | 193 | else // we found a delimiter at pos |
7c968cee | 194 | { |
7c968cee | 195 | // in wxTOKEN_RET_DELIMS mode we return the delimiter character |
4626c57c VZ |
196 | // with token, otherwise leave it out |
197 | size_t len = pos - m_pos; | |
198 | if ( m_mode == wxTOKEN_RET_DELIMS ) | |
199 | len++; | |
200 | ||
201 | token.assign(m_string, m_pos, len); | |
dab58492 | 202 | |
4626c57c VZ |
203 | // skip the token and the trailing delimiter |
204 | m_pos = pos + 1; | |
bbf8fc53 | 205 | |
4626c57c | 206 | m_lastDelim = m_string[pos]; |
7c968cee | 207 | } |
85833f5c | 208 | } |
4626c57c | 209 | while ( !AllowEmpty() && token.empty() ); |
bbf8fc53 VZ |
210 | |
211 | return token; | |
f4ada568 | 212 | } |
1e6feb95 VZ |
213 | |
214 | // ---------------------------------------------------------------------------- | |
215 | // public functions | |
216 | // ---------------------------------------------------------------------------- | |
217 | ||
218 | wxArrayString wxStringTokenize(const wxString& str, | |
219 | const wxString& delims, | |
220 | wxStringTokenizerMode mode) | |
221 | { | |
222 | wxArrayString tokens; | |
223 | wxStringTokenizer tk(str, delims, mode); | |
224 | while ( tk.HasMoreTokens() ) | |
225 | { | |
226 | tokens.Add(tk.GetNextToken()); | |
227 | } | |
228 | ||
229 | return tokens; | |
230 | } |