]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: tokenzr.cpp | |
3 | // Purpose: String tokenizer | |
4 | // Author: Guilhem Lavaux | |
5 | // Modified by: Vadim Zeitlin (almost full rewrite) | |
6 | // Created: 04/22/98 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Guilhem Lavaux | |
9 | // Licence: wxWindows licence | |
10 | ///////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | // ============================================================================ | |
13 | // declarations | |
14 | // ============================================================================ | |
15 | ||
16 | // ---------------------------------------------------------------------------- | |
17 | // headers | |
18 | // ---------------------------------------------------------------------------- | |
19 | ||
20 | #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) | |
21 | #pragma implementation "tokenzr.h" | |
22 | #endif | |
23 | ||
24 | // For compilers that support precompilation, includes "wx.h". | |
25 | #include "wx/wxprec.h" | |
26 | ||
27 | #ifdef __BORLANDC__ | |
28 | #pragma hdrstop | |
29 | #endif | |
30 | ||
31 | #include "wx/tokenzr.h" | |
32 | #include "wx/arrstr.h" | |
33 | ||
34 | // Required for wxIs... functions | |
35 | #include <ctype.h> | |
36 | ||
37 | // ============================================================================ | |
38 | // implementation | |
39 | // ============================================================================ | |
40 | ||
41 | // ---------------------------------------------------------------------------- | |
42 | // wxStringTokenizer construction | |
43 | // ---------------------------------------------------------------------------- | |
44 | ||
45 | wxStringTokenizer::wxStringTokenizer(const wxString& str, | |
46 | const wxString& delims, | |
47 | wxStringTokenizerMode mode) | |
48 | { | |
49 | SetString(str, delims, mode); | |
50 | } | |
51 | ||
52 | void wxStringTokenizer::SetString(const wxString& str, | |
53 | const wxString& delims, | |
54 | wxStringTokenizerMode mode) | |
55 | { | |
56 | if ( mode == wxTOKEN_DEFAULT ) | |
57 | { | |
58 | // by default, we behave like strtok() if the delimiters are only | |
59 | // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for | |
60 | // whitespace delimiters, strtok() behaviour is better because we want | |
61 | // to count consecutive spaces as one delimiter) | |
62 | const wxChar *p; | |
63 | for ( p = delims.c_str(); *p; p++ ) | |
64 | { | |
65 | if ( !wxIsspace(*p) ) | |
66 | break; | |
67 | } | |
68 | ||
69 | if ( *p ) | |
70 | { | |
71 | // not whitespace char in delims | |
72 | mode = wxTOKEN_RET_EMPTY; | |
73 | } | |
74 | else | |
75 | { | |
76 | // only whitespaces | |
77 | mode = wxTOKEN_STRTOK; | |
78 | } | |
79 | } | |
80 | ||
81 | m_delims = delims; | |
82 | m_mode = mode; | |
83 | ||
84 | Reinit(str); | |
85 | } | |
86 | ||
87 | void wxStringTokenizer::Reinit(const wxString& str) | |
88 | { | |
89 | wxASSERT_MSG( IsOk(), _T("you should call SetString() first") ); | |
90 | ||
91 | m_string = str; | |
92 | m_pos = 0; | |
93 | ||
94 | // empty string doesn't have any tokens | |
95 | m_hasMore = !m_string.empty(); | |
96 | } | |
97 | ||
98 | // ---------------------------------------------------------------------------- | |
99 | // access to the tokens | |
100 | // ---------------------------------------------------------------------------- | |
101 | ||
102 | // do we have more of them? | |
103 | bool wxStringTokenizer::HasMoreTokens() const | |
104 | { | |
105 | wxCHECK_MSG( IsOk(), FALSE, _T("you should call SetString() first") ); | |
106 | ||
107 | if ( m_string.find_first_not_of(m_delims) == wxString::npos ) | |
108 | { | |
109 | // no non empty tokens left, but in 2 cases we still may return TRUE if | |
110 | // GetNextToken() wasn't called yet for this empty token: | |
111 | // | |
112 | // a) in wxTOKEN_RET_EMPTY_ALL mode we always do it | |
113 | // b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a | |
114 | // string containing only the delimiter: then there is an empty | |
115 | // token just before it | |
116 | return (m_mode == wxTOKEN_RET_EMPTY_ALL) || | |
117 | (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0) | |
118 | ? m_hasMore : FALSE; | |
119 | } | |
120 | else | |
121 | { | |
122 | // there are non delimiter characters left, hence we do have more | |
123 | // tokens | |
124 | return TRUE; | |
125 | } | |
126 | } | |
127 | ||
128 | // count the number of tokens in the string | |
129 | size_t wxStringTokenizer::CountTokens() const | |
130 | { | |
131 | wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") ); | |
132 | ||
133 | // VZ: this function is IMHO not very useful, so it's probably not very | |
134 | // important if it's implementation here is not as efficient as it | |
135 | // could be - but OTOH like this we're sure to get the correct answer | |
136 | // in all modes | |
137 | wxStringTokenizer *self = (wxStringTokenizer *)this; // const_cast | |
138 | wxString stringInitial = m_string; | |
139 | ||
140 | size_t count = 0; | |
141 | while ( self->HasMoreTokens() ) | |
142 | { | |
143 | count++; | |
144 | ||
145 | (void)self->GetNextToken(); | |
146 | } | |
147 | ||
148 | self->Reinit(stringInitial); | |
149 | ||
150 | return count; | |
151 | } | |
152 | ||
153 | // ---------------------------------------------------------------------------- | |
154 | // token extraction | |
155 | // ---------------------------------------------------------------------------- | |
156 | ||
157 | wxString wxStringTokenizer::GetNextToken() | |
158 | { | |
159 | // strtok() doesn't return empty tokens, all other modes do | |
160 | bool allowEmpty = m_mode != wxTOKEN_STRTOK; | |
161 | ||
162 | wxString token; | |
163 | do | |
164 | { | |
165 | if ( !HasMoreTokens() ) | |
166 | { | |
167 | break; | |
168 | } | |
169 | // find the end of this token | |
170 | size_t pos = m_string.find_first_of(m_delims); | |
171 | ||
172 | // and the start of the next one | |
173 | if ( pos == wxString::npos ) | |
174 | { | |
175 | // no more delimiters, the token is everything till the end of | |
176 | // string | |
177 | token = m_string; | |
178 | ||
179 | m_pos += m_string.length(); | |
180 | m_string.clear(); | |
181 | ||
182 | // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL | |
183 | // mode (we will return the trailing one right now in this case) | |
184 | m_hasMore = FALSE; | |
185 | } | |
186 | else | |
187 | { | |
188 | size_t pos2 = pos + 1; | |
189 | ||
190 | // in wxTOKEN_RET_DELIMS mode we return the delimiter character | |
191 | // with token | |
192 | token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2 | |
193 | : pos); | |
194 | ||
195 | // remove token with the following it delimiter from string | |
196 | m_string.erase(0, pos2); | |
197 | ||
198 | // keep track of the position in the original string too | |
199 | m_pos += pos2; | |
200 | } | |
201 | } | |
202 | while ( !allowEmpty && token.empty() ); | |
203 | ||
204 | return token; | |
205 | } | |
206 | ||
207 | // ---------------------------------------------------------------------------- | |
208 | // public functions | |
209 | // ---------------------------------------------------------------------------- | |
210 | ||
211 | wxArrayString wxStringTokenize(const wxString& str, | |
212 | const wxString& delims, | |
213 | wxStringTokenizerMode mode) | |
214 | { | |
215 | wxArrayString tokens; | |
216 | wxStringTokenizer tk(str, delims, mode); | |
217 | while ( tk.HasMoreTokens() ) | |
218 | { | |
219 | tokens.Add(tk.GetNextToken()); | |
220 | } | |
221 | ||
222 | return tokens; | |
223 | } |