]>
Commit | Line | Data |
---|---|---|
f4ada568 GL |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: tokenzr.cpp | |
3 | // Purpose: String tokenizer | |
4 | // Author: Guilhem Lavaux | |
1e6feb95 | 5 | // Modified by: Vadim Zeitlin (almost full rewrite) |
f4ada568 GL |
6 | // Created: 04/22/98 |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Guilhem Lavaux | |
65571936 | 9 | // Licence: wxWindows licence |
f4ada568 GL |
10 | ///////////////////////////////////////////////////////////////////////////// |
11 | ||
bbf8fc53 VZ |
12 | // ============================================================================ |
13 | // declarations | |
14 | // ============================================================================ | |
15 | ||
16 | // ---------------------------------------------------------------------------- | |
17 | // headers | |
18 | // ---------------------------------------------------------------------------- | |
19 | ||
fcc6dddd JS |
20 | // For compilers that support precompilation, includes "wx.h". |
21 | #include "wx/wxprec.h" | |
22 | ||
23 | #ifdef __BORLANDC__ | |
85833f5c | 24 | #pragma hdrstop |
fcc6dddd JS |
25 | #endif |
26 | ||
f4ada568 | 27 | #include "wx/tokenzr.h" |
df5168c4 | 28 | #include "wx/arrstr.h" |
f4ada568 | 29 | |
3f8e5072 JS |
30 | // Required for wxIs... functions |
31 | #include <ctype.h> | |
32 | ||
bbf8fc53 VZ |
33 | // ============================================================================ |
34 | // implementation | |
35 | // ============================================================================ | |
36 | ||
37 | // ---------------------------------------------------------------------------- | |
38 | // wxStringTokenizer construction | |
39 | // ---------------------------------------------------------------------------- | |
40 | ||
7c968cee | 41 | wxStringTokenizer::wxStringTokenizer(const wxString& str, |
f4ada568 | 42 | const wxString& delims, |
7c968cee | 43 | wxStringTokenizerMode mode) |
bbf8fc53 | 44 | { |
7c968cee | 45 | SetString(str, delims, mode); |
bbf8fc53 VZ |
46 | } |
47 | ||
7c968cee | 48 | void wxStringTokenizer::SetString(const wxString& str, |
bbf8fc53 | 49 | const wxString& delims, |
7c968cee | 50 | wxStringTokenizerMode mode) |
f4ada568 | 51 | { |
7c968cee VZ |
52 | if ( mode == wxTOKEN_DEFAULT ) |
53 | { | |
54 | // by default, we behave like strtok() if the delimiters are only | |
55 | // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for | |
56 | // whitespace delimiters, strtok() behaviour is better because we want | |
57 | // to count consecutive spaces as one delimiter) | |
58 | const wxChar *p; | |
59 | for ( p = delims.c_str(); *p; p++ ) | |
60 | { | |
61 | if ( !wxIsspace(*p) ) | |
62 | break; | |
63 | } | |
64 | ||
65 | if ( *p ) | |
66 | { | |
67 | // not whitespace char in delims | |
68 | mode = wxTOKEN_RET_EMPTY; | |
69 | } | |
70 | else | |
71 | { | |
72 | // only whitespaces | |
73 | mode = wxTOKEN_STRTOK; | |
74 | } | |
75 | } | |
76 | ||
85833f5c | 77 | m_delims = delims; |
7c968cee | 78 | m_mode = mode; |
bbf8fc53 | 79 | |
7c968cee | 80 | Reinit(str); |
f4ada568 GL |
81 | } |
82 | ||
7c968cee | 83 | void wxStringTokenizer::Reinit(const wxString& str) |
f4ada568 | 84 | { |
7c968cee VZ |
85 | wxASSERT_MSG( IsOk(), _T("you should call SetString() first") ); |
86 | ||
87 | m_string = str; | |
88 | m_pos = 0; | |
89 | ||
90 | // empty string doesn't have any tokens | |
91 | m_hasMore = !m_string.empty(); | |
f4ada568 GL |
92 | } |
93 | ||
bbf8fc53 | 94 | // ---------------------------------------------------------------------------- |
7c968cee | 95 | // access to the tokens |
bbf8fc53 VZ |
96 | // ---------------------------------------------------------------------------- |
97 | ||
7c968cee VZ |
98 | // do we have more of them? |
99 | bool wxStringTokenizer::HasMoreTokens() const | |
f4ada568 | 100 | { |
cb719f2e | 101 | wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") ); |
7c968cee VZ |
102 | |
103 | if ( m_string.find_first_not_of(m_delims) == wxString::npos ) | |
bbf8fc53 | 104 | { |
cb719f2e | 105 | // no non empty tokens left, but in 2 cases we still may return true if |
1e6feb95 VZ |
106 | // GetNextToken() wasn't called yet for this empty token: |
107 | // | |
108 | // a) in wxTOKEN_RET_EMPTY_ALL mode we always do it | |
109 | // b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a | |
110 | // string containing only the delimiter: then there is an empty | |
111 | // token just before it | |
112 | return (m_mode == wxTOKEN_RET_EMPTY_ALL) || | |
113 | (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0) | |
cb719f2e | 114 | ? m_hasMore : false; |
7c968cee VZ |
115 | } |
116 | else | |
117 | { | |
118 | // there are non delimiter characters left, hence we do have more | |
119 | // tokens | |
cb719f2e | 120 | return true; |
7c968cee VZ |
121 | } |
122 | } | |
bbf8fc53 | 123 | |
7c968cee VZ |
124 | // count the number of tokens in the string |
125 | size_t wxStringTokenizer::CountTokens() const | |
126 | { | |
127 | wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") ); | |
bbf8fc53 | 128 | |
7c968cee VZ |
129 | // VZ: this function is IMHO not very useful, so it's probably not very |
130 | // important if it's implementation here is not as efficient as it | |
131 | // could be - but OTOH like this we're sure to get the correct answer | |
132 | // in all modes | |
133 | wxStringTokenizer *self = (wxStringTokenizer *)this; // const_cast | |
134 | wxString stringInitial = m_string; | |
bbf8fc53 | 135 | |
7c968cee VZ |
136 | size_t count = 0; |
137 | while ( self->HasMoreTokens() ) | |
bbf8fc53 VZ |
138 | { |
139 | count++; | |
7c968cee VZ |
140 | |
141 | (void)self->GetNextToken(); | |
bbf8fc53 VZ |
142 | } |
143 | ||
7c968cee VZ |
144 | self->Reinit(stringInitial); |
145 | ||
bbf8fc53 VZ |
146 | return count; |
147 | } | |
148 | ||
149 | // ---------------------------------------------------------------------------- | |
150 | // token extraction | |
151 | // ---------------------------------------------------------------------------- | |
152 | ||
153 | wxString wxStringTokenizer::GetNextToken() | |
154 | { | |
7c968cee VZ |
155 | // strtok() doesn't return empty tokens, all other modes do |
156 | bool allowEmpty = m_mode != wxTOKEN_STRTOK; | |
157 | ||
bbf8fc53 | 158 | wxString token; |
7c968cee | 159 | do |
bbf8fc53 | 160 | { |
7c968cee | 161 | if ( !HasMoreTokens() ) |
85833f5c | 162 | { |
7c968cee | 163 | break; |
85833f5c | 164 | } |
7c968cee VZ |
165 | // find the end of this token |
166 | size_t pos = m_string.find_first_of(m_delims); | |
167 | ||
168 | // and the start of the next one | |
169 | if ( pos == wxString::npos ) | |
85833f5c | 170 | { |
7c968cee VZ |
171 | // no more delimiters, the token is everything till the end of |
172 | // string | |
173 | token = m_string; | |
174 | ||
175 | m_pos += m_string.length(); | |
176 | m_string.clear(); | |
bbf8fc53 | 177 | |
7c968cee VZ |
178 | // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL |
179 | // mode (we will return the trailing one right now in this case) | |
cb719f2e | 180 | m_hasMore = false; |
85833f5c | 181 | } |
7c968cee VZ |
182 | else |
183 | { | |
184 | size_t pos2 = pos + 1; | |
f4ada568 | 185 | |
7c968cee VZ |
186 | // in wxTOKEN_RET_DELIMS mode we return the delimiter character |
187 | // with token | |
188 | token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2 | |
189 | : pos); | |
dab58492 | 190 | |
7c968cee VZ |
191 | // remove token with the following it delimiter from string |
192 | m_string.erase(0, pos2); | |
bbf8fc53 | 193 | |
7c968cee VZ |
194 | // keep track of the position in the original string too |
195 | m_pos += pos2; | |
196 | } | |
85833f5c | 197 | } |
7c968cee | 198 | while ( !allowEmpty && token.empty() ); |
bbf8fc53 VZ |
199 | |
200 | return token; | |
f4ada568 | 201 | } |
1e6feb95 VZ |
202 | |
203 | // ---------------------------------------------------------------------------- | |
204 | // public functions | |
205 | // ---------------------------------------------------------------------------- | |
206 | ||
207 | wxArrayString wxStringTokenize(const wxString& str, | |
208 | const wxString& delims, | |
209 | wxStringTokenizerMode mode) | |
210 | { | |
211 | wxArrayString tokens; | |
212 | wxStringTokenizer tk(str, delims, mode); | |
213 | while ( tk.HasMoreTokens() ) | |
214 | { | |
215 | tokens.Add(tk.GetNextToken()); | |
216 | } | |
217 | ||
218 | return tokens; | |
219 | } |