]> git.saurik.com Git - wxWidgets.git/blob - src/common/tokenzr.cpp
Use re_search directly instead of regexec when using the GNU regex lib
[wxWidgets.git] / src / common / tokenzr.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: tokenzr.cpp
3 // Purpose: String tokenizer
4 // Author: Guilhem Lavaux
5 // Modified by: Vadim Zeitlin (almost full rewrite)
6 // Created: 04/22/98
7 // RCS-ID: $Id$
8 // Copyright: (c) Guilhem Lavaux
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
11
12 // ============================================================================
13 // declarations
14 // ============================================================================
15
16 // ----------------------------------------------------------------------------
17 // headers
18 // ----------------------------------------------------------------------------
19
20 // For compilers that support precompilation, includes "wx.h".
21 #include "wx/wxprec.h"
22
23 #ifdef __BORLANDC__
24 #pragma hdrstop
25 #endif
26
27 #include "wx/tokenzr.h"
28 #include "wx/arrstr.h"
29
30 // Required for wxIs... functions
31 #include <ctype.h>
32
33 // ============================================================================
34 // implementation
35 // ============================================================================
36
37 // ----------------------------------------------------------------------------
38 // wxStringTokenizer construction
39 // ----------------------------------------------------------------------------
40
41 wxStringTokenizer::wxStringTokenizer(const wxString& str,
42 const wxString& delims,
43 wxStringTokenizerMode mode)
44 {
45 SetString(str, delims, mode);
46 }
47
48 void wxStringTokenizer::SetString(const wxString& str,
49 const wxString& delims,
50 wxStringTokenizerMode mode)
51 {
52 if ( mode == wxTOKEN_DEFAULT )
53 {
54 // by default, we behave like strtok() if the delimiters are only
55 // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
56 // whitespace delimiters, strtok() behaviour is better because we want
57 // to count consecutive spaces as one delimiter)
58 const wxChar *p;
59 for ( p = delims.c_str(); *p; p++ )
60 {
61 if ( !wxIsspace(*p) )
62 break;
63 }
64
65 if ( *p )
66 {
67 // not whitespace char in delims
68 mode = wxTOKEN_RET_EMPTY;
69 }
70 else
71 {
72 // only whitespaces
73 mode = wxTOKEN_STRTOK;
74 }
75 }
76
77 m_delims = delims;
78 m_mode = mode;
79
80 Reinit(str);
81 }
82
83 void wxStringTokenizer::Reinit(const wxString& str)
84 {
85 wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
86
87 m_string = str;
88 m_pos = 0;
89
90 // empty string doesn't have any tokens
91 m_hasMore = !m_string.empty();
92 }
93
94 // ----------------------------------------------------------------------------
95 // access to the tokens
96 // ----------------------------------------------------------------------------
97
98 // do we have more of them?
99 bool wxStringTokenizer::HasMoreTokens() const
100 {
101 wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
102
103 if ( m_string.find_first_not_of(m_delims) == wxString::npos )
104 {
105 // no non empty tokens left, but in 2 cases we still may return true if
106 // GetNextToken() wasn't called yet for this empty token:
107 //
108 // a) in wxTOKEN_RET_EMPTY_ALL mode we always do it
109 // b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a
110 // string containing only the delimiter: then there is an empty
111 // token just before it
112 return (m_mode == wxTOKEN_RET_EMPTY_ALL) ||
113 (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0)
114 ? m_hasMore : false;
115 }
116 else
117 {
118 // there are non delimiter characters left, hence we do have more
119 // tokens
120 return true;
121 }
122 }
123
124 // count the number of tokens in the string
125 size_t wxStringTokenizer::CountTokens() const
126 {
127 wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
128
129 // VZ: this function is IMHO not very useful, so it's probably not very
130 // important if it's implementation here is not as efficient as it
131 // could be - but OTOH like this we're sure to get the correct answer
132 // in all modes
133 wxStringTokenizer *self = (wxStringTokenizer *)this; // const_cast
134 wxString stringInitial = m_string;
135
136 size_t count = 0;
137 while ( self->HasMoreTokens() )
138 {
139 count++;
140
141 (void)self->GetNextToken();
142 }
143
144 self->Reinit(stringInitial);
145
146 return count;
147 }
148
149 // ----------------------------------------------------------------------------
150 // token extraction
151 // ----------------------------------------------------------------------------
152
153 wxString wxStringTokenizer::GetNextToken()
154 {
155 // strtok() doesn't return empty tokens, all other modes do
156 bool allowEmpty = m_mode != wxTOKEN_STRTOK;
157
158 wxString token;
159 do
160 {
161 if ( !HasMoreTokens() )
162 {
163 break;
164 }
165 // find the end of this token
166 size_t pos = m_string.find_first_of(m_delims);
167
168 // and the start of the next one
169 if ( pos == wxString::npos )
170 {
171 // no more delimiters, the token is everything till the end of
172 // string
173 token = m_string;
174
175 m_pos += m_string.length();
176 m_string.clear();
177
178 // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
179 // mode (we will return the trailing one right now in this case)
180 m_hasMore = false;
181 }
182 else
183 {
184 size_t pos2 = pos + 1;
185
186 // in wxTOKEN_RET_DELIMS mode we return the delimiter character
187 // with token
188 token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
189 : pos);
190
191 // remove token with the following it delimiter from string
192 m_string.erase(0, pos2);
193
194 // keep track of the position in the original string too
195 m_pos += pos2;
196 }
197 }
198 while ( !allowEmpty && token.empty() );
199
200 return token;
201 }
202
203 // ----------------------------------------------------------------------------
204 // public functions
205 // ----------------------------------------------------------------------------
206
207 wxArrayString wxStringTokenize(const wxString& str,
208 const wxString& delims,
209 wxStringTokenizerMode mode)
210 {
211 wxArrayString tokens;
212 wxStringTokenizer tk(str, delims, mode);
213 while ( tk.HasMoreTokens() )
214 {
215 tokens.Add(tk.GetNextToken());
216 }
217
218 return tokens;
219 }