]> git.saurik.com Git - wxWidgets.git/blame - src/common/tokenzr.cpp
up to 2.8.0
[wxWidgets.git] / src / common / tokenzr.cpp
CommitLineData
f4ada568 1/////////////////////////////////////////////////////////////////////////////
aaa6d89a 2// Name: src/common/tokenzr.cpp
f4ada568
GL
3// Purpose: String tokenizer
4// Author: Guilhem Lavaux
1e6feb95 5// Modified by: Vadim Zeitlin (almost full rewrite)
f4ada568
GL
6// Created: 04/22/98
7// RCS-ID: $Id$
8// Copyright: (c) Guilhem Lavaux
65571936 9// Licence: wxWindows licence
f4ada568
GL
10/////////////////////////////////////////////////////////////////////////////
11
bbf8fc53
VZ
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
fcc6dddd
JS
20// For compilers that support precompilation, includes "wx.h".
21#include "wx/wxprec.h"
22
23#ifdef __BORLANDC__
85833f5c 24 #pragma hdrstop
fcc6dddd
JS
25#endif
26
f4ada568 27#include "wx/tokenzr.h"
aaa6d89a
WS
28
29#ifndef WX_PRECOMP
30 #include "wx/arrstr.h"
31#endif
f4ada568 32
3f8e5072
JS
33// Required for wxIs... functions
34#include <ctype.h>
35
bbf8fc53
VZ
36// ============================================================================
37// implementation
38// ============================================================================
39
40// ----------------------------------------------------------------------------
41// wxStringTokenizer construction
42// ----------------------------------------------------------------------------
43
7c968cee 44wxStringTokenizer::wxStringTokenizer(const wxString& str,
f4ada568 45 const wxString& delims,
7c968cee 46 wxStringTokenizerMode mode)
bbf8fc53 47{
7c968cee 48 SetString(str, delims, mode);
bbf8fc53
VZ
49}
50
7c968cee 51void wxStringTokenizer::SetString(const wxString& str,
bbf8fc53 52 const wxString& delims,
7c968cee 53 wxStringTokenizerMode mode)
f4ada568 54{
7c968cee
VZ
55 if ( mode == wxTOKEN_DEFAULT )
56 {
57 // by default, we behave like strtok() if the delimiters are only
58 // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
59 // whitespace delimiters, strtok() behaviour is better because we want
60 // to count consecutive spaces as one delimiter)
61 const wxChar *p;
62 for ( p = delims.c_str(); *p; p++ )
63 {
64 if ( !wxIsspace(*p) )
65 break;
66 }
67
68 if ( *p )
69 {
70 // not whitespace char in delims
71 mode = wxTOKEN_RET_EMPTY;
72 }
73 else
74 {
75 // only whitespaces
76 mode = wxTOKEN_STRTOK;
77 }
78 }
79
85833f5c 80 m_delims = delims;
7c968cee 81 m_mode = mode;
bbf8fc53 82
7c968cee 83 Reinit(str);
f4ada568
GL
84}
85
7c968cee 86void wxStringTokenizer::Reinit(const wxString& str)
f4ada568 87{
7c968cee
VZ
88 wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
89
90 m_string = str;
91 m_pos = 0;
4626c57c 92 m_lastDelim = _T('\0');
f4ada568
GL
93}
94
bbf8fc53 95// ----------------------------------------------------------------------------
7c968cee 96// access to the tokens
bbf8fc53
VZ
97// ----------------------------------------------------------------------------
98
7c968cee
VZ
99// do we have more of them?
100bool wxStringTokenizer::HasMoreTokens() const
f4ada568 101{
cb719f2e 102 wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
7c968cee 103
4626c57c 104 if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos )
bbf8fc53 105 {
4626c57c
VZ
106 // there are non delimiter characters left, so we do have more tokens
107 return true;
7c968cee 108 }
4626c57c
VZ
109
110 switch ( m_mode )
7c968cee 111 {
4626c57c
VZ
112 case wxTOKEN_RET_EMPTY:
113 case wxTOKEN_RET_DELIMS:
114 // special hack for wxTOKEN_RET_EMPTY: we should return the initial
115 // empty token even if there are only delimiters after it
116 return m_pos == 0 && !m_string.empty();
117
118 case wxTOKEN_RET_EMPTY_ALL:
119 // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
120 // already returned the trailing empty token after the last
121 // delimiter by examining m_lastDelim: it is set to NUL if we run
122 // up to the end of the string in GetNextToken(), but if it is not
123 // NUL yet we still have this last token to return even if m_pos is
124 // already at m_string.length()
125 return m_pos < m_string.length() || m_lastDelim != _T('\0');
126
127 case wxTOKEN_INVALID:
128 case wxTOKEN_DEFAULT:
129 wxFAIL_MSG( _T("unexpected tokenizer mode") );
130 // fall through
131
132 case wxTOKEN_STRTOK:
133 // never return empty delimiters
134 break;
7c968cee 135 }
4626c57c
VZ
136
137 return false;
7c968cee 138}
bbf8fc53 139
4626c57c 140// count the number of (remaining) tokens in the string
7c968cee
VZ
141size_t wxStringTokenizer::CountTokens() const
142{
143 wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
bbf8fc53 144
7c968cee 145 // VZ: this function is IMHO not very useful, so it's probably not very
4626c57c
VZ
146 // important if its implementation here is not as efficient as it
147 // could be -- but OTOH like this we're sure to get the correct answer
7c968cee 148 // in all modes
4626c57c 149 wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode);
bbf8fc53 150
7c968cee 151 size_t count = 0;
4626c57c 152 while ( tkz.HasMoreTokens() )
bbf8fc53
VZ
153 {
154 count++;
7c968cee 155
4626c57c 156 (void)tkz.GetNextToken();
bbf8fc53
VZ
157 }
158
159 return count;
160}
161
162// ----------------------------------------------------------------------------
163// token extraction
164// ----------------------------------------------------------------------------
165
166wxString wxStringTokenizer::GetNextToken()
167{
168 wxString token;
7c968cee 169 do
bbf8fc53 170 {
7c968cee 171 if ( !HasMoreTokens() )
85833f5c 172 {
7c968cee 173 break;
85833f5c 174 }
4626c57c 175
7c968cee 176 // find the end of this token
4626c57c 177 size_t pos = m_string.find_first_of(m_delims, m_pos);
7c968cee
VZ
178
179 // and the start of the next one
180 if ( pos == wxString::npos )
85833f5c 181 {
7c968cee
VZ
182 // no more delimiters, the token is everything till the end of
183 // string
4626c57c 184 token.assign(m_string, m_pos, wxString::npos);
7c968cee 185
4626c57c
VZ
186 // skip the token
187 m_pos = m_string.length();
bbf8fc53 188
4626c57c
VZ
189 // it wasn't terminated
190 m_lastDelim = _T('\0');
85833f5c 191 }
4626c57c 192 else // we found a delimiter at pos
7c968cee 193 {
7c968cee 194 // in wxTOKEN_RET_DELIMS mode we return the delimiter character
4626c57c
VZ
195 // with token, otherwise leave it out
196 size_t len = pos - m_pos;
197 if ( m_mode == wxTOKEN_RET_DELIMS )
198 len++;
199
200 token.assign(m_string, m_pos, len);
dab58492 201
4626c57c
VZ
202 // skip the token and the trailing delimiter
203 m_pos = pos + 1;
bbf8fc53 204
4626c57c 205 m_lastDelim = m_string[pos];
7c968cee 206 }
85833f5c 207 }
4626c57c 208 while ( !AllowEmpty() && token.empty() );
bbf8fc53
VZ
209
210 return token;
f4ada568 211}
1e6feb95
VZ
212
213// ----------------------------------------------------------------------------
214// public functions
215// ----------------------------------------------------------------------------
216
217wxArrayString wxStringTokenize(const wxString& str,
218 const wxString& delims,
219 wxStringTokenizerMode mode)
220{
221 wxArrayString tokens;
222 wxStringTokenizer tk(str, delims, mode);
223 while ( tk.HasMoreTokens() )
224 {
225 tokens.Add(tk.GetNextToken());
226 }
227
228 return tokens;
229}