1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/tokenzr.cpp
3 // Purpose: String tokenizer
4 // Author: Guilhem Lavaux
5 // Modified by: Vadim Zeitlin (almost full rewrite)
7 // Copyright: (c) Guilhem Lavaux
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
11 // ============================================================================
13 // ============================================================================
15 // ----------------------------------------------------------------------------
17 // ----------------------------------------------------------------------------
19 // For compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
26 #include "wx/tokenzr.h"
29 #include "wx/arrstr.h"
33 // Required for wxIs... functions
36 // ============================================================================
38 // ============================================================================
40 // ----------------------------------------------------------------------------
42 // ----------------------------------------------------------------------------
44 static wxString::const_iterator
45 find_first_of(const wxChar
*delims
, size_t len
,
46 const wxString::const_iterator
& from
,
47 const wxString::const_iterator
& end
)
49 wxASSERT_MSG( from
<= end
, wxT("invalid index") );
51 for ( wxString::const_iterator i
= from
; i
!= end
; ++i
)
53 if ( wxTmemchr(delims
, *i
, len
) )
60 static wxString::const_iterator
61 find_first_not_of(const wxChar
*delims
, size_t len
,
62 const wxString::const_iterator
& from
,
63 const wxString::const_iterator
& end
)
65 wxASSERT_MSG( from
<= end
, wxT("invalid index") );
67 for ( wxString::const_iterator i
= from
; i
!= end
; ++i
)
69 if ( !wxTmemchr(delims
, *i
, len
) )
76 // ----------------------------------------------------------------------------
77 // wxStringTokenizer construction
78 // ----------------------------------------------------------------------------
80 wxStringTokenizer::wxStringTokenizer(const wxString
& str
,
81 const wxString
& delims
,
82 wxStringTokenizerMode mode
)
84 SetString(str
, delims
, mode
);
87 void wxStringTokenizer::SetString(const wxString
& str
,
88 const wxString
& delims
,
89 wxStringTokenizerMode mode
)
91 if ( mode
== wxTOKEN_DEFAULT
)
93 // by default, we behave like strtok() if the delimiters are only
94 // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
95 // whitespace delimiters, strtok() behaviour is better because we want
96 // to count consecutive spaces as one delimiter)
97 wxString::const_iterator p
;
98 for ( p
= delims
.begin(); p
!= delims
.end(); ++p
)
100 if ( !wxIsspace(*p
) )
104 if ( p
!= delims
.end() )
106 // not whitespace char in delims
107 mode
= wxTOKEN_RET_EMPTY
;
112 mode
= wxTOKEN_STRTOK
;
116 #if wxUSE_UNICODE // FIXME-UTF8: only wc_str()
117 m_delims
= delims
.wc_str();
119 m_delims
= delims
.mb_str();
121 m_delimsLen
= delims
.length();
128 void wxStringTokenizer::Reinit(const wxString
& str
)
130 wxASSERT_MSG( IsOk(), wxT("you should call SetString() first") );
133 m_stringEnd
= m_string
.end();
134 m_pos
= m_string
.begin();
135 m_lastDelim
= wxT('\0');
136 m_hasMoreTokens
= MoreTokens_Unknown
;
139 // ----------------------------------------------------------------------------
140 // access to the tokens
141 // ----------------------------------------------------------------------------
143 // do we have more of them?
144 bool wxStringTokenizer::HasMoreTokens() const
146 // GetNextToken() calls HasMoreTokens() and so HasMoreTokens() is called
147 // twice in every interation in the following common usage patten:
148 // while ( HasMoreTokens() )
150 // We optimize this case by caching HasMoreTokens() return value here:
151 if ( m_hasMoreTokens
== MoreTokens_Unknown
)
153 bool r
= DoHasMoreTokens();
154 wxConstCast(this, wxStringTokenizer
)->m_hasMoreTokens
=
155 r
? MoreTokens_Yes
: MoreTokens_No
;
159 return m_hasMoreTokens
== MoreTokens_Yes
;
162 bool wxStringTokenizer::DoHasMoreTokens() const
164 wxCHECK_MSG( IsOk(), false, wxT("you should call SetString() first") );
166 if ( find_first_not_of(m_delims
, m_delimsLen
, m_pos
, m_stringEnd
)
169 // there are non delimiter characters left, so we do have more tokens
175 case wxTOKEN_RET_EMPTY
:
176 case wxTOKEN_RET_DELIMS
:
177 // special hack for wxTOKEN_RET_EMPTY: we should return the initial
178 // empty token even if there are only delimiters after it
179 return !m_string
.empty() && m_pos
== m_string
.begin();
181 case wxTOKEN_RET_EMPTY_ALL
:
182 // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
183 // already returned the trailing empty token after the last
184 // delimiter by examining m_lastDelim: it is set to NUL if we run
185 // up to the end of the string in GetNextToken(), but if it is not
186 // NUL yet we still have this last token to return even if m_pos is
187 // already at m_string.length()
188 return m_pos
< m_stringEnd
|| m_lastDelim
!= wxT('\0');
190 case wxTOKEN_INVALID
:
191 case wxTOKEN_DEFAULT
:
192 wxFAIL_MSG( wxT("unexpected tokenizer mode") );
196 // never return empty delimiters
203 // count the number of (remaining) tokens in the string
204 size_t wxStringTokenizer::CountTokens() const
206 wxCHECK_MSG( IsOk(), 0, wxT("you should call SetString() first") );
208 // VZ: this function is IMHO not very useful, so it's probably not very
209 // important if its implementation here is not as efficient as it
210 // could be -- but OTOH like this we're sure to get the correct answer
212 wxStringTokenizer
tkz(wxString(m_pos
, m_stringEnd
), m_delims
, m_mode
);
215 while ( tkz
.HasMoreTokens() )
219 (void)tkz
.GetNextToken();
225 // ----------------------------------------------------------------------------
227 // ----------------------------------------------------------------------------
229 wxString
wxStringTokenizer::GetNextToken()
234 if ( !HasMoreTokens() )
239 m_hasMoreTokens
= MoreTokens_Unknown
;
241 // find the end of this token
242 wxString::const_iterator pos
=
243 find_first_of(m_delims
, m_delimsLen
, m_pos
, m_stringEnd
);
245 // and the start of the next one
246 if ( pos
== m_stringEnd
)
248 // no more delimiters, the token is everything till the end of
250 token
.assign(m_pos
, m_stringEnd
);
255 // it wasn't terminated
256 m_lastDelim
= wxT('\0');
258 else // we found a delimiter at pos
260 // in wxTOKEN_RET_DELIMS mode we return the delimiter character
261 // with token, otherwise leave it out
262 wxString::const_iterator
tokenEnd(pos
);
263 if ( m_mode
== wxTOKEN_RET_DELIMS
)
266 token
.assign(m_pos
, tokenEnd
);
268 // skip the token and the trailing delimiter
271 m_lastDelim
= (pos
== m_stringEnd
) ? wxT('\0') : (wxChar
)*pos
;
274 while ( !AllowEmpty() && token
.empty() );
279 // ----------------------------------------------------------------------------
281 // ----------------------------------------------------------------------------
283 wxArrayString
wxStringTokenize(const wxString
& str
,
284 const wxString
& delims
,
285 wxStringTokenizerMode mode
)
287 wxArrayString tokens
;
288 wxStringTokenizer
tk(str
, delims
, mode
);
289 while ( tk
.HasMoreTokens() )
291 tokens
.Add(tk
.GetNextToken());