1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/tokenzr.cpp
3 // Purpose: String tokenizer
4 // Author: Guilhem Lavaux
5 // Modified by: Vadim Zeitlin (almost full rewrite)
8 // Copyright: (c) Guilhem Lavaux
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
12 // ============================================================================
14 // ============================================================================
16 // ----------------------------------------------------------------------------
18 // ----------------------------------------------------------------------------
20 // For compilers that support precompilation, includes "wx.h".
21 #include "wx/wxprec.h"
27 #include "wx/tokenzr.h"
30 #include "wx/arrstr.h"
34 // Required for wxIs... functions
37 // ============================================================================
39 // ============================================================================
41 // ----------------------------------------------------------------------------
43 // ----------------------------------------------------------------------------
45 static wxString::const_iterator
46 find_first_of(const wxChar
*delims
, size_t len
,
47 const wxString::const_iterator
& from
,
48 const wxString::const_iterator
& end
)
50 wxASSERT_MSG( from
<= end
, wxT("invalid index") );
52 for ( wxString::const_iterator i
= from
; i
!= end
; ++i
)
54 if ( wxTmemchr(delims
, *i
, len
) )
61 static wxString::const_iterator
62 find_first_not_of(const wxChar
*delims
, size_t len
,
63 const wxString::const_iterator
& from
,
64 const wxString::const_iterator
& end
)
66 wxASSERT_MSG( from
<= end
, wxT("invalid index") );
68 for ( wxString::const_iterator i
= from
; i
!= end
; ++i
)
70 if ( !wxTmemchr(delims
, *i
, len
) )
77 // ----------------------------------------------------------------------------
78 // wxStringTokenizer construction
79 // ----------------------------------------------------------------------------
81 wxStringTokenizer::wxStringTokenizer(const wxString
& str
,
82 const wxString
& delims
,
83 wxStringTokenizerMode mode
)
85 SetString(str
, delims
, mode
);
88 void wxStringTokenizer::SetString(const wxString
& str
,
89 const wxString
& delims
,
90 wxStringTokenizerMode mode
)
92 if ( mode
== wxTOKEN_DEFAULT
)
94 // by default, we behave like strtok() if the delimiters are only
95 // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
96 // whitespace delimiters, strtok() behaviour is better because we want
97 // to count consecutive spaces as one delimiter)
98 wxString::const_iterator p
;
99 for ( p
= delims
.begin(); p
!= delims
.end(); ++p
)
101 if ( !wxIsspace(*p
) )
105 if ( p
!= delims
.end() )
107 // not whitespace char in delims
108 mode
= wxTOKEN_RET_EMPTY
;
113 mode
= wxTOKEN_STRTOK
;
117 #if wxUSE_UNICODE // FIXME-UTF8: only wc_str()
118 m_delims
= delims
.wc_str();
120 m_delims
= delims
.mb_str();
122 m_delimsLen
= delims
.length();
129 void wxStringTokenizer::Reinit(const wxString
& str
)
131 wxASSERT_MSG( IsOk(), wxT("you should call SetString() first") );
134 m_stringEnd
= m_string
.end();
135 m_pos
= m_string
.begin();
136 m_lastDelim
= wxT('\0');
137 m_hasMoreTokens
= MoreTokens_Unknown
;
140 // ----------------------------------------------------------------------------
141 // access to the tokens
142 // ----------------------------------------------------------------------------
144 // do we have more of them?
145 bool wxStringTokenizer::HasMoreTokens() const
147 // GetNextToken() calls HasMoreTokens() and so HasMoreTokens() is called
148 // twice in every interation in the following common usage patten:
149 // while ( HasMoreTokens() )
151 // We optimize this case by caching HasMoreTokens() return value here:
152 if ( m_hasMoreTokens
== MoreTokens_Unknown
)
154 bool r
= DoHasMoreTokens();
155 wxConstCast(this, wxStringTokenizer
)->m_hasMoreTokens
=
156 r
? MoreTokens_Yes
: MoreTokens_No
;
160 return m_hasMoreTokens
== MoreTokens_Yes
;
163 bool wxStringTokenizer::DoHasMoreTokens() const
165 wxCHECK_MSG( IsOk(), false, wxT("you should call SetString() first") );
167 if ( find_first_not_of(m_delims
, m_delimsLen
, m_pos
, m_stringEnd
)
170 // there are non delimiter characters left, so we do have more tokens
176 case wxTOKEN_RET_EMPTY
:
177 case wxTOKEN_RET_DELIMS
:
178 // special hack for wxTOKEN_RET_EMPTY: we should return the initial
179 // empty token even if there are only delimiters after it
180 return !m_string
.empty() && m_pos
== m_string
.begin();
182 case wxTOKEN_RET_EMPTY_ALL
:
183 // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
184 // already returned the trailing empty token after the last
185 // delimiter by examining m_lastDelim: it is set to NUL if we run
186 // up to the end of the string in GetNextToken(), but if it is not
187 // NUL yet we still have this last token to return even if m_pos is
188 // already at m_string.length()
189 return m_pos
< m_stringEnd
|| m_lastDelim
!= wxT('\0');
191 case wxTOKEN_INVALID
:
192 case wxTOKEN_DEFAULT
:
193 wxFAIL_MSG( wxT("unexpected tokenizer mode") );
197 // never return empty delimiters
204 // count the number of (remaining) tokens in the string
205 size_t wxStringTokenizer::CountTokens() const
207 wxCHECK_MSG( IsOk(), 0, wxT("you should call SetString() first") );
209 // VZ: this function is IMHO not very useful, so it's probably not very
210 // important if its implementation here is not as efficient as it
211 // could be -- but OTOH like this we're sure to get the correct answer
213 wxStringTokenizer
tkz(wxString(m_pos
, m_stringEnd
), m_delims
, m_mode
);
216 while ( tkz
.HasMoreTokens() )
220 (void)tkz
.GetNextToken();
226 // ----------------------------------------------------------------------------
228 // ----------------------------------------------------------------------------
230 wxString
wxStringTokenizer::GetNextToken()
235 if ( !HasMoreTokens() )
240 m_hasMoreTokens
= MoreTokens_Unknown
;
242 // find the end of this token
243 wxString::const_iterator pos
=
244 find_first_of(m_delims
, m_delimsLen
, m_pos
, m_stringEnd
);
246 // and the start of the next one
247 if ( pos
== m_stringEnd
)
249 // no more delimiters, the token is everything till the end of
251 token
.assign(m_pos
, m_stringEnd
);
256 // it wasn't terminated
257 m_lastDelim
= wxT('\0');
259 else // we found a delimiter at pos
261 // in wxTOKEN_RET_DELIMS mode we return the delimiter character
262 // with token, otherwise leave it out
263 wxString::const_iterator
tokenEnd(pos
);
264 if ( m_mode
== wxTOKEN_RET_DELIMS
)
267 token
.assign(m_pos
, tokenEnd
);
269 // skip the token and the trailing delimiter
272 m_lastDelim
= (pos
== m_stringEnd
) ? wxT('\0') : (wxChar
)*pos
;
275 while ( !AllowEmpty() && token
.empty() );
280 // ----------------------------------------------------------------------------
282 // ----------------------------------------------------------------------------
284 wxArrayString
wxStringTokenize(const wxString
& str
,
285 const wxString
& delims
,
286 wxStringTokenizerMode mode
)
288 wxArrayString tokens
;
289 wxStringTokenizer
tk(str
, delims
, mode
);
290 while ( tk
.HasMoreTokens() )
292 tokens
.Add(tk
.GetNextToken());