]> git.saurik.com Git - wxWidgets.git/blame_incremental - src/common/tokenzr.cpp
Set HTML data even in Unicode mode
[wxWidgets.git] / src / common / tokenzr.cpp
... / ...
CommitLineData
1/////////////////////////////////////////////////////////////////////////////
2// Name: tokenzr.cpp
3// Purpose: String tokenizer
4// Author: Guilhem Lavaux
5// Modified by: Vadim Zeitlin (almost full rewrite)
6// Created: 04/22/98
7// RCS-ID: $Id$
8// Copyright: (c) Guilhem Lavaux
9// Licence: wxWindows licence
10/////////////////////////////////////////////////////////////////////////////
11
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
20// For compilers that support precompilation, includes "wx.h".
21#include "wx/wxprec.h"
22
23#ifdef __BORLANDC__
24 #pragma hdrstop
25#endif
26
27#include "wx/tokenzr.h"
28#include "wx/arrstr.h"
29
30// Required for wxIs... functions
31#include <ctype.h>
32
33// ============================================================================
34// implementation
35// ============================================================================
36
37// ----------------------------------------------------------------------------
38// wxStringTokenizer construction
39// ----------------------------------------------------------------------------
40
41wxStringTokenizer::wxStringTokenizer(const wxString& str,
42 const wxString& delims,
43 wxStringTokenizerMode mode)
44{
45 SetString(str, delims, mode);
46}
47
48void wxStringTokenizer::SetString(const wxString& str,
49 const wxString& delims,
50 wxStringTokenizerMode mode)
51{
52 if ( mode == wxTOKEN_DEFAULT )
53 {
54 // by default, we behave like strtok() if the delimiters are only
55 // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
56 // whitespace delimiters, strtok() behaviour is better because we want
57 // to count consecutive spaces as one delimiter)
58 const wxChar *p;
59 for ( p = delims.c_str(); *p; p++ )
60 {
61 if ( !wxIsspace(*p) )
62 break;
63 }
64
65 if ( *p )
66 {
67 // not whitespace char in delims
68 mode = wxTOKEN_RET_EMPTY;
69 }
70 else
71 {
72 // only whitespaces
73 mode = wxTOKEN_STRTOK;
74 }
75 }
76
77 m_delims = delims;
78 m_mode = mode;
79
80 Reinit(str);
81}
82
83void wxStringTokenizer::Reinit(const wxString& str)
84{
85 wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
86
87 m_string = str;
88 m_pos = 0;
89
90 // empty string doesn't have any tokens
91 m_hasMore = !m_string.empty();
92}
93
94// ----------------------------------------------------------------------------
95// access to the tokens
96// ----------------------------------------------------------------------------
97
98// do we have more of them?
99bool wxStringTokenizer::HasMoreTokens() const
100{
101 wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
102
103 if ( m_string.find_first_not_of(m_delims) == wxString::npos )
104 {
105 // no non empty tokens left, but in 2 cases we still may return true if
106 // GetNextToken() wasn't called yet for this empty token:
107 //
108 // a) in wxTOKEN_RET_EMPTY_ALL mode we always do it
109 // b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a
110 // string containing only the delimiter: then there is an empty
111 // token just before it
112 return (m_mode == wxTOKEN_RET_EMPTY_ALL) ||
113 (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0)
114 ? m_hasMore : false;
115 }
116 else
117 {
118 // there are non delimiter characters left, hence we do have more
119 // tokens
120 return true;
121 }
122}
123
124// count the number of tokens in the string
125size_t wxStringTokenizer::CountTokens() const
126{
127 wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
128
129 // VZ: this function is IMHO not very useful, so it's probably not very
130 // important if it's implementation here is not as efficient as it
131 // could be - but OTOH like this we're sure to get the correct answer
132 // in all modes
133 wxStringTokenizer *self = (wxStringTokenizer *)this; // const_cast
134 wxString stringInitial = m_string;
135
136 size_t count = 0;
137 while ( self->HasMoreTokens() )
138 {
139 count++;
140
141 (void)self->GetNextToken();
142 }
143
144 self->Reinit(stringInitial);
145
146 return count;
147}
148
149// ----------------------------------------------------------------------------
150// token extraction
151// ----------------------------------------------------------------------------
152
153wxString wxStringTokenizer::GetNextToken()
154{
155 // strtok() doesn't return empty tokens, all other modes do
156 bool allowEmpty = m_mode != wxTOKEN_STRTOK;
157
158 wxString token;
159 do
160 {
161 if ( !HasMoreTokens() )
162 {
163 break;
164 }
165 // find the end of this token
166 size_t pos = m_string.find_first_of(m_delims);
167
168 // and the start of the next one
169 if ( pos == wxString::npos )
170 {
171 // no more delimiters, the token is everything till the end of
172 // string
173 token = m_string;
174
175 m_pos += m_string.length();
176 m_string.clear();
177
178 // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
179 // mode (we will return the trailing one right now in this case)
180 m_hasMore = false;
181 }
182 else
183 {
184 size_t pos2 = pos + 1;
185
186 // in wxTOKEN_RET_DELIMS mode we return the delimiter character
187 // with token
188 token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
189 : pos);
190
191 // remove token with the following it delimiter from string
192 m_string.erase(0, pos2);
193
194 // keep track of the position in the original string too
195 m_pos += pos2;
196 }
197 }
198 while ( !allowEmpty && token.empty() );
199
200 return token;
201}
202
203// ----------------------------------------------------------------------------
204// public functions
205// ----------------------------------------------------------------------------
206
207wxArrayString wxStringTokenize(const wxString& str,
208 const wxString& delims,
209 wxStringTokenizerMode mode)
210{
211 wxArrayString tokens;
212 wxStringTokenizer tk(str, delims, mode);
213 while ( tk.HasMoreTokens() )
214 {
215 tokens.Add(tk.GetNextToken());
216 }
217
218 return tokens;
219}