]> git.saurik.com Git - wxWidgets.git/blame - src/common/tokenzr.cpp
better attempts to try .utf-8 suffix
[wxWidgets.git] / src / common / tokenzr.cpp
CommitLineData
f4ada568
GL
1/////////////////////////////////////////////////////////////////////////////
2// Name: tokenzr.cpp
3// Purpose: String tokenizer
4// Author: Guilhem Lavaux
1e6feb95 5// Modified by: Vadim Zeitlin (almost full rewrite)
f4ada568
GL
6// Created: 04/22/98
7// RCS-ID: $Id$
8// Copyright: (c) Guilhem Lavaux
9// Licence: wxWindows licence
10/////////////////////////////////////////////////////////////////////////////
11
bbf8fc53
VZ
12// ============================================================================
13// declarations
14// ============================================================================
15
16// ----------------------------------------------------------------------------
17// headers
18// ----------------------------------------------------------------------------
19
f4ada568 20#ifdef __GNUG__
85833f5c 21 #pragma implementation "tokenzr.h"
f4ada568
GL
22#endif
23
fcc6dddd
JS
24// For compilers that support precompilation, includes "wx.h".
25#include "wx/wxprec.h"
26
27#ifdef __BORLANDC__
85833f5c 28 #pragma hdrstop
fcc6dddd
JS
29#endif
30
f4ada568 31#include "wx/tokenzr.h"
df5168c4 32#include "wx/arrstr.h"
f4ada568 33
3f8e5072
JS
34// Required for wxIs... functions
35#include <ctype.h>
36
bbf8fc53
VZ
37// ============================================================================
38// implementation
39// ============================================================================
40
41// ----------------------------------------------------------------------------
42// wxStringTokenizer construction
43// ----------------------------------------------------------------------------
44
7c968cee 45wxStringTokenizer::wxStringTokenizer(const wxString& str,
f4ada568 46 const wxString& delims,
7c968cee 47 wxStringTokenizerMode mode)
bbf8fc53 48{
7c968cee 49 SetString(str, delims, mode);
bbf8fc53
VZ
50}
51
7c968cee 52void wxStringTokenizer::SetString(const wxString& str,
bbf8fc53 53 const wxString& delims,
7c968cee 54 wxStringTokenizerMode mode)
f4ada568 55{
7c968cee
VZ
56 if ( mode == wxTOKEN_DEFAULT )
57 {
58 // by default, we behave like strtok() if the delimiters are only
59 // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
60 // whitespace delimiters, strtok() behaviour is better because we want
61 // to count consecutive spaces as one delimiter)
62 const wxChar *p;
63 for ( p = delims.c_str(); *p; p++ )
64 {
65 if ( !wxIsspace(*p) )
66 break;
67 }
68
69 if ( *p )
70 {
71 // not whitespace char in delims
72 mode = wxTOKEN_RET_EMPTY;
73 }
74 else
75 {
76 // only whitespaces
77 mode = wxTOKEN_STRTOK;
78 }
79 }
80
85833f5c 81 m_delims = delims;
7c968cee 82 m_mode = mode;
bbf8fc53 83
7c968cee 84 Reinit(str);
f4ada568
GL
85}
86
7c968cee 87void wxStringTokenizer::Reinit(const wxString& str)
f4ada568 88{
7c968cee
VZ
89 wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
90
91 m_string = str;
92 m_pos = 0;
93
94 // empty string doesn't have any tokens
95 m_hasMore = !m_string.empty();
f4ada568
GL
96}
97
bbf8fc53 98// ----------------------------------------------------------------------------
7c968cee 99// access to the tokens
bbf8fc53
VZ
100// ----------------------------------------------------------------------------
101
7c968cee
VZ
102// do we have more of them?
103bool wxStringTokenizer::HasMoreTokens() const
f4ada568 104{
7c968cee
VZ
105 wxCHECK_MSG( IsOk(), FALSE, _T("you should call SetString() first") );
106
107 if ( m_string.find_first_not_of(m_delims) == wxString::npos )
bbf8fc53 108 {
1e6feb95
VZ
109 // no non empty tokens left, but in 2 cases we still may return TRUE if
110 // GetNextToken() wasn't called yet for this empty token:
111 //
112 // a) in wxTOKEN_RET_EMPTY_ALL mode we always do it
113 // b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a
114 // string containing only the delimiter: then there is an empty
115 // token just before it
116 return (m_mode == wxTOKEN_RET_EMPTY_ALL) ||
117 (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0)
118 ? m_hasMore : FALSE;
7c968cee
VZ
119 }
120 else
121 {
122 // there are non delimiter characters left, hence we do have more
123 // tokens
124 return TRUE;
125 }
126}
bbf8fc53 127
7c968cee
VZ
128// count the number of tokens in the string
129size_t wxStringTokenizer::CountTokens() const
130{
131 wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
bbf8fc53 132
7c968cee
VZ
133 // VZ: this function is IMHO not very useful, so it's probably not very
134 // important if it's implementation here is not as efficient as it
135 // could be - but OTOH like this we're sure to get the correct answer
136 // in all modes
137 wxStringTokenizer *self = (wxStringTokenizer *)this; // const_cast
138 wxString stringInitial = m_string;
bbf8fc53 139
7c968cee
VZ
140 size_t count = 0;
141 while ( self->HasMoreTokens() )
bbf8fc53
VZ
142 {
143 count++;
7c968cee
VZ
144
145 (void)self->GetNextToken();
bbf8fc53
VZ
146 }
147
7c968cee
VZ
148 self->Reinit(stringInitial);
149
bbf8fc53
VZ
150 return count;
151}
152
153// ----------------------------------------------------------------------------
154// token extraction
155// ----------------------------------------------------------------------------
156
157wxString wxStringTokenizer::GetNextToken()
158{
7c968cee
VZ
159 // strtok() doesn't return empty tokens, all other modes do
160 bool allowEmpty = m_mode != wxTOKEN_STRTOK;
161
bbf8fc53 162 wxString token;
7c968cee 163 do
bbf8fc53 164 {
7c968cee 165 if ( !HasMoreTokens() )
85833f5c 166 {
7c968cee 167 break;
85833f5c 168 }
7c968cee
VZ
169 // find the end of this token
170 size_t pos = m_string.find_first_of(m_delims);
171
172 // and the start of the next one
173 if ( pos == wxString::npos )
85833f5c 174 {
7c968cee
VZ
175 // no more delimiters, the token is everything till the end of
176 // string
177 token = m_string;
178
179 m_pos += m_string.length();
180 m_string.clear();
bbf8fc53 181
7c968cee
VZ
182 // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
183 // mode (we will return the trailing one right now in this case)
bbf8fc53 184 m_hasMore = FALSE;
85833f5c 185 }
7c968cee
VZ
186 else
187 {
188 size_t pos2 = pos + 1;
f4ada568 189
7c968cee
VZ
190 // in wxTOKEN_RET_DELIMS mode we return the delimiter character
191 // with token
192 token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
193 : pos);
dab58492 194
7c968cee
VZ
195 // remove token with the following it delimiter from string
196 m_string.erase(0, pos2);
bbf8fc53 197
7c968cee
VZ
198 // keep track of the position in the original string too
199 m_pos += pos2;
200 }
85833f5c 201 }
7c968cee 202 while ( !allowEmpty && token.empty() );
bbf8fc53
VZ
203
204 return token;
f4ada568 205}
1e6feb95
VZ
206
207// ----------------------------------------------------------------------------
208// public functions
209// ----------------------------------------------------------------------------
210
211wxArrayString wxStringTokenize(const wxString& str,
212 const wxString& delims,
213 wxStringTokenizerMode mode)
214{
215 wxArrayString tokens;
216 wxStringTokenizer tk(str, delims, mode);
217 while ( tk.HasMoreTokens() )
218 {
219 tokens.Add(tk.GetNextToken());
220 }
221
222 return tokens;
223}