1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: String tokenizer - a C++ replacement for strtok(3)
4 // Author: Guilhem Lavaux
5 // Modified by: (or rather rewritten by) Vadim Zeitlin
8 // Copyright: (c) Guilhem Lavaux
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
15 #include "wx/object.h"
16 #include "wx/string.h"
17 #include "wx/arrstr.h"
19 // ----------------------------------------------------------------------------
21 // ----------------------------------------------------------------------------
23 // default: delimiters are usual white space characters
24 #define wxDEFAULT_DELIMITERS (wxT(" \t\r\n"))
26 // wxStringTokenizer mode flags which determine its behaviour
27 enum wxStringTokenizerMode
29 wxTOKEN_INVALID
= -1, // set by def ctor until SetString() is called
30 wxTOKEN_DEFAULT
, // strtok() for whitespace delims, RET_EMPTY else
31 wxTOKEN_RET_EMPTY
, // return empty token in the middle of the string
32 wxTOKEN_RET_EMPTY_ALL
, // return trailing empty tokens too
33 wxTOKEN_RET_DELIMS
, // return the delim with token (implies RET_EMPTY)
34 wxTOKEN_STRTOK
// behave exactly like strtok(3)
37 // ----------------------------------------------------------------------------
38 // wxStringTokenizer: replaces infamous strtok() and has some other features
39 // ----------------------------------------------------------------------------
41 class WXDLLIMPEXP_BASE wxStringTokenizer
: public wxObject
44 // ctors and initializers
45 // default ctor, call SetString() later
46 wxStringTokenizer() { m_mode
= wxTOKEN_INVALID
; }
47 // ctor which gives us the string
48 wxStringTokenizer(const wxString
& str
,
49 const wxString
& delims
= wxDEFAULT_DELIMITERS
,
50 wxStringTokenizerMode mode
= wxTOKEN_DEFAULT
);
52 // args are same as for the non default ctor above
53 void SetString(const wxString
& str
,
54 const wxString
& delims
= wxDEFAULT_DELIMITERS
,
55 wxStringTokenizerMode mode
= wxTOKEN_DEFAULT
);
57 // reinitialize the tokenizer with the same delimiters/mode
58 void Reinit(const wxString
& str
);
61 // return the number of remaining tokens
62 size_t CountTokens() const;
63 // did we reach the end of the string?
64 bool HasMoreTokens() const;
65 // get the next token, will return empty string if !HasMoreTokens()
66 wxString
GetNextToken();
67 // get the delimiter which terminated the token last retrieved by
68 // GetNextToken() or NUL if there had been no tokens yet or the last
69 // one wasn't terminated (but ran to the end of the string)
70 wxChar
GetLastDelimiter() const { return m_lastDelim
; }
72 // get current tokenizer state
73 // returns the part of the string which remains to tokenize (*not* the
75 wxString
GetString() const { return wxString(m_pos
, m_string
.end()); }
77 // returns the current position (i.e. one index after the last
78 // returned token or 0 if GetNextToken() has never been called) in the
80 size_t GetPosition() const { return m_pos
- m_string
.begin(); }
83 // get the current mode - can be different from the one passed to the
84 // ctor if it was wxTOKEN_DEFAULT
85 wxStringTokenizerMode
GetMode() const { return m_mode
; }
86 // do we return empty tokens?
87 bool AllowEmpty() const { return m_mode
!= wxTOKEN_STRTOK
; }
90 // backwards compatibility section from now on
91 // -------------------------------------------
93 // for compatibility only, use GetNextToken() instead
94 wxString
NextToken() { return GetNextToken(); }
96 // compatibility only, don't use
97 void SetString(const wxString
& to_tokenize
,
98 const wxString
& delims
,
99 bool WXUNUSED(ret_delim
))
101 SetString(to_tokenize
, delims
, wxTOKEN_RET_DELIMS
);
104 wxStringTokenizer(const wxString
& to_tokenize
,
105 const wxString
& delims
,
108 SetString(to_tokenize
, delims
, ret_delim
);
112 bool IsOk() const { return m_mode
!= wxTOKEN_INVALID
; }
114 bool DoHasMoreTokens() const;
123 MoreTokensState m_hasMoreTokens
;
125 wxString m_string
; // the string we tokenize
126 wxString::const_iterator m_stringEnd
;
127 // FIXME-UTF8: use wxWcharBuffer
128 wxWxCharBuffer m_delims
; // all possible delimiters
131 wxString::const_iterator m_pos
; // the current position in m_string
133 wxStringTokenizerMode m_mode
; // see wxTOKEN_XXX values
135 wxChar m_lastDelim
; // delimiter after last token or '\0'
138 // ----------------------------------------------------------------------------
139 // convenience function which returns all tokens at once
140 // ----------------------------------------------------------------------------
142 // the function takes the same parameters as wxStringTokenizer ctor and returns
143 // the array containing all tokens
144 wxArrayString WXDLLIMPEXP_BASE
145 wxStringTokenize(const wxString
& str
,
146 const wxString
& delims
= wxDEFAULT_DELIMITERS
,
147 wxStringTokenizerMode mode
= wxTOKEN_DEFAULT
);
149 #endif // _WX_TOKENZRH