1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: String tokenizer - a C++ replacement for strtok(3)
4 // Author: Guilhem Lavaux
5 // Modified by: (or rather rewritten by) Vadim Zeitlin
7 // Copyright: (c) Guilhem Lavaux
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
14 #include "wx/object.h"
15 #include "wx/string.h"
16 #include "wx/arrstr.h"
18 // ----------------------------------------------------------------------------
20 // ----------------------------------------------------------------------------
22 // default: delimiters are usual white space characters
23 #define wxDEFAULT_DELIMITERS (wxT(" \t\r\n"))
25 // wxStringTokenizer mode flags which determine its behaviour
26 enum wxStringTokenizerMode
28 wxTOKEN_INVALID
= -1, // set by def ctor until SetString() is called
29 wxTOKEN_DEFAULT
, // strtok() for whitespace delims, RET_EMPTY else
30 wxTOKEN_RET_EMPTY
, // return empty token in the middle of the string
31 wxTOKEN_RET_EMPTY_ALL
, // return trailing empty tokens too
32 wxTOKEN_RET_DELIMS
, // return the delim with token (implies RET_EMPTY)
33 wxTOKEN_STRTOK
// behave exactly like strtok(3)
36 // ----------------------------------------------------------------------------
37 // wxStringTokenizer: replaces infamous strtok() and has some other features
38 // ----------------------------------------------------------------------------
40 class WXDLLIMPEXP_BASE wxStringTokenizer
: public wxObject
43 // ctors and initializers
44 // default ctor, call SetString() later
45 wxStringTokenizer() { m_mode
= wxTOKEN_INVALID
; }
46 // ctor which gives us the string
47 wxStringTokenizer(const wxString
& str
,
48 const wxString
& delims
= wxDEFAULT_DELIMITERS
,
49 wxStringTokenizerMode mode
= wxTOKEN_DEFAULT
);
51 // args are same as for the non default ctor above
52 void SetString(const wxString
& str
,
53 const wxString
& delims
= wxDEFAULT_DELIMITERS
,
54 wxStringTokenizerMode mode
= wxTOKEN_DEFAULT
);
56 // reinitialize the tokenizer with the same delimiters/mode
57 void Reinit(const wxString
& str
);
60 // return the number of remaining tokens
61 size_t CountTokens() const;
62 // did we reach the end of the string?
63 bool HasMoreTokens() const;
64 // get the next token, will return empty string if !HasMoreTokens()
65 wxString
GetNextToken();
66 // get the delimiter which terminated the token last retrieved by
67 // GetNextToken() or NUL if there had been no tokens yet or the last
68 // one wasn't terminated (but ran to the end of the string)
69 wxChar
GetLastDelimiter() const { return m_lastDelim
; }
71 // get current tokenizer state
72 // returns the part of the string which remains to tokenize (*not* the
74 wxString
GetString() const { return wxString(m_pos
, m_string
.end()); }
76 // returns the current position (i.e. one index after the last
77 // returned token or 0 if GetNextToken() has never been called) in the
79 size_t GetPosition() const { return m_pos
- m_string
.begin(); }
82 // get the current mode - can be different from the one passed to the
83 // ctor if it was wxTOKEN_DEFAULT
84 wxStringTokenizerMode
GetMode() const { return m_mode
; }
85 // do we return empty tokens?
86 bool AllowEmpty() const { return m_mode
!= wxTOKEN_STRTOK
; }
89 // backwards compatibility section from now on
90 // -------------------------------------------
92 // for compatibility only, use GetNextToken() instead
93 wxString
NextToken() { return GetNextToken(); }
95 // compatibility only, don't use
96 void SetString(const wxString
& to_tokenize
,
97 const wxString
& delims
,
98 bool WXUNUSED(ret_delim
))
100 SetString(to_tokenize
, delims
, wxTOKEN_RET_DELIMS
);
103 wxStringTokenizer(const wxString
& to_tokenize
,
104 const wxString
& delims
,
107 SetString(to_tokenize
, delims
, ret_delim
);
111 bool IsOk() const { return m_mode
!= wxTOKEN_INVALID
; }
113 bool DoHasMoreTokens() const;
122 MoreTokensState m_hasMoreTokens
;
124 wxString m_string
; // the string we tokenize
125 wxString::const_iterator m_stringEnd
;
126 // FIXME-UTF8: use wxWcharBuffer
127 wxWxCharBuffer m_delims
; // all possible delimiters
130 wxString::const_iterator m_pos
; // the current position in m_string
132 wxStringTokenizerMode m_mode
; // see wxTOKEN_XXX values
134 wxChar m_lastDelim
; // delimiter after last token or '\0'
137 // ----------------------------------------------------------------------------
138 // convenience function which returns all tokens at once
139 // ----------------------------------------------------------------------------
141 // the function takes the same parameters as wxStringTokenizer ctor and returns
142 // the array containing all tokens
143 wxArrayString WXDLLIMPEXP_BASE
144 wxStringTokenize(const wxString
& str
,
145 const wxString
& delims
= wxDEFAULT_DELIMITERS
,
146 wxStringTokenizerMode mode
= wxTOKEN_DEFAULT
);
148 #endif // _WX_TOKENZRH