]> git.saurik.com Git - wxWidgets.git/blob - include/wx/tokenzr.h
radically simplified declarations of wxUString-specific char buffers: there are no...
[wxWidgets.git] / include / wx / tokenzr.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: wx/tokenzr.h
3 // Purpose: String tokenizer - a C++ replacement for strtok(3)
4 // Author: Guilhem Lavaux
5 // Modified by: (or rather rewritten by) Vadim Zeitlin
6 // Created: 04/22/98
7 // RCS-ID: $Id$
8 // Copyright: (c) Guilhem Lavaux
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
11
12 #ifndef _WX_TOKENZRH
13 #define _WX_TOKENZRH
14
15 #include "wx/object.h"
16 #include "wx/string.h"
17 #include "wx/arrstr.h"
18
19 // ----------------------------------------------------------------------------
20 // constants
21 // ----------------------------------------------------------------------------
22
23 // default: delimiters are usual white space characters
24 #define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
25
26 // wxStringTokenizer mode flags which determine its behaviour
27 enum wxStringTokenizerMode
28 {
29 wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called
30 wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else
31 wxTOKEN_RET_EMPTY, // return empty token in the middle of the string
32 wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too
33 wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY)
34 wxTOKEN_STRTOK // behave exactly like strtok(3)
35 };
36
37 // ----------------------------------------------------------------------------
38 // wxStringTokenizer: replaces infamous strtok() and has some other features
39 // ----------------------------------------------------------------------------
40
41 class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject
42 {
43 public:
44 // ctors and initializers
45 // default ctor, call SetString() later
46 wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
47 // ctor which gives us the string
48 wxStringTokenizer(const wxString& str,
49 const wxString& delims = wxDEFAULT_DELIMITERS,
50 wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
51
52 // args are same as for the non default ctor above
53 void SetString(const wxString& str,
54 const wxString& delims = wxDEFAULT_DELIMITERS,
55 wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
56
57 // reinitialize the tokenizer with the same delimiters/mode
58 void Reinit(const wxString& str);
59
60 // tokens access
61 // return the number of remaining tokens
62 size_t CountTokens() const;
63 // did we reach the end of the string?
64 bool HasMoreTokens() const;
65 // get the next token, will return empty string if !HasMoreTokens()
66 wxString GetNextToken();
67 // get the delimiter which terminated the token last retrieved by
68 // GetNextToken() or NUL if there had been no tokens yet or the last
69 // one wasn't terminated (but ran to the end of the string)
70 wxChar GetLastDelimiter() const { return m_lastDelim; }
71
72 // get current tokenizer state
73 // returns the part of the string which remains to tokenize (*not* the
74 // initial string)
75 wxString GetString() const { return wxString(m_pos, m_string.end()); }
76
77 // returns the current position (i.e. one index after the last
78 // returned token or 0 if GetNextToken() has never been called) in the
79 // original string
80 size_t GetPosition() const { return m_pos - m_string.begin(); }
81
82 // misc
83 // get the current mode - can be different from the one passed to the
84 // ctor if it was wxTOKEN_DEFAULT
85 wxStringTokenizerMode GetMode() const { return m_mode; }
86 // do we return empty tokens?
87 bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
88
89
90 // backwards compatibility section from now on
91 // -------------------------------------------
92
93 // for compatibility only, use GetNextToken() instead
94 wxString NextToken() { return GetNextToken(); }
95
96 // compatibility only, don't use
97 void SetString(const wxString& to_tokenize,
98 const wxString& delims,
99 bool WXUNUSED(ret_delim))
100 {
101 SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
102 }
103
104 wxStringTokenizer(const wxString& to_tokenize,
105 const wxString& delims,
106 bool ret_delim)
107 {
108 SetString(to_tokenize, delims, ret_delim);
109 }
110
111 protected:
112 bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
113
114 bool DoHasMoreTokens() const;
115
116 enum MoreTokensState
117 {
118 MoreTokens_Unknown,
119 MoreTokens_Yes,
120 MoreTokens_No
121 };
122
123 MoreTokensState m_hasMoreTokens;
124
125 wxString m_string; // the string we tokenize
126 wxString::const_iterator m_stringEnd;
127 // FIXME-UTF8: use wxWcharBuffer
128 wxWxCharBuffer m_delims; // all possible delimiters
129 size_t m_delimsLen;
130
131 wxString::const_iterator m_pos; // the current position in m_string
132
133 wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values
134
135 wxChar m_lastDelim; // delimiter after last token or '\0'
136 };
137
138 // ----------------------------------------------------------------------------
139 // convenience function which returns all tokens at once
140 // ----------------------------------------------------------------------------
141
142 // the function takes the same parameters as wxStringTokenizer ctor and returns
143 // the array containing all tokens
144 wxArrayString WXDLLIMPEXP_BASE
145 wxStringTokenize(const wxString& str,
146 const wxString& delims = wxDEFAULT_DELIMITERS,
147 wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
148
149 #endif // _WX_TOKENZRH