]>
Commit | Line | Data |
---|---|---|
f4ada568 | 1 | ///////////////////////////////////////////////////////////////////////////// |
7c968cee VZ |
2 | // Name: wx/tokenzr.h |
3 | // Purpose: String tokenizer - a C++ replacement for strtok(3) | |
f4ada568 | 4 | // Author: Guilhem Lavaux |
1e6feb95 | 5 | // Modified by: (or rather rewritten by) Vadim Zeitlin |
f4ada568 | 6 | // Created: 04/22/98 |
f4ada568 | 7 | // Copyright: (c) Guilhem Lavaux |
65571936 | 8 | // Licence: wxWindows licence |
f4ada568 GL |
9 | ///////////////////////////////////////////////////////////////////////////// |
10 | ||
11 | #ifndef _WX_TOKENZRH | |
12 | #define _WX_TOKENZRH | |
13 | ||
f4ada568 GL |
14 | #include "wx/object.h" |
15 | #include "wx/string.h" | |
df5168c4 | 16 | #include "wx/arrstr.h" |
bbf8fc53 | 17 | |
7c968cee VZ |
18 | // ---------------------------------------------------------------------------- |
19 | // constants | |
20 | // ---------------------------------------------------------------------------- | |
21 | ||
bbf8fc53 | 22 | // default: delimiters are usual white space characters |
9a83f860 | 23 | #define wxDEFAULT_DELIMITERS (wxT(" \t\r\n")) |
f4ada568 | 24 | |
7c968cee VZ |
25 | // wxStringTokenizer mode flags which determine its behaviour |
26 | enum wxStringTokenizerMode | |
27 | { | |
28 | wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called | |
29 | wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else | |
30 | wxTOKEN_RET_EMPTY, // return empty token in the middle of the string | |
31 | wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too | |
32 | wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY) | |
33 | wxTOKEN_STRTOK // behave exactly like strtok(3) | |
34 | }; | |
35 | ||
36 | // ---------------------------------------------------------------------------- | |
37 | // wxStringTokenizer: replaces infamous strtok() and has some other features | |
38 | // ---------------------------------------------------------------------------- | |
39 | ||
bddd7a8d | 40 | class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject |
85833f5c | 41 | { |
f4ada568 | 42 | public: |
7c968cee VZ |
43 | // ctors and initializers |
44 | // default ctor, call SetString() later | |
45 | wxStringTokenizer() { m_mode = wxTOKEN_INVALID; } | |
46 | // ctor which gives us the string | |
47 | wxStringTokenizer(const wxString& str, | |
bbf8fc53 | 48 | const wxString& delims = wxDEFAULT_DELIMITERS, |
7c968cee VZ |
49 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); |
50 | ||
51 | // args are same as for the non default ctor above | |
52 | void SetString(const wxString& str, | |
bbf8fc53 | 53 | const wxString& delims = wxDEFAULT_DELIMITERS, |
7c968cee VZ |
54 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); |
55 | ||
56 | // reinitialize the tokenizer with the same delimiters/mode | |
57 | void Reinit(const wxString& str); | |
85833f5c | 58 | |
7c968cee | 59 | // tokens access |
4626c57c | 60 | // return the number of remaining tokens |
bbf8fc53 | 61 | size_t CountTokens() const; |
7c968cee VZ |
62 | // did we reach the end of the string? |
63 | bool HasMoreTokens() const; | |
64 | // get the next token, will return empty string if !HasMoreTokens() | |
bbf8fc53 | 65 | wxString GetNextToken(); |
4626c57c VZ |
66 | // get the delimiter which terminated the token last retrieved by |
67 | // GetNextToken() or NUL if there had been no tokens yet or the last | |
68 | // one wasn't terminated (but ran to the end of the string) | |
69 | wxChar GetLastDelimiter() const { return m_lastDelim; } | |
85833f5c | 70 | |
7c968cee VZ |
71 | // get current tokenizer state |
72 | // returns the part of the string which remains to tokenize (*not* the | |
73 | // initial string) | |
f0dfc29c | 74 | wxString GetString() const { return wxString(m_pos, m_string.end()); } |
85833f5c | 75 | |
7c968cee VZ |
76 | // returns the current position (i.e. one index after the last |
77 | // returned token or 0 if GetNextToken() has never been called) in the | |
78 | // original string | |
f0dfc29c | 79 | size_t GetPosition() const { return m_pos - m_string.begin(); } |
dbdb39b2 | 80 | |
7c968cee VZ |
81 | // misc |
82 | // get the current mode - can be different from the one passed to the | |
83 | // ctor if it was wxTOKEN_DEFAULT | |
84 | wxStringTokenizerMode GetMode() const { return m_mode; } | |
4626c57c VZ |
85 | // do we return empty tokens? |
86 | bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; } | |
87 | ||
7c968cee VZ |
88 | |
89 | // backwards compatibility section from now on | |
90 | // ------------------------------------------- | |
91 | ||
bbf8fc53 VZ |
92 | // for compatibility only, use GetNextToken() instead |
93 | wxString NextToken() { return GetNextToken(); } | |
85833f5c | 94 | |
7c968cee VZ |
95 | // compatibility only, don't use |
96 | void SetString(const wxString& to_tokenize, | |
97 | const wxString& delims, | |
06b466c7 | 98 | bool WXUNUSED(ret_delim)) |
7c968cee VZ |
99 | { |
100 | SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS); | |
101 | } | |
102 | ||
103 | wxStringTokenizer(const wxString& to_tokenize, | |
104 | const wxString& delims, | |
105 | bool ret_delim) | |
106 | { | |
107 | SetString(to_tokenize, delims, ret_delim); | |
108 | } | |
109 | ||
2224580a | 110 | protected: |
7c968cee VZ |
111 | bool IsOk() const { return m_mode != wxTOKEN_INVALID; } |
112 | ||
f0dfc29c | 113 | bool DoHasMoreTokens() const; |
bbf8fc53 | 114 | |
f0dfc29c VS |
115 | enum MoreTokensState |
116 | { | |
117 | MoreTokens_Unknown, | |
118 | MoreTokens_Yes, | |
119 | MoreTokens_No | |
120 | }; | |
121 | ||
122 | MoreTokensState m_hasMoreTokens; | |
123 | ||
124 | wxString m_string; // the string we tokenize | |
125 | wxString::const_iterator m_stringEnd; | |
126 | // FIXME-UTF8: use wxWcharBuffer | |
127 | wxWxCharBuffer m_delims; // all possible delimiters | |
128 | size_t m_delimsLen; | |
129 | ||
130 | wxString::const_iterator m_pos; // the current position in m_string | |
bbf8fc53 | 131 | |
7c968cee VZ |
132 | wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values |
133 | ||
4626c57c | 134 | wxChar m_lastDelim; // delimiter after last token or '\0' |
f4ada568 GL |
135 | }; |
136 | ||
1e6feb95 VZ |
137 | // ---------------------------------------------------------------------------- |
138 | // convenience function which returns all tokens at once | |
139 | // ---------------------------------------------------------------------------- | |
140 | ||
141 | // the function takes the same parameters as wxStringTokenizer ctor and returns | |
142 | // the array containing all tokens | |
bddd7a8d | 143 | wxArrayString WXDLLIMPEXP_BASE |
1e6feb95 VZ |
144 | wxStringTokenize(const wxString& str, |
145 | const wxString& delims = wxDEFAULT_DELIMITERS, | |
146 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); | |
147 | ||
85833f5c | 148 | #endif // _WX_TOKENZRH |