]>
Commit | Line | Data |
---|---|---|
f4ada568 | 1 | ///////////////////////////////////////////////////////////////////////////// |
7c968cee VZ |
2 | // Name: wx/tokenzr.h |
3 | // Purpose: String tokenizer - a C++ replacement for strtok(3) | |
f4ada568 | 4 | // Author: Guilhem Lavaux |
1e6feb95 | 5 | // Modified by: (or rather rewritten by) Vadim Zeitlin |
f4ada568 GL |
6 | // Created: 04/22/98 |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Guilhem Lavaux | |
65571936 | 9 | // Licence: wxWindows licence |
f4ada568 GL |
10 | ///////////////////////////////////////////////////////////////////////////// |
11 | ||
12 | #ifndef _WX_TOKENZRH | |
13 | #define _WX_TOKENZRH | |
14 | ||
f4ada568 GL |
15 | #include "wx/object.h" |
16 | #include "wx/string.h" | |
df5168c4 | 17 | #include "wx/arrstr.h" |
bbf8fc53 | 18 | |
7c968cee VZ |
19 | // ---------------------------------------------------------------------------- |
20 | // constants | |
21 | // ---------------------------------------------------------------------------- | |
22 | ||
bbf8fc53 VZ |
23 | // default: delimiters are usual white space characters |
24 | #define wxDEFAULT_DELIMITERS (_T(" \t\r\n")) | |
f4ada568 | 25 | |
7c968cee VZ |
26 | // wxStringTokenizer mode flags which determine its behaviour |
27 | enum wxStringTokenizerMode | |
28 | { | |
29 | wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called | |
30 | wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else | |
31 | wxTOKEN_RET_EMPTY, // return empty token in the middle of the string | |
32 | wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too | |
33 | wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY) | |
34 | wxTOKEN_STRTOK // behave exactly like strtok(3) | |
35 | }; | |
36 | ||
37 | // ---------------------------------------------------------------------------- | |
38 | // wxStringTokenizer: replaces infamous strtok() and has some other features | |
39 | // ---------------------------------------------------------------------------- | |
40 | ||
bddd7a8d | 41 | class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject |
85833f5c | 42 | { |
f4ada568 | 43 | public: |
7c968cee VZ |
44 | // ctors and initializers |
45 | // default ctor, call SetString() later | |
46 | wxStringTokenizer() { m_mode = wxTOKEN_INVALID; } | |
47 | // ctor which gives us the string | |
48 | wxStringTokenizer(const wxString& str, | |
bbf8fc53 | 49 | const wxString& delims = wxDEFAULT_DELIMITERS, |
7c968cee VZ |
50 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); |
51 | ||
52 | // args are same as for the non default ctor above | |
53 | void SetString(const wxString& str, | |
bbf8fc53 | 54 | const wxString& delims = wxDEFAULT_DELIMITERS, |
7c968cee VZ |
55 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); |
56 | ||
57 | // reinitialize the tokenizer with the same delimiters/mode | |
58 | void Reinit(const wxString& str); | |
85833f5c | 59 | |
7c968cee | 60 | // tokens access |
4626c57c | 61 | // return the number of remaining tokens |
bbf8fc53 | 62 | size_t CountTokens() const; |
7c968cee VZ |
63 | // did we reach the end of the string? |
64 | bool HasMoreTokens() const; | |
65 | // get the next token, will return empty string if !HasMoreTokens() | |
bbf8fc53 | 66 | wxString GetNextToken(); |
4626c57c VZ |
67 | // get the delimiter which terminated the token last retrieved by |
68 | // GetNextToken() or NUL if there had been no tokens yet or the last | |
69 | // one wasn't terminated (but ran to the end of the string) | |
70 | wxChar GetLastDelimiter() const { return m_lastDelim; } | |
85833f5c | 71 | |
7c968cee VZ |
72 | // get current tokenizer state |
73 | // returns the part of the string which remains to tokenize (*not* the | |
74 | // initial string) | |
f0dfc29c | 75 | wxString GetString() const { return wxString(m_pos, m_string.end()); } |
85833f5c | 76 | |
7c968cee VZ |
77 | // returns the current position (i.e. one index after the last |
78 | // returned token or 0 if GetNextToken() has never been called) in the | |
79 | // original string | |
f0dfc29c | 80 | size_t GetPosition() const { return m_pos - m_string.begin(); } |
dbdb39b2 | 81 | |
7c968cee VZ |
82 | // misc |
83 | // get the current mode - can be different from the one passed to the | |
84 | // ctor if it was wxTOKEN_DEFAULT | |
85 | wxStringTokenizerMode GetMode() const { return m_mode; } | |
4626c57c VZ |
86 | // do we return empty tokens? |
87 | bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; } | |
88 | ||
7c968cee VZ |
89 | |
90 | // backwards compatibility section from now on | |
91 | // ------------------------------------------- | |
92 | ||
bbf8fc53 VZ |
93 | // for compatibility only, use GetNextToken() instead |
94 | wxString NextToken() { return GetNextToken(); } | |
85833f5c | 95 | |
7c968cee VZ |
96 | // compatibility only, don't use |
97 | void SetString(const wxString& to_tokenize, | |
98 | const wxString& delims, | |
06b466c7 | 99 | bool WXUNUSED(ret_delim)) |
7c968cee VZ |
100 | { |
101 | SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS); | |
102 | } | |
103 | ||
104 | wxStringTokenizer(const wxString& to_tokenize, | |
105 | const wxString& delims, | |
106 | bool ret_delim) | |
107 | { | |
108 | SetString(to_tokenize, delims, ret_delim); | |
109 | } | |
110 | ||
2224580a | 111 | protected: |
7c968cee VZ |
112 | bool IsOk() const { return m_mode != wxTOKEN_INVALID; } |
113 | ||
f0dfc29c | 114 | bool DoHasMoreTokens() const; |
bbf8fc53 | 115 | |
f0dfc29c VS |
116 | enum MoreTokensState |
117 | { | |
118 | MoreTokens_Unknown, | |
119 | MoreTokens_Yes, | |
120 | MoreTokens_No | |
121 | }; | |
122 | ||
123 | MoreTokensState m_hasMoreTokens; | |
124 | ||
125 | wxString m_string; // the string we tokenize | |
126 | wxString::const_iterator m_stringEnd; | |
127 | // FIXME-UTF8: use wxWcharBuffer | |
128 | wxWxCharBuffer m_delims; // all possible delimiters | |
129 | size_t m_delimsLen; | |
130 | ||
131 | wxString::const_iterator m_pos; // the current position in m_string | |
bbf8fc53 | 132 | |
7c968cee VZ |
133 | wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values |
134 | ||
4626c57c | 135 | wxChar m_lastDelim; // delimiter after last token or '\0' |
f4ada568 GL |
136 | }; |
137 | ||
1e6feb95 VZ |
138 | // ---------------------------------------------------------------------------- |
139 | // convenience function which returns all tokens at once | |
140 | // ---------------------------------------------------------------------------- | |
141 | ||
142 | // the function takes the same parameters as wxStringTokenizer ctor and returns | |
143 | // the array containing all tokens | |
bddd7a8d | 144 | wxArrayString WXDLLIMPEXP_BASE |
1e6feb95 VZ |
145 | wxStringTokenize(const wxString& str, |
146 | const wxString& delims = wxDEFAULT_DELIMITERS, | |
147 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); | |
148 | ||
85833f5c | 149 | #endif // _WX_TOKENZRH |