| 1 | ///////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: wx/tokenzr.h |
| 3 | // Purpose: String tokenizer - a C++ replacement for strtok(3) |
| 4 | // Author: Guilhem Lavaux |
| 5 | // Modified by: (or rather rewritten by) Vadim Zeitlin |
| 6 | // Created: 04/22/98 |
| 7 | // RCS-ID: $Id$ |
| 8 | // Copyright: (c) Guilhem Lavaux |
| 9 | // Licence: wxWindows licence |
| 10 | ///////////////////////////////////////////////////////////////////////////// |
| 11 | |
| 12 | #ifndef _WX_TOKENZRH |
| 13 | #define _WX_TOKENZRH |
| 14 | |
| 15 | #include "wx/object.h" |
| 16 | #include "wx/string.h" |
| 17 | #include "wx/arrstr.h" |
| 18 | |
| 19 | // ---------------------------------------------------------------------------- |
| 20 | // constants |
| 21 | // ---------------------------------------------------------------------------- |
| 22 | |
| 23 | // default: delimiters are usual white space characters |
| 24 | #define wxDEFAULT_DELIMITERS (_T(" \t\r\n")) |
| 25 | |
| 26 | // wxStringTokenizer mode flags which determine its behaviour |
| 27 | enum wxStringTokenizerMode |
| 28 | { |
| 29 | wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called |
| 30 | wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else |
| 31 | wxTOKEN_RET_EMPTY, // return empty token in the middle of the string |
| 32 | wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too |
| 33 | wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY) |
| 34 | wxTOKEN_STRTOK // behave exactly like strtok(3) |
| 35 | }; |
| 36 | |
| 37 | // ---------------------------------------------------------------------------- |
| 38 | // wxStringTokenizer: replaces infamous strtok() and has some other features |
| 39 | // ---------------------------------------------------------------------------- |
| 40 | |
| 41 | class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject |
| 42 | { |
| 43 | public: |
| 44 | // ctors and initializers |
| 45 | // default ctor, call SetString() later |
| 46 | wxStringTokenizer() { m_mode = wxTOKEN_INVALID; } |
| 47 | // ctor which gives us the string |
| 48 | wxStringTokenizer(const wxString& str, |
| 49 | const wxString& delims = wxDEFAULT_DELIMITERS, |
| 50 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); |
| 51 | |
| 52 | // args are same as for the non default ctor above |
| 53 | void SetString(const wxString& str, |
| 54 | const wxString& delims = wxDEFAULT_DELIMITERS, |
| 55 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); |
| 56 | |
| 57 | // reinitialize the tokenizer with the same delimiters/mode |
| 58 | void Reinit(const wxString& str); |
| 59 | |
| 60 | // tokens access |
| 61 | // return the number of remaining tokens |
| 62 | size_t CountTokens() const; |
| 63 | // did we reach the end of the string? |
| 64 | bool HasMoreTokens() const; |
| 65 | // get the next token, will return empty string if !HasMoreTokens() |
| 66 | wxString GetNextToken(); |
| 67 | // get the delimiter which terminated the token last retrieved by |
| 68 | // GetNextToken() or NUL if there had been no tokens yet or the last |
| 69 | // one wasn't terminated (but ran to the end of the string) |
| 70 | wxChar GetLastDelimiter() const { return m_lastDelim; } |
| 71 | |
| 72 | // get current tokenizer state |
| 73 | // returns the part of the string which remains to tokenize (*not* the |
| 74 | // initial string) |
| 75 | wxString GetString() const { return wxString(m_pos, m_string.end()); } |
| 76 | |
| 77 | // returns the current position (i.e. one index after the last |
| 78 | // returned token or 0 if GetNextToken() has never been called) in the |
| 79 | // original string |
| 80 | size_t GetPosition() const { return m_pos - m_string.begin(); } |
| 81 | |
| 82 | // misc |
| 83 | // get the current mode - can be different from the one passed to the |
| 84 | // ctor if it was wxTOKEN_DEFAULT |
| 85 | wxStringTokenizerMode GetMode() const { return m_mode; } |
| 86 | // do we return empty tokens? |
| 87 | bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; } |
| 88 | |
| 89 | |
| 90 | // backwards compatibility section from now on |
| 91 | // ------------------------------------------- |
| 92 | |
| 93 | // for compatibility only, use GetNextToken() instead |
| 94 | wxString NextToken() { return GetNextToken(); } |
| 95 | |
| 96 | // compatibility only, don't use |
| 97 | void SetString(const wxString& to_tokenize, |
| 98 | const wxString& delims, |
| 99 | bool WXUNUSED(ret_delim)) |
| 100 | { |
| 101 | SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS); |
| 102 | } |
| 103 | |
| 104 | wxStringTokenizer(const wxString& to_tokenize, |
| 105 | const wxString& delims, |
| 106 | bool ret_delim) |
| 107 | { |
| 108 | SetString(to_tokenize, delims, ret_delim); |
| 109 | } |
| 110 | |
| 111 | protected: |
| 112 | bool IsOk() const { return m_mode != wxTOKEN_INVALID; } |
| 113 | |
| 114 | bool DoHasMoreTokens() const; |
| 115 | |
| 116 | enum MoreTokensState |
| 117 | { |
| 118 | MoreTokens_Unknown, |
| 119 | MoreTokens_Yes, |
| 120 | MoreTokens_No |
| 121 | }; |
| 122 | |
| 123 | MoreTokensState m_hasMoreTokens; |
| 124 | |
| 125 | wxString m_string; // the string we tokenize |
| 126 | wxString::const_iterator m_stringEnd; |
| 127 | // FIXME-UTF8: use wxWcharBuffer |
| 128 | wxWxCharBuffer m_delims; // all possible delimiters |
| 129 | size_t m_delimsLen; |
| 130 | |
| 131 | wxString::const_iterator m_pos; // the current position in m_string |
| 132 | |
| 133 | wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values |
| 134 | |
| 135 | wxChar m_lastDelim; // delimiter after last token or '\0' |
| 136 | }; |
| 137 | |
| 138 | // ---------------------------------------------------------------------------- |
| 139 | // convenience function which returns all tokens at once |
| 140 | // ---------------------------------------------------------------------------- |
| 141 | |
| 142 | // the function takes the same parameters as wxStringTokenizer ctor and returns |
| 143 | // the array containing all tokens |
| 144 | wxArrayString WXDLLIMPEXP_BASE |
| 145 | wxStringTokenize(const wxString& str, |
| 146 | const wxString& delims = wxDEFAULT_DELIMITERS, |
| 147 | wxStringTokenizerMode mode = wxTOKEN_DEFAULT); |
| 148 | |
| 149 | #endif // _WX_TOKENZRH |