X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/2224580a415da12d7c453943c4ae4a1b58ecd369..10c2f98a3951e534ac608fb801dd44f795733b82:/include/wx/tokenzr.h diff --git a/include/wx/tokenzr.h b/include/wx/tokenzr.h index b1d8ed65cc..fca43655d9 100644 --- a/include/wx/tokenzr.h +++ b/include/wx/tokenzr.h @@ -1,8 +1,8 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: tokenzr.h -// Purpose: String tokenizer +// Name: wx/tokenzr.h +// Purpose: String tokenizer - a C++ replacement for strtok(3) // Author: Guilhem Lavaux -// Modified by: Gregory Pietsch +// Modified by: (or rather rewritten by) Vadim Zeitlin // Created: 04/22/98 // RCS-ID: $Id$ // Copyright: (c) Guilhem Lavaux @@ -12,53 +12,138 @@ #ifndef _WX_TOKENZRH #define _WX_TOKENZRH -#ifdef __GNUG__ - #pragma interface "tokenzr.h" -#endif - #include "wx/object.h" #include "wx/string.h" -#include "wx/filefn.h" +#include "wx/arrstr.h" + +// ---------------------------------------------------------------------------- +// constants +// ---------------------------------------------------------------------------- + +// default: delimiters are usual white space characters +#define wxDEFAULT_DELIMITERS (_T(" \t\r\n")) + +// wxStringTokenizer mode flags which determine its behaviour +enum wxStringTokenizerMode +{ + wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called + wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else + wxTOKEN_RET_EMPTY, // return empty token in the middle of the string + wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too + wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY) + wxTOKEN_STRTOK // behave exactly like strtok(3) +}; + +// ---------------------------------------------------------------------------- +// wxStringTokenizer: replaces infamous strtok() and has some other features +// ---------------------------------------------------------------------------- -class WXDLLEXPORT wxStringTokenizer : public wxObject +class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject { public: - wxStringTokenizer(const wxString& to_tokenize, - const wxString& delims = " \t\r\n", - bool ret_delim = FALSE); - wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; } - virtual ~wxStringTokenizer(); + // ctors and initializers + // default ctor, call SetString() later + wxStringTokenizer() { m_mode = wxTOKEN_INVALID; } + // ctor which gives us the string + wxStringTokenizer(const wxString& str, + const wxString& delims = wxDEFAULT_DELIMITERS, + wxStringTokenizerMode mode = wxTOKEN_DEFAULT); + + // args are same as for the non default ctor above + void SetString(const wxString& str, + const wxString& delims = wxDEFAULT_DELIMITERS, + wxStringTokenizerMode mode = wxTOKEN_DEFAULT); + + // reinitialize the tokenizer with the same delimiters/mode + void Reinit(const wxString& str); + + // tokens access + // return the number of remaining tokens + size_t CountTokens() const; + // did we reach the end of the string? + bool HasMoreTokens() const; + // get the next token, will return empty string if !HasMoreTokens() + wxString GetNextToken(); + // get the delimiter which terminated the token last retrieved by + // GetNextToken() or NUL if there had been no tokens yet or the last + // one wasn't terminated (but ran to the end of the string) + wxChar GetLastDelimiter() const { return m_lastDelim; } + + // get current tokenizer state + // returns the part of the string which remains to tokenize (*not* the + // initial string) + wxString GetString() const { return wxString(m_pos, m_string.end()); } + + // returns the current position (i.e. one index after the last + // returned token or 0 if GetNextToken() has never been called) in the + // original string + size_t GetPosition() const { return m_pos - m_string.begin(); } + + // misc + // get the current mode - can be different from the one passed to the + // ctor if it was wxTOKEN_DEFAULT + wxStringTokenizerMode GetMode() const { return m_mode; } + // do we return empty tokens? + bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; } - int CountTokens() const; - bool HasMoreTokens(); - wxString NextToken(); - wxString GetNextToken() { return NextToken(); }; + // backwards compatibility section from now on + // ------------------------------------------- - wxString GetString() const { return m_string; } - // One note about GetString -- it returns the string - // remaining after the previous tokens have been removed, - // not the original string + // for compatibility only, use GetNextToken() instead + wxString NextToken() { return GetNextToken(); } + // compatibility only, don't use void SetString(const wxString& to_tokenize, - const wxString& delims = " \t\r\n", - bool ret_delim = FALSE) + const wxString& delims, + bool WXUNUSED(ret_delim)) { - m_string = to_tokenize; - m_delims = delims; - m_retdelims = ret_delim; - m_pos = 0; + SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS); } - // Here's the desired function. It returns the position - // of the next token in the original string by keeping track - // of everything that's been deleted by GetNextToken. - wxUint32 GetPosition() { return m_pos; } + wxStringTokenizer(const wxString& to_tokenize, + const wxString& delims, + bool ret_delim) + { + SetString(to_tokenize, delims, ret_delim); + } protected: - wxString m_string, m_delims; - bool m_retdelims; - wxUint32 m_pos; // the position + bool IsOk() const { return m_mode != wxTOKEN_INVALID; } + + bool DoHasMoreTokens() const; + + enum MoreTokensState + { + MoreTokens_Unknown, + MoreTokens_Yes, + MoreTokens_No + }; + + MoreTokensState m_hasMoreTokens; + + wxString m_string; // the string we tokenize + wxString::const_iterator m_stringEnd; + // FIXME-UTF8: use wxWcharBuffer + wxWxCharBuffer m_delims; // all possible delimiters + size_t m_delimsLen; + + wxString::const_iterator m_pos; // the current position in m_string + + wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values + + wxChar m_lastDelim; // delimiter after last token or '\0' }; +// ---------------------------------------------------------------------------- +// convenience function which returns all tokens at once +// ---------------------------------------------------------------------------- + +// the function takes the same parameters as wxStringTokenizer ctor and returns +// the array containing all tokens +wxArrayString WXDLLIMPEXP_BASE +wxStringTokenize(const wxString& str, + const wxString& delims = wxDEFAULT_DELIMITERS, + wxStringTokenizerMode mode = wxTOKEN_DEFAULT); + #endif // _WX_TOKENZRH