[wxWidgets.git] / include / wx / tokenzr.h

/////////////////////////////////////////////////////////////////////////////
// Name:        wx/tokenzr.h
// Purpose:     String tokenizer - a C++ replacement for strtok(3)
// Author:      Guilhem Lavaux
// Modified by: (or rather rewritten by) Vadim Zeitlin
// Created:     04/22/98
// RCS-ID:      $Id$
// Copyright:   (c) Guilhem Lavaux
// Licence:     wxWindows licence
/////////////////////////////////////////////////////////////////////////////

#ifndef _WX_TOKENZRH
#define _WX_TOKENZRH

#include "wx/object.h"
#include "wx/string.h"
#include "wx/arrstr.h"

// ----------------------------------------------------------------------------
// constants
// ----------------------------------------------------------------------------

// default: delimiters are usual white space characters
#define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))

// wxStringTokenizer mode flags which determine its behaviour
enum wxStringTokenizerMode
{
    wxTOKEN_INVALID = -1,   // set by def ctor until SetString() is called
    wxTOKEN_DEFAULT,        // strtok() for whitespace delims, RET_EMPTY else
    wxTOKEN_RET_EMPTY,      // return empty token in the middle of the string
    wxTOKEN_RET_EMPTY_ALL,  // return trailing empty tokens too
    wxTOKEN_RET_DELIMS,     // return the delim with token (implies RET_EMPTY)
    wxTOKEN_STRTOK          // behave exactly like strtok(3)
};

// ----------------------------------------------------------------------------
// wxStringTokenizer: replaces infamous strtok() and has some other features
// ----------------------------------------------------------------------------

class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject
{
public:
    // ctors and initializers
        // default ctor, call SetString() later
    wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
        // ctor which gives us the string
    wxStringTokenizer(const wxString& str,
                      const wxString& delims = wxDEFAULT_DELIMITERS,
                      wxStringTokenizerMode mode = wxTOKEN_DEFAULT);

        // args are same as for the non default ctor above
    void SetString(const wxString& str,
                   const wxString& delims = wxDEFAULT_DELIMITERS,
                   wxStringTokenizerMode mode = wxTOKEN_DEFAULT);

        // reinitialize the tokenizer with the same delimiters/mode
    void Reinit(const wxString& str);

    // tokens access
        // return the number of remaining tokens
    size_t CountTokens() const;
        // did we reach the end of the string?
    bool HasMoreTokens() const;
        // get the next token, will return empty string if !HasMoreTokens()
    wxString GetNextToken();
        // get the delimiter which terminated the token last retrieved by
        // GetNextToken() or NUL if there had been no tokens yet or the last
        // one wasn't terminated (but ran to the end of the string)
    wxChar GetLastDelimiter() const { return m_lastDelim; }

    // get current tokenizer state
        // returns the part of the string which remains to tokenize (*not* the
        // initial string)
    wxString GetString() const { return wxString(m_pos, m_string.end()); }

        // returns the current position (i.e. one index after the last
        // returned token or 0 if GetNextToken() has never been called) in the
        // original string
    size_t GetPosition() const { return m_pos - m_string.begin(); }

    // misc
        // get the current mode - can be different from the one passed to the
        // ctor if it was wxTOKEN_DEFAULT
    wxStringTokenizerMode GetMode() const { return m_mode; }
        // do we return empty tokens?
    bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }


    // backwards compatibility section from now on
    // -------------------------------------------

    // for compatibility only, use GetNextToken() instead
    wxString NextToken() { return GetNextToken(); }

    // compatibility only, don't use
    void SetString(const wxString& to_tokenize,
                   const wxString& delims,
                   bool WXUNUSED(ret_delim))
    {
        SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
    }

    wxStringTokenizer(const wxString& to_tokenize,
                      const wxString& delims,
                      bool ret_delim)
    {
        SetString(to_tokenize, delims, ret_delim);
    }

protected:
    bool IsOk() const { return m_mode != wxTOKEN_INVALID; }

    bool DoHasMoreTokens() const;

    enum MoreTokensState
    {
        MoreTokens_Unknown,
        MoreTokens_Yes,
        MoreTokens_No
    };

    MoreTokensState m_hasMoreTokens;

    wxString m_string;              // the string we tokenize
    wxString::const_iterator m_stringEnd;
    // FIXME-UTF8: use wxWcharBuffer
    wxWxCharBuffer m_delims;        // all possible delimiters
    size_t m_delimsLen;

    wxString::const_iterator m_pos; // the current position in m_string

    wxStringTokenizerMode m_mode;   // see wxTOKEN_XXX values

    wxChar   m_lastDelim;           // delimiter after last token or '\0'
};

// ----------------------------------------------------------------------------
// convenience function which returns all tokens at once
// ----------------------------------------------------------------------------

// the function takes the same parameters as wxStringTokenizer ctor and returns
// the array containing all tokens
wxArrayString WXDLLIMPEXP_BASE
wxStringTokenize(const wxString& str,
                 const wxString& delims = wxDEFAULT_DELIMITERS,
                 wxStringTokenizerMode mode = wxTOKEN_DEFAULT);

#endif // _WX_TOKENZRH
Commit	Line	Data
f4ada568	1	/////////////////////////////////////////////////////////////////////////////
7c968cee VZ	2	// Name: wx/tokenzr.h
7c968cee VZ	3	// Purpose: String tokenizer - a C++ replacement for strtok(3)
f4ada568	4	// Author: Guilhem Lavaux
1e6feb95	5	// Modified by: (or rather rewritten by) Vadim Zeitlin
f4ada568 GL	6	// Created: 04/22/98
	7	// RCS-ID: $Id$
	8	// Copyright: (c) Guilhem Lavaux
65571936	9	// Licence: wxWindows licence
f4ada568 GL	10	/////////////////////////////////////////////////////////////////////////////
	11
	12	#ifndef _WX_TOKENZRH
	13	#define _WX_TOKENZRH
	14
f4ada568 GL	15	#include "wx/object.h"
f4ada568 GL	16	#include "wx/string.h"
df5168c4	17	#include "wx/arrstr.h"
bbf8fc53	18
7c968cee VZ	19	// ----------------------------------------------------------------------------
	20	// constants
	21	// ----------------------------------------------------------------------------
	22
bbf8fc53 VZ	23	// default: delimiters are usual white space characters
bbf8fc53 VZ	24	#define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
f4ada568	25
7c968cee VZ	26	// wxStringTokenizer mode flags which determine its behaviour
	27	enum wxStringTokenizerMode
	28	{
	29	wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called
	30	wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else
	31	wxTOKEN_RET_EMPTY, // return empty token in the middle of the string
	32	wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too
	33	wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY)
	34	wxTOKEN_STRTOK // behave exactly like strtok(3)
	35	};
	36
	37	// ----------------------------------------------------------------------------
	38	// wxStringTokenizer: replaces infamous strtok() and has some other features
	39	// ----------------------------------------------------------------------------
	40
bddd7a8d	41	class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject
85833f5c	42	{
f4ada568	43	public:
7c968cee VZ	44	// ctors and initializers
	45	// default ctor, call SetString() later
	46	wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
	47	// ctor which gives us the string
	48	wxStringTokenizer(const wxString& str,
bbf8fc53	49	const wxString& delims = wxDEFAULT_DELIMITERS,
7c968cee VZ	50	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
	51
	52	// args are same as for the non default ctor above
	53	void SetString(const wxString& str,
bbf8fc53	54	const wxString& delims = wxDEFAULT_DELIMITERS,
7c968cee VZ	55	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
	56
	57	// reinitialize the tokenizer with the same delimiters/mode
	58	void Reinit(const wxString& str);
85833f5c	59
7c968cee	60	// tokens access
4626c57c	61	// return the number of remaining tokens
bbf8fc53	62	size_t CountTokens() const;
7c968cee VZ	63	// did we reach the end of the string?
	64	bool HasMoreTokens() const;
	65	// get the next token, will return empty string if !HasMoreTokens()
bbf8fc53	66	wxString GetNextToken();
4626c57c VZ	67	// get the delimiter which terminated the token last retrieved by
	68	// GetNextToken() or NUL if there had been no tokens yet or the last
	69	// one wasn't terminated (but ran to the end of the string)
	70	wxChar GetLastDelimiter() const { return m_lastDelim; }
85833f5c	71
7c968cee VZ	72	// get current tokenizer state
	73	// returns the part of the string which remains to tokenize (not the
	74	// initial string)
f0dfc29c	75	wxString GetString() const { return wxString(m_pos, m_string.end()); }
85833f5c	76
7c968cee VZ	77	// returns the current position (i.e. one index after the last
	78	// returned token or 0 if GetNextToken() has never been called) in the
	79	// original string
f0dfc29c	80	size_t GetPosition() const { return m_pos - m_string.begin(); }
dbdb39b2	81
7c968cee VZ	82	// misc
	83	// get the current mode - can be different from the one passed to the
	84	// ctor if it was wxTOKEN_DEFAULT
	85	wxStringTokenizerMode GetMode() const { return m_mode; }
4626c57c VZ	86	// do we return empty tokens?
	87	bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
	88
7c968cee VZ	89
	90	// backwards compatibility section from now on
	91	// -------------------------------------------
	92
bbf8fc53 VZ	93	// for compatibility only, use GetNextToken() instead
bbf8fc53 VZ	94	wxString NextToken() { return GetNextToken(); }
85833f5c	95
7c968cee VZ	96	// compatibility only, don't use
	97	void SetString(const wxString& to_tokenize,
	98	const wxString& delims,
06b466c7	99	bool WXUNUSED(ret_delim))
7c968cee VZ	100	{
	101	SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
	102	}
	103
	104	wxStringTokenizer(const wxString& to_tokenize,
	105	const wxString& delims,
	106	bool ret_delim)
	107	{
	108	SetString(to_tokenize, delims, ret_delim);
	109	}
	110
2224580a	111	protected:
7c968cee VZ	112	bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
7c968cee VZ	113
f0dfc29c	114	bool DoHasMoreTokens() const;
bbf8fc53	115
f0dfc29c VS	116	enum MoreTokensState
	117	{
	118	MoreTokens_Unknown,
	119	MoreTokens_Yes,
	120	MoreTokens_No
	121	};
	122
	123	MoreTokensState m_hasMoreTokens;
	124
	125	wxString m_string; // the string we tokenize
	126	wxString::const_iterator m_stringEnd;
	127	// FIXME-UTF8: use wxWcharBuffer
	128	wxWxCharBuffer m_delims; // all possible delimiters
	129	size_t m_delimsLen;
	130
	131	wxString::const_iterator m_pos; // the current position in m_string
bbf8fc53	132
7c968cee VZ	133	wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values
7c968cee VZ	134
4626c57c	135	wxChar m_lastDelim; // delimiter after last token or '\0'
f4ada568 GL	136	};
f4ada568 GL	137
1e6feb95 VZ	138	// ----------------------------------------------------------------------------
	139	// convenience function which returns all tokens at once
	140	// ----------------------------------------------------------------------------
	141
	142	// the function takes the same parameters as wxStringTokenizer ctor and returns
	143	// the array containing all tokens
bddd7a8d	144	wxArrayString WXDLLIMPEXP_BASE
1e6feb95 VZ	145	wxStringTokenize(const wxString& str,
	146	const wxString& delims = wxDEFAULT_DELIMITERS,
	147	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
	148
85833f5c	149	#endif // _WX_TOKENZRH