[wxWidgets.git] / interface / tokenzr.h

/////////////////////////////////////////////////////////////////////////////
// Name:        tokenzr.h
// Purpose:     interface of wxStringTokenizer
// Author:      wxWidgets team
// RCS-ID:      $Id$
// Licence:     wxWindows license
/////////////////////////////////////////////////////////////////////////////

/**
    The behaviour of wxStringTokenizer is governed by the
    wxStringTokenizer::wxStringTokenizer() or wxStringTokenizer::SetString() 
    with the parameter @e mode, which may be one of the following:
*/
enum wxStringTokenizerMode
{
    wxTOKEN_INVALID = -1,   ///< Invalid tokenizer mode.

    /**
        Default behaviour: wxStringTokenizer will behave in the same way as
        @c strtok() (::wxTOKEN_STRTOK) if the delimiters string only contains
        white space characters but, unlike the standard function, it will
        behave like ::wxTOKEN_RET_EMPTY, returning empty tokens if this is not
        the case. This is helpful for parsing strictly formatted data where
        the number of fields is fixed but some of them may be empty (i.e.
        @c TAB or comma delimited text files).
    */
    wxTOKEN_DEFAULT,

    /**
        In this mode, the empty tokens in the middle of the string will be returned,
        i.e. @c "a::b:" will be tokenized in three tokens @c 'a', " and @c 'b'. Notice
        that all trailing delimiters are ignored in this mode, not just the last one,
        i.e. a string @c "a::b::" would still result in the same set of tokens.
    */
    wxTOKEN_RET_EMPTY,

    /**
        In this mode, empty trailing tokens (including the one after the last delimiter
        character) will be returned as well. The string @c "a::b:" will be tokenized in
        four tokens: the already mentioned ones and another empty one as the last one
        and a string @c "a::b::" will have five tokens.
    */
    wxTOKEN_RET_EMPTY_ALL,

    /**
        In this mode, the delimiter character after the end of the current token (there
        may be none if this is the last token) is returned appended to the token.
        Otherwise, it is the same mode as ::wxTOKEN_RET_EMPTY. Notice that there is no
        mode like this one but behaving like ::wxTOKEN_RET_EMPTY_ALL instead of
        ::wxTOKEN_RET_EMPTY, use ::wxTOKEN_RET_EMPTY_ALL and
        wxStringTokenizer::GetLastDelimiter() to emulate it.
    */
    wxTOKEN_RET_DELIMS,

    /**
        In this mode the class behaves exactly like the standard @c strtok() function:
        the empty tokens are never returned.
    */
    wxTOKEN_STRTOK
};

/**
    @class wxStringTokenizer
    @wxheader{tokenzr.h}

    wxStringTokenizer helps you to break a string up into a number of tokens.
    It replaces the standard C function @c strtok() and also extends it in a
    number of ways.

    To use this class, you should create a wxStringTokenizer object, give it the
    string to tokenize and also the delimiters which separate tokens in the string
    (by default, white space characters will be used).

    Then wxStringTokenizer::GetNextToken() may be called repeatedly until 
    wxStringTokenizer::HasMoreTokens() returns @false.

    For example:

    @code
    wxStringTokenizer tokenizer("first:second:third:fourth", ":");
    while ( tokenizer.HasMoreTokens() )
    {
        wxString token = tokenizer.GetNextToken();

        // process token here
    }
    @endcode

    @library{wxbase}
    @category{data}

    @see wxStringTokenize()
*/
class wxStringTokenizer : public wxObject
{
public:
    /**
        Default constructor. You must call SetString() before calling any other
        methods.
    */
    wxStringTokenizer();
    /**
        Constructor. Pass the string to tokenize, a string containing
        delimiters, and the @a mode specifying how the string should be
        tokenized.

        @see SetString()
   */
    wxStringTokenizer(const wxString& str,
                      const wxString& delims = " \t\r\n",
                      wxStringTokenizerMode mode = wxTOKEN_DEFAULT);

    /**
        Returns the number of tokens remaining in the input string. The number
        of tokens returned by this function is decremented each time
        GetNextToken() is called and when it reaches 0, HasMoreTokens()
        returns @false.
    */
    int CountTokens() const;

    /**
        Returns the delimiter which ended scan for the last token returned by
        GetNextToken() or @c NUL if there had been no calls to this function
        yet or if it returned the trailing empty token in
        ::wxTOKEN_RET_EMPTY_ALL mode.

        @since 2.7.0
    */
    wxChar GetLastDelimiter();

    /**
        Returns the next token or empty string if the end of string was reached.
    */
    wxString GetNextToken() const;

    /**
        Returns the current position (i.e. one index after the last returned
        token or 0 if GetNextToken() has never been called) in the original
        string.
    */
    size_t GetPosition() const;

    /**
        Returns the part of the starting string without all token already extracted.
    */
    wxString GetString() const;

    /**
        Returns @true if the tokenizer has further tokens, @false if none are left.
    */
    bool HasMoreTokens() const;

    /**
        Initializes the tokenizer. Pass the string to tokenize, a string
        containing delimiters, and the @a mode specifying how the string
        should be tokenized.
    */
    void SetString(const wxString& to_tokenize,
                   const wxString& delims = " \t\r\n",
                   wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
};
Commit	Line	Data
23324ae1 FM	1	/////////////////////////////////////////////////////////////////////////////
23324ae1 FM	2	// Name: tokenzr.h
e54c96f1	3	// Purpose: interface of wxStringTokenizer
23324ae1 FM	4	// Author: wxWidgets team
	5	// RCS-ID: $Id$
	6	// Licence: wxWindows license
	7	/////////////////////////////////////////////////////////////////////////////
	8
977cf110 BP	9	/**
	10	The behaviour of wxStringTokenizer is governed by the
	11	wxStringTokenizer::wxStringTokenizer() or wxStringTokenizer::SetString()
	12	with the parameter @e mode, which may be one of the following:
	13	*/
	14	enum wxStringTokenizerMode
	15	{
	16	wxTOKEN_INVALID = -1, ///< Invalid tokenizer mode.
	17
	18	/**
	19	Default behaviour: wxStringTokenizer will behave in the same way as
	20	@c strtok() (::wxTOKEN_STRTOK) if the delimiters string only contains
	21	white space characters but, unlike the standard function, it will
	22	behave like ::wxTOKEN_RET_EMPTY, returning empty tokens if this is not
	23	the case. This is helpful for parsing strictly formatted data where
	24	the number of fields is fixed but some of them may be empty (i.e.
	25	@c TAB or comma delimited text files).
	26	*/
	27	wxTOKEN_DEFAULT,
	28
	29	/**
	30	In this mode, the empty tokens in the middle of the string will be returned,
	31	i.e. @c "a::b:" will be tokenized in three tokens @c 'a', " and @c 'b'. Notice
	32	that all trailing delimiters are ignored in this mode, not just the last one,
	33	i.e. a string @c "a::b::" would still result in the same set of tokens.
	34	*/
	35	wxTOKEN_RET_EMPTY,
	36
	37	/**
	38	In this mode, empty trailing tokens (including the one after the last delimiter
	39	character) will be returned as well. The string @c "a::b:" will be tokenized in
	40	four tokens: the already mentioned ones and another empty one as the last one
	41	and a string @c "a::b::" will have five tokens.
	42	*/
	43	wxTOKEN_RET_EMPTY_ALL,
	44
	45	/**
	46	In this mode, the delimiter character after the end of the current token (there
	47	may be none if this is the last token) is returned appended to the token.
	48	Otherwise, it is the same mode as ::wxTOKEN_RET_EMPTY. Notice that there is no
	49	mode like this one but behaving like ::wxTOKEN_RET_EMPTY_ALL instead of
	50	::wxTOKEN_RET_EMPTY, use ::wxTOKEN_RET_EMPTY_ALL and
	51	wxStringTokenizer::GetLastDelimiter() to emulate it.
	52	*/
	53	wxTOKEN_RET_DELIMS,
	54
	55	/**
	56	In this mode the class behaves exactly like the standard @c strtok() function:
	57	the empty tokens are never returned.
	58	*/
	59	wxTOKEN_STRTOK
	60	};
	61
23324ae1 FM	62	/**
	63	@class wxStringTokenizer
	64	@wxheader{tokenzr.h}
7c913512	65
977cf110 BP	66	wxStringTokenizer helps you to break a string up into a number of tokens.
977cf110 BP	67	It replaces the standard C function @c strtok() and also extends it in a
23324ae1	68	number of ways.
7c913512	69
23324ae1 FM	70	To use this class, you should create a wxStringTokenizer object, give it the
	71	string to tokenize and also the delimiters which separate tokens in the string
	72	(by default, white space characters will be used).
7c913512	73
977cf110 BP	74	Then wxStringTokenizer::GetNextToken() may be called repeatedly until
977cf110 BP	75	wxStringTokenizer::HasMoreTokens() returns @false.
7c913512	76
23324ae1	77	For example:
7c913512	78
23324ae1	79	@code
977cf110 BP	80	wxStringTokenizer tokenizer("first:second:third:fourth", ":");
977cf110 BP	81	while ( tokenizer.HasMoreTokens() )
23324ae1	82	{
977cf110	83	wxString token = tokenizer.GetNextToken();
7c913512	84
23324ae1 FM	85	// process token here
	86	}
	87	@endcode
7c913512	88
23324ae1 FM	89	@library{wxbase}
23324ae1 FM	90	@category{data}
7c913512	91
e54c96f1	92	@see wxStringTokenize()
23324ae1 FM	93	*/
	94	class wxStringTokenizer : public wxObject
	95	{
	96	public:
23324ae1	97	/**
977cf110 BP	98	Default constructor. You must call SetString() before calling any other
977cf110 BP	99	methods.
23324ae1 FM	100	*/
23324ae1 FM	101	wxStringTokenizer();
977cf110 BP	102	/**
	103	Constructor. Pass the string to tokenize, a string containing
	104	delimiters, and the @a mode specifying how the string should be
	105	tokenized.
	106
	107	@see SetString()
	108	*/
7c913512 FM	109	wxStringTokenizer(const wxString& str,
	110	const wxString& delims = " \t\r\n",
	111	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
23324ae1 FM	112
23324ae1 FM	113	/**
977cf110 BP	114	Returns the number of tokens remaining in the input string. The number
	115	of tokens returned by this function is decremented each time
	116	GetNextToken() is called and when it reaches 0, HasMoreTokens()
	117	returns @false.
23324ae1	118	*/
328f5751	119	int CountTokens() const;
23324ae1 FM	120
23324ae1 FM	121	/**
7c913512	122	Returns the delimiter which ended scan for the last token returned by
977cf110 BP	123	GetNextToken() or @c NUL if there had been no calls to this function
	124	yet or if it returned the trailing empty token in
	125	::wxTOKEN_RET_EMPTY_ALL mode.
3c4f71cc	126
1e24c2af	127	@since 2.7.0
23324ae1 FM	128	*/
	129	wxChar GetLastDelimiter();
	130
	131	/**
	132	Returns the next token or empty string if the end of string was reached.
	133	*/
328f5751	134	wxString GetNextToken() const;
23324ae1 FM	135
	136	/**
	137	Returns the current position (i.e. one index after the last returned
	138	token or 0 if GetNextToken() has never been called) in the original
	139	string.
	140	*/
328f5751	141	size_t GetPosition() const;
23324ae1 FM	142
	143	/**
	144	Returns the part of the starting string without all token already extracted.
	145	*/
328f5751	146	wxString GetString() const;
23324ae1 FM	147
	148	/**
	149	Returns @true if the tokenizer has further tokens, @false if none are left.
	150	*/
328f5751	151	bool HasMoreTokens() const;
23324ae1 FM	152
23324ae1 FM	153	/**
977cf110 BP	154	Initializes the tokenizer. Pass the string to tokenize, a string
	155	containing delimiters, and the @a mode specifying how the string
	156	should be tokenized.
23324ae1 FM	157	*/
	158	void SetString(const wxString& to_tokenize,
	159	const wxString& delims = " \t\r\n",
	160	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
	161	};