[wxWidgets.git] / interface / wx / tokenzr.h

/////////////////////////////////////////////////////////////////////////////
// Name:        tokenzr.h
// Purpose:     interface of wxStringTokenizer
// Author:      wxWidgets team
// Licence:     wxWindows licence
/////////////////////////////////////////////////////////////////////////////

/**
    The behaviour of wxStringTokenizer is governed by the
    wxStringTokenizer::wxStringTokenizer() or wxStringTokenizer::SetString() 
    with the parameter @e mode, which may be one of the following:
*/
enum wxStringTokenizerMode
{
    wxTOKEN_INVALID = -1,   ///< Invalid tokenizer mode.

    /**
        Default behaviour: wxStringTokenizer will behave in the same way as
        @c strtok() (::wxTOKEN_STRTOK) if the delimiters string only contains
        white space characters but, unlike the standard function, it will
        behave like ::wxTOKEN_RET_EMPTY, returning empty tokens if this is not
        the case. This is helpful for parsing strictly formatted data where
        the number of fields is fixed but some of them may be empty (i.e.
        @c TAB or comma delimited text files).
    */
    wxTOKEN_DEFAULT,

    /**
        In this mode, the empty tokens in the middle of the string will be returned,
        i.e. @c "a::b:" will be tokenized in three tokens @c 'a', @c '' and @c 'b'. 
        Notice that all trailing delimiters are ignored in this mode, not just the last one,
        i.e. a string @c "a::b::" would still result in the same set of tokens.
    */
    wxTOKEN_RET_EMPTY,

    /**
        In this mode, empty trailing tokens (including the one after the last delimiter
        character) will be returned as well. The string @c "a::b:" will be tokenized in
        four tokens: the already mentioned ones and another empty one as the last one
        and a string @c "a::b::" will have five tokens.
    */
    wxTOKEN_RET_EMPTY_ALL,

    /**
        In this mode, the delimiter character after the end of the current token (there
        may be none if this is the last token) is returned appended to the token.
        Otherwise, it is the same mode as ::wxTOKEN_RET_EMPTY. Notice that there is no
        mode like this one but behaving like ::wxTOKEN_RET_EMPTY_ALL instead of
        ::wxTOKEN_RET_EMPTY, use ::wxTOKEN_RET_EMPTY_ALL and
        wxStringTokenizer::GetLastDelimiter() to emulate it.
    */
    wxTOKEN_RET_DELIMS,

    /**
        In this mode the class behaves exactly like the standard @c strtok() function:
        the empty tokens are never returned.
    */
    wxTOKEN_STRTOK
};

/// Default wxStringTokenizer delimiters are the usual white space characters.
#define wxDEFAULT_DELIMITERS " \t\r\n"

/**
    @class wxStringTokenizer

    wxStringTokenizer helps you to break a string up into a number of tokens.
    It replaces the standard C function @c strtok() and also extends it in a
    number of ways.

    To use this class, you should create a wxStringTokenizer object, give it the
    string to tokenize and also the delimiters which separate tokens in the string
    (by default, white space characters will be used).

    Then wxStringTokenizer::GetNextToken() may be called repeatedly until 
    wxStringTokenizer::HasMoreTokens() returns @false.

    For example:

    @code
    wxStringTokenizer tokenizer("first:second:third:fourth", ":");
    while ( tokenizer.HasMoreTokens() )
    {
        wxString token = tokenizer.GetNextToken();

        // process token here
    }
    @endcode

    @library{wxbase}
    @category{data}

    @see ::wxStringTokenize()
*/
class wxStringTokenizer : public wxObject
{
public:
    /**
        Default constructor. You must call SetString() before calling any other
        methods.
    */
    wxStringTokenizer();
    /**
        Constructor. Pass the string to tokenize, a string containing
        delimiters, and the @a mode specifying how the string should be
        tokenized.

        @see SetString()
   */
    wxStringTokenizer(const wxString& str,
                      const wxString& delims = wxDEFAULT_DELIMITERS,
                      wxStringTokenizerMode mode = wxTOKEN_DEFAULT);

    /**
        Returns the number of tokens remaining in the input string. The number
        of tokens returned by this function is decremented each time
        GetNextToken() is called and when it reaches 0, HasMoreTokens()
        returns @false.
    */
    size_t CountTokens() const;

    /**
        Returns the delimiter which ended scan for the last token returned by
        GetNextToken() or @c NUL if there had been no calls to this function
        yet or if it returned the trailing empty token in
        ::wxTOKEN_RET_EMPTY_ALL mode.

        @since 2.7.0
    */
    wxChar GetLastDelimiter() const;

    /**
        Returns the next token or empty string if the end of string was reached.
    */
    wxString GetNextToken();

    /**
        Returns the current position (i.e.\ one index after the last returned
        token or 0 if GetNextToken() has never been called) in the original
        string.
    */
    size_t GetPosition() const;

    /**
        Returns the part of the starting string without all token already extracted.
    */
    wxString GetString() const;

    /**
        Returns @true if the tokenizer has further tokens, @false if none are left.
    */
    bool HasMoreTokens() const;

    /**
        Initializes the tokenizer. Pass the string to tokenize, a string
        containing delimiters, and the @a mode specifying how the string
        should be tokenized.
    */
    void SetString(const wxString& str,
                   const wxString& delims = wxDEFAULT_DELIMITERS,
                   wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
};


/** @addtogroup group_funcmacro_string */
//@{

/**
    This is a convenience function wrapping wxStringTokenizer which simply 
    returns all tokens found in the given @a str as an array.

    Please see wxStringTokenizer::wxStringTokenizer for the description 
    of the other parameters.

    @return The array with the parsed tokens.

    @header{wx/tokenzr.h}
*/
wxArrayString 
wxStringTokenize(const wxString& str,
                 const wxString& delims = wxDEFAULT_DELIMITERS,
                 wxStringTokenizerMode mode = wxTOKEN_DEFAULT);

//@}
Commit	Line	Data
23324ae1 FM	1	/////////////////////////////////////////////////////////////////////////////
23324ae1 FM	2	// Name: tokenzr.h
e54c96f1	3	// Purpose: interface of wxStringTokenizer
23324ae1	4	// Author: wxWidgets team
526954c5	5	// Licence: wxWindows licence
23324ae1 FM	6	/////////////////////////////////////////////////////////////////////////////
23324ae1 FM	7
977cf110 BP	8	/**
	9	The behaviour of wxStringTokenizer is governed by the
	10	wxStringTokenizer::wxStringTokenizer() or wxStringTokenizer::SetString()
	11	with the parameter @e mode, which may be one of the following:
	12	*/
	13	enum wxStringTokenizerMode
	14	{
	15	wxTOKEN_INVALID = -1, ///< Invalid tokenizer mode.
	16
	17	/**
	18	Default behaviour: wxStringTokenizer will behave in the same way as
	19	@c strtok() (::wxTOKEN_STRTOK) if the delimiters string only contains
	20	white space characters but, unlike the standard function, it will
	21	behave like ::wxTOKEN_RET_EMPTY, returning empty tokens if this is not
	22	the case. This is helpful for parsing strictly formatted data where
	23	the number of fields is fixed but some of them may be empty (i.e.
	24	@c TAB or comma delimited text files).
	25	*/
	26	wxTOKEN_DEFAULT,
	27
	28	/**
	29	In this mode, the empty tokens in the middle of the string will be returned,
163bd4f7 FM	30	i.e. @c "a::b:" will be tokenized in three tokens @c 'a', @c '' and @c 'b'.
163bd4f7 FM	31	Notice that all trailing delimiters are ignored in this mode, not just the last one,
977cf110 BP	32	i.e. a string @c "a::b::" would still result in the same set of tokens.
	33	*/
	34	wxTOKEN_RET_EMPTY,
	35
	36	/**
	37	In this mode, empty trailing tokens (including the one after the last delimiter
	38	character) will be returned as well. The string @c "a::b:" will be tokenized in
	39	four tokens: the already mentioned ones and another empty one as the last one
	40	and a string @c "a::b::" will have five tokens.
	41	*/
	42	wxTOKEN_RET_EMPTY_ALL,
	43
	44	/**
	45	In this mode, the delimiter character after the end of the current token (there
	46	may be none if this is the last token) is returned appended to the token.
	47	Otherwise, it is the same mode as ::wxTOKEN_RET_EMPTY. Notice that there is no
	48	mode like this one but behaving like ::wxTOKEN_RET_EMPTY_ALL instead of
	49	::wxTOKEN_RET_EMPTY, use ::wxTOKEN_RET_EMPTY_ALL and
	50	wxStringTokenizer::GetLastDelimiter() to emulate it.
	51	*/
	52	wxTOKEN_RET_DELIMS,
	53
	54	/**
	55	In this mode the class behaves exactly like the standard @c strtok() function:
	56	the empty tokens are never returned.
	57	*/
	58	wxTOKEN_STRTOK
	59	};
	60
0b59366f VZ	61	/// Default wxStringTokenizer delimiters are the usual white space characters.
	62	#define wxDEFAULT_DELIMITERS " \t\r\n"
	63
23324ae1 FM	64	/**
23324ae1 FM	65	@class wxStringTokenizer
7c913512	66
977cf110 BP	67	wxStringTokenizer helps you to break a string up into a number of tokens.
977cf110 BP	68	It replaces the standard C function @c strtok() and also extends it in a
23324ae1	69	number of ways.
7c913512	70
23324ae1 FM	71	To use this class, you should create a wxStringTokenizer object, give it the
	72	string to tokenize and also the delimiters which separate tokens in the string
	73	(by default, white space characters will be used).
7c913512	74
977cf110 BP	75	Then wxStringTokenizer::GetNextToken() may be called repeatedly until
977cf110 BP	76	wxStringTokenizer::HasMoreTokens() returns @false.
7c913512	77
23324ae1	78	For example:
7c913512	79
23324ae1	80	@code
977cf110 BP	81	wxStringTokenizer tokenizer("first:second:third:fourth", ":");
977cf110 BP	82	while ( tokenizer.HasMoreTokens() )
23324ae1	83	{
977cf110	84	wxString token = tokenizer.GetNextToken();
7c913512	85
23324ae1 FM	86	// process token here
	87	}
	88	@endcode
7c913512	89
23324ae1 FM	90	@library{wxbase}
23324ae1 FM	91	@category{data}
7c913512	92
c631ad54	93	@see ::wxStringTokenize()
23324ae1 FM	94	*/
	95	class wxStringTokenizer : public wxObject
	96	{
	97	public:
23324ae1	98	/**
977cf110 BP	99	Default constructor. You must call SetString() before calling any other
977cf110 BP	100	methods.
23324ae1 FM	101	*/
23324ae1 FM	102	wxStringTokenizer();
977cf110 BP	103	/**
	104	Constructor. Pass the string to tokenize, a string containing
	105	delimiters, and the @a mode specifying how the string should be
	106	tokenized.
	107
	108	@see SetString()
	109	*/
7c913512	110	wxStringTokenizer(const wxString& str,
0b59366f	111	const wxString& delims = wxDEFAULT_DELIMITERS,
7c913512	112	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
23324ae1 FM	113
23324ae1 FM	114	/**
977cf110 BP	115	Returns the number of tokens remaining in the input string. The number
	116	of tokens returned by this function is decremented each time
	117	GetNextToken() is called and when it reaches 0, HasMoreTokens()
	118	returns @false.
23324ae1	119	*/
43c48e1e	120	size_t CountTokens() const;
23324ae1 FM	121
23324ae1 FM	122	/**
7c913512	123	Returns the delimiter which ended scan for the last token returned by
977cf110 BP	124	GetNextToken() or @c NUL if there had been no calls to this function
	125	yet or if it returned the trailing empty token in
	126	::wxTOKEN_RET_EMPTY_ALL mode.
3c4f71cc	127
1e24c2af	128	@since 2.7.0
23324ae1	129	*/
adaaa686	130	wxChar GetLastDelimiter() const;
23324ae1 FM	131
	132	/**
	133	Returns the next token or empty string if the end of string was reached.
	134	*/
adaaa686	135	wxString GetNextToken();
23324ae1 FM	136
23324ae1 FM	137	/**
0824e369	138	Returns the current position (i.e.\ one index after the last returned
23324ae1 FM	139	token or 0 if GetNextToken() has never been called) in the original
	140	string.
	141	*/
328f5751	142	size_t GetPosition() const;
23324ae1 FM	143
	144	/**
	145	Returns the part of the starting string without all token already extracted.
	146	*/
328f5751	147	wxString GetString() const;
23324ae1 FM	148
	149	/**
	150	Returns @true if the tokenizer has further tokens, @false if none are left.
	151	*/
328f5751	152	bool HasMoreTokens() const;
23324ae1 FM	153
23324ae1 FM	154	/**
977cf110 BP	155	Initializes the tokenizer. Pass the string to tokenize, a string
	156	containing delimiters, and the @a mode specifying how the string
	157	should be tokenized.
23324ae1	158	*/
0b59366f VZ	159	void SetString(const wxString& str,
0b59366f VZ	160	const wxString& delims = wxDEFAULT_DELIMITERS,
23324ae1 FM	161	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
23324ae1 FM	162	};
c631ad54 FM	163
	164
	165	/** @addtogroup group_funcmacro_string */
	166	//@{
	167
	168	/**
	169	This is a convenience function wrapping wxStringTokenizer which simply
	170	returns all tokens found in the given @a str as an array.
	171
	172	Please see wxStringTokenizer::wxStringTokenizer for the description
	173	of the other parameters.
	174
	175	@return The array with the parsed tokens.
	176
ea3883df	177	@header{wx/tokenzr.h}
c631ad54 FM	178	*/
	179	wxArrayString
	180	wxStringTokenize(const wxString& str,
	181	const wxString& delims = wxDEFAULT_DELIMITERS,
	182	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
0b59366f	183
c631ad54	184	//@}