[wxWidgets.git] / interface / tokenzr.h

/////////////////////////////////////////////////////////////////////////////
// Name:        tokenzr.h
// Purpose:     interface of wxStringTokenizer
// Author:      wxWidgets team
// RCS-ID:      $Id$
// Licence:     wxWindows license
/////////////////////////////////////////////////////////////////////////////

/**
    @class wxStringTokenizer
    @wxheader{tokenzr.h}

    wxStringTokenizer helps you to break a string up into a number of tokens. It
    replaces the standard C function @c strtok() and also extends it in a
    number of ways.

    To use this class, you should create a wxStringTokenizer object, give it the
    string to tokenize and also the delimiters which separate tokens in the string
    (by default, white space characters will be used).

    Then wxStringTokenizer::GetNextToken may be called
    repeatedly until it wxStringTokenizer::HasMoreTokens
    returns @false.

    For example:

    @code
    wxStringTokenizer tkz(wxT("first:second:third:fourth"), wxT(":"));
    while ( tkz.HasMoreTokens() )
    {
        wxString token = tkz.GetNextToken();

        // process token here
    }
    @endcode

    By default, wxStringTokenizer will behave in the same way as @c strtok() if
    the delimiters string only contains white space characters but, unlike the
    standard function, it will return empty tokens if this is not the case. This
    is helpful for parsing strictly formatted data where the number of fields is
    fixed but some of them may be empty (i.e. @c TAB or comma delimited text
    files).

    The behaviour is governed by the last
    @ref wxStringTokenizer::wxstringtokenizer
    constructor/wxStringTokenizer::SetString
    parameter @c mode which may be one of the following:


    @c wxTOKEN_DEFAULT


    Default behaviour (as described above):
    same as @c wxTOKEN_STRTOK if the delimiter string contains only
    whitespaces, same as @c wxTOKEN_RET_EMPTY otherwise


    @c wxTOKEN_RET_EMPTY


    In this mode, the empty tokens in the
    middle of the string will be returned, i.e. @c "a::b:" will be tokenized in
    three tokens 'a', '' and 'b'. Notice that all trailing delimiters are ignored
    in this mode, not just the last one, i.e. a string @c "a::b::" would
    still result in the same set of tokens.


    @c wxTOKEN_RET_EMPTY_ALL


    In this mode, empty trailing tokens
    (including the one after the last delimiter character) will be returned as
    well. The string @c "a::b:" will be tokenized in four tokens: the already
    mentioned ones and another empty one as the last one and a string
    @c "a::b::" will have five tokens.


    @c wxTOKEN_RET_DELIMS


    In this mode, the delimiter character
    after the end of the current token (there may be none if this is the last
    token) is returned appended to the token. Otherwise, it is the same mode as
    @c wxTOKEN_RET_EMPTY. Notice that there is no mode like this one but
    behaving like @c wxTOKEN_RET_EMPTY_ALL instead of
    @c wxTOKEN_RET_EMPTY, use @c wxTOKEN_RET_EMPTY_ALL and
    wxStringTokenizer::GetLastDelimiter to emulate it.


    @c wxTOKEN_STRTOK


    In this mode the class behaves exactly like
    the standard @c strtok() function: the empty tokens are never returned.


    @library{wxbase}
    @category{data}

    @see wxStringTokenize()
*/
class wxStringTokenizer : public wxObject
{
public:
    //@{
    /**
        Constructor. Pass the string to tokenize, a string containing delimiters
        and the mode specifying how the string should be tokenized.
    */
    wxStringTokenizer();
    wxStringTokenizer(const wxString& str,
                      const wxString& delims = " \t\r\n",
                      wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
    //@}

    /**
        Returns the number of tokens remaining in the input string. The number of
        tokens returned by this function is decremented each time
        GetNextToken() is called and when it
        reaches 0 HasMoreTokens() returns
        @false.
    */
    int CountTokens() const;

    /**
        Returns the delimiter which ended scan for the last token returned by
        GetNextToken() or @c NUL if
        there had been no calls to this function yet or if it returned the trailing
        empty token in @c wxTOKEN_RET_EMPTY_ALL mode.
        
        @wxsince{2.7.0}
    */
    wxChar GetLastDelimiter();

    /**
        Returns the next token or empty string if the end of string was reached.
    */
    wxString GetNextToken() const;

    /**
        Returns the current position (i.e. one index after the last returned
        token or 0 if GetNextToken() has never been called) in the original
        string.
    */
    size_t GetPosition() const;

    /**
        Returns the part of the starting string without all token already extracted.
    */
    wxString GetString() const;

    /**
        Returns @true if the tokenizer has further tokens, @false if none are left.
    */
    bool HasMoreTokens() const;

    /**
        Initializes the tokenizer.
        Pass the string to tokenize, a string containing delimiters,
        and the mode specifying how the string should be tokenized.
    */
    void SetString(const wxString& to_tokenize,
                   const wxString& delims = " \t\r\n",
                   wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
};
Commit	Line	Data
23324ae1 FM	1	/////////////////////////////////////////////////////////////////////////////
23324ae1 FM	2	// Name: tokenzr.h
e54c96f1	3	// Purpose: interface of wxStringTokenizer
23324ae1 FM	4	// Author: wxWidgets team
	5	// RCS-ID: $Id$
	6	// Licence: wxWindows license
	7	/////////////////////////////////////////////////////////////////////////////
	8
	9	/**
	10	@class wxStringTokenizer
	11	@wxheader{tokenzr.h}
7c913512	12
23324ae1 FM	13	wxStringTokenizer helps you to break a string up into a number of tokens. It
	14	replaces the standard C function @c strtok() and also extends it in a
	15	number of ways.
7c913512	16
23324ae1 FM	17	To use this class, you should create a wxStringTokenizer object, give it the
	18	string to tokenize and also the delimiters which separate tokens in the string
	19	(by default, white space characters will be used).
7c913512	20
23324ae1	21	Then wxStringTokenizer::GetNextToken may be called
7c913512	22	repeatedly until it wxStringTokenizer::HasMoreTokens
23324ae1	23	returns @false.
7c913512	24
23324ae1	25	For example:
7c913512	26
23324ae1 FM	27	@code
	28	wxStringTokenizer tkz(wxT("first:second:third:fourth"), wxT(":"));
	29	while ( tkz.HasMoreTokens() )
	30	{
	31	wxString token = tkz.GetNextToken();
7c913512	32
23324ae1 FM	33	// process token here
	34	}
	35	@endcode
7c913512	36
23324ae1 FM	37	By default, wxStringTokenizer will behave in the same way as @c strtok() if
	38	the delimiters string only contains white space characters but, unlike the
	39	standard function, it will return empty tokens if this is not the case. This
	40	is helpful for parsing strictly formatted data where the number of fields is
	41	fixed but some of them may be empty (i.e. @c TAB or comma delimited text
	42	files).
7c913512 FM	43
7c913512 FM	44	The behaviour is governed by the last
23324ae1	45	@ref wxStringTokenizer::wxstringtokenizer
7c913512	46	constructor/wxStringTokenizer::SetString
23324ae1	47	parameter @c mode which may be one of the following:
7c913512 FM	48
	49
	50
23324ae1	51	@c wxTOKEN_DEFAULT
7c913512 FM	52
7c913512 FM	53
23324ae1 FM	54	Default behaviour (as described above):
	55	same as @c wxTOKEN_STRTOK if the delimiter string contains only
	56	whitespaces, same as @c wxTOKEN_RET_EMPTY otherwise
7c913512 FM	57
7c913512 FM	58
23324ae1	59	@c wxTOKEN_RET_EMPTY
7c913512 FM	60
7c913512 FM	61
23324ae1 FM	62	In this mode, the empty tokens in the
	63	middle of the string will be returned, i.e. @c "a::b:" will be tokenized in
	64	three tokens 'a', '' and 'b'. Notice that all trailing delimiters are ignored
	65	in this mode, not just the last one, i.e. a string @c "a::b::" would
	66	still result in the same set of tokens.
7c913512 FM	67
7c913512 FM	68
23324ae1	69	@c wxTOKEN_RET_EMPTY_ALL
7c913512 FM	70
7c913512 FM	71
23324ae1 FM	72	In this mode, empty trailing tokens
	73	(including the one after the last delimiter character) will be returned as
	74	well. The string @c "a::b:" will be tokenized in four tokens: the already
7c913512	75	mentioned ones and another empty one as the last one and a string
23324ae1	76	@c "a::b::" will have five tokens.
7c913512 FM	77
7c913512 FM	78
23324ae1	79	@c wxTOKEN_RET_DELIMS
7c913512 FM	80
7c913512 FM	81
23324ae1 FM	82	In this mode, the delimiter character
23324ae1 FM	83	after the end of the current token (there may be none if this is the last
7c913512	84	token) is returned appended to the token. Otherwise, it is the same mode as
23324ae1	85	@c wxTOKEN_RET_EMPTY. Notice that there is no mode like this one but
7c913512 FM	86	behaving like @c wxTOKEN_RET_EMPTY_ALL instead of
7c913512 FM	87	@c wxTOKEN_RET_EMPTY, use @c wxTOKEN_RET_EMPTY_ALL and
23324ae1	88	wxStringTokenizer::GetLastDelimiter to emulate it.
7c913512 FM	89
7c913512 FM	90
23324ae1	91	@c wxTOKEN_STRTOK
7c913512 FM	92
7c913512 FM	93
23324ae1 FM	94	In this mode the class behaves exactly like
23324ae1 FM	95	the standard @c strtok() function: the empty tokens are never returned.
7c913512 FM	96
	97
	98
23324ae1 FM	99	@library{wxbase}
23324ae1 FM	100	@category{data}
7c913512	101
e54c96f1	102	@see wxStringTokenize()
23324ae1 FM	103	*/
	104	class wxStringTokenizer : public wxObject
	105	{
	106	public:
	107	//@{
	108	/**
	109	Constructor. Pass the string to tokenize, a string containing delimiters
	110	and the mode specifying how the string should be tokenized.
	111	*/
	112	wxStringTokenizer();
7c913512 FM	113	wxStringTokenizer(const wxString& str,
	114	const wxString& delims = " \t\r\n",
	115	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
23324ae1 FM	116	//@}
	117
	118	/**
	119	Returns the number of tokens remaining in the input string. The number of
7c913512	120	tokens returned by this function is decremented each time
23324ae1 FM	121	GetNextToken() is called and when it
	122	reaches 0 HasMoreTokens() returns
	123	@false.
	124	*/
328f5751	125	int CountTokens() const;
23324ae1 FM	126
23324ae1 FM	127	/**
7c913512	128	Returns the delimiter which ended scan for the last token returned by
23324ae1 FM	129	GetNextToken() or @c NUL if
	130	there had been no calls to this function yet or if it returned the trailing
	131	empty token in @c wxTOKEN_RET_EMPTY_ALL mode.
e54c96f1 FM	132
e54c96f1 FM	133	@wxsince{2.7.0}
23324ae1 FM	134	*/
	135	wxChar GetLastDelimiter();
	136
	137	/**
	138	Returns the next token or empty string if the end of string was reached.
	139	*/
328f5751	140	wxString GetNextToken() const;
23324ae1 FM	141
	142	/**
	143	Returns the current position (i.e. one index after the last returned
	144	token or 0 if GetNextToken() has never been called) in the original
	145	string.
	146	*/
328f5751	147	size_t GetPosition() const;
23324ae1 FM	148
	149	/**
	150	Returns the part of the starting string without all token already extracted.
	151	*/
328f5751	152	wxString GetString() const;
23324ae1 FM	153
	154	/**
	155	Returns @true if the tokenizer has further tokens, @false if none are left.
	156	*/
328f5751	157	bool HasMoreTokens() const;
23324ae1 FM	158
	159	/**
	160	Initializes the tokenizer.
23324ae1 FM	161	Pass the string to tokenize, a string containing delimiters,
	162	and the mode specifying how the string should be tokenized.
	163	*/
	164	void SetString(const wxString& to_tokenize,
	165	const wxString& delims = " \t\r\n",
	166	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
	167	};
e54c96f1	168