[wxWidgets.git] / interface / tokenzr.h

/////////////////////////////////////////////////////////////////////////////
// Name:        tokenzr.h
// Purpose:     documentation for wxStringTokenizer class
// Author:      wxWidgets team
// RCS-ID:      $Id$
// Licence:     wxWindows license
/////////////////////////////////////////////////////////////////////////////

/**
    @class wxStringTokenizer
    @wxheader{tokenzr.h}
    
    wxStringTokenizer helps you to break a string up into a number of tokens. It
    replaces the standard C function @c strtok() and also extends it in a
    number of ways.
    
    To use this class, you should create a wxStringTokenizer object, give it the
    string to tokenize and also the delimiters which separate tokens in the string
    (by default, white space characters will be used).
    
    Then wxStringTokenizer::GetNextToken may be called
    repeatedly until it wxStringTokenizer::HasMoreTokens 
    returns @false.
    
    For example:
    
    @code
    wxStringTokenizer tkz(wxT("first:second:third:fourth"), wxT(":"));
    while ( tkz.HasMoreTokens() )
    {
        wxString token = tkz.GetNextToken();
    
        // process token here
    }
    @endcode
    
    By default, wxStringTokenizer will behave in the same way as @c strtok() if
    the delimiters string only contains white space characters but, unlike the
    standard function, it will return empty tokens if this is not the case. This
    is helpful for parsing strictly formatted data where the number of fields is
    fixed but some of them may be empty (i.e. @c TAB or comma delimited text
    files).
    
    The behaviour is governed by the last 
    @ref wxStringTokenizer::wxstringtokenizer
    constructor/wxStringTokenizer::SetString 
    parameter @c mode which may be one of the following:
    
    
    
    @c wxTOKEN_DEFAULT
    
    
    Default behaviour (as described above):
    same as @c wxTOKEN_STRTOK if the delimiter string contains only
    whitespaces, same as @c wxTOKEN_RET_EMPTY otherwise
    
    
    @c wxTOKEN_RET_EMPTY
    
    
    In this mode, the empty tokens in the
    middle of the string will be returned, i.e. @c "a::b:" will be tokenized in
    three tokens 'a', '' and 'b'. Notice that all trailing delimiters are ignored
    in this mode, not just the last one, i.e. a string @c "a::b::" would
    still result in the same set of tokens.
    
    
    @c wxTOKEN_RET_EMPTY_ALL
    
    
    In this mode, empty trailing tokens
    (including the one after the last delimiter character) will be returned as
    well. The string @c "a::b:" will be tokenized in four tokens: the already
    mentioned ones and another empty one as the last one and a string 
    @c "a::b::" will have five tokens.
    
    
    @c wxTOKEN_RET_DELIMS
    
    
    In this mode, the delimiter character
    after the end of the current token (there may be none if this is the last
    token) is returned appended to the token. Otherwise, it is the same mode as 
    @c wxTOKEN_RET_EMPTY. Notice that there is no mode like this one but
    behaving like @c wxTOKEN_RET_EMPTY_ALL instead of 
    @c wxTOKEN_RET_EMPTY, use @c wxTOKEN_RET_EMPTY_ALL and 
    wxStringTokenizer::GetLastDelimiter to emulate it.
    
    
    @c wxTOKEN_STRTOK
    
    
    In this mode the class behaves exactly like
    the standard @c strtok() function: the empty tokens are never returned.
    
    
    
    @library{wxbase}
    @category{data}
    
    @seealso
    wxStringTokenize
*/
class wxStringTokenizer : public wxObject
{
public:
    //@{
    /**
        Constructor. Pass the string to tokenize, a string containing delimiters
        and the mode specifying how the string should be tokenized.
    */
    wxStringTokenizer();
        wxStringTokenizer(const wxString& str,
                          const wxString& delims = " \t\r\n",
                          wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
    //@}

    /**
        Returns the number of tokens remaining in the input string. The number of
        tokens returned by this function is decremented each time 
        GetNextToken() is called and when it
        reaches 0 HasMoreTokens() returns
        @false.
    */
    int CountTokens();

    /**
        Returns the delimiter which ended scan for the last token returned by 
        GetNextToken() or @c NUL if
        there had been no calls to this function yet or if it returned the trailing
        empty token in @c wxTOKEN_RET_EMPTY_ALL mode.
        
        This function is new since wxWidgets version 2.7.0
    */
    wxChar GetLastDelimiter();

    /**
        Returns the next token or empty string if the end of string was reached.
    */
    wxString GetNextToken();

    /**
        Returns the current position (i.e. one index after the last returned
        token or 0 if GetNextToken() has never been called) in the original
        string.
    */
    size_t GetPosition();

    /**
        Returns the part of the starting string without all token already extracted.
    */
    wxString GetString();

    /**
        Returns @true if the tokenizer has further tokens, @false if none are left.
    */
    bool HasMoreTokens();

    /**
        Initializes the tokenizer.
        
        Pass the string to tokenize, a string containing delimiters,
        and the mode specifying how the string should be tokenized.
    */
    void SetString(const wxString& to_tokenize,
                   const wxString& delims = " \t\r\n",
                   wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
};
Commit	Line	Data
23324ae1 FM	1	/////////////////////////////////////////////////////////////////////////////
	2	// Name: tokenzr.h
	3	// Purpose: documentation for wxStringTokenizer class
	4	// Author: wxWidgets team
	5	// RCS-ID: $Id$
	6	// Licence: wxWindows license
	7	/////////////////////////////////////////////////////////////////////////////
	8
	9	/**
	10	@class wxStringTokenizer
	11	@wxheader{tokenzr.h}
	12
	13	wxStringTokenizer helps you to break a string up into a number of tokens. It
	14	replaces the standard C function @c strtok() and also extends it in a
	15	number of ways.
	16
	17	To use this class, you should create a wxStringTokenizer object, give it the
	18	string to tokenize and also the delimiters which separate tokens in the string
	19	(by default, white space characters will be used).
	20
	21	Then wxStringTokenizer::GetNextToken may be called
	22	repeatedly until it wxStringTokenizer::HasMoreTokens
	23	returns @false.
	24
	25	For example:
	26
	27	@code
	28	wxStringTokenizer tkz(wxT("first:second:third:fourth"), wxT(":"));
	29	while ( tkz.HasMoreTokens() )
	30	{
	31	wxString token = tkz.GetNextToken();
	32
	33	// process token here
	34	}
	35	@endcode
	36
	37	By default, wxStringTokenizer will behave in the same way as @c strtok() if
	38	the delimiters string only contains white space characters but, unlike the
	39	standard function, it will return empty tokens if this is not the case. This
	40	is helpful for parsing strictly formatted data where the number of fields is
	41	fixed but some of them may be empty (i.e. @c TAB or comma delimited text
	42	files).
	43
	44	The behaviour is governed by the last
	45	@ref wxStringTokenizer::wxstringtokenizer
	46	constructor/wxStringTokenizer::SetString
	47	parameter @c mode which may be one of the following:
	48
	49
	50
	51	@c wxTOKEN_DEFAULT
	52
	53
	54	Default behaviour (as described above):
	55	same as @c wxTOKEN_STRTOK if the delimiter string contains only
	56	whitespaces, same as @c wxTOKEN_RET_EMPTY otherwise
	57
	58
	59	@c wxTOKEN_RET_EMPTY
	60
	61
	62	In this mode, the empty tokens in the
	63	middle of the string will be returned, i.e. @c "a::b:" will be tokenized in
	64	three tokens 'a', '' and 'b'. Notice that all trailing delimiters are ignored
65	in this mode, not just the last one, i.e. a string @c "a::b::" would
66	still result in the same set of tokens.
67
68
69	@c wxTOKEN_RET_EMPTY_ALL
70
71
72	In this mode, empty trailing tokens
73	(including the one after the last delimiter character) will be returned as
74	well. The string @c "a::b:" will be tokenized in four tokens: the already
75	mentioned ones and another empty one as the last one and a string
76	@c "a::b::" will have five tokens.
77
78
79	@c wxTOKEN_RET_DELIMS
80
81
82	In this mode, the delimiter character
83	after the end of the current token (there may be none if this is the last
84	token) is returned appended to the token. Otherwise, it is the same mode as
85	@c wxTOKEN_RET_EMPTY. Notice that there is no mode like this one but
86	behaving like @c wxTOKEN_RET_EMPTY_ALL instead of
87	@c wxTOKEN_RET_EMPTY, use @c wxTOKEN_RET_EMPTY_ALL and
88	wxStringTokenizer::GetLastDelimiter to emulate it.
89
90
91	@c wxTOKEN_STRTOK
92
93
94	In this mode the class behaves exactly like
95	the standard @c strtok() function: the empty tokens are never returned.
96
97
98
99	@library{wxbase}
100	@category{data}
101
102	@seealso
103	wxStringTokenize
104	*/
105	class wxStringTokenizer : public wxObject
106	{
107	public:
108	//@{
109	/**
110	Constructor. Pass the string to tokenize, a string containing delimiters
111	and the mode specifying how the string should be tokenized.
112	*/
113	wxStringTokenizer();
114	wxStringTokenizer(const wxString& str,
115	const wxString& delims = " \t\r\n",
116	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
117	//@}
118
119	/**
120	Returns the number of tokens remaining in the input string. The number of
121	tokens returned by this function is decremented each time
122	GetNextToken() is called and when it
123	reaches 0 HasMoreTokens() returns
124	@false.
125	*/
126	int CountTokens();
127
128	/**
129	Returns the delimiter which ended scan for the last token returned by
130	GetNextToken() or @c NUL if
131	there had been no calls to this function yet or if it returned the trailing
132	empty token in @c wxTOKEN_RET_EMPTY_ALL mode.
133
134	This function is new since wxWidgets version 2.7.0
135	*/
136	wxChar GetLastDelimiter();
137
138	/**
139	Returns the next token or empty string if the end of string was reached.
140	*/
141	wxString GetNextToken();
142
143	/**
144	Returns the current position (i.e. one index after the last returned
145	token or 0 if GetNextToken() has never been called) in the original
146	string.
147	*/
148	size_t GetPosition();
149
150	/**
151	Returns the part of the starting string without all token already extracted.
152	*/
153	wxString GetString();
154
155	/**
156	Returns @true if the tokenizer has further tokens, @false if none are left.
157	*/
158	bool HasMoreTokens();
159
160	/**
161	Initializes the tokenizer.
162
163	Pass the string to tokenize, a string containing delimiters,
164	and the mode specifying how the string should be tokenized.
165	*/
166	void SetString(const wxString& to_tokenize,
167	const wxString& delims = " \t\r\n",
168	wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
169	};