\section{\class{wxStringTokenizer}}\label{wxstringtokenizer}
-wxStringTokenizer helps you to break a string up into a number of tokens.
+wxStringTokenizer helps you to break a string up into a number of tokens. It
+replaces the standard C function {\tt strtok()} and also extends it in a
+number of ways.
To use this class, you should create a wxStringTokenizer object, give it the
string to tokenize and also the delimiters which separate tokens in the string
\begin{verbatim}
-wxStringTokenizer tkz("first:second:third::fivth", ":");
+wxStringTokenizer tkz("first:second:third:fourth", ":");
while ( tkz.HasMoreTokens() )
{
wxString token = tkz.GetNextToken();
}
\end{verbatim}
-Another feature of this class is that it may return the delimiter which
-was found after the token with it. In a simple case like above, you are not
-interested in this because the delimiter is always {\tt ':'}, but if the
-delimiters string has several characters, you might need to know which of them
-follows the current token. In this case, pass {\tt TRUE} to wxStringTokenizer
-constructor or \helpref{SetString}{wxstringtokenizersetstring} method and
-the delimiter will be appended to each returned token (except for the last
-one).
+By default, wxStringTokenizer will behave in the same way as {\tt strtok()} if
+the delimiters string only contains white space characters but, unlike the
+standard function, it will return empty tokens if this is not the case. This
+is helpful for parsing strictly formatted data where the number of fields is
+fixed but some of them may be empty (i.e. {\tt TAB} or comma delimited text
+files).
+
+The behaviour is governed by the last
+\helpref{constructor}{wxstringtokenizerwxstringtokenizer}/\helpref{SetString}{wxstringtokenizersetstring}
+parameter {\tt mode} which may be one of the following:
+
+\twocolwidtha{5cm}%
+\begin{twocollist}\itemsep=0pt
+\twocolitem{{\tt wxTOKEN\_DEFAULT}}{Default behaviour (as described above):
+same as {\tt wxTOKEN\_STRTOK} if the delimiter string contains only
+whitespaces, same as {\tt wxTOKEN\_RET\_EMPTY} otherwise}
+\twocolitem{{\tt wxTOKEN\_RET\_EMPTY}}{In this mode, the empty tokens in the
+middle of the string will be returned, i.e. {\tt "a::b:"} will be tokenized in
+three tokens `a', `' and `b'.}
+\twocolitem{{\tt wxTOKEN\_RET\_EMPTY\_ALL}}{In this mode, empty trailing token
+(after the last delimiter character) will be returned as well. The string as
+above will contain four tokens: the already mentioned ones and another empty
+one as the last one.}
+\twocolitem{{\tt wxTOKEN\_RET\_DELIMS}}{In this mode, the delimiter character
+after the end of the current token (there may be none if this is the last
+token) is returned appended to the token. Otherwise, it is the same mode as
+{\tt wxTOKEN\_RET\_EMPTY}.}
+\twocolitem{{\tt wxTOKEN\_STRTOK}}{In this mode the class behaves exactly like
+the standard {\tt strtok()} function. The empty tokens are never returned.}
+\end{twocollist}
\wxheading{Derived from}
\func{}{wxStringTokenizer}{\void}
-Default constructor.
+Default constructor. You must call
+\helpref{SetString}{wxstringtokenizersetstring} before calling any other
+methods.
-\func{}{wxStringTokenizer}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}}
+\func{}{wxStringTokenizer}{\param{const wxString\& }{str}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{wxStringTokenizerMode }{mode = wxTOKEN\_DEFAULT}}
-Constructor. Pass the string to tokenize, a string containing delimiters,
-a flag specifying whether to return delimiters with tokens.
-
-\membersection{wxStringTokenizer::\destruct{wxStringTokenizer}}\label{wxstringtokenizerdtor}
-
-\func{}{\destruct{wxStringTokenizer}}{\void}
-
-Destructor.
+Constructor. Pass the string to tokenize, a string containing delimiters
+and the mode specifying how the string should be tokenized.
\membersection{wxStringTokenizer::CountTokens}\label{wxstringtokenizercounttokens}
\constfunc{bool}{HasMoreTokens}{\void}
-Returns TRUE if the tokenizer has further tokens.
+Returns TRUE if the tokenizer has further tokens, FALSE if none are left.
\membersection{wxStringTokenizer::GetNextToken}\label{wxstringtokenizergetnexttoken}
-\constfunc{wxString}{GetNextToken}{\void}
+\func{wxString}{GetNextToken}{\void}
Returns the next token or empty string if the end of string was reached.
\membersection{wxStringTokenizer::SetString}\label{wxstringtokenizersetstring}
-\func{void}{SetString}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}}
+\func{void}{SetString}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{wxStringTokenizerMode }{mode = wxTOKEN\_DEFAULT}}
Initializes the tokenizer.
Pass the string to tokenize, a string containing delimiters,
-a flag specifying whether to return delimiters with tokens.
+and the mode specifying how the string should be tokenized.
/////////////////////////////////////////////////////////////////////////////
-// Name: tokenzr.h
-// Purpose: String tokenizer
+// Name: wx/tokenzr.h
+// Purpose: String tokenizer - a C++ replacement for strtok(3)
// Author: Guilhem Lavaux
// Modified by: Vadim Zeitlin
// Created: 04/22/98
#include "wx/object.h"
#include "wx/string.h"
+// ----------------------------------------------------------------------------
+// constants
+// ----------------------------------------------------------------------------
+
// default: delimiters are usual white space characters
#define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
+// wxStringTokenizer mode flags which determine its behaviour
+enum wxStringTokenizerMode
+{
+ wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called
+ wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else
+ wxTOKEN_RET_EMPTY, // return empty token in the middle of the string
+ wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too
+ wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY)
+ wxTOKEN_STRTOK // behave exactly like strtok(3)
+};
+
+// ----------------------------------------------------------------------------
+// wxStringTokenizer: replaces infamous strtok() and has some other features
+// ----------------------------------------------------------------------------
+
class WXDLLEXPORT wxStringTokenizer : public wxObject
{
public:
- // ctors and such
- wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; }
- wxStringTokenizer(const wxString& to_tokenize,
+ // ctors and initializers
+ // default ctor, call SetString() later
+ wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
+ // ctor which gives us the string
+ wxStringTokenizer(const wxString& str,
const wxString& delims = wxDEFAULT_DELIMITERS,
- bool ret_delim = FALSE);
- void SetString(const wxString& to_tokenize,
+ wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
+
+ // args are same as for the non default ctor above
+ void SetString(const wxString& str,
const wxString& delims = wxDEFAULT_DELIMITERS,
- bool ret_delim = FALSE);
- virtual ~wxStringTokenizer();
+ wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
+
+ // reinitialize the tokenizer with the same delimiters/mode
+ void Reinit(const wxString& str);
- // count tokens/get next token
+ // tokens access
+ // count them
size_t CountTokens() const;
- bool HasMoreTokens() { return m_hasMore; }
+ // did we reach the end of the string?
+ bool HasMoreTokens() const;
+ // get the next token, will return empty string if !HasMoreTokens()
wxString GetNextToken();
- // One note about GetString -- it returns the string
- // remaining after the previous tokens have been removed,
- // not the original string
+ // get current tokenizer state
+ // returns the part of the string which remains to tokenize (*not* the
+ // initial string)
wxString GetString() const { return m_string; }
- // returns the current position (i.e. one index after the last returned
- // token or 0 if GetNextToken() has never been called) in the original
- // string
+ // returns the current position (i.e. one index after the last
+ // returned token or 0 if GetNextToken() has never been called) in the
+ // original string
size_t GetPosition() const { return m_pos; }
+ // misc
+ // get the current mode - can be different from the one passed to the
+ // ctor if it was wxTOKEN_DEFAULT
+ wxStringTokenizerMode GetMode() const { return m_mode; }
+
+ // backwards compatibility section from now on
+ // -------------------------------------------
+
// for compatibility only, use GetNextToken() instead
wxString NextToken() { return GetNextToken(); }
+ // compatibility only, don't use
+ void SetString(const wxString& to_tokenize,
+ const wxString& delims,
+ bool ret_delim)
+ {
+ SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
+ }
+
+ wxStringTokenizer(const wxString& to_tokenize,
+ const wxString& delims,
+ bool ret_delim)
+ {
+ SetString(to_tokenize, delims, ret_delim);
+ }
+
protected:
+ bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
+
wxString m_string, // the (rest of) string to tokenize
m_delims; // all delimiters
size_t m_pos; // the position in the original string
- bool m_retdelims; // if TRUE, return delims with tokens
- bool m_hasMore; // do we have more tokens?
+ wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values
+
+ bool m_hasMore; // do we have more (possible empty) tokens?
};
#endif // _WX_TOKENZRH
#if wxUSE_LONGLONG_NATIVE
wxASSERT_MSG( c == wxLongLongNative(a.GetHi(), a.GetLo()) +
wxLongLongNative(b.GetHi(), b.GetLo()),
- "addition failure" );
+ "addition failure" );
#else // !wxUSE_LONGLONG_NATIVE
wxASSERT_MSG( c - b == a, "addition failure" );
#endif // wxUSE_LONGLONG_NATIVE
if weekNumMonth < 0:
weekNumMonth = weekNumMonth + 53
return weekNumMonth
-
+
def GetLastSundayBefore(dt):
if dt.iso_week[2] == 7:
return dt
{
wxDateSpan span;
const char *name;
- } testArithmData[] =
+ } testArithmData[] =
{
{ wxDateSpan::Day(), "day" },
{ wxDateSpan::Week(), "week" },
{ wxDateSpan::Year(), "year" },
{ wxDateSpan(1, 2, 3, 4), "year, 2 months, 3 weeks, 4 days" },
};
-
+
wxDateTime dt(29, wxDateTime::Dec, 1999), dt1, dt2;
for ( size_t n = 0; n < WXSIZEOF(testArithmData); n++ )
#include "wx/timer.h"
#include "wx/tokenzr.h"
+static void TestStringConstruction()
+{
+ puts("*** Testing wxString constructores ***");
+
+ #define TEST_CTOR(args, res) \
+ { \
+ wxString s args ; \
+ printf("wxString%s = %s ", #args, s.c_str()); \
+ if ( s == res ) \
+ { \
+ puts("(ok)"); \
+ } \
+ else \
+ { \
+ printf("(ERROR: should be %s)\n", res); \
+ } \
+ }
+
+ TEST_CTOR((_T('Z'), 4), _T("ZZZZ"));
+ TEST_CTOR((_T("Hello"), 4), _T("Hell"));
+ TEST_CTOR((_T("Hello"), 5), _T("Hello"));
+ // TEST_CTOR((_T("Hello"), 6), _T("Hello")); -- should give assert failure
+
+ static const wxChar *s = _T("?really!");
+ const wxChar *start = wxStrchr(s, _T('r'));
+ const wxChar *end = wxStrchr(s, _T('!'));
+ TEST_CTOR((start, end), _T("really"));
+
+ puts("");
+}
+
static void TestString()
{
wxStopWatch sw;
{
puts("*** Testing wxStringTokenizer ***");
+ static const wxChar *modeNames[] =
+ {
+ _T("default"),
+ _T("return empty"),
+ _T("return all empty"),
+ _T("with delims"),
+ _T("like strtok"),
+ };
+
static const struct StringTokenizerTest
{
- const wxChar *str; // string to tokenize
- const wxChar *delims; // delimiters to use
- size_t count; // count of token
- bool with; // return tokens with delimiters?
- } tokenizerTestData[] =
- {
- { _T(""), _T(" "), 0, FALSE },
- { _T("Hello, world"), _T(" "), 2, FALSE },
- { _T("Hello, world"), _T(","), 2, FALSE },
- { _T("Hello, world!"), _T(",!"), 3, TRUE },
- { _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7, FALSE },
- { _T("1 \t3\t4 6 "), wxDEFAULT_DELIMITERS, 9, TRUE },
- { _T("01/02/99"), _T("/-"), 3, FALSE },
+ const wxChar *str; // string to tokenize
+ const wxChar *delims; // delimiters to use
+ size_t count; // count of token
+ wxStringTokenizerMode mode; // how should we tokenize it
+ } tokenizerTestData[] =
+ {
+ { _T(""), _T(" "), 0 },
+ { _T("Hello, world"), _T(" "), 2 },
+ { _T("Hello, world "), _T(" "), 2 },
+ { _T("Hello, world"), _T(","), 2 },
+ { _T("Hello, world!"), _T(",!"), 2 },
+ { _T("Hello,, world!"), _T(",!"), 3 },
+ { _T("Hello, world!"), _T(",!"), 3, wxTOKEN_RET_EMPTY_ALL },
+ { _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7 },
+ { _T("1 \t3\t4 6 "), wxDEFAULT_DELIMITERS, 4 },
+ { _T("1 \t3\t4 6 "), wxDEFAULT_DELIMITERS, 6, wxTOKEN_RET_EMPTY },
+ { _T("1 \t3\t4 6 "), wxDEFAULT_DELIMITERS, 9, wxTOKEN_RET_EMPTY_ALL },
+ { _T("01/02/99"), _T("/-"), 3 },
+ { _T("01-02/99"), _T("/-"), 3, wxTOKEN_RET_DELIMS },
};
for ( size_t n = 0; n < WXSIZEOF(tokenizerTestData); n++ )
{
const StringTokenizerTest& tt = tokenizerTestData[n];
- wxStringTokenizer tkz(tt.str, tt.delims, tt.with);
+ wxStringTokenizer tkz(tt.str, tt.delims, tt.mode);
size_t count = tkz.CountTokens();
- printf(_T("String '%s' has %u tokens delimited by '%s' "),
- tt.str,
+ printf(_T("String '%s' has %u tokens delimited by '%s' (mode = %s) "),
+ MakePrintable(tt.str).c_str(),
count,
- MakePrintable(tt.delims).c_str());
+ MakePrintable(tt.delims).c_str(),
+ modeNames[tkz.GetMode()]);
if ( count == tt.count )
{
puts(_T("(ok)"));
continue;
}
+ // if we emulate strtok(), check that we do it correctly
+ wxChar *buf, *s, *last;
+
+ if ( tkz.GetMode() == wxTOKEN_STRTOK )
+ {
+ buf = new wxChar[wxStrlen(tt.str) + 1];
+ wxStrcpy(buf, tt.str);
+
+ s = wxStrtok(buf, tt.delims, &last);
+ }
+ else
+ {
+ buf = NULL;
+ }
+
// now show the tokens themselves
size_t count2 = 0;
while ( tkz.HasMoreTokens() )
{
- printf(_T("\ttoken %u: '%s'\n"),
+ wxString token = tkz.GetNextToken();
+
+ printf(_T("\ttoken %u: '%s'"),
++count2,
- MakePrintable(tkz.GetNextToken()).c_str());
+ MakePrintable(token).c_str());
+
+ if ( buf )
+ {
+ if ( token == s )
+ {
+ puts(" (ok)");
+ }
+ else
+ {
+ printf(" (ERROR: should be %s)\n", s);
+ }
+
+ s = wxStrtok(NULL, tt.delims, &last);
+ }
+ else
+ {
+ // nothing to compare with
+ puts("");
+ }
}
if ( count2 != count )
{
- puts(_T("ERROR: token count mismatch"));
+ puts(_T("\tERROR: token count mismatch"));
}
+
+ delete [] buf;
}
puts("");
}
if ( 0 )
{
+ TestStringConstruction();
TestStringSub();
TestStringFormat();
TestStringFind();
}
- TestStringTokenizer();
+ TestStringTokenizer();
#endif // TEST_STRINGS
#ifdef TEST_ARRAYS
// wxStringTokenizer construction
// ----------------------------------------------------------------------------
-wxStringTokenizer::wxStringTokenizer(const wxString& to_tokenize,
+wxStringTokenizer::wxStringTokenizer(const wxString& str,
const wxString& delims,
- bool ret_delims)
+ wxStringTokenizerMode mode)
{
- SetString(to_tokenize, delims, ret_delims);
+ SetString(str, delims, mode);
}
-void wxStringTokenizer::SetString(const wxString& to_tokenize,
+void wxStringTokenizer::SetString(const wxString& str,
const wxString& delims,
- bool ret_delim)
+ wxStringTokenizerMode mode)
{
- m_string = to_tokenize;
+ if ( mode == wxTOKEN_DEFAULT )
+ {
+ // by default, we behave like strtok() if the delimiters are only
+ // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
+ // whitespace delimiters, strtok() behaviour is better because we want
+ // to count consecutive spaces as one delimiter)
+ const wxChar *p;
+ for ( p = delims.c_str(); *p; p++ )
+ {
+ if ( !wxIsspace(*p) )
+ break;
+ }
+
+ if ( *p )
+ {
+ // not whitespace char in delims
+ mode = wxTOKEN_RET_EMPTY;
+ }
+ else
+ {
+ // only whitespaces
+ mode = wxTOKEN_STRTOK;
+ }
+ }
+
m_delims = delims;
- m_retdelims = ret_delim;
- m_pos = 0;
+ m_mode = mode;
- // empty string doesn't have any tokens
- m_hasMore = !m_string.empty();
+ Reinit(str);
}
-wxStringTokenizer::~wxStringTokenizer()
+void wxStringTokenizer::Reinit(const wxString& str)
{
+ wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
+
+ m_string = str;
+ m_pos = 0;
+
+ // empty string doesn't have any tokens
+ m_hasMore = !m_string.empty();
}
// ----------------------------------------------------------------------------
-// count the number of tokens in the string
+// access to the tokens
// ----------------------------------------------------------------------------
-size_t wxStringTokenizer::CountTokens() const
+// do we have more of them?
+bool wxStringTokenizer::HasMoreTokens() const
{
- size_t pos = 0;
- size_t count = 0;
- for ( ;; )
+ wxCHECK_MSG( IsOk(), FALSE, _T("you should call SetString() first") );
+
+ if ( m_string.find_first_not_of(m_delims) == wxString::npos )
{
- pos = m_string.find_first_of(m_delims, pos);
- if ( pos == wxString::npos )
- break;
+ // no non empty tokens left, but in wxTOKEN_RET_EMPTY_ALL mode we
+ // still may return TRUE if GetNextToken() wasn't called yet for the
+ // last trailing empty token
+ return m_mode == wxTOKEN_RET_EMPTY_ALL ? m_hasMore : FALSE;
+ }
+ else
+ {
+ // there are non delimiter characters left, hence we do have more
+ // tokens
+ return TRUE;
+ }
+}
- count++; // one more token found
+// count the number of tokens in the string
+size_t wxStringTokenizer::CountTokens() const
+{
+ wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
- pos++; // skip delimiter
- }
+ // VZ: this function is IMHO not very useful, so it's probably not very
+ // important if it's implementation here is not as efficient as it
+ // could be - but OTOH like this we're sure to get the correct answer
+ // in all modes
+ wxStringTokenizer *self = (wxStringTokenizer *)this; // const_cast
+ wxString stringInitial = m_string;
- // normally, we didn't count the last token in the loop above - so add it
- // unless the string was empty from the very beginning, in which case it
- // still has 0 (and not 1) tokens
- if ( !m_string.empty() )
+ size_t count = 0;
+ while ( self->HasMoreTokens() )
{
count++;
+
+ (void)self->GetNextToken();
}
+ self->Reinit(stringInitial);
+
return count;
}
wxString wxStringTokenizer::GetNextToken()
{
+ // strtok() doesn't return empty tokens, all other modes do
+ bool allowEmpty = m_mode != wxTOKEN_STRTOK;
+
wxString token;
- if ( HasMoreTokens() )
+ do
{
- size_t pos = m_string.find_first_of(m_delims); // end of token
- size_t pos2; // start of the next one
- if ( pos != wxString::npos )
+ if ( !HasMoreTokens() )
{
- // return the delimiter too
- pos2 = pos + 1;
+ break;
}
- else
+ // find the end of this token
+ size_t pos = m_string.find_first_of(m_delims);
+
+ // and the start of the next one
+ if ( pos == wxString::npos )
{
- pos2 = m_string.length();
+ // no more delimiters, the token is everything till the end of
+ // string
+ token = m_string;
+
+ m_pos += m_string.length();
+ m_string.clear();
- // no more tokens in this string
+ // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
+ // mode (we will return the trailing one right now in this case)
m_hasMore = FALSE;
}
+ else
+ {
+ size_t pos2 = pos + 1;
- token = wxString(m_string, m_retdelims ? pos2 : pos);
+ // in wxTOKEN_RET_DELIMS mode we return the delimiter character
+ // with token
+ token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
+ : pos);
- // remove token with the following it delimiter from string
- m_string.erase(0, pos2);
+ // remove token with the following it delimiter from string
+ m_string.erase(0, pos2);
- // keep track of the position in the original string too
- m_pos += pos2;
+ // keep track of the position in the original string too
+ m_pos += pos2;
+ }
}
- //else: no more tokens, return empty token
+ while ( !allowEmpty && token.empty() );
return token;
}