src/common/tokenzr.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/tokenzr.cpp
   3 // Purpose:     String tokenizer
   4 // Author:      Guilhem Lavaux
   5 // Modified by: Vadim Zeitlin (almost full rewrite)
   6 // Created:     04/22/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) Guilhem Lavaux
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 // For compilers that support precompilation, includes "wx.h".
  21 #include "wx/wxprec.h"
  22
  23 #ifdef __BORLANDC__
  24     #pragma hdrstop
  25 #endif
  26
  27 #include "wx/tokenzr.h"
  28
  29 #ifndef WX_PRECOMP
  30     #include "wx/arrstr.h"
  31     #include "wx/crt.h"
  32 #endif
  33
  34 // Required for wxIs... functions
  35 #include <ctype.h>
  36
  37 // ============================================================================
  38 // implementation
  39 // ============================================================================
  40
  41 // ----------------------------------------------------------------------------
  42 // wxStringTokenizer construction
  43 // ----------------------------------------------------------------------------
  44
  45 wxStringTokenizer::wxStringTokenizer(const wxString& str,
  46                                      const wxString& delims,
  47                                      wxStringTokenizerMode mode)
  48 {
  49     SetString(str, delims, mode);
  50 }
  51
  52 void wxStringTokenizer::SetString(const wxString& str,
  53                                   const wxString& delims,
  54                                   wxStringTokenizerMode mode)
  55 {
  56     if ( mode == wxTOKEN_DEFAULT )
  57     {
  58         // by default, we behave like strtok() if the delimiters are only
  59         // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
  60         // whitespace delimiters, strtok() behaviour is better because we want
  61         // to count consecutive spaces as one delimiter)
  62         const wxChar *p;
  63         for ( p = delims.c_str(); *p; p++ )
  64         {
  65             if ( !wxIsspace(*p) )
  66                 break;
  67         }
  68
  69         if ( *p )
  70         {
  71             // not whitespace char in delims
  72             mode = wxTOKEN_RET_EMPTY;
  73         }
  74         else
  75         {
  76             // only whitespaces
  77             mode = wxTOKEN_STRTOK;
  78         }
  79     }
  80
  81     m_delims = delims;
  82     m_mode = mode;
  83
  84     Reinit(str);
  85 }
  86
  87 void wxStringTokenizer::Reinit(const wxString& str)
  88 {
  89     wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
  90
  91     m_string = str;
  92     m_pos = 0;
  93     m_lastDelim = _T('\0');
  94 }
  95
  96 // ----------------------------------------------------------------------------
  97 // access to the tokens
  98 // ----------------------------------------------------------------------------
  99
 100 // do we have more of them?
 101 bool wxStringTokenizer::HasMoreTokens() const
 102 {
 103     wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
 104
 105     if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos )
 106     {
 107         // there are non delimiter characters left, so we do have more tokens
 108         return true;
 109     }
 110
 111     switch ( m_mode )
 112     {
 113         case wxTOKEN_RET_EMPTY:
 114         case wxTOKEN_RET_DELIMS:
 115             // special hack for wxTOKEN_RET_EMPTY: we should return the initial
 116             // empty token even if there are only delimiters after it
 117             return m_pos == 0 && !m_string.empty();
 118
 119         case wxTOKEN_RET_EMPTY_ALL:
 120             // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
 121             // already returned the trailing empty token after the last
 122             // delimiter by examining m_lastDelim: it is set to NUL if we run
 123             // up to the end of the string in GetNextToken(), but if it is not
 124             // NUL yet we still have this last token to return even if m_pos is
 125             // already at m_string.length()
 126             return m_pos < m_string.length() || m_lastDelim != _T('\0');
 127
 128         case wxTOKEN_INVALID:
 129         case wxTOKEN_DEFAULT:
 130             wxFAIL_MSG( _T("unexpected tokenizer mode") );
 131             // fall through
 132
 133         case wxTOKEN_STRTOK:
 134             // never return empty delimiters
 135             break;
 136     }
 137
 138     return false;
 139 }
 140
 141 // count the number of (remaining) tokens in the string
 142 size_t wxStringTokenizer::CountTokens() const
 143 {
 144     wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
 145
 146     // VZ: this function is IMHO not very useful, so it's probably not very
 147     //     important if its implementation here is not as efficient as it
 148     //     could be -- but OTOH like this we're sure to get the correct answer
 149     //     in all modes
 150     wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode);
 151
 152     size_t count = 0;
 153     while ( tkz.HasMoreTokens() )
 154     {
 155         count++;
 156
 157         (void)tkz.GetNextToken();
 158     }
 159
 160     return count;
 161 }
 162
 163 // ----------------------------------------------------------------------------
 164 // token extraction
 165 // ----------------------------------------------------------------------------
 166
 167 wxString wxStringTokenizer::GetNextToken()
 168 {
 169     wxString token;
 170     do
 171     {
 172         if ( !HasMoreTokens() )
 173         {
 174             break;
 175         }
 176
 177         // find the end of this token
 178         size_t pos = m_string.find_first_of(m_delims, m_pos);
 179
 180         // and the start of the next one
 181         if ( pos == wxString::npos )
 182         {
 183             // no more delimiters, the token is everything till the end of
 184             // string
 185             token.assign(m_string, m_pos, wxString::npos);
 186
 187             // skip the token
 188             m_pos = m_string.length();
 189
 190             // it wasn't terminated
 191             m_lastDelim = _T('\0');
 192         }
 193         else // we found a delimiter at pos
 194         {
 195             // in wxTOKEN_RET_DELIMS mode we return the delimiter character
 196             // with token, otherwise leave it out
 197             size_t len = pos - m_pos;
 198             if ( m_mode == wxTOKEN_RET_DELIMS )
 199                 len++;
 200
 201             token.assign(m_string, m_pos, len);
 202
 203             // skip the token and the trailing delimiter
 204             m_pos = pos + 1;
 205
 206             m_lastDelim = m_string[pos];
 207         }
 208     }
 209     while ( !AllowEmpty() && token.empty() );
 210
 211     return token;
 212 }
 213
 214 // ----------------------------------------------------------------------------
 215 // public functions
 216 // ----------------------------------------------------------------------------
 217
 218 wxArrayString wxStringTokenize(const wxString& str,
 219                                const wxString& delims,
 220                                wxStringTokenizerMode mode)
 221 {
 222     wxArrayString tokens;
 223     wxStringTokenizer tk(str, delims, mode);
 224     while ( tk.HasMoreTokens() )
 225     {
 226         tokens.Add(tk.GetNextToken());
 227     }
 228
 229     return tokens;
 230 }