src/common/tokenzr.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/tokenzr.cpp
   3 // Purpose:     String tokenizer
   4 // Author:      Guilhem Lavaux
   5 // Modified by: Vadim Zeitlin (almost full rewrite)
   6 // Created:     04/22/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) Guilhem Lavaux
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 // For compilers that support precompilation, includes "wx.h".
  21 #include "wx/wxprec.h"
  22
  23 #ifdef __BORLANDC__
  24     #pragma hdrstop
  25 #endif
  26
  27 #include "wx/tokenzr.h"
  28
  29 #ifndef WX_PRECOMP
  30     #include "wx/arrstr.h"
  31 #endif
  32
  33 // Required for wxIs... functions
  34 #include <ctype.h>
  35
  36 // ============================================================================
  37 // implementation
  38 // ============================================================================
  39
  40 // ----------------------------------------------------------------------------
  41 // wxStringTokenizer construction
  42 // ----------------------------------------------------------------------------
  43
  44 wxStringTokenizer::wxStringTokenizer(const wxString& str,
  45                                      const wxString& delims,
  46                                      wxStringTokenizerMode mode)
  47 {
  48     SetString(str, delims, mode);
  49 }
  50
  51 void wxStringTokenizer::SetString(const wxString& str,
  52                                   const wxString& delims,
  53                                   wxStringTokenizerMode mode)
  54 {
  55     if ( mode == wxTOKEN_DEFAULT )
  56     {
  57         // by default, we behave like strtok() if the delimiters are only
  58         // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
  59         // whitespace delimiters, strtok() behaviour is better because we want
  60         // to count consecutive spaces as one delimiter)
  61         const wxChar *p;
  62         for ( p = delims.c_str(); *p; p++ )
  63         {
  64             if ( !wxIsspace(*p) )
  65                 break;
  66         }
  67
  68         if ( *p )
  69         {
  70             // not whitespace char in delims
  71             mode = wxTOKEN_RET_EMPTY;
  72         }
  73         else
  74         {
  75             // only whitespaces
  76             mode = wxTOKEN_STRTOK;
  77         }
  78     }
  79
  80     m_delims = delims;
  81     m_mode = mode;
  82
  83     Reinit(str);
  84 }
  85
  86 void wxStringTokenizer::Reinit(const wxString& str)
  87 {
  88     wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
  89
  90     m_string = str;
  91     m_pos = 0;
  92     m_lastDelim = _T('\0');
  93 }
  94
  95 // ----------------------------------------------------------------------------
  96 // access to the tokens
  97 // ----------------------------------------------------------------------------
  98
  99 // do we have more of them?
 100 bool wxStringTokenizer::HasMoreTokens() const
 101 {
 102     wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
 103
 104     if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos )
 105     {
 106         // there are non delimiter characters left, so we do have more tokens
 107         return true;
 108     }
 109
 110     switch ( m_mode )
 111     {
 112         case wxTOKEN_RET_EMPTY:
 113         case wxTOKEN_RET_DELIMS:
 114             // special hack for wxTOKEN_RET_EMPTY: we should return the initial
 115             // empty token even if there are only delimiters after it
 116             return m_pos == 0 && !m_string.empty();
 117
 118         case wxTOKEN_RET_EMPTY_ALL:
 119             // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
 120             // already returned the trailing empty token after the last
 121             // delimiter by examining m_lastDelim: it is set to NUL if we run
 122             // up to the end of the string in GetNextToken(), but if it is not
 123             // NUL yet we still have this last token to return even if m_pos is
 124             // already at m_string.length()
 125             return m_pos < m_string.length() || m_lastDelim != _T('\0');
 126
 127         case wxTOKEN_INVALID:
 128         case wxTOKEN_DEFAULT:
 129             wxFAIL_MSG( _T("unexpected tokenizer mode") );
 130             // fall through
 131
 132         case wxTOKEN_STRTOK:
 133             // never return empty delimiters
 134             break;
 135     }
 136
 137     return false;
 138 }
 139
 140 // count the number of (remaining) tokens in the string
 141 size_t wxStringTokenizer::CountTokens() const
 142 {
 143     wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
 144
 145     // VZ: this function is IMHO not very useful, so it's probably not very
 146     //     important if its implementation here is not as efficient as it
 147     //     could be -- but OTOH like this we're sure to get the correct answer
 148     //     in all modes
 149     wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode);
 150
 151     size_t count = 0;
 152     while ( tkz.HasMoreTokens() )
 153     {
 154         count++;
 155
 156         (void)tkz.GetNextToken();
 157     }
 158
 159     return count;
 160 }
 161
 162 // ----------------------------------------------------------------------------
 163 // token extraction
 164 // ----------------------------------------------------------------------------
 165
 166 wxString wxStringTokenizer::GetNextToken()
 167 {
 168     wxString token;
 169     do
 170     {
 171         if ( !HasMoreTokens() )
 172         {
 173             break;
 174         }
 175
 176         // find the end of this token
 177         size_t pos = m_string.find_first_of(m_delims, m_pos);
 178
 179         // and the start of the next one
 180         if ( pos == wxString::npos )
 181         {
 182             // no more delimiters, the token is everything till the end of
 183             // string
 184             token.assign(m_string, m_pos, wxString::npos);
 185
 186             // skip the token
 187             m_pos = m_string.length();
 188
 189             // it wasn't terminated
 190             m_lastDelim = _T('\0');
 191         }
 192         else // we found a delimiter at pos
 193         {
 194             // in wxTOKEN_RET_DELIMS mode we return the delimiter character
 195             // with token, otherwise leave it out
 196             size_t len = pos - m_pos;
 197             if ( m_mode == wxTOKEN_RET_DELIMS )
 198                 len++;
 199
 200             token.assign(m_string, m_pos, len);
 201
 202             // skip the token and the trailing delimiter
 203             m_pos = pos + 1;
 204
 205             m_lastDelim = m_string[pos];
 206         }
 207     }
 208     while ( !AllowEmpty() && token.empty() );
 209
 210     return token;
 211 }
 212
 213 // ----------------------------------------------------------------------------
 214 // public functions
 215 // ----------------------------------------------------------------------------
 216
 217 wxArrayString wxStringTokenize(const wxString& str,
 218                                const wxString& delims,
 219                                wxStringTokenizerMode mode)
 220 {
 221     wxArrayString tokens;
 222     wxStringTokenizer tk(str, delims, mode);
 223     while ( tk.HasMoreTokens() )
 224     {
 225         tokens.Add(tk.GetNextToken());
 226     }
 227
 228     return tokens;
 229 }