src/common/tokenzr.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        tokenzr.cpp
   3 // Purpose:     String tokenizer
   4 // Author:      Guilhem Lavaux
   5 // Modified by: Vadim Zeitlin (almost full rewrite)
   6 // Created:     04/22/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) Guilhem Lavaux
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 // For compilers that support precompilation, includes "wx.h".
  21 #include "wx/wxprec.h"
  22
  23 #ifdef __BORLANDC__
  24     #pragma hdrstop
  25 #endif
  26
  27 #include "wx/tokenzr.h"
  28 #include "wx/arrstr.h"
  29
  30 // Required for wxIs... functions
  31 #include <ctype.h>
  32
  33 // ============================================================================
  34 // implementation
  35 // ============================================================================
  36
  37 // ----------------------------------------------------------------------------
  38 // wxStringTokenizer construction
  39 // ----------------------------------------------------------------------------
  40
  41 wxStringTokenizer::wxStringTokenizer(const wxString& str,
  42                                      const wxString& delims,
  43                                      wxStringTokenizerMode mode)
  44 {
  45     SetString(str, delims, mode);
  46 }
  47
  48 void wxStringTokenizer::SetString(const wxString& str,
  49                                   const wxString& delims,
  50                                   wxStringTokenizerMode mode)
  51 {
  52     if ( mode == wxTOKEN_DEFAULT )
  53     {
  54         // by default, we behave like strtok() if the delimiters are only
  55         // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
  56         // whitespace delimiters, strtok() behaviour is better because we want
  57         // to count consecutive spaces as one delimiter)
  58         const wxChar *p;
  59         for ( p = delims.c_str(); *p; p++ )
  60         {
  61             if ( !wxIsspace(*p) )
  62                 break;
  63         }
  64
  65         if ( *p )
  66         {
  67             // not whitespace char in delims
  68             mode = wxTOKEN_RET_EMPTY;
  69         }
  70         else
  71         {
  72             // only whitespaces
  73             mode = wxTOKEN_STRTOK;
  74         }
  75     }
  76
  77     m_delims = delims;
  78     m_mode = mode;
  79
  80     Reinit(str);
  81 }
  82
  83 void wxStringTokenizer::Reinit(const wxString& str)
  84 {
  85     wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
  86
  87     m_string = str;
  88     m_pos = 0;
  89     m_lastDelim = _T('\0');
  90 }
  91
  92 // ----------------------------------------------------------------------------
  93 // access to the tokens
  94 // ----------------------------------------------------------------------------
  95
  96 // do we have more of them?
  97 bool wxStringTokenizer::HasMoreTokens() const
  98 {
  99     wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
 100
 101     if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos )
 102     {
 103         // there are non delimiter characters left, so we do have more tokens
 104         return true;
 105     }
 106
 107     switch ( m_mode )
 108     {
 109         case wxTOKEN_RET_EMPTY:
 110         case wxTOKEN_RET_DELIMS:
 111             // special hack for wxTOKEN_RET_EMPTY: we should return the initial
 112             // empty token even if there are only delimiters after it
 113             return m_pos == 0 && !m_string.empty();
 114
 115         case wxTOKEN_RET_EMPTY_ALL:
 116             // special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had
 117             // already returned the trailing empty token after the last
 118             // delimiter by examining m_lastDelim: it is set to NUL if we run
 119             // up to the end of the string in GetNextToken(), but if it is not
 120             // NUL yet we still have this last token to return even if m_pos is
 121             // already at m_string.length()
 122             return m_pos < m_string.length() || m_lastDelim != _T('\0');
 123
 124         case wxTOKEN_INVALID:
 125         case wxTOKEN_DEFAULT:
 126             wxFAIL_MSG( _T("unexpected tokenizer mode") );
 127             // fall through
 128
 129         case wxTOKEN_STRTOK:
 130             // never return empty delimiters
 131             break;
 132     }
 133
 134     return false;
 135 }
 136
 137 // count the number of (remaining) tokens in the string
 138 size_t wxStringTokenizer::CountTokens() const
 139 {
 140     wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
 141
 142     // VZ: this function is IMHO not very useful, so it's probably not very
 143     //     important if its implementation here is not as efficient as it
 144     //     could be -- but OTOH like this we're sure to get the correct answer
 145     //     in all modes
 146     wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode);
 147
 148     size_t count = 0;
 149     while ( tkz.HasMoreTokens() )
 150     {
 151         count++;
 152
 153         (void)tkz.GetNextToken();
 154     }
 155
 156     return count;
 157 }
 158
 159 // ----------------------------------------------------------------------------
 160 // token extraction
 161 // ----------------------------------------------------------------------------
 162
 163 wxString wxStringTokenizer::GetNextToken()
 164 {
 165     wxString token;
 166     do
 167     {
 168         if ( !HasMoreTokens() )
 169         {
 170             break;
 171         }
 172
 173         // find the end of this token
 174         size_t pos = m_string.find_first_of(m_delims, m_pos);
 175
 176         // and the start of the next one
 177         if ( pos == wxString::npos )
 178         {
 179             // no more delimiters, the token is everything till the end of
 180             // string
 181             token.assign(m_string, m_pos, wxString::npos);
 182
 183             // skip the token
 184             m_pos = m_string.length();
 185
 186             // it wasn't terminated
 187             m_lastDelim = _T('\0');
 188         }
 189         else // we found a delimiter at pos
 190         {
 191             // in wxTOKEN_RET_DELIMS mode we return the delimiter character
 192             // with token, otherwise leave it out
 193             size_t len = pos - m_pos;
 194             if ( m_mode == wxTOKEN_RET_DELIMS )
 195                 len++;
 196
 197             token.assign(m_string, m_pos, len);
 198
 199             // skip the token and the trailing delimiter
 200             m_pos = pos + 1;
 201
 202             m_lastDelim = m_string[pos];
 203         }
 204     }
 205     while ( !AllowEmpty() && token.empty() );
 206
 207     return token;
 208 }
 209
 210 // ----------------------------------------------------------------------------
 211 // public functions
 212 // ----------------------------------------------------------------------------
 213
 214 wxArrayString wxStringTokenize(const wxString& str,
 215                                const wxString& delims,
 216                                wxStringTokenizerMode mode)
 217 {
 218     wxArrayString tokens;
 219     wxStringTokenizer tk(str, delims, mode);
 220     while ( tk.HasMoreTokens() )
 221     {
 222         tokens.Add(tk.GetNextToken());
 223     }
 224
 225     return tokens;
 226 }