src/common/tokenzr.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        tokenzr.cpp
   3 // Purpose:     String tokenizer
   4 // Author:      Guilhem Lavaux
   5 // Modified by: Vadim Zeitlin (almost full rewrite)
   6 // Created:     04/22/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) Guilhem Lavaux
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  21     #pragma implementation "tokenzr.h"
  22 #endif
  23
  24 // For compilers that support precompilation, includes "wx.h".
  25 #include "wx/wxprec.h"
  26
  27 #ifdef __BORLANDC__
  28     #pragma hdrstop
  29 #endif
  30
  31 #include "wx/tokenzr.h"
  32 #include "wx/arrstr.h"
  33
  34 // Required for wxIs... functions
  35 #include <ctype.h>
  36
  37 // ============================================================================
  38 // implementation
  39 // ============================================================================
  40
  41 // ----------------------------------------------------------------------------
  42 // wxStringTokenizer construction
  43 // ----------------------------------------------------------------------------
  44
  45 wxStringTokenizer::wxStringTokenizer(const wxString& str,
  46                                      const wxString& delims,
  47                                      wxStringTokenizerMode mode)
  48 {
  49     SetString(str, delims, mode);
  50 }
  51
  52 void wxStringTokenizer::SetString(const wxString& str,
  53                                   const wxString& delims,
  54                                   wxStringTokenizerMode mode)
  55 {
  56     if ( mode == wxTOKEN_DEFAULT )
  57     {
  58         // by default, we behave like strtok() if the delimiters are only
  59         // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
  60         // whitespace delimiters, strtok() behaviour is better because we want
  61         // to count consecutive spaces as one delimiter)
  62         const wxChar *p;
  63         for ( p = delims.c_str(); *p; p++ )
  64         {
  65             if ( !wxIsspace(*p) )
  66                 break;
  67         }
  68
  69         if ( *p )
  70         {
  71             // not whitespace char in delims
  72             mode = wxTOKEN_RET_EMPTY;
  73         }
  74         else
  75         {
  76             // only whitespaces
  77             mode = wxTOKEN_STRTOK;
  78         }
  79     }
  80
  81     m_delims = delims;
  82     m_mode = mode;
  83
  84     Reinit(str);
  85 }
  86
  87 void wxStringTokenizer::Reinit(const wxString& str)
  88 {
  89     wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
  90
  91     m_string = str;
  92     m_pos = 0;
  93
  94     // empty string doesn't have any tokens
  95     m_hasMore = !m_string.empty();
  96 }
  97
  98 // ----------------------------------------------------------------------------
  99 // access to the tokens
 100 // ----------------------------------------------------------------------------
 101
 102 // do we have more of them?
 103 bool wxStringTokenizer::HasMoreTokens() const
 104 {
 105     wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
 106
 107     if ( m_string.find_first_not_of(m_delims) == wxString::npos )
 108     {
 109         // no non empty tokens left, but in 2 cases we still may return true if
 110         // GetNextToken() wasn't called yet for this empty token:
 111         //
 112         //   a) in wxTOKEN_RET_EMPTY_ALL mode we always do it
 113         //   b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a
 114         //      string containing only the delimiter: then there is an empty
 115         //      token just before it
 116         return (m_mode == wxTOKEN_RET_EMPTY_ALL) ||
 117                (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0)
 118                     ? m_hasMore : false;
 119     }
 120     else
 121     {
 122         // there are non delimiter characters left, hence we do have more
 123         // tokens
 124         return true;
 125     }
 126 }
 127
 128 // count the number of tokens in the string
 129 size_t wxStringTokenizer::CountTokens() const
 130 {
 131     wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
 132
 133     // VZ: this function is IMHO not very useful, so it's probably not very
 134     //     important if it's implementation here is not as efficient as it
 135     //     could be - but OTOH like this we're sure to get the correct answer
 136     //     in all modes
 137     wxStringTokenizer *self = (wxStringTokenizer *)this;    // const_cast
 138     wxString stringInitial = m_string;
 139
 140     size_t count = 0;
 141     while ( self->HasMoreTokens() )
 142     {
 143         count++;
 144
 145         (void)self->GetNextToken();
 146     }
 147
 148     self->Reinit(stringInitial);
 149
 150     return count;
 151 }
 152
 153 // ----------------------------------------------------------------------------
 154 // token extraction
 155 // ----------------------------------------------------------------------------
 156
 157 wxString wxStringTokenizer::GetNextToken()
 158 {
 159     // strtok() doesn't return empty tokens, all other modes do
 160     bool allowEmpty = m_mode != wxTOKEN_STRTOK;
 161
 162     wxString token;
 163     do
 164     {
 165         if ( !HasMoreTokens() )
 166         {
 167             break;
 168         }
 169         // find the end of this token
 170         size_t pos = m_string.find_first_of(m_delims);
 171
 172         // and the start of the next one
 173         if ( pos == wxString::npos )
 174         {
 175             // no more delimiters, the token is everything till the end of
 176             // string
 177             token = m_string;
 178
 179             m_pos += m_string.length();
 180             m_string.clear();
 181
 182             // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
 183             // mode (we will return the trailing one right now in this case)
 184             m_hasMore = false;
 185         }
 186         else
 187         {
 188             size_t pos2 = pos + 1;
 189
 190             // in wxTOKEN_RET_DELIMS mode we return the delimiter character
 191             // with token
 192             token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
 193                                                                     : pos);
 194
 195             // remove token with the following it delimiter from string
 196             m_string.erase(0, pos2);
 197
 198             // keep track of the position in the original string too
 199             m_pos += pos2;
 200         }
 201     }
 202     while ( !allowEmpty && token.empty() );
 203
 204     return token;
 205 }
 206
 207 // ----------------------------------------------------------------------------
 208 // public functions
 209 // ----------------------------------------------------------------------------
 210
 211 wxArrayString wxStringTokenize(const wxString& str,
 212                                const wxString& delims,
 213                                wxStringTokenizerMode mode)
 214 {
 215     wxArrayString tokens;
 216     wxStringTokenizer tk(str, delims, mode);
 217     while ( tk.HasMoreTokens() )
 218     {
 219         tokens.Add(tk.GetNextToken());
 220     }
 221
 222     return tokens;
 223 }