src/common/tokenzr.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        tokenzr.cpp
   3 // Purpose:     String tokenizer
   4 // Author:      Guilhem Lavaux
   5 // Modified by: Vadim Zeitlin (almost full rewrite)
   6 // Created:     04/22/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) Guilhem Lavaux
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 // For compilers that support precompilation, includes "wx.h".
  21 #include "wx/wxprec.h"
  22
  23 #ifdef __BORLANDC__
  24     #pragma hdrstop
  25 #endif
  26
  27 #include "wx/tokenzr.h"
  28 #include "wx/arrstr.h"
  29
  30 // Required for wxIs... functions
  31 #include <ctype.h>
  32
  33 // ============================================================================
  34 // implementation
  35 // ============================================================================
  36
  37 // ----------------------------------------------------------------------------
  38 // wxStringTokenizer construction
  39 // ----------------------------------------------------------------------------
  40
  41 wxStringTokenizer::wxStringTokenizer(const wxString& str,
  42                                      const wxString& delims,
  43                                      wxStringTokenizerMode mode)
  44 {
  45     SetString(str, delims, mode);
  46 }
  47
  48 void wxStringTokenizer::SetString(const wxString& str,
  49                                   const wxString& delims,
  50                                   wxStringTokenizerMode mode)
  51 {
  52     if ( mode == wxTOKEN_DEFAULT )
  53     {
  54         // by default, we behave like strtok() if the delimiters are only
  55         // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
  56         // whitespace delimiters, strtok() behaviour is better because we want
  57         // to count consecutive spaces as one delimiter)
  58         const wxChar *p;
  59         for ( p = delims.c_str(); *p; p++ )
  60         {
  61             if ( !wxIsspace(*p) )
  62                 break;
  63         }
  64
  65         if ( *p )
  66         {
  67             // not whitespace char in delims
  68             mode = wxTOKEN_RET_EMPTY;
  69         }
  70         else
  71         {
  72             // only whitespaces
  73             mode = wxTOKEN_STRTOK;
  74         }
  75     }
  76
  77     m_delims = delims;
  78     m_mode = mode;
  79
  80     Reinit(str);
  81 }
  82
  83 void wxStringTokenizer::Reinit(const wxString& str)
  84 {
  85     wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
  86
  87     m_string = str;
  88     m_pos = 0;
  89
  90     // empty string doesn't have any tokens
  91     m_hasMore = !m_string.empty();
  92 }
  93
  94 // ----------------------------------------------------------------------------
  95 // access to the tokens
  96 // ----------------------------------------------------------------------------
  97
  98 // do we have more of them?
  99 bool wxStringTokenizer::HasMoreTokens() const
 100 {
 101     wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") );
 102
 103     if ( m_string.find_first_not_of(m_delims) == wxString::npos )
 104     {
 105         // no non empty tokens left, but in 2 cases we still may return true if
 106         // GetNextToken() wasn't called yet for this empty token:
 107         //
 108         //   a) in wxTOKEN_RET_EMPTY_ALL mode we always do it
 109         //   b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a
 110         //      string containing only the delimiter: then there is an empty
 111         //      token just before it
 112         return (m_mode == wxTOKEN_RET_EMPTY_ALL) ||
 113                (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0)
 114                     ? m_hasMore : false;
 115     }
 116     else
 117     {
 118         // there are non delimiter characters left, hence we do have more
 119         // tokens
 120         return true;
 121     }
 122 }
 123
 124 // count the number of tokens in the string
 125 size_t wxStringTokenizer::CountTokens() const
 126 {
 127     wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
 128
 129     // VZ: this function is IMHO not very useful, so it's probably not very
 130     //     important if it's implementation here is not as efficient as it
 131     //     could be - but OTOH like this we're sure to get the correct answer
 132     //     in all modes
 133     wxStringTokenizer *self = (wxStringTokenizer *)this;    // const_cast
 134     wxString stringInitial = m_string;
 135
 136     size_t count = 0;
 137     while ( self->HasMoreTokens() )
 138     {
 139         count++;
 140
 141         (void)self->GetNextToken();
 142     }
 143
 144     self->Reinit(stringInitial);
 145
 146     return count;
 147 }
 148
 149 // ----------------------------------------------------------------------------
 150 // token extraction
 151 // ----------------------------------------------------------------------------
 152
 153 wxString wxStringTokenizer::GetNextToken()
 154 {
 155     // strtok() doesn't return empty tokens, all other modes do
 156     bool allowEmpty = m_mode != wxTOKEN_STRTOK;
 157
 158     wxString token;
 159     do
 160     {
 161         if ( !HasMoreTokens() )
 162         {
 163             break;
 164         }
 165         // find the end of this token
 166         size_t pos = m_string.find_first_of(m_delims);
 167
 168         // and the start of the next one
 169         if ( pos == wxString::npos )
 170         {
 171             // no more delimiters, the token is everything till the end of
 172             // string
 173             token = m_string;
 174
 175             m_pos += m_string.length();
 176             m_string.clear();
 177
 178             // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
 179             // mode (we will return the trailing one right now in this case)
 180             m_hasMore = false;
 181         }
 182         else
 183         {
 184             size_t pos2 = pos + 1;
 185
 186             // in wxTOKEN_RET_DELIMS mode we return the delimiter character
 187             // with token
 188             token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
 189                                                                     : pos);
 190
 191             // remove token with the following it delimiter from string
 192             m_string.erase(0, pos2);
 193
 194             // keep track of the position in the original string too
 195             m_pos += pos2;
 196         }
 197     }
 198     while ( !allowEmpty && token.empty() );
 199
 200     return token;
 201 }
 202
 203 // ----------------------------------------------------------------------------
 204 // public functions
 205 // ----------------------------------------------------------------------------
 206
 207 wxArrayString wxStringTokenize(const wxString& str,
 208                                const wxString& delims,
 209                                wxStringTokenizerMode mode)
 210 {
 211     wxArrayString tokens;
 212     wxStringTokenizer tk(str, delims, mode);
 213     while ( tk.HasMoreTokens() )
 214     {
 215         tokens.Add(tk.GetNextToken());
 216     }
 217
 218     return tokens;
 219 }