src/common/tokenzr.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        tokenzr.cpp
   3 // Purpose:     String tokenizer
   4 // Author:      Guilhem Lavaux
   5 // Modified by: Vadim Zeitlin (almost full rewrite)
   6 // Created:     04/22/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) Guilhem Lavaux
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ============================================================================
  13 // declarations
  14 // ============================================================================
  15
  16 // ----------------------------------------------------------------------------
  17 // headers
  18 // ----------------------------------------------------------------------------
  19
  20 #ifdef __GNUG__
  21     #pragma implementation "tokenzr.h"
  22 #endif
  23
  24 // For compilers that support precompilation, includes "wx.h".
  25 #include "wx/wxprec.h"
  26
  27 #ifdef __BORLANDC__
  28     #pragma hdrstop
  29 #endif
  30
  31 #include "wx/tokenzr.h"
  32
  33 // Required for wxIs... functions
  34 #include <ctype.h>
  35
  36 // ============================================================================
  37 // implementation
  38 // ============================================================================
  39
  40 // ----------------------------------------------------------------------------
  41 // wxStringTokenizer construction
  42 // ----------------------------------------------------------------------------
  43
  44 wxStringTokenizer::wxStringTokenizer(const wxString& str,
  45                                      const wxString& delims,
  46                                      wxStringTokenizerMode mode)
  47 {
  48     SetString(str, delims, mode);
  49 }
  50
  51 void wxStringTokenizer::SetString(const wxString& str,
  52                                   const wxString& delims,
  53                                   wxStringTokenizerMode mode)
  54 {
  55     if ( mode == wxTOKEN_DEFAULT )
  56     {
  57         // by default, we behave like strtok() if the delimiters are only
  58         // whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for
  59         // whitespace delimiters, strtok() behaviour is better because we want
  60         // to count consecutive spaces as one delimiter)
  61         const wxChar *p;
  62         for ( p = delims.c_str(); *p; p++ )
  63         {
  64             if ( !wxIsspace(*p) )
  65                 break;
  66         }
  67
  68         if ( *p )
  69         {
  70             // not whitespace char in delims
  71             mode = wxTOKEN_RET_EMPTY;
  72         }
  73         else
  74         {
  75             // only whitespaces
  76             mode = wxTOKEN_STRTOK;
  77         }
  78     }
  79
  80     m_delims = delims;
  81     m_mode = mode;
  82
  83     Reinit(str);
  84 }
  85
  86 void wxStringTokenizer::Reinit(const wxString& str)
  87 {
  88     wxASSERT_MSG( IsOk(), _T("you should call SetString() first") );
  89
  90     m_string = str;
  91     m_pos = 0;
  92
  93     // empty string doesn't have any tokens
  94     m_hasMore = !m_string.empty();
  95 }
  96
  97 // ----------------------------------------------------------------------------
  98 // access to the tokens
  99 // ----------------------------------------------------------------------------
 100
 101 // do we have more of them?
 102 bool wxStringTokenizer::HasMoreTokens() const
 103 {
 104     wxCHECK_MSG( IsOk(), FALSE, _T("you should call SetString() first") );
 105
 106     if ( m_string.find_first_not_of(m_delims) == wxString::npos )
 107     {
 108         // no non empty tokens left, but in 2 cases we still may return TRUE if
 109         // GetNextToken() wasn't called yet for this empty token:
 110         //
 111         //   a) in wxTOKEN_RET_EMPTY_ALL mode we always do it
 112         //   b) in wxTOKEN_RET_EMPTY mode we do it in the special case of a
 113         //      string containing only the delimiter: then there is an empty
 114         //      token just before it
 115         return (m_mode == wxTOKEN_RET_EMPTY_ALL) ||
 116                (m_mode == wxTOKEN_RET_EMPTY && m_pos == 0)
 117                     ? m_hasMore : FALSE;
 118     }
 119     else
 120     {
 121         // there are non delimiter characters left, hence we do have more
 122         // tokens
 123         return TRUE;
 124     }
 125 }
 126
 127 // count the number of tokens in the string
 128 size_t wxStringTokenizer::CountTokens() const
 129 {
 130     wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") );
 131
 132     // VZ: this function is IMHO not very useful, so it's probably not very
 133     //     important if it's implementation here is not as efficient as it
 134     //     could be - but OTOH like this we're sure to get the correct answer
 135     //     in all modes
 136     wxStringTokenizer *self = (wxStringTokenizer *)this;    // const_cast
 137     wxString stringInitial = m_string;
 138
 139     size_t count = 0;
 140     while ( self->HasMoreTokens() )
 141     {
 142         count++;
 143
 144         (void)self->GetNextToken();
 145     }
 146
 147     self->Reinit(stringInitial);
 148
 149     return count;
 150 }
 151
 152 // ----------------------------------------------------------------------------
 153 // token extraction
 154 // ----------------------------------------------------------------------------
 155
 156 wxString wxStringTokenizer::GetNextToken()
 157 {
 158     // strtok() doesn't return empty tokens, all other modes do
 159     bool allowEmpty = m_mode != wxTOKEN_STRTOK;
 160
 161     wxString token;
 162     do
 163     {
 164         if ( !HasMoreTokens() )
 165         {
 166             break;
 167         }
 168         // find the end of this token
 169         size_t pos = m_string.find_first_of(m_delims);
 170
 171         // and the start of the next one
 172         if ( pos == wxString::npos )
 173         {
 174             // no more delimiters, the token is everything till the end of
 175             // string
 176             token = m_string;
 177
 178             m_pos += m_string.length();
 179             m_string.clear();
 180
 181             // no more tokens in this string, even in wxTOKEN_RET_EMPTY_ALL
 182             // mode (we will return the trailing one right now in this case)
 183             m_hasMore = FALSE;
 184         }
 185         else
 186         {
 187             size_t pos2 = pos + 1;
 188
 189             // in wxTOKEN_RET_DELIMS mode we return the delimiter character
 190             // with token
 191             token = wxString(m_string, m_mode == wxTOKEN_RET_DELIMS ? pos2
 192                                                                     : pos);
 193
 194             // remove token with the following it delimiter from string
 195             m_string.erase(0, pos2);
 196
 197             // keep track of the position in the original string too
 198             m_pos += pos2;
 199         }
 200     }
 201     while ( !allowEmpty && token.empty() );
 202
 203     return token;
 204 }
 205
 206 // ----------------------------------------------------------------------------
 207 // public functions
 208 // ----------------------------------------------------------------------------
 209
 210 wxArrayString wxStringTokenize(const wxString& str,
 211                                const wxString& delims,
 212                                wxStringTokenizerMode mode)
 213 {
 214     wxArrayString tokens;
 215     wxStringTokenizer tk(str, delims, mode);
 216     while ( tk.HasMoreTokens() )
 217     {
 218         tokens.Add(tk.GetNextToken());
 219     }
 220
 221     return tokens;
 222 }