made wxTextInputStream Unicode safe and general cleanup (patch 653775)

author Vadim Zeitlin <vadim@wxwidgets.org>

Wed, 9 Jul 2003 22:55:57 +0000 (22:55 +0000)

committer Vadim Zeitlin <vadim@wxwidgets.org>

Wed, 9 Jul 2003 22:55:57 +0000 (22:55 +0000)
author Vadim Zeitlin <vadim@wxwidgets.org>
Wed, 9 Jul 2003 22:55:57 +0000 (22:55 +0000)
committer Vadim Zeitlin <vadim@wxwidgets.org>
Wed, 9 Jul 2003 22:55:57 +0000 (22:55 +0000)
diff --git a/docs/latex/wx/txtstrm.tex b/docs/latex/wx/txtstrm.tex

index 059b72277d3aaca8463dadf9dfac3d5fb29955c0..32004defb316128a51a4d657e75630da3d609de0 100644 (file)
--- a/docs/latex/wx/txtstrm.tex
+++ b/docs/latex/wx/txtstrm.tex
@@ -15,6 +15,12 @@ and on a typical 32-bit computer, none of these match to the "long" type (wxInt3
  is defined as int on 32-bit architectures) so that you cannot use long. To avoid
  problems (here and elsewhere), make use of wxInt32, wxUint32 and similar types.
  
+If you're scanning through a file using wxTextInputStream, you should check for EOF {\bf before}
+reading the next item (word / number), because otherwise the last item may get lost. 
+You should however be prepared to receive an empty item (empty string / zero number) at the
+end of file, especially on Windows systems. This is unavoidable because most (but not all) files end
+with whitespace (i.e. usually a newline).
+
  For example:
  
  \begin{verbatim}
@@ -37,14 +43,20 @@ For example:
  
  \membersection{wxTextInputStream::wxTextInputStream}\label{wxtextinputstreamconstr}
  
-\func{}{wxTextInputStream}{\param{wxInputStream\&}{ stream}}
+\func{}{wxTextInputStream}{\param{wxInputStream\&}{ stream}, \param{const wxString\&}{ sep=wxT(" \t")}, 
+  \param{wxMBConv\&}{ conv = wxConvUTF8} }
  
  Constructs a text stream object from an input stream. Only read methods will
  be available.
  
  \wxheading{Parameters}
  
-\docparam{stream}{The input stream.}
+\docparam{stream}{The underlying input stream.}
+
+\docparam{sep}{The initial string separator characters.}
+
+\docparam{conv}{{\it In Unicode build only:} The encoding converter used to convert the bytes in the
+  underlying input stream to characters.}
  
  \membersection{wxTextInputStream::\destruct{wxTextInputStream}}
  
@@ -52,23 +64,64 @@ be available.
  
  Destroys the wxTextInputStream object.
  
-\membersection{wxTextInputStream::Read8}
+\membersection{wxTextInputStream::Read8}\label{wxtextinputstreamread8}
+
+\func{wxUint8}{Read8}{\param{int}{ base = 10}}
+
+Reads a single unsigned byte from the stream, given in base {\it base}.
  
-\func{wxUint8}{Read8}{\void}
+The value of {\it base} must be comprised between $2$ and $36$, inclusive, or
+be a special value $0$ which means that the usual rules of {\tt C} numbers are
+applied: if the number starts with {\tt 0x} it is considered to be in base
+$16$, if it starts with {\tt 0} - in base $8$ and in base $10$ otherwise. Note
+that you may not want to specify the base $0$ if you are parsing the numbers
+which may have leading zeroes as they can yield unexpected (to the user not
+familiar with C) results.
  
-Reads a single byte from the stream.
+\membersection{wxTextInputStream::Read8S}
+
+\func{wxInt8}{Read8S}{\param{int}{ base = 10}}
+
+Reads a single signed byte from the stream.
+
+See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
+description of the {\it base} parameter.
  
  \membersection{wxTextInputStream::Read16}
  
-\func{wxUint16}{Read16}{\void}
+\func{wxUint16}{Read16}{\param{int}{ base = 10}}
+
+Reads a unsigned 16 bit integer from the stream.
  
-Reads a 16 bit integer from the stream.
+See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
+description of the {\it base} parameter.
+
+\membersection{wxTextInputStream::Read16S}
+
+\func{wxInt16}{Read16S}{\param{int}{ base = 10}}
+
+Reads a signed 16 bit integer from the stream.
+
+See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
+description of the {\it base} parameter.
  
  \membersection{wxTextInputStream::Read32}
  
-\func{wxUint32}{Read32}{\void}
+\func{wxUint32}{Read32}{\param{int}{ base = 10}}
+
+Reads a 32 bit unsigned integer from the stream.
+
+See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
+description of the {\it base} parameter.
+
+\membersection{wxTextInputStream::Read32S}
+
+\func{wxInt32}{Read32S}{\param{int}{ base = 10}}
+
+Reads a 32 bit signed integer from the stream.
  
-Reads a 32 bit integer from the stream.
+See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
+description of the {\it base} parameter.
  
  \membersection{wxTextInputStream::ReadDouble}
  
diff --git a/include/wx/txtstrm.h b/include/wx/txtstrm.h

index a7317a5fac805d67c7859d638ecb6a2874bbd647..c974b6f12b93034291d3976c2b6a61cbc55270c1 100644 (file)
--- a/include/wx/txtstrm.h
+++ b/include/wx/txtstrm.h
@@ -29,6 +29,13 @@ typedef wxTextOutputStream& (*__wxTextOutputManip)(wxTextOutputStream&);
  WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream );
  
  
+#define wxEOT wxT('\4') // the End-Of-Text control code (used only inside wxTextInputStream)
+
+// If you're scanning through a file using wxTextInputStream, you should check for EOF _before_
+// reading the next item (word / number), because otherwise the last item may get lost. 
+// You should however be prepared to receive an empty item (empty string / zero number) at the
+// end of file, especially on Windows systems. This is unavoidable because most (but not all) files end
+// with whitespace (i.e. usually a newline).
  class WXDLLIMPEXP_BASE wxTextInputStream
  {
  public:
@@ -39,11 +46,14 @@ public:
  #endif
      ~wxTextInputStream();
  
-    wxUint32 Read32();
-    wxUint16 Read16();
-    wxUint8  Read8();
+    wxUint32 Read32(int base = 10); // base may be between 2 and 36, inclusive, or the special 0 (= C format)
+    wxUint16 Read16(int base = 10);
+    wxUint8  Read8(int base = 10);
+    wxInt32  Read32S(int base = 10);
+    wxInt16  Read16S(int base = 10);
+    wxInt8   Read8S(int base = 10);
      double   ReadDouble();
-    wxString ReadString();  // deprecated use ReadLine or ReadWord instead
+    wxString ReadString();  // deprecated: use ReadLine or ReadWord instead
      wxString ReadLine();
      wxString ReadWord();
  
@@ -65,14 +75,17 @@ public:
  protected:
      wxInputStream &m_input;
      wxString m_separators;
+    char m_lastBytes[10]; // stores the bytes that were read for the last character
      
  #if wxUSE_UNICODE
      wxMBConv &m_conv;
  #endif
  
      bool   EatEOL(const wxChar &c);
+    void   UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
+    // returns EOT (\4) if there is a stream error, or end of file
+    wxChar NextChar();   // this should be used instead of GetC() because of Unicode issues
      wxChar NextNonSeparators();
-    void   SkipIfEndOfLine( wxChar c );
  };
  
  typedef enum
diff --git a/src/common/txtstrm.cpp b/src/common/txtstrm.cpp

index 032474919c8427c224ac1885e7b469088007d56f..55a7ac35804f1f72fdd2eef494e5c94666e2255b 100644 (file)
--- a/src/common/txtstrm.cpp
+++ b/src/common/txtstrm.cpp
@@ -42,11 +42,13 @@
  wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv)
    : m_input(s), m_separators(sep), m_conv(conv)
  {
+    memset((void*)m_lastBytes, 0, 10);
  }
  #else
  wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
    : m_input(s), m_separators(sep)
  {
+    memset((void*)m_lastBytes, 0, 10);
  }
  #endif
  
@@ -54,13 +56,52 @@ wxTextInputStream::~wxTextInputStream()
  {
  }
  
+void wxTextInputStream::UngetLast()
+{
+    size_t byteCount = 0;
+    while(m_lastBytes[byteCount]) // pseudo ANSI strlen (even for Unicode!)
+        byteCount++;
+    m_input.Ungetch(m_lastBytes, byteCount);
+    memset((void*)m_lastBytes, 0, 10);
+}
+
+wxChar wxTextInputStream::NextChar()
+{
+#if wxUSE_UNICODE
+    wxChar wbuf[2];
+    memset((void*)m_lastBytes, 0, 10);
+    for(size_t inlen = 0; inlen < 9; inlen++) 
+    {
+        // actually read the next character
+        m_lastBytes[inlen] = m_input.GetC();
+
+        if(m_input.LastRead() <= 0) 
+            return wxEOT;
+        
+        int retlen = (int) m_conv.MB2WC(wbuf, m_lastBytes, 2); // returns -1 for failure
+        if(retlen >= 0) // res == 0 could happen for '\0' char
+            return wbuf[0];
+    }
+    // there should be no encoding which requires more than nine bytes for one character...
+    return wxEOT;
+#else
+    m_lastBytes[0] = m_input.GetC();
+    
+    if(m_input.LastRead() <= 0) 
+        return wxEOT;
+    
+    return m_lastBytes[0];
+#endif
+    
+}
+
  wxChar wxTextInputStream::NextNonSeparators()
  {
      wxChar c = (wxChar) 0;
      for (;;)
      {
-        if (!m_input) return (wxChar) 0;
-        c = m_input.GetC();
+        c = NextChar();
+        if (c == wxEOT) return (wxChar) 0;
  
          if (c != wxT('\n') &&
              c != wxT('\r') &&
@@ -76,162 +117,65 @@ bool wxTextInputStream::EatEOL(const wxChar &c)
  
      if (c == wxT('\r')) // eat on both Mac and DOS
      {
-        if (!m_input) return TRUE;
-        wxChar c2 = m_input.GetC();
+        wxChar c2 = NextChar();
+        if(c2 == wxEOT) return TRUE; // end of stream reached, had enough :-)
  
-        if (c2 != wxT('\n'))  m_input.Ungetch( c2 ); // Don't eat on Mac
+        if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac
          return TRUE;
      }
  
      return FALSE;
  }
  
-void wxTextInputStream::SkipIfEndOfLine( wxChar c )
+wxUint32 wxTextInputStream::Read32(int base)
  {
-    if (EatEOL(c)) return;
-    else m_input.Ungetch( c );  // no line terminator
+    wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
+    if(!m_input) return 0;
+
+    wxString word = ReadWord();
+    if(word.IsEmpty())
+        return 0;
+    return wxStrtoul(word.c_str(), 0, base);
  }
  
-wxUint32 wxTextInputStream::Read32()
+wxUint16 wxTextInputStream::Read16(int base)
  {
-    /* I only implemented a simple integer parser */
-    // VZ: what about using strtol()?? (TODO)
+    return (wxUint16)Read32(base);
+}
  
-    int sign;
-    wxInt32 i;
+wxUint8 wxTextInputStream::Read8(int base)
+{
+    return (wxUint8)Read32(base);
+}
  
-    if (!m_input) return 0;
-    int c = NextNonSeparators();
-    if (c==(wxChar)0) return 0;
+wxInt32 wxTextInputStream::Read32S(int base)
+{
+    wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
+    if(!m_input) return 0;
  
-    i = 0;
-    if (! (c == wxT('-') || c == wxT('+') || isdigit(c)) )
-    {
-        m_input.Ungetch(c);
+    wxString word = ReadWord();
+    if(word.IsEmpty())
          return 0;
-    }
-
-    if (c == wxT('-'))
-    {
-        sign = -1;
-        c = m_input.GetC();
-    } else
-    if (c == wxT('+'))
-    {
-        sign = 1;
-        c = m_input.GetC();
-    } else
-    {
-        sign = 1;
-    }
-
-    while (isdigit(c))
-    {
-        i = i*10 + (c - (int)wxT('0'));
-        c = m_input.GetC();
-    }
-
-    SkipIfEndOfLine( c );
-
-    i *= sign;
-
-    return (wxUint32)i;
+    return wxStrtol(word.c_str(), 0, base);
  }
  
-wxUint16 wxTextInputStream::Read16()
+wxInt16 wxTextInputStream::Read16S(int base)
  {
-    return (wxUint16)Read32();
+    return (wxInt16)Read32S(base);
  }
  
-wxUint8 wxTextInputStream::Read8()
+wxInt8 wxTextInputStream::Read8S(int base)
  {
-    return (wxUint8)Read32();
+    return (wxInt8)Read32S(base);
  }
  
  double wxTextInputStream::ReadDouble()
  {
-    /* I only implemented a simple float parser
-     * VZ: what about using strtod()?? (TODO)
-     */
-
-    double f;
-    int theSign;
-
-    if (!m_input)
+    if(!m_input) return 0;
+    wxString word = ReadWord();
+    if(word.IsEmpty())
          return 0;
-
-    int c = NextNonSeparators();
-    if (c==(wxChar)0) return 0;
-
-    f = 0.0;
-    if (! (c == wxT('.') || c == wxT(',') || c == wxT('-') || c == wxT('+') || isdigit(c)) )
-    {
-        m_input.Ungetch(c);
-        return 0;
-    }
-
-    if (c == wxT('-'))
-    {
-        theSign = -1;
-        c = m_input.GetC();
-    } else
-    if (c == wxT('+'))
-    {
-        theSign = 1;
-        c = m_input.GetC();
-    }
-    else
-    {
-        theSign = 1;
-    }
-
-    while (isdigit(c))
-    {
-        f = f*10 + (c - wxT('0'));
-        c = m_input.GetC();
-    }
-
-    if (c == wxT('.') || c == wxT(','))
-    {
-        double f_multiplicator = (double) 0.1;
-
-        c = m_input.GetC();
-
-        while (isdigit(c))
-        {
-            f += (c-wxT('0'))*f_multiplicator;
-            f_multiplicator /= 10;
-            c = m_input.GetC();
-        }
-
-        if (c == wxT('e'))
-        {
-            double f_multiplicator = 0.0;
-            int i, e;
-
-            c = m_input.GetC();
-
-            switch (c)
-            {
-                case wxT('-'): f_multiplicator = 0.1;  break;
-                case wxT('+'): f_multiplicator = 10.0; break;
-            }
-
-            e = Read8();  // why only max 256 ?
-
-            for (i=0;i<e;i++)
-                f *= f_multiplicator;
-        }
-        else
-            SkipIfEndOfLine( c );
-    }
-    else
-    {
-        m_input.Ungetch(c);
-    }
-
-    f *= theSign;
-    return f;
+    return wxStrtod(word.c_str(), 0);
  }
  
  wxString wxTextInputStream::ReadString()
@@ -245,19 +189,9 @@ wxString wxTextInputStream::ReadLine()
  
      while ( !m_input.Eof() )
      {
-#if wxUSE_UNICODE
-        // FIXME: this is only works for single byte encodings
-        // How-to read a single char in an unkown encoding???
-        char buf[10];
-        buf[0] = m_input.GetC();
-        buf[1] = 0;
-        
-        wxChar wbuf[2];
-        m_conv.MB2WC( wbuf, buf, 2 );
-        wxChar c = wbuf[0];
-#else
-        char c = m_input.GetC();
-#endif
+        wxChar c = NextChar();
+        if(c == wxEOT)
+            break;
          
          if ( !m_input )
              break;
@@ -286,9 +220,8 @@ wxString wxTextInputStream::ReadWord()
      
      while ( !m_input.Eof() )
      {
-        c = m_input.GetC();
-        
-        if (!m_input)
+        c = NextChar();
+        if(c == wxEOT)
              break;
              
          if (m_separators.Contains(c))
@@ -311,13 +244,8 @@ wxTextInputStream& wxTextInputStream::operator>>(wxString& word)
  
  wxTextInputStream& wxTextInputStream::operator>>(char& c)
  {
-    if (!m_input)
-    {
-        c = 0;
-        return *this;
-    }
-
      c = m_input.GetC();
+    if(m_input.LastRead() <= 0) c = 0;
  
      if (EatEOL(c))
          c = '\n';
author	Vadim Zeitlin <vadim@wxwidgets.org>
	Wed, 9 Jul 2003 22:55:57 +0000 (22:55 +0000)
committer	Vadim Zeitlin <vadim@wxwidgets.org>
	Wed, 9 Jul 2003 22:55:57 +0000 (22:55 +0000)
docs/latex/wx/txtstrm.tex		patch \| blob \| blame \| history
include/wx/txtstrm.h		patch \| blob \| blame \| history
src/common/txtstrm.cpp		patch \| blob \| blame \| history