From 2348a84293e52b9a701891bcdd801b3e8e541642 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 9 Jul 2003 22:55:57 +0000 Subject: [PATCH] made wxTextInputStream Unicode safe and general cleanup (patch 653775) git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@21825 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- docs/latex/wx/txtstrm.tex | 71 ++++++++++-- include/wx/txtstrm.h | 23 +++- src/common/txtstrm.cpp | 236 +++++++++++++------------------------- 3 files changed, 162 insertions(+), 168 deletions(-) diff --git a/docs/latex/wx/txtstrm.tex b/docs/latex/wx/txtstrm.tex index 059b72277d..32004defb3 100644 --- a/docs/latex/wx/txtstrm.tex +++ b/docs/latex/wx/txtstrm.tex @@ -15,6 +15,12 @@ and on a typical 32-bit computer, none of these match to the "long" type (wxInt3 is defined as int on 32-bit architectures) so that you cannot use long. To avoid problems (here and elsewhere), make use of wxInt32, wxUint32 and similar types. +If you're scanning through a file using wxTextInputStream, you should check for EOF {\bf before} +reading the next item (word / number), because otherwise the last item may get lost. +You should however be prepared to receive an empty item (empty string / zero number) at the +end of file, especially on Windows systems. This is unavoidable because most (but not all) files end +with whitespace (i.e. usually a newline). + For example: \begin{verbatim} @@ -37,14 +43,20 @@ For example: \membersection{wxTextInputStream::wxTextInputStream}\label{wxtextinputstreamconstr} -\func{}{wxTextInputStream}{\param{wxInputStream\&}{ stream}} +\func{}{wxTextInputStream}{\param{wxInputStream\&}{ stream}, \param{const wxString\&}{ sep=wxT(" \t")}, + \param{wxMBConv\&}{ conv = wxConvUTF8} } Constructs a text stream object from an input stream. Only read methods will be available. \wxheading{Parameters} -\docparam{stream}{The input stream.} +\docparam{stream}{The underlying input stream.} + +\docparam{sep}{The initial string separator characters.} + +\docparam{conv}{{\it In Unicode build only:} The encoding converter used to convert the bytes in the + underlying input stream to characters.} \membersection{wxTextInputStream::\destruct{wxTextInputStream}} @@ -52,23 +64,64 @@ be available. Destroys the wxTextInputStream object. -\membersection{wxTextInputStream::Read8} +\membersection{wxTextInputStream::Read8}\label{wxtextinputstreamread8} + +\func{wxUint8}{Read8}{\param{int}{ base = 10}} + +Reads a single unsigned byte from the stream, given in base {\it base}. -\func{wxUint8}{Read8}{\void} +The value of {\it base} must be comprised between $2$ and $36$, inclusive, or +be a special value $0$ which means that the usual rules of {\tt C} numbers are +applied: if the number starts with {\tt 0x} it is considered to be in base +$16$, if it starts with {\tt 0} - in base $8$ and in base $10$ otherwise. Note +that you may not want to specify the base $0$ if you are parsing the numbers +which may have leading zeroes as they can yield unexpected (to the user not +familiar with C) results. -Reads a single byte from the stream. +\membersection{wxTextInputStream::Read8S} + +\func{wxInt8}{Read8S}{\param{int}{ base = 10}} + +Reads a single signed byte from the stream. + +See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the +description of the {\it base} parameter. \membersection{wxTextInputStream::Read16} -\func{wxUint16}{Read16}{\void} +\func{wxUint16}{Read16}{\param{int}{ base = 10}} + +Reads a unsigned 16 bit integer from the stream. -Reads a 16 bit integer from the stream. +See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the +description of the {\it base} parameter. + +\membersection{wxTextInputStream::Read16S} + +\func{wxInt16}{Read16S}{\param{int}{ base = 10}} + +Reads a signed 16 bit integer from the stream. + +See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the +description of the {\it base} parameter. \membersection{wxTextInputStream::Read32} -\func{wxUint32}{Read32}{\void} +\func{wxUint32}{Read32}{\param{int}{ base = 10}} + +Reads a 32 bit unsigned integer from the stream. + +See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the +description of the {\it base} parameter. + +\membersection{wxTextInputStream::Read32S} + +\func{wxInt32}{Read32S}{\param{int}{ base = 10}} + +Reads a 32 bit signed integer from the stream. -Reads a 32 bit integer from the stream. +See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the +description of the {\it base} parameter. \membersection{wxTextInputStream::ReadDouble} diff --git a/include/wx/txtstrm.h b/include/wx/txtstrm.h index a7317a5fac..c974b6f12b 100644 --- a/include/wx/txtstrm.h +++ b/include/wx/txtstrm.h @@ -29,6 +29,13 @@ typedef wxTextOutputStream& (*__wxTextOutputManip)(wxTextOutputStream&); WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream ); +#define wxEOT wxT('\4') // the End-Of-Text control code (used only inside wxTextInputStream) + +// If you're scanning through a file using wxTextInputStream, you should check for EOF _before_ +// reading the next item (word / number), because otherwise the last item may get lost. +// You should however be prepared to receive an empty item (empty string / zero number) at the +// end of file, especially on Windows systems. This is unavoidable because most (but not all) files end +// with whitespace (i.e. usually a newline). class WXDLLIMPEXP_BASE wxTextInputStream { public: @@ -39,11 +46,14 @@ public: #endif ~wxTextInputStream(); - wxUint32 Read32(); - wxUint16 Read16(); - wxUint8 Read8(); + wxUint32 Read32(int base = 10); // base may be between 2 and 36, inclusive, or the special 0 (= C format) + wxUint16 Read16(int base = 10); + wxUint8 Read8(int base = 10); + wxInt32 Read32S(int base = 10); + wxInt16 Read16S(int base = 10); + wxInt8 Read8S(int base = 10); double ReadDouble(); - wxString ReadString(); // deprecated use ReadLine or ReadWord instead + wxString ReadString(); // deprecated: use ReadLine or ReadWord instead wxString ReadLine(); wxString ReadWord(); @@ -65,14 +75,17 @@ public: protected: wxInputStream &m_input; wxString m_separators; + char m_lastBytes[10]; // stores the bytes that were read for the last character #if wxUSE_UNICODE wxMBConv &m_conv; #endif bool EatEOL(const wxChar &c); + void UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues + // returns EOT (\4) if there is a stream error, or end of file + wxChar NextChar(); // this should be used instead of GetC() because of Unicode issues wxChar NextNonSeparators(); - void SkipIfEndOfLine( wxChar c ); }; typedef enum diff --git a/src/common/txtstrm.cpp b/src/common/txtstrm.cpp index 032474919c..55a7ac3580 100644 --- a/src/common/txtstrm.cpp +++ b/src/common/txtstrm.cpp @@ -42,11 +42,13 @@ wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv) : m_input(s), m_separators(sep), m_conv(conv) { + memset((void*)m_lastBytes, 0, 10); } #else wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep) : m_input(s), m_separators(sep) { + memset((void*)m_lastBytes, 0, 10); } #endif @@ -54,13 +56,52 @@ wxTextInputStream::~wxTextInputStream() { } +void wxTextInputStream::UngetLast() +{ + size_t byteCount = 0; + while(m_lastBytes[byteCount]) // pseudo ANSI strlen (even for Unicode!) + byteCount++; + m_input.Ungetch(m_lastBytes, byteCount); + memset((void*)m_lastBytes, 0, 10); +} + +wxChar wxTextInputStream::NextChar() +{ +#if wxUSE_UNICODE + wxChar wbuf[2]; + memset((void*)m_lastBytes, 0, 10); + for(size_t inlen = 0; inlen < 9; inlen++) + { + // actually read the next character + m_lastBytes[inlen] = m_input.GetC(); + + if(m_input.LastRead() <= 0) + return wxEOT; + + int retlen = (int) m_conv.MB2WC(wbuf, m_lastBytes, 2); // returns -1 for failure + if(retlen >= 0) // res == 0 could happen for '\0' char + return wbuf[0]; + } + // there should be no encoding which requires more than nine bytes for one character... + return wxEOT; +#else + m_lastBytes[0] = m_input.GetC(); + + if(m_input.LastRead() <= 0) + return wxEOT; + + return m_lastBytes[0]; +#endif + +} + wxChar wxTextInputStream::NextNonSeparators() { wxChar c = (wxChar) 0; for (;;) { - if (!m_input) return (wxChar) 0; - c = m_input.GetC(); + c = NextChar(); + if (c == wxEOT) return (wxChar) 0; if (c != wxT('\n') && c != wxT('\r') && @@ -76,162 +117,65 @@ bool wxTextInputStream::EatEOL(const wxChar &c) if (c == wxT('\r')) // eat on both Mac and DOS { - if (!m_input) return TRUE; - wxChar c2 = m_input.GetC(); + wxChar c2 = NextChar(); + if(c2 == wxEOT) return TRUE; // end of stream reached, had enough :-) - if (c2 != wxT('\n')) m_input.Ungetch( c2 ); // Don't eat on Mac + if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac return TRUE; } return FALSE; } -void wxTextInputStream::SkipIfEndOfLine( wxChar c ) +wxUint32 wxTextInputStream::Read32(int base) { - if (EatEOL(c)) return; - else m_input.Ungetch( c ); // no line terminator + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + if(!m_input) return 0; + + wxString word = ReadWord(); + if(word.IsEmpty()) + return 0; + return wxStrtoul(word.c_str(), 0, base); } -wxUint32 wxTextInputStream::Read32() +wxUint16 wxTextInputStream::Read16(int base) { - /* I only implemented a simple integer parser */ - // VZ: what about using strtol()?? (TODO) + return (wxUint16)Read32(base); +} - int sign; - wxInt32 i; +wxUint8 wxTextInputStream::Read8(int base) +{ + return (wxUint8)Read32(base); +} - if (!m_input) return 0; - int c = NextNonSeparators(); - if (c==(wxChar)0) return 0; +wxInt32 wxTextInputStream::Read32S(int base) +{ + wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); + if(!m_input) return 0; - i = 0; - if (! (c == wxT('-') || c == wxT('+') || isdigit(c)) ) - { - m_input.Ungetch(c); + wxString word = ReadWord(); + if(word.IsEmpty()) return 0; - } - - if (c == wxT('-')) - { - sign = -1; - c = m_input.GetC(); - } else - if (c == wxT('+')) - { - sign = 1; - c = m_input.GetC(); - } else - { - sign = 1; - } - - while (isdigit(c)) - { - i = i*10 + (c - (int)wxT('0')); - c = m_input.GetC(); - } - - SkipIfEndOfLine( c ); - - i *= sign; - - return (wxUint32)i; + return wxStrtol(word.c_str(), 0, base); } -wxUint16 wxTextInputStream::Read16() +wxInt16 wxTextInputStream::Read16S(int base) { - return (wxUint16)Read32(); + return (wxInt16)Read32S(base); } -wxUint8 wxTextInputStream::Read8() +wxInt8 wxTextInputStream::Read8S(int base) { - return (wxUint8)Read32(); + return (wxInt8)Read32S(base); } double wxTextInputStream::ReadDouble() { - /* I only implemented a simple float parser - * VZ: what about using strtod()?? (TODO) - */ - - double f; - int theSign; - - if (!m_input) + if(!m_input) return 0; + wxString word = ReadWord(); + if(word.IsEmpty()) return 0; - - int c = NextNonSeparators(); - if (c==(wxChar)0) return 0; - - f = 0.0; - if (! (c == wxT('.') || c == wxT(',') || c == wxT('-') || c == wxT('+') || isdigit(c)) ) - { - m_input.Ungetch(c); - return 0; - } - - if (c == wxT('-')) - { - theSign = -1; - c = m_input.GetC(); - } else - if (c == wxT('+')) - { - theSign = 1; - c = m_input.GetC(); - } - else - { - theSign = 1; - } - - while (isdigit(c)) - { - f = f*10 + (c - wxT('0')); - c = m_input.GetC(); - } - - if (c == wxT('.') || c == wxT(',')) - { - double f_multiplicator = (double) 0.1; - - c = m_input.GetC(); - - while (isdigit(c)) - { - f += (c-wxT('0'))*f_multiplicator; - f_multiplicator /= 10; - c = m_input.GetC(); - } - - if (c == wxT('e')) - { - double f_multiplicator = 0.0; - int i, e; - - c = m_input.GetC(); - - switch (c) - { - case wxT('-'): f_multiplicator = 0.1; break; - case wxT('+'): f_multiplicator = 10.0; break; - } - - e = Read8(); // why only max 256 ? - - for (i=0;i>(wxString& word) wxTextInputStream& wxTextInputStream::operator>>(char& c) { - if (!m_input) - { - c = 0; - return *this; - } - c = m_input.GetC(); + if(m_input.LastRead() <= 0) c = 0; if (EatEOL(c)) c = '\n'; -- 2.47.2