#include "wx/textfile.h"
#include "wx/filename.h"
+#include "wx/buffer.h"
// ============================================================================
// wxTextFile class implementation
bool wxTextFile::OnRead(wxMBConv& conv)
{
// file should be opened and we must be in it's beginning
- wxASSERT( m_file.IsOpened() &&
- (m_file.GetKind() != wxFILE_KIND_DISK || m_file.Tell() == 0) );
-
- static const size_t BUF_SIZE = 1024;
-#if wxUSE_UNICODE
- static const size_t NUL_SIZE = 4;
-#else
- static const size_t NUL_SIZE = 1;
-#endif
-
- char buf[BUF_SIZE + NUL_SIZE];
- wxChar chLast = '\0';
- wxString str;
-
- for ( ;; )
+ wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
+
+ // read the entire file in memory: this is not the most efficient thing to
+ // do but there is no good way to avoid it in Unicode build because if we
+ // read the file block by block we can't convert each block to Unicode
+ // separately (the last multibyte char in the block might be only partially
+ // read and so the conversion would fail) and, as the file contents is kept
+ // in memory by wxTextFile anyhow, it shouldn't be a big problem to read
+ // the file entirely
+ const size_t bufSize = m_file.Length() + 4 /* for trailing NULs */;
+ size_t bufPos = 0;
+ wxCharBuffer buf(bufSize - 1 /* it adds 1 internally */);
+
+ char block[1024];
+ for ( bool eof = false; !eof; )
{
- // leave space for trailing NUL
- ssize_t nRead = m_file.Read(buf, BUF_SIZE);
+ // try to read up to the size of the entire block
+ ssize_t nRead = m_file.Read(block, WXSIZEOF(block));
if ( nRead == wxInvalidOffset )
{
return false;
}
- if ( nRead == 0 )
- break;
+ eof = nRead == 0;
+ if ( eof )
+ {
+ // append 4 trailing NUL bytes: this is needed to ensure that the
+ // string is going to be NUL-terminated, whatever is the encoding
+ // used (even UTF-32)
+ block[0] =
+ block[1] =
+ block[2] =
+ block[3] = '\0';
+ nRead = 4;
+ }
+
+ // this shouldn't happen but don't overwrite the buffer if it does
+ wxCHECK_MSG( bufPos + nRead <= bufSize, false,
+ _T("read more than file length?") );
+
+ // append to the buffer
+ memcpy(buf.data() + bufPos, block, nRead);
+ bufPos += nRead;
+ }
- // save the number characters which we already processed during the
- // last loop iteration
- const size_t lenOld = str.length();
+ const wxString str(buf, conv);
+ // this doesn't risk to happen in ANSI build
#if wxUSE_UNICODE
- // we have to properly NUL-terminate the string for any encoding it may
- // use -- 4 NULs should be enough for everyone (this is why we add 4
- // extra bytes to the buffer)
- buf[nRead] =
- buf[nRead + 1] =
- buf[nRead + 2] =
- buf[nRead + 3] = '\0';
-
- // append to the remains of the last block, don't overwrite
- wxString strbuf(buf, conv);
- if ( strbuf.empty() )
- {
- // conversion failed
- return false;
- }
+ if ( bufSize > 4 && str.empty() )
+ {
+ wxLogError(_("Failed to convert file contents to Unicode."));
+ return false;
+ }
+#endif // wxUSE_UNICODE
- str += strbuf;
-#else // ANSI
- wxUnusedVar(conv);
- buf[nRead] = '\0';
- str += buf;
-#endif // wxUSE_UNICODE/!wxUSE_UNICODE
+ free(buf.release()); // we don't need this memory any more
- // the beginning of the current line, changes inside the loop
- wxString::const_iterator lineStart = str.begin();
- const wxString::const_iterator end = str.end();
- for ( wxString::const_iterator p = lineStart + lenOld; p != end; p++ )
- {
- const wxChar ch = *p;
- switch ( ch )
- {
- case '\n':
- // could be a DOS or Unix EOL
- if ( chLast == '\r' )
- {
- AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
- }
- else // bare '\n', Unix style
- {
- AddLine(wxString(lineStart, p), wxTextFileType_Unix);
- }
+ // now break the buffer in lines
+
+ // last processed character, we need to know if it was a CR or not
+ wxChar chLast = '\0';
+ // the beginning of the current line, changes inside the loop
+ wxString::const_iterator lineStart = str.begin();
+ const wxString::const_iterator end = str.end();
+ for ( wxString::const_iterator p = lineStart; p != end; p++ )
+ {
+ const wxChar ch = *p;
+ switch ( ch )
+ {
+ case '\n':
+ // could be a DOS or Unix EOL
+ if ( chLast == '\r' )
+ {
+ AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
+ }
+ else // bare '\n', Unix style
+ {
+ AddLine(wxString(lineStart, p), wxTextFileType_Unix);
+ }
+
+ lineStart = p + 1;
+ break;
+
+ case '\r':
+ if ( chLast == '\r' )
+ {
+ // Mac empty line
+ AddLine(wxEmptyString, wxTextFileType_Mac);
lineStart = p + 1;
- break;
-
- case '\r':
- if ( chLast == '\r' )
- {
- // Mac empty line
- AddLine(wxEmptyString, wxTextFileType_Mac);
- lineStart = p + 1;
- }
- //else: we don't what this is yet -- could be a Mac EOL or
- // start of DOS EOL so wait for next char
- break;
-
- default:
- if ( chLast == '\r' )
- {
- // Mac line termination
- AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
- lineStart = p;
- }
- }
-
- chLast = ch;
+ }
+ //else: we don't know what this is yet -- could be a Mac EOL or
+ // start of DOS EOL so wait for next char
+ break;
+
+ default:
+ if ( chLast == '\r' )
+ {
+ // Mac line termination
+ AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
+ lineStart = p;
+ }
}
- // remove the part we already processed
- str.erase(0, lineStart - str.begin());
+ chLast = ch;
}
// anything in the last line?
- if ( !str.empty() )
+ if ( lineStart != end )
{
- AddLine(str, wxTextFileType_None); // no line terminator
+ // add unterminated last line
+ AddLine(wxString(lineStart, end), wxTextFileType_None);
}
return true;