X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/c1981a2fa408700243400606f93679b2befe1ac2..1819a5c1acd9cc3ac496eb13844dd25122a1533c:/src/common/textfile.cpp diff --git a/src/common/textfile.cpp b/src/common/textfile.cpp index f9cd34077d..1ba27db9f1 100644 --- a/src/common/textfile.cpp +++ b/src/common/textfile.cpp @@ -35,6 +35,7 @@ #include "wx/textfile.h" #include "wx/filename.h" +#include "wx/buffer.h" // ============================================================================ // wxTextFile class implementation @@ -88,24 +89,24 @@ bool wxTextFile::OnClose() bool wxTextFile::OnRead(wxMBConv& conv) { // file should be opened and we must be in it's beginning - wxASSERT( m_file.IsOpened() && - (m_file.GetKind() != wxFILE_KIND_DISK || m_file.Tell() == 0) ); - - static const size_t BUF_SIZE = 1024; -#if wxUSE_UNICODE - static const size_t NUL_SIZE = 4; -#else - static const size_t NUL_SIZE = 1; -#endif - - char buf[BUF_SIZE + NUL_SIZE]; - wxChar chLast = '\0'; - wxString str; - - for ( ;; ) + wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 ); + + // read the entire file in memory: this is not the most efficient thing to + // do but there is no good way to avoid it in Unicode build because if we + // read the file block by block we can't convert each block to Unicode + // separately (the last multibyte char in the block might be only partially + // read and so the conversion would fail) and, as the file contents is kept + // in memory by wxTextFile anyhow, it shouldn't be a big problem to read + // the file entirely + const size_t bufSize = m_file.Length() + 4 /* for trailing NULs */; + size_t bufPos = 0; + wxCharBuffer buf(bufSize - 1 /* it adds 1 internally */); + + char block[1024]; + for ( bool eof = false; !eof; ) { - // leave space for trailing NUL - ssize_t nRead = m_file.Read(buf, BUF_SIZE); + // try to read up to the size of the entire block + ssize_t nRead = m_file.Read(block, WXSIZEOF(block)); if ( nRead == wxInvalidOffset ) { @@ -113,91 +114,97 @@ bool wxTextFile::OnRead(wxMBConv& conv) return false; } - if ( nRead == 0 ) - break; + eof = nRead == 0; + if ( eof ) + { + // append 4 trailing NUL bytes: this is needed to ensure that the + // string is going to be NUL-terminated, whatever is the encoding + // used (even UTF-32) + block[0] = + block[1] = + block[2] = + block[3] = '\0'; + nRead = 4; + } + + // this shouldn't happen but don't overwrite the buffer if it does + wxCHECK_MSG( bufPos + nRead <= bufSize, false, + _T("read more than file length?") ); + + // append to the buffer + memcpy(buf.data() + bufPos, block, nRead); + bufPos += nRead; + } - // save the number characters which we already processed during the - // last loop iteration - const size_t lenOld = str.length(); + const wxString str(buf, conv); + // this doesn't risk to happen in ANSI build #if wxUSE_UNICODE - // we have to properly NUL-terminate the string for any encoding it may - // use -- 4 NULs should be enough for everyone (this is why we add 4 - // extra bytes to the buffer) - buf[nRead] = - buf[nRead + 1] = - buf[nRead + 2] = - buf[nRead + 3] = '\0'; - - // append to the remains of the last block, don't overwrite - wxString strbuf(buf, conv); - if ( strbuf.empty() ) - { - // conversion failed - return false; - } + if ( bufSize > 4 && str.empty() ) + { + wxLogError(_("Failed to convert file contents to Unicode.")); + return false; + } +#endif // wxUSE_UNICODE - str += strbuf; -#else // ANSI - wxUnusedVar(conv); - buf[nRead] = '\0'; - str += buf; -#endif // wxUSE_UNICODE/!wxUSE_UNICODE + free(buf.release()); // we don't need this memory any more - // the beginning of the current line, changes inside the loop - wxString::const_iterator lineStart = str.begin(); - const wxString::const_iterator end = str.end(); - for ( wxString::const_iterator p = lineStart + lenOld; p != end; p++ ) - { - const wxChar ch = *p; - switch ( ch ) - { - case '\n': - // could be a DOS or Unix EOL - if ( chLast == '\r' ) - { - AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos); - } - else // bare '\n', Unix style - { - AddLine(wxString(lineStart, p), wxTextFileType_Unix); - } + // now break the buffer in lines + + // last processed character, we need to know if it was a CR or not + wxChar chLast = '\0'; + // the beginning of the current line, changes inside the loop + wxString::const_iterator lineStart = str.begin(); + const wxString::const_iterator end = str.end(); + for ( wxString::const_iterator p = lineStart; p != end; p++ ) + { + const wxChar ch = *p; + switch ( ch ) + { + case '\n': + // could be a DOS or Unix EOL + if ( chLast == '\r' ) + { + AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos); + } + else // bare '\n', Unix style + { + AddLine(wxString(lineStart, p), wxTextFileType_Unix); + } + + lineStart = p + 1; + break; + + case '\r': + if ( chLast == '\r' ) + { + // Mac empty line + AddLine(wxEmptyString, wxTextFileType_Mac); lineStart = p + 1; - break; - - case '\r': - if ( chLast == '\r' ) - { - // Mac empty line - AddLine(wxEmptyString, wxTextFileType_Mac); - lineStart = p + 1; - } - //else: we don't what this is yet -- could be a Mac EOL or - // start of DOS EOL so wait for next char - break; - - default: - if ( chLast == '\r' ) - { - // Mac line termination - AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac); - lineStart = p; - } - } - - chLast = ch; + } + //else: we don't know what this is yet -- could be a Mac EOL or + // start of DOS EOL so wait for next char + break; + + default: + if ( chLast == '\r' ) + { + // Mac line termination + AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac); + lineStart = p; + } } - // remove the part we already processed - str.erase(0, lineStart - str.begin()); + chLast = ch; } // anything in the last line? - if ( !str.empty() ) + if ( lineStart != end ) { - AddLine(str, wxTextFileType_None); // no line terminator + // add unterminated last line + AddLine(wxString(lineStart, end), wxTextFileType_None); } return true;