X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/67c20e133ebe01b5b4bc2468661a8bcc4097c5a2..ba86da30cf1a2a8429f3af465cbb6c9f52b307fb:/src/common/textfile.cpp diff --git a/src/common/textfile.cpp b/src/common/textfile.cpp index 604bb54c4c..79d837f631 100644 --- a/src/common/textfile.cpp +++ b/src/common/textfile.cpp @@ -13,10 +13,6 @@ // headers // ============================================================================ -#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) - #pragma implementation "textfile.h" -#endif - #include "wx/wxprec.h" #ifdef __BORLANDC__ @@ -39,6 +35,7 @@ #include "wx/textfile.h" #include "wx/filename.h" +#include "wx/buffer.h" // ============================================================================ // wxTextFile class implementation @@ -89,98 +86,205 @@ bool wxTextFile::OnClose() } -bool wxTextFile::OnRead(wxMBConv& conv) +bool wxTextFile::OnRead(const wxMBConv& conv) { - // file should be opened and we must be in it's beginning - wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 ); + // file should be opened + wxASSERT_MSG( m_file.IsOpened(), _T("can't read closed file") ); + + // read the entire file in memory: this is not the most efficient thing to + // do it but there is no good way to avoid it in Unicode build because if + // we read the file block by block we can't convert each block to Unicode + // separately (the last multibyte char in the block might be only partially + // read and so the conversion would fail) and, as the file contents is kept + // in memory by wxTextFile anyhow, it shouldn't be a big problem to read + // the file entirely + size_t bufSize = 0; + + // number of bytes to (try to) read from disk at once + static const size_t BLOCK_SIZE = 4096; + + wxCharBuffer buf; + + // first determine if the file is seekable or not and so whether we can + // determine its length in advance + wxFileOffset fileLength; + { + wxLogNull logNull; + fileLength = m_file.Length(); + } - char *strBuf, *strPtr, *strEnd; - char ch, chLast = '\0'; - char buf[1024]; - wxFileSize_t nRead; + // some non-seekable files under /proc under Linux pretend that they're + // seekable but always return 0; others do return an error + const bool seekable = fileLength != wxInvalidOffset && fileLength != 0; + if ( seekable ) + { + // we know the required length, so set the buffer size in advance + bufSize = fileLength; + if ( !buf.extend(bufSize) ) + return false; - strPtr = strBuf = new char[1024]; - strEnd = strBuf + 1024; + // if the file is seekable, also check that we're at its beginning + wxASSERT_MSG( m_file.Tell() == 0, _T("should be at start of file") ); - do - { - nRead = m_file.Read(buf, WXSIZEOF(buf)); - if ( nRead == wxInvalidOffset ) + char *dst = buf.data(); + for ( size_t nRemaining = bufSize; nRemaining > 0; ) { - // read error (error message already given in wxFile::Read) - delete[] strBuf; - return false; + size_t nToRead = BLOCK_SIZE; + + // the file size could have changed, avoid overflowing the buffer + // even if it did + if ( nToRead > nRemaining ) + nToRead = nRemaining; + + ssize_t nRead = m_file.Read(dst, nToRead); + + if ( nRead == wxInvalidOffset ) + { + // read error (error message already given in wxFile::Read) + return false; + } + + if ( nRead == 0 ) + { + // this file can't be empty because we checked for this above + // so this must be the end of file + break; + } + + dst += nRead; + nRemaining -= nRead; } - for (wxFileSize_t n = 0; n < nRead; n++) + wxASSERT_MSG( dst - buf.data() == (wxFileOffset)bufSize, + _T("logic error") ); + } + else // file is not seekable + { + char block[BLOCK_SIZE]; + for ( ;; ) { - ch = buf[n]; - switch ( ch ) + ssize_t nRead = m_file.Read(block, WXSIZEOF(block)); + + if ( nRead == wxInvalidOffset ) + { + // read error (error message already given in wxFile::Read) + return false; + } + + if ( nRead == 0 ) { - case '\n': - // Dos/Unix line termination - *strPtr = '\0'; - AddLine(wxString(strBuf, conv), - chLast == '\r' ? wxTextFileType_Dos - : wxTextFileType_Unix); - strPtr = strBuf; - chLast = '\n'; - break; - - case '\r': - if ( chLast == '\r' ) + // if no bytes have been read, presumably this is a + // valid-but-empty file + if ( bufSize == 0 ) + return true; + + // otherwise we've finished reading the file + break; + } + + // extend the buffer for new data + if ( !buf.extend(bufSize + nRead) ) + return false; + + // and append it to the buffer + memcpy(buf.data() + bufSize, block, nRead); + bufSize += nRead; + } + } + + const wxString str(buf, conv, bufSize); + + // there's no risk of this happening in ANSI build +#if wxUSE_UNICODE + if ( bufSize > 4 && str.empty() ) + { + wxLogError(_("Failed to convert file \"%s\" to Unicode."), GetName()); + return false; + } +#endif // wxUSE_UNICODE + + // we don't need this memory any more + buf.reset(); + + + // now break the buffer in lines + + // last processed character, we need to know if it was a CR or not + wxChar chLast = '\0'; + + // the beginning of the current line, changes inside the loop + wxString::const_iterator lineStart = str.begin(); + const wxString::const_iterator end = str.end(); + for ( wxString::const_iterator p = lineStart; p != end; p++ ) + { + const wxChar ch = *p; + switch ( ch ) + { + case '\n': + // could be a DOS or Unix EOL + if ( chLast == '\r' ) + { + if ( p - 1 >= lineStart ) { - // Mac empty line - AddLine(wxEmptyString, wxTextFileType_Mac); + AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos); } else - chLast = '\r'; - break; - - default: - if ( chLast == '\r' ) { - // Mac line termination - *strPtr = '\0'; - AddLine(wxString(strBuf, conv), wxTextFileType_Mac); - chLast = ch; - strPtr = strBuf; - *(strPtr++) = ch; + // there were two line endings, so add an empty line: + AddLine(wxEmptyString, wxTextFileType_Dos); + } + } + else // bare '\n', Unix style + { + AddLine(wxString(lineStart, p), wxTextFileType_Unix); + } + + lineStart = p + 1; + break; + + case '\r': + if ( chLast == '\r' ) + { + // Mac empty line + AddLine(wxEmptyString, wxTextFileType_Mac); + lineStart = p + 1; + } + //else: we don't know what this is yet -- could be a Mac EOL or + // start of DOS EOL so wait for next char + break; + + default: + if ( chLast == '\r' ) + { + // Mac line termination + if ( p - 1 >= lineStart ) + { + AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac); } else { - // add to the current line - *(strPtr++) = ch; - if ( strPtr == strEnd ) - { - // we must allocate more memory - size_t size = strEnd - strBuf; - char *newBuf = new char[size + 1024]; - memcpy(newBuf, strBuf, size); - delete[] strBuf; - strBuf = newBuf; - strEnd = strBuf + size + 1024; - strPtr = strBuf + size; - } + // there were two line endings, so add an empty line: + AddLine(wxEmptyString, wxTextFileType_Mac); } - } + lineStart = p; + } } - } while ( nRead == WXSIZEOF(buf) ); + + chLast = ch; + } // anything in the last line? - if ( strPtr != strBuf ) + if ( lineStart != end ) { - *strPtr = '\0'; - AddLine(wxString(strBuf, conv), - wxTextFileType_None); // no line terminator + // add unterminated last line + AddLine(wxString(lineStart, end), wxTextFileType_None); } - delete[] strBuf; return true; } -bool wxTextFile::OnWrite(wxTextFileType typeNew, wxMBConv& conv) +bool wxTextFile::OnWrite(wxTextFileType typeNew, const wxMBConv& conv) { wxFileName fn = m_strBufferName; @@ -210,4 +314,3 @@ bool wxTextFile::OnWrite(wxTextFileType typeNew, wxMBConv& conv) } #endif // wxUSE_TEXTFILE -