X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/dbcf443c7d19d415c23678c16d3da5cd8fbb151d..64accea5fae6a4ad5303d2b513043fd39a3b383b:/src/common/textfile.cpp?ds=inline diff --git a/src/common/textfile.cpp b/src/common/textfile.cpp index c48a48d2ab..79d837f631 100644 --- a/src/common/textfile.cpp +++ b/src/common/textfile.cpp @@ -86,66 +86,125 @@ bool wxTextFile::OnClose() } -bool wxTextFile::OnRead(wxMBConv& conv) +bool wxTextFile::OnRead(const wxMBConv& conv) { - // file should be opened and we must be in it's beginning - wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 ); + // file should be opened + wxASSERT_MSG( m_file.IsOpened(), _T("can't read closed file") ); // read the entire file in memory: this is not the most efficient thing to - // do but there is no good way to avoid it in Unicode build because if we - // read the file block by block we can't convert each block to Unicode + // do it but there is no good way to avoid it in Unicode build because if + // we read the file block by block we can't convert each block to Unicode // separately (the last multibyte char in the block might be only partially // read and so the conversion would fail) and, as the file contents is kept // in memory by wxTextFile anyhow, it shouldn't be a big problem to read // the file entirely - const size_t bufSize = m_file.Length() + 4 /* for trailing NULs */; - size_t bufPos = 0; - wxCharBuffer buf(bufSize - 1 /* it adds 1 internally */); + size_t bufSize = 0; - char block[1024]; - for ( bool eof = false; !eof; ) + // number of bytes to (try to) read from disk at once + static const size_t BLOCK_SIZE = 4096; + + wxCharBuffer buf; + + // first determine if the file is seekable or not and so whether we can + // determine its length in advance + wxFileOffset fileLength; { - // try to read up to the size of the entire block - ssize_t nRead = m_file.Read(block, WXSIZEOF(block)); + wxLogNull logNull; + fileLength = m_file.Length(); + } - if ( nRead == wxInvalidOffset ) - { - // read error (error message already given in wxFile::Read) + // some non-seekable files under /proc under Linux pretend that they're + // seekable but always return 0; others do return an error + const bool seekable = fileLength != wxInvalidOffset && fileLength != 0; + if ( seekable ) + { + // we know the required length, so set the buffer size in advance + bufSize = fileLength; + if ( !buf.extend(bufSize) ) return false; - } - eof = nRead == 0; - if ( eof ) + // if the file is seekable, also check that we're at its beginning + wxASSERT_MSG( m_file.Tell() == 0, _T("should be at start of file") ); + + char *dst = buf.data(); + for ( size_t nRemaining = bufSize; nRemaining > 0; ) { - // append 4 trailing NUL bytes: this is needed to ensure that the - // string is going to be NUL-terminated, whatever is the encoding - // used (even UTF-32) - block[0] = - block[1] = - block[2] = - block[3] = '\0'; - nRead = 4; + size_t nToRead = BLOCK_SIZE; + + // the file size could have changed, avoid overflowing the buffer + // even if it did + if ( nToRead > nRemaining ) + nToRead = nRemaining; + + ssize_t nRead = m_file.Read(dst, nToRead); + + if ( nRead == wxInvalidOffset ) + { + // read error (error message already given in wxFile::Read) + return false; + } + + if ( nRead == 0 ) + { + // this file can't be empty because we checked for this above + // so this must be the end of file + break; + } + + dst += nRead; + nRemaining -= nRead; } - // this shouldn't happen but don't overwrite the buffer if it does - wxCHECK_MSG( bufPos + nRead <= bufSize, false, - _T("read more than file length?") ); + wxASSERT_MSG( dst - buf.data() == (wxFileOffset)bufSize, + _T("logic error") ); + } + else // file is not seekable + { + char block[BLOCK_SIZE]; + for ( ;; ) + { + ssize_t nRead = m_file.Read(block, WXSIZEOF(block)); + + if ( nRead == wxInvalidOffset ) + { + // read error (error message already given in wxFile::Read) + return false; + } + + if ( nRead == 0 ) + { + // if no bytes have been read, presumably this is a + // valid-but-empty file + if ( bufSize == 0 ) + return true; + + // otherwise we've finished reading the file + break; + } + + // extend the buffer for new data + if ( !buf.extend(bufSize + nRead) ) + return false; - // append to the buffer - memcpy(buf.data() + bufPos, block, nRead); - bufPos += nRead; + // and append it to the buffer + memcpy(buf.data() + bufSize, block, nRead); + bufSize += nRead; + } } - const wxString str(buf, conv); + const wxString str(buf, conv, bufSize); + + // there's no risk of this happening in ANSI build #if wxUSE_UNICODE - if ( str.empty() ) + if ( bufSize > 4 && str.empty() ) { - wxLogError(_("Failed to convert file contents to Unicode.")); + wxLogError(_("Failed to convert file \"%s\" to Unicode."), GetName()); return false; } #endif // wxUSE_UNICODE - free(buf.release()); // we don't need this memory any more + // we don't need this memory any more + buf.reset(); // now break the buffer in lines @@ -165,7 +224,15 @@ bool wxTextFile::OnRead(wxMBConv& conv) // could be a DOS or Unix EOL if ( chLast == '\r' ) { - AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos); + if ( p - 1 >= lineStart ) + { + AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos); + } + else + { + // there were two line endings, so add an empty line: + AddLine(wxEmptyString, wxTextFileType_Dos); + } } else // bare '\n', Unix style { @@ -190,7 +257,15 @@ bool wxTextFile::OnRead(wxMBConv& conv) if ( chLast == '\r' ) { // Mac line termination - AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac); + if ( p - 1 >= lineStart ) + { + AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac); + } + else + { + // there were two line endings, so add an empty line: + AddLine(wxEmptyString, wxTextFileType_Mac); + } lineStart = p; } } @@ -209,7 +284,7 @@ bool wxTextFile::OnRead(wxMBConv& conv) } -bool wxTextFile::OnWrite(wxTextFileType typeNew, wxMBConv& conv) +bool wxTextFile::OnWrite(wxTextFileType typeNew, const wxMBConv& conv) { wxFileName fn = m_strBufferName; @@ -239,4 +314,3 @@ bool wxTextFile::OnWrite(wxTextFileType typeNew, wxMBConv& conv) } #endif // wxUSE_TEXTFILE -