From 1a37c619614a9260968f1703d7c6aae5a9a139f7 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Fri, 6 Nov 2009 09:43:31 +0100 Subject: [PATCH] Finally adope the patch from Morten Hustveit to be able to optional use mmaps and iovec to increase patch speed - but as this increase memory usage we can always fall back to the "old" method which doesn't relay on mmaps. --- methods/rred.cc | 261 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 243 insertions(+), 18 deletions(-) diff --git a/methods/rred.cc b/methods/rred.cc index 9abb1b89c..7236efd03 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -1,11 +1,13 @@ // Includes /*{{{*/ #include +#include #include #include #include #include #include +#include #include #include #include @@ -29,7 +31,7 @@ class RredMethod : public pkgAcqMethod { // the supported ed commands enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'}; // return values - enum State {ED_OK=0, ED_ORDERING=1, ED_PARSER=2, ED_FAILURE=3}; + enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED}; State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, unsigned long &line, char *buffer, Hashes *hash) const; @@ -37,7 +39,8 @@ class RredMethod : public pkgAcqMethod { void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines, Hashes *hash, char *buffer) const; - State patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash) const; + State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const; + State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const; protected: // the methods main method @@ -67,7 +70,7 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ // get the current command and parse it if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) { if (Debug == true) - std::clog << "rred: encounter end of file - we can start patching now."; + std::clog << "rred: encounter end of file - we can start patching now." << std::endl; line = 0; return ED_OK; } @@ -189,22 +192,240 @@ void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/ } } /*}}}*/ -RredMethod::State RredMethod::patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, /*{{{*/ - Hashes *hash) const { +RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/ + FileFd &out_file, Hashes *hash) const { char buffer[BUF_SIZE]; - + FILE* fFrom = fdopen(From.Fd(), "r"); + FILE* fPatch = fdopen(Patch.Fd(), "r"); + FILE* fTo = fdopen(out_file.Fd(), "w"); + /* we do a tail recursion to read the commands in the right order */ unsigned long line = -1; // assign highest possible value - State result = applyFile(ed_cmds, in_file, out_file, line, buffer, hash); + State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash); /* read the rest from infile */ if (result == ED_OK) { - while (fgets(buffer, BUF_SIZE, in_file) != NULL) { - size_t const written = fwrite(buffer, 1, strlen(buffer), out_file); + while (fgets(buffer, BUF_SIZE, fFrom) != NULL) { + size_t const written = fwrite(buffer, 1, strlen(buffer), fTo); hash->Add((unsigned char*)buffer, written); } + fflush(fTo); } return result; +} + /*}}}*/ +struct EdCommand { /*{{{*/ + size_t data_start; + size_t data_end; + size_t data_lines; + size_t first_line; + size_t last_line; + char type; +}; +#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */ + /*}}}*/ +RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/ + FileFd &out_file, Hashes *hash) const { +#ifdef _POSIX_MAPPED_FILES + MMap ed_cmds(Patch, MMap::ReadOnly); + MMap in_file(From, MMap::ReadOnly); + FILE* fTo = fdopen(out_file.Fd(), "w"); + + if (ed_cmds.Size() == 0 || in_file.Size() == 0) + return MMAP_FAILED; + + EdCommand* commands = 0; + size_t command_count = 0; + size_t command_alloc = 0; + + const char* begin = (char*) ed_cmds.Data(); + const char* end = begin; + const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size(); + + const char* input = (char*) in_file.Data(); + const char* input_end = (char*) in_file.Data() + in_file.Size(); + + size_t i; + + /* 1. Parse entire script. It is executed in reverse order, so we cather it + * in the `commands' buffer first + */ + + for(;;) { + EdCommand cmd; + cmd.data_start = 0; + cmd.data_end = 0; + + while(begin != ed_end && *begin == '\n') + ++begin; + while(end != ed_end && *end != '\n') + ++end; + if(end == ed_end && begin == end) + break; + + /* Determine command range */ + const char* tmp = begin; + + for(;;) { + /* atoll is safe despite lacking NUL-termination; we know there's an + * alphabetic character at end[-1] + */ + if(tmp == end) { + cmd.first_line = atol(begin); + cmd.last_line = cmd.first_line; + break; + } + if(*tmp == ',') { + cmd.first_line = atol(begin); + cmd.last_line = atol(tmp + 1); + break; + } + ++tmp; + } + + // which command to execute on this line(s)? + switch (end[-1]) { + case MODE_CHANGED: + if (Debug == true) + std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl; + break; + case MODE_ADDED: + if (Debug == true) + std::clog << "Insert after line " << cmd.first_line << std::endl; + break; + case MODE_DELETED: + if (Debug == true) + std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl; + break; + default: + _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]); + free(commands); + return ED_PARSER; + } + cmd.type = end[-1]; + + /* Determine the size of the inserted text, so we don't have to scan this + * text again later. + */ + begin = end + 1; + end = begin; + cmd.data_lines = 0; + + if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) { + cmd.data_start = begin - (char*) ed_cmds.Data(); + while(end != ed_end) { + if(*end == '\n') { + if(end[-1] == '.' && end[-2] == '\n') + break; + ++cmd.data_lines; + } + ++end; + } + cmd.data_end = end - (char*) ed_cmds.Data() - 1; + begin = end + 1; + end = begin; + } + if(command_count == command_alloc) { + command_alloc = (command_alloc + 64) * 3 / 2; + commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand)); + } + commands[command_count++] = cmd; + } + + struct iovec* iov = new struct iovec[IOV_COUNT]; + size_t iov_size = 0; + + size_t amount, remaining; + size_t line = 1; + EdCommand* cmd; + + /* 2. Execute script. We gather writes in a `struct iov' array, and flush + * using writev to minimize the number of system calls. Data is read + * directly from the memory mappings of the input file and the script. + */ + + for(i = command_count; i-- > 0; ) { + cmd = &commands[i]; + if(cmd->type == MODE_ADDED) + amount = cmd->first_line + 1; + else + amount = cmd->first_line; + + if(line < amount) { + begin = input; + while(line != amount) { + input = (const char*) memchr(input, '\n', input_end - input); + if(!input) + break; + ++line; + ++input; + } + + iov[iov_size].iov_base = (void*) begin; + iov[iov_size].iov_len = input - begin; + hash->Add((const unsigned char*) begin, input - begin); + + if(++iov_size == IOV_COUNT) { + writev(out_file.Fd(), iov, IOV_COUNT); + iov_size = 0; + } + } + + if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) { + remaining = (cmd->last_line - cmd->first_line) + 1; + line += remaining; + while(remaining) { + input = (const char*) memchr(input, '\n', input_end - input); + if(!input) + break; + --remaining; + ++input; + } + } + + if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) { + if(cmd->data_end != cmd->data_start) { + iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start); + iov[iov_size].iov_len = cmd->data_end - cmd->data_start; + hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start), + iov[iov_size].iov_len); + + if(++iov_size == IOV_COUNT) { + writev(out_file.Fd(), iov, IOV_COUNT); + iov_size = 0; + } + } + } + } + + if(input != input_end) { + iov[iov_size].iov_base = (void*) input; + iov[iov_size].iov_len = input_end - input; + hash->Add((const unsigned char*) input, input_end - input); + ++iov_size; + } + + if(iov_size) { + writev(out_file.Fd(), iov, iov_size); + iov_size = 0; + } + + for(i = 0; i < iov_size; i += IOV_COUNT) { + if(iov_size - i < IOV_COUNT) + writev(out_file.Fd(), iov + i, iov_size - i); + else + writev(out_file.Fd(), iov + i, IOV_COUNT); + } + + delete [] iov; + free(commands); + + fflush(fTo); + + return ED_OK; +#else + return MMAP_FAILED; +#endif } /*}}}*/ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ @@ -234,19 +455,23 @@ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ return false; Hashes Hash; - FILE* fFrom = fdopen(From.Fd(), "r"); - FILE* fPatch = fdopen(Patch.Fd(), "r"); - FILE* fTo = fdopen(To.Fd(), "w"); // now do the actual patching - if (patchFile(fPatch, fFrom, fTo, &Hash) != ED_OK) { - _error->Errno("rred", _("Could not patch file")); - return false; + State const result = patchMMap(Patch, From, To, &Hash); + if (result == MMAP_FAILED) { + // retry with patchFile + lseek(Patch.Fd(), 0, SEEK_SET); + lseek(From.Fd(), 0, SEEK_SET); + To.Open(Itm->DestFile,FileFd::WriteEmpty); + if (_error->PendingError() == true) + return false; + if (patchFile(Patch, From, To, &Hash) != ED_OK) { + return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (1)").c_str()); + } + } else if (result != ED_OK) { + return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (2)").c_str()); } // write out the result - fflush(fFrom); - fflush(fPatch); - fflush(fTo); From.Close(); Patch.Close(); To.Close(); -- 2.45.2