X-Git-Url: https://git.saurik.com/apt.git/blobdiff_plain/c7139d8c8c04e69150ab975c0e2698d05ed6a298..2a49601f69e08f06fb2727d869d420daacdd09d5:/methods/rred.cc diff --git a/methods/rred.cc b/methods/rred.cc index 2d4dd768b..7c65f8f92 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -1,11 +1,16 @@ // Includes /*{{{*/ +#include + #include +#include #include #include #include #include +#include #include +#include #include #include #include @@ -29,22 +34,23 @@ class RredMethod : public pkgAcqMethod { // the supported ed commands enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'}; // return values - enum State {ED_OK=0, ED_ORDERING=1, ED_PARSER=2, ED_FAILURE=3}; + enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED}; - State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, + State applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file, unsigned long &line, char *buffer, Hashes *hash) const; - void ignoreLineInFile(FILE *fin, char *buffer) const; - void copyLineFromFileToFile(FILE *fin, FILE *fout, + void ignoreLineInFile(FileFd &fin, char *buffer) const; + void copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines, Hashes *hash, char *buffer) const; - State patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash) const; + State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const; + State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const; protected: // the methods main method virtual bool Fetch(FetchItem *Itm); public: - RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {}; + RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {}; }; /*}}}*/ /** \brief applyFile - in reverse order with a tail recursion {{{ @@ -62,12 +68,12 @@ public: * \param hash the created file for correctness * \return the success State of the ed command executor */ -RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, +RredMethod::State RredMethod::applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file, unsigned long &line, char *buffer, Hashes *hash) const { // get the current command and parse it - if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) { + if (ed_cmds.ReadLine(buffer, BUF_SIZE) == NULL) { if (Debug == true) - std::clog << "rred: encounter end of file - we can start patching now."; + std::clog << "rred: encounter end of file - we can start patching now." << std::endl; line = 0; return ED_OK; } @@ -120,13 +126,17 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ unsigned char mode = *idx; // save the current position - unsigned const long pos = ftell(ed_cmds); + unsigned const long long pos = ed_cmds.Tell(); // if this is add or change then go to the next full stop + unsigned int data_length = 0; if (mode == MODE_CHANGED || mode == MODE_ADDED) { - do + do { ignoreLineInFile(ed_cmds, buffer); + data_length++; + } while (strncmp(buffer, ".", 1) != 0); + data_length--; // the dot should not be copied } // do the recursive call - the last command is the one we need to execute at first @@ -140,10 +150,9 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ line++; // first wind to the current position and copy over all unchanged lines - while (line < startline) { - fgets(buffer, BUF_SIZE, in_file); - copyLineFromFileToFile(in_file, out_file, hash, buffer); - line++; + if (line < startline) { + copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer); + line = startline; } if (mode != MODE_ADDED) @@ -151,13 +160,8 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ // include data from ed script if (mode == MODE_CHANGED || mode == MODE_ADDED) { - fseek(ed_cmds, pos, SEEK_SET); - while(fgets(buffer, BUF_SIZE, ed_cmds) != NULL) { - if (strncmp(buffer, ".", 1) != 0) - copyLineFromFileToFile(ed_cmds, out_file, hash, buffer); - else - break; - } + ed_cmds.Seek(pos); + copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer); } // ignore the corresponding number of lines from input @@ -170,50 +174,288 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ return ED_OK; } /*}}}*/ -void RredMethod::copyLineFromFileToFile(FILE *fin, FILE *fout, /*{{{*/ +void RredMethod::copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,/*{{{*/ Hashes *hash, char *buffer) const { - size_t written = fwrite(buffer, 1, strlen(buffer), fout); - hash->Add((unsigned char*)buffer, written); - while (strlen(buffer) == (BUF_SIZE - 1) && - buffer[BUF_SIZE - 2] != '\n') { - fgets(buffer, BUF_SIZE, fin); - written = fwrite(buffer, 1, strlen(buffer), fout); - hash->Add((unsigned char*)buffer, written); + while (0 < lines--) { + do { + fin.ReadLine(buffer, BUF_SIZE); + unsigned long long const towrite = strlen(buffer); + fout.Write(buffer, towrite); + hash->Add((unsigned char*)buffer, towrite); + } while (strlen(buffer) == (BUF_SIZE - 1) && + buffer[BUF_SIZE - 2] != '\n'); } } /*}}}*/ -void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/ - fgets(buffer, BUF_SIZE, fin); +void RredMethod::ignoreLineInFile(FileFd &fin, char *buffer) const { /*{{{*/ + fin.ReadLine(buffer, BUF_SIZE); while (strlen(buffer) == (BUF_SIZE - 1) && buffer[BUF_SIZE - 2] != '\n') { - fgets(buffer, BUF_SIZE, fin); + fin.ReadLine(buffer, BUF_SIZE); buffer[0] = ' '; } } /*}}}*/ -RredMethod::State RredMethod::patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, /*{{{*/ - Hashes *hash) const { +RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/ + FileFd &out_file, Hashes *hash) const { char buffer[BUF_SIZE]; - + /* we do a tail recursion to read the commands in the right order */ unsigned long line = -1; // assign highest possible value - State result = applyFile(ed_cmds, in_file, out_file, line, buffer, hash); + State const result = applyFile(Patch, From, out_file, line, buffer, hash); /* read the rest from infile */ if (result == ED_OK) { - while (fgets(buffer, BUF_SIZE, in_file) != NULL) { - size_t const written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); + while (From.ReadLine(buffer, BUF_SIZE) != NULL) { + unsigned long long const towrite = strlen(buffer); + out_file.Write(buffer, towrite); + hash->Add((unsigned char*)buffer, towrite); } } return result; +} + /*}}}*/ +/* struct EdCommand {{{*/ +#ifdef _POSIX_MAPPED_FILES +struct EdCommand { + size_t data_start; + size_t data_end; + size_t data_lines; + size_t first_line; + size_t last_line; + char type; +}; +#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */ +static ssize_t retry_writev(int fd, const struct iovec *iov, int iovcnt) { + ssize_t Res; + errno = 0; + ssize_t i = 0; + do { + Res = writev(fd, iov + i, iovcnt); + if (Res < 0 && errno == EINTR) + continue; + if (Res < 0) + return _error->Errno("writev",_("Write error")); + iovcnt -= Res; + i += Res; + } while (Res > 0 && iovcnt > 0); + return i; +} +#endif + /*}}}*/ +RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/ + FileFd &out_file, Hashes *hash) const { +#ifdef _POSIX_MAPPED_FILES + MMap ed_cmds(Patch, MMap::ReadOnly); + MMap in_file(From, MMap::ReadOnly); + + unsigned long long const ed_size = ed_cmds.Size(); + unsigned long long const in_size = in_file.Size(); + if (ed_size == 0 || in_size == 0) + return MMAP_FAILED; + + EdCommand* commands = 0; + size_t command_count = 0; + size_t command_alloc = 0; + + const char* begin = (char*) ed_cmds.Data(); + const char* end = begin; + const char* ed_end = (char*) ed_cmds.Data() + ed_size; + + const char* input = (char*) in_file.Data(); + const char* input_end = (char*) in_file.Data() + in_size; + + size_t i; + + /* 1. Parse entire script. It is executed in reverse order, so we cather it + * in the `commands' buffer first + */ + + for(;;) { + EdCommand cmd; + cmd.data_start = 0; + cmd.data_end = 0; + + while(begin != ed_end && *begin == '\n') + ++begin; + while(end != ed_end && *end != '\n') + ++end; + if(end == ed_end && begin == end) + break; + + /* Determine command range */ + const char* tmp = begin; + + for(;;) { + /* atoll is safe despite lacking NUL-termination; we know there's an + * alphabetic character at end[-1] + */ + if(tmp == end) { + cmd.first_line = atol(begin); + cmd.last_line = cmd.first_line; + break; + } + if(*tmp == ',') { + cmd.first_line = atol(begin); + cmd.last_line = atol(tmp + 1); + break; + } + ++tmp; + } + + // which command to execute on this line(s)? + switch (end[-1]) { + case MODE_CHANGED: + if (Debug == true) + std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl; + break; + case MODE_ADDED: + if (Debug == true) + std::clog << "Insert after line " << cmd.first_line << std::endl; + break; + case MODE_DELETED: + if (Debug == true) + std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl; + break; + default: + _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]); + free(commands); + return ED_PARSER; + } + cmd.type = end[-1]; + + /* Determine the size of the inserted text, so we don't have to scan this + * text again later. + */ + begin = end + 1; + end = begin; + cmd.data_lines = 0; + + if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) { + cmd.data_start = begin - (char*) ed_cmds.Data(); + while(end != ed_end) { + if(*end == '\n') { + if(end[-1] == '.' && end[-2] == '\n') + break; + ++cmd.data_lines; + } + ++end; + } + cmd.data_end = end - (char*) ed_cmds.Data() - 1; + begin = end + 1; + end = begin; + } + if(command_count == command_alloc) { + command_alloc = (command_alloc + 64) * 3 / 2; + EdCommand* newCommands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand)); + if (newCommands == NULL) { + free(commands); + return MMAP_FAILED; + } + commands = newCommands; + } + commands[command_count++] = cmd; + } + + struct iovec* iov = new struct iovec[IOV_COUNT]; + size_t iov_size = 0; + + size_t amount, remaining; + size_t line = 1; + EdCommand* cmd; + + /* 2. Execute script. We gather writes in a `struct iov' array, and flush + * using writev to minimize the number of system calls. Data is read + * directly from the memory mappings of the input file and the script. + */ + + for(i = command_count; i-- > 0; ) { + cmd = &commands[i]; + if(cmd->type == MODE_ADDED) + amount = cmd->first_line + 1; + else + amount = cmd->first_line; + + if(line < amount) { + begin = input; + while(line != amount) { + input = (const char*) memchr(input, '\n', input_end - input); + if(!input) + break; + ++line; + ++input; + } + + iov[iov_size].iov_base = (void*) begin; + iov[iov_size].iov_len = input - begin; + hash->Add((const unsigned char*) begin, input - begin); + + if(++iov_size == IOV_COUNT) { + retry_writev(out_file.Fd(), iov, IOV_COUNT); + iov_size = 0; + } + } + + if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) { + remaining = (cmd->last_line - cmd->first_line) + 1; + line += remaining; + while(remaining) { + input = (const char*) memchr(input, '\n', input_end - input); + if(!input) + break; + --remaining; + ++input; + } + } + + if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) { + if(cmd->data_end != cmd->data_start) { + iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start); + iov[iov_size].iov_len = cmd->data_end - cmd->data_start; + hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start), + iov[iov_size].iov_len); + + if(++iov_size == IOV_COUNT) { + retry_writev(out_file.Fd(), iov, IOV_COUNT); + iov_size = 0; + } + } + } + } + + if(input != input_end) { + iov[iov_size].iov_base = (void*) input; + iov[iov_size].iov_len = input_end - input; + hash->Add((const unsigned char*) input, input_end - input); + ++iov_size; + } + + if(iov_size) { + retry_writev(out_file.Fd(), iov, iov_size); + iov_size = 0; + } + + for(i = 0; i < iov_size; i += IOV_COUNT) { + if(iov_size - i < IOV_COUNT) + retry_writev(out_file.Fd(), iov + i, iov_size - i); + else + retry_writev(out_file.Fd(), iov + i, IOV_COUNT); + } + + delete [] iov; + free(commands); + + return ED_OK; +#else + return MMAP_FAILED; +#endif } /*}}}*/ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ { Debug = _config->FindB("Debug::pkgAcquire::RRed", false); URI Get = Itm->Uri; - string Path = Get.Host + Get.Path; // To account for relative paths + std::string Path = Get.Host + Get.Path; // To account for relative paths FetchResult Res; Res.Filename = Itm->DestFile; @@ -229,51 +471,60 @@ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ // Open the source and destination files (the d'tor of FileFd will do // the cleanup/closing of the fds) FileFd From(Path,FileFd::ReadOnly); - FileFd Patch(Path+".ed",FileFd::ReadOnly); - FileFd To(Itm->DestFile,FileFd::WriteEmpty); + FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip); + FileFd To(Itm->DestFile,FileFd::WriteAtomic); To.EraseOnFailure(); if (_error->PendingError() == true) return false; Hashes Hash; - FILE* fFrom = fdopen(From.Fd(), "r"); - FILE* fPatch = fdopen(Patch.Fd(), "r"); - FILE* fTo = fdopen(To.Fd(), "w"); // now do the actual patching - if (patchFile(fPatch, fFrom, fTo, &Hash) != ED_OK) { - _error->Errno("rred", _("Could not patch file")); - return false; + State const result = patchMMap(Patch, From, To, &Hash); + if (result == MMAP_FAILED) { + // retry with patchFile + Patch.Seek(0); + From.Seek(0); + To.Open(Itm->DestFile,FileFd::WriteAtomic); + if (_error->PendingError() == true) + return false; + if (patchFile(Patch, From, To, &Hash) != ED_OK) { + return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str()); + } else if (Debug == true) { + std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl; + } + } else if (result != ED_OK) { + return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str()); + } else if (Debug == true) { + std::clog << "rred: finished mmap patching of " << Path << std::endl; } // write out the result - fflush(fFrom); - fflush(fPatch); - fflush(fTo); From.Close(); Patch.Close(); To.Close(); - // Transfer the modification times - struct stat Buf; - if (stat(Path.c_str(),&Buf) != 0) + /* Transfer the modification times from the patch file + to be able to see in which state the file should be + and use the access time from the "old" file */ + struct stat BufBase, BufPatch; + if (stat(Path.c_str(),&BufBase) != 0 || + stat(std::string(Path+".ed").c_str(),&BufPatch) != 0) return _error->Errno("stat",_("Failed to stat")); struct utimbuf TimeBuf; - TimeBuf.actime = Buf.st_atime; - TimeBuf.modtime = Buf.st_mtime; + TimeBuf.actime = BufBase.st_atime; + TimeBuf.modtime = BufPatch.st_mtime; if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0) return _error->Errno("utime",_("Failed to set modification time")); - if (stat(Itm->DestFile.c_str(),&Buf) != 0) + if (stat(Itm->DestFile.c_str(),&BufBase) != 0) return _error->Errno("stat",_("Failed to stat")); // return done - if (Itm->Uri.empty() == true) { - Res.LastModified = Buf.st_mtime; - Res.Size = Buf.st_size; - Res.TakeHashes(Hash); - URIDone(Res); - } + Res.LastModified = BufBase.st_mtime; + Res.Size = BufBase.st_size; + Res.TakeHashes(Hash); + URIDone(Res); return true; } @@ -307,7 +558,7 @@ public: * and will write the result to "Testfile.result". */ int main(int argc, char *argv[]) { - if (argc == 0) { + if (argc <= 1) { RredMethod Mth; return Mth.Run(); } else {