]> git.saurik.com Git - apt.git/blobdiff - methods/rred.cc
cleanup the hash iteration. unfortunately there is no 1:1 mapping from Hashes::Suppor...
[apt.git] / methods / rred.cc
index 2d4dd768bc82e993bee1d4494ca20bf633d437a4..7c65f8f9223a30d15d552d27d7dfb4ebdc90a852 100644 (file)
@@ -1,11 +1,16 @@
 // Includes                                                                    /*{{{*/
 // Includes                                                                    /*{{{*/
+#include <config.h>
+
 #include <apt-pkg/fileutl.h>
 #include <apt-pkg/fileutl.h>
+#include <apt-pkg/mmap.h>
 #include <apt-pkg/error.h>
 #include <apt-pkg/acquire-method.h>
 #include <apt-pkg/strutl.h>
 #include <apt-pkg/hashes.h>
 #include <apt-pkg/error.h>
 #include <apt-pkg/acquire-method.h>
 #include <apt-pkg/strutl.h>
 #include <apt-pkg/hashes.h>
+#include <apt-pkg/configuration.h>
 
 #include <sys/stat.h>
 
 #include <sys/stat.h>
+#include <sys/uio.h>
 #include <unistd.h>
 #include <utime.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <utime.h>
 #include <stdio.h>
@@ -29,22 +34,23 @@ class RredMethod : public pkgAcqMethod {
        // the supported ed commands
        enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
        // return values
        // the supported ed commands
        enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
        // return values
-       enum State {ED_OK=0, ED_ORDERING=1, ED_PARSER=2, ED_FAILURE=3};
+       enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
 
 
-       State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
+       State applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
                     unsigned long &line, char *buffer, Hashes *hash) const;
                     unsigned long &line, char *buffer, Hashes *hash) const;
-       void ignoreLineInFile(FILE *fin, char *buffer) const;
-       void copyLineFromFileToFile(FILE *fin, FILE *fout,
+       void ignoreLineInFile(FileFd &fin, char *buffer) const;
+       void copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,
                                    Hashes *hash, char *buffer) const;
 
                                    Hashes *hash, char *buffer) const;
 
-       State patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash) const;
+       State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
+       State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
 
 protected:
        // the methods main method
        virtual bool Fetch(FetchItem *Itm);
 
 public:
 
 protected:
        // the methods main method
        virtual bool Fetch(FetchItem *Itm);
 
 public:
-       RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {};
+       RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
 };
                                                                                /*}}}*/
 /** \brief applyFile - in reverse order with a tail recursion                  {{{
 };
                                                                                /*}}}*/
 /** \brief applyFile - in reverse order with a tail recursion                  {{{
@@ -62,12 +68,12 @@ public:
  *  \param hash the created file for correctness
  *  \return the success State of the ed command executor
  */
  *  \param hash the created file for correctness
  *  \return the success State of the ed command executor
  */
-RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
+RredMethod::State RredMethod::applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
                        unsigned long &line, char *buffer, Hashes *hash) const {
        // get the current command and parse it
                        unsigned long &line, char *buffer, Hashes *hash) const {
        // get the current command and parse it
-       if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) {
+       if (ed_cmds.ReadLine(buffer, BUF_SIZE) == NULL) {
                if (Debug == true)
                if (Debug == true)
-                       std::clog << "rred: encounter end of file - we can start patching now.";
+                       std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
                line = 0;
                return ED_OK;
        }
                line = 0;
                return ED_OK;
        }
@@ -120,13 +126,17 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_
        unsigned char mode = *idx;
 
        // save the current position
        unsigned char mode = *idx;
 
        // save the current position
-       unsigned const long pos = ftell(ed_cmds);
+       unsigned const long long pos = ed_cmds.Tell();
 
        // if this is add or change then go to the next full stop
 
        // if this is add or change then go to the next full stop
+       unsigned int data_length = 0;
        if (mode == MODE_CHANGED || mode == MODE_ADDED) {
        if (mode == MODE_CHANGED || mode == MODE_ADDED) {
-               do
+               do {
                        ignoreLineInFile(ed_cmds, buffer);
                        ignoreLineInFile(ed_cmds, buffer);
+                       data_length++;
+               }
                while (strncmp(buffer, ".", 1) != 0);
                while (strncmp(buffer, ".", 1) != 0);
+               data_length--; // the dot should not be copied
        }
 
        // do the recursive call - the last command is the one we need to execute at first
        }
 
        // do the recursive call - the last command is the one we need to execute at first
@@ -140,10 +150,9 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_
                line++;
 
        // first wind to the current position and copy over all unchanged lines
                line++;
 
        // first wind to the current position and copy over all unchanged lines
-       while (line < startline) {
-               fgets(buffer, BUF_SIZE, in_file);
-               copyLineFromFileToFile(in_file, out_file, hash, buffer);
-               line++;
+       if (line < startline) {
+               copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
+               line = startline;
        }
 
        if (mode != MODE_ADDED)
        }
 
        if (mode != MODE_ADDED)
@@ -151,13 +160,8 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_
 
        // include data from ed script
        if (mode == MODE_CHANGED || mode == MODE_ADDED) {
 
        // include data from ed script
        if (mode == MODE_CHANGED || mode == MODE_ADDED) {
-               fseek(ed_cmds, pos, SEEK_SET);
-               while(fgets(buffer, BUF_SIZE, ed_cmds) != NULL) {
-                       if (strncmp(buffer, ".", 1) != 0)
-                               copyLineFromFileToFile(ed_cmds, out_file, hash, buffer);
-                       else
-                               break;
-               }
+               ed_cmds.Seek(pos);
+               copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
        }
 
        // ignore the corresponding number of lines from input
        }
 
        // ignore the corresponding number of lines from input
@@ -170,50 +174,288 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_
        return ED_OK;
 }
                                                                                /*}}}*/
        return ED_OK;
 }
                                                                                /*}}}*/
-void RredMethod::copyLineFromFileToFile(FILE *fin, FILE *fout,                 /*{{{*/
+void RredMethod::copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,/*{{{*/
                                        Hashes *hash, char *buffer) const {
                                        Hashes *hash, char *buffer) const {
-       size_t written = fwrite(buffer, 1, strlen(buffer), fout);
-       hash->Add((unsigned char*)buffer, written);
-       while (strlen(buffer) == (BUF_SIZE - 1) &&
-              buffer[BUF_SIZE - 2] != '\n') {
-               fgets(buffer, BUF_SIZE, fin);
-               written = fwrite(buffer, 1, strlen(buffer), fout);
-               hash->Add((unsigned char*)buffer, written);
+       while (0 < lines--) {
+               do {
+                       fin.ReadLine(buffer, BUF_SIZE);
+                       unsigned long long const towrite = strlen(buffer);
+                       fout.Write(buffer, towrite);
+                       hash->Add((unsigned char*)buffer, towrite);
+               } while (strlen(buffer) == (BUF_SIZE - 1) &&
+                      buffer[BUF_SIZE - 2] != '\n');
        }
 }
                                                                                /*}}}*/
        }
 }
                                                                                /*}}}*/
-void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const {             /*{{{*/
-       fgets(buffer, BUF_SIZE, fin);
+void RredMethod::ignoreLineInFile(FileFd &fin, char *buffer) const {           /*{{{*/
+       fin.ReadLine(buffer, BUF_SIZE);
        while (strlen(buffer) == (BUF_SIZE - 1) &&
               buffer[BUF_SIZE - 2] != '\n') {
        while (strlen(buffer) == (BUF_SIZE - 1) &&
               buffer[BUF_SIZE - 2] != '\n') {
-               fgets(buffer, BUF_SIZE, fin);
+               fin.ReadLine(buffer, BUF_SIZE);
                buffer[0] = ' ';
        }
 }
                                                                                /*}}}*/
                buffer[0] = ' ';
        }
 }
                                                                                /*}}}*/
-RredMethod::State RredMethod::patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,          /*{{{*/
-      Hashes *hash) const {
+RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From,           /*{{{*/
+                                       FileFd &out_file, Hashes *hash) const {
    char buffer[BUF_SIZE];
    char buffer[BUF_SIZE];
-   
+
    /* we do a tail recursion to read the commands in the right order */
    unsigned long line = -1; // assign highest possible value
    /* we do a tail recursion to read the commands in the right order */
    unsigned long line = -1; // assign highest possible value
-   State result = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
+   State const result = applyFile(Patch, From, out_file, line, buffer, hash);
    
    /* read the rest from infile */
    if (result == ED_OK) {
    
    /* read the rest from infile */
    if (result == ED_OK) {
-      while (fgets(buffer, BUF_SIZE, in_file) != NULL) {
-         size_t const written = fwrite(buffer, 1, strlen(buffer), out_file);
-         hash->Add((unsigned char*)buffer, written);
+      while (From.ReadLine(buffer, BUF_SIZE) != NULL) {
+        unsigned long long const towrite = strlen(buffer);
+        out_file.Write(buffer, towrite);
+        hash->Add((unsigned char*)buffer, towrite);
       }
    }
    return result;
       }
    }
    return result;
+}
+                                                                               /*}}}*/
+/* struct EdCommand                                                            {{{*/
+#ifdef _POSIX_MAPPED_FILES
+struct EdCommand {
+  size_t data_start;
+  size_t data_end;
+  size_t data_lines;
+  size_t first_line;
+  size_t last_line;
+  char type;
+};
+#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
+static ssize_t retry_writev(int fd, const struct iovec *iov, int iovcnt) {
+       ssize_t Res;
+       errno = 0;
+       ssize_t i = 0;
+       do {
+               Res = writev(fd, iov + i, iovcnt);
+               if (Res < 0 && errno == EINTR)
+                       continue;
+               if (Res < 0)
+                       return _error->Errno("writev",_("Write error"));
+               iovcnt -= Res;
+               i += Res;
+       } while (Res > 0 && iovcnt > 0);
+       return i;
+}
+#endif
+                                                                               /*}}}*/
+RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From,           /*{{{*/
+                                       FileFd &out_file, Hashes *hash) const {
+#ifdef _POSIX_MAPPED_FILES
+       MMap ed_cmds(Patch, MMap::ReadOnly);
+       MMap in_file(From, MMap::ReadOnly);
+
+       unsigned long long const ed_size = ed_cmds.Size();
+       unsigned long long const in_size = in_file.Size();
+       if (ed_size == 0 || in_size == 0)
+               return MMAP_FAILED;
+
+       EdCommand* commands = 0;
+       size_t command_count = 0;
+       size_t command_alloc = 0;
+
+       const char* begin = (char*) ed_cmds.Data();
+       const char* end = begin;
+       const char* ed_end = (char*) ed_cmds.Data() + ed_size;
+
+       const char* input = (char*) in_file.Data();
+       const char* input_end = (char*) in_file.Data() + in_size;
+
+       size_t i;
+
+       /* 1. Parse entire script.  It is executed in reverse order, so we cather it
+        *    in the `commands' buffer first
+        */
+
+       for(;;) {
+               EdCommand cmd;
+               cmd.data_start = 0;
+               cmd.data_end = 0;
+
+               while(begin != ed_end && *begin == '\n')
+                       ++begin;
+               while(end != ed_end && *end != '\n')
+                       ++end;
+               if(end == ed_end && begin == end)
+                       break;
+
+               /* Determine command range */
+               const char* tmp = begin;
+
+               for(;;) {
+                       /* atoll is safe despite lacking NUL-termination; we know there's an
+                        * alphabetic character at end[-1]
+                        */
+                       if(tmp == end) {
+                               cmd.first_line = atol(begin);
+                               cmd.last_line = cmd.first_line;
+                               break;
+                       }
+                       if(*tmp == ',') {
+                               cmd.first_line = atol(begin);
+                               cmd.last_line = atol(tmp + 1);
+                               break;
+                       }
+                       ++tmp;
+               }
+
+               // which command to execute on this line(s)?
+               switch (end[-1]) {
+                       case MODE_CHANGED:
+                               if (Debug == true)
+                                       std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
+                               break;
+                       case MODE_ADDED:
+                               if (Debug == true)
+                                       std::clog << "Insert after line " << cmd.first_line << std::endl;
+                               break;
+                       case MODE_DELETED:
+                               if (Debug == true)
+                                       std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
+                               break;
+                       default:
+                               _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
+                               free(commands);
+                               return ED_PARSER;
+               }
+               cmd.type = end[-1];
+
+               /* Determine the size of the inserted text, so we don't have to scan this
+                * text again later.
+                */
+               begin = end + 1;
+               end = begin;
+               cmd.data_lines = 0;
+
+               if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
+                       cmd.data_start = begin - (char*) ed_cmds.Data();
+                       while(end != ed_end) {
+                               if(*end == '\n') {
+                                       if(end[-1] == '.' && end[-2] == '\n')
+                                               break;
+                                       ++cmd.data_lines;
+                               }
+                               ++end;
+                       }
+                       cmd.data_end = end - (char*) ed_cmds.Data() - 1;
+                       begin = end + 1;
+                       end = begin;
+               }
+               if(command_count == command_alloc) {
+                       command_alloc = (command_alloc + 64) * 3 / 2;
+                       EdCommand* newCommands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
+                       if (newCommands == NULL) {
+                               free(commands);
+                               return MMAP_FAILED;
+                       }
+                       commands = newCommands;
+               }
+               commands[command_count++] = cmd;
+       }
+
+       struct iovec* iov = new struct iovec[IOV_COUNT];
+       size_t iov_size = 0;
+
+       size_t amount, remaining;
+       size_t line = 1;
+       EdCommand* cmd;
+
+       /* 2. Execute script.  We gather writes in a `struct iov' array, and flush
+        *    using writev to minimize the number of system calls.  Data is read
+        *    directly from the memory mappings of the input file and the script.
+        */
+
+       for(i = command_count; i-- > 0; ) {
+               cmd = &commands[i];
+               if(cmd->type == MODE_ADDED)
+                       amount = cmd->first_line + 1;
+               else
+                       amount = cmd->first_line;
+
+               if(line < amount) {
+                       begin = input;
+                       while(line != amount) {
+                               input = (const char*) memchr(input, '\n', input_end - input);
+                               if(!input)
+                                       break;
+                               ++line;
+                               ++input;
+                       }
+
+                       iov[iov_size].iov_base = (void*) begin;
+                       iov[iov_size].iov_len = input - begin;
+                       hash->Add((const unsigned char*) begin, input - begin);
+
+                       if(++iov_size == IOV_COUNT) {
+                               retry_writev(out_file.Fd(), iov, IOV_COUNT);
+                               iov_size = 0;
+                       }
+               }
+
+               if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
+                       remaining = (cmd->last_line - cmd->first_line) + 1;
+                       line += remaining;
+                       while(remaining) {
+                               input = (const char*) memchr(input, '\n', input_end - input);
+                               if(!input)
+                                       break;
+                               --remaining;
+                               ++input;
+                       }
+               }
+
+               if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
+                       if(cmd->data_end != cmd->data_start) {
+                               iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
+                               iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
+                               hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
+                               iov[iov_size].iov_len);
+
+                               if(++iov_size == IOV_COUNT) {
+                                       retry_writev(out_file.Fd(), iov, IOV_COUNT);
+                                       iov_size = 0;
+                               }
+                       }
+               }
+       }
+
+       if(input != input_end) {
+               iov[iov_size].iov_base = (void*) input;
+               iov[iov_size].iov_len = input_end - input;
+               hash->Add((const unsigned char*) input, input_end - input);
+               ++iov_size;
+       }
+
+       if(iov_size) {
+               retry_writev(out_file.Fd(), iov, iov_size);
+               iov_size = 0;
+       }
+
+       for(i = 0; i < iov_size; i += IOV_COUNT) {
+               if(iov_size - i < IOV_COUNT)
+                       retry_writev(out_file.Fd(), iov + i, iov_size - i);
+               else
+                       retry_writev(out_file.Fd(), iov + i, IOV_COUNT);
+       }
+
+       delete [] iov;
+       free(commands);
+
+       return ED_OK;
+#else
+       return MMAP_FAILED;
+#endif
 }
                                                                                /*}}}*/
 bool RredMethod::Fetch(FetchItem *Itm)                                         /*{{{*/
 {
    Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
    URI Get = Itm->Uri;
 }
                                                                                /*}}}*/
 bool RredMethod::Fetch(FetchItem *Itm)                                         /*{{{*/
 {
    Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
    URI Get = Itm->Uri;
-   string Path = Get.Host + Get.Path; // To account for relative paths
+   std::string Path = Get.Host + Get.Path; // To account for relative paths
 
    FetchResult Res;
    Res.Filename = Itm->DestFile;
 
    FetchResult Res;
    Res.Filename = Itm->DestFile;
@@ -229,51 +471,60 @@ bool RredMethod::Fetch(FetchItem *Itm)                                            /*{{{*/
    // Open the source and destination files (the d'tor of FileFd will do 
    // the cleanup/closing of the fds)
    FileFd From(Path,FileFd::ReadOnly);
    // Open the source and destination files (the d'tor of FileFd will do 
    // the cleanup/closing of the fds)
    FileFd From(Path,FileFd::ReadOnly);
-   FileFd Patch(Path+".ed",FileFd::ReadOnly);
-   FileFd To(Itm->DestFile,FileFd::WriteEmpty);   
+   FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip);
+   FileFd To(Itm->DestFile,FileFd::WriteAtomic);   
    To.EraseOnFailure();
    if (_error->PendingError() == true)
       return false;
    
    Hashes Hash;
    To.EraseOnFailure();
    if (_error->PendingError() == true)
       return false;
    
    Hashes Hash;
-   FILE* fFrom = fdopen(From.Fd(), "r");
-   FILE* fPatch = fdopen(Patch.Fd(), "r");
-   FILE* fTo = fdopen(To.Fd(), "w");
    // now do the actual patching
    // now do the actual patching
-   if (patchFile(fPatch, fFrom, fTo, &Hash) != ED_OK) {
-     _error->Errno("rred", _("Could not patch file"));  
-      return false;
+   State const result = patchMMap(Patch, From, To, &Hash);
+   if (result == MMAP_FAILED) {
+      // retry with patchFile
+      Patch.Seek(0);
+      From.Seek(0);
+      To.Open(Itm->DestFile,FileFd::WriteAtomic);
+      if (_error->PendingError() == true)
+         return false;
+      if (patchFile(Patch, From, To, &Hash) != ED_OK) {
+        return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
+      } else if (Debug == true) {
+        std::clog << "rred: finished file patching of " << Path  << " after mmap failed." << std::endl;
+      }
+   } else if (result != ED_OK) {
+      return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
+   } else if (Debug == true) {
+      std::clog << "rred: finished mmap patching of " << Path << std::endl;
    }
 
    // write out the result
    }
 
    // write out the result
-   fflush(fFrom);
-   fflush(fPatch);
-   fflush(fTo);
    From.Close();
    Patch.Close();
    To.Close();
 
    From.Close();
    Patch.Close();
    To.Close();
 
-   // Transfer the modification times
-   struct stat Buf;
-   if (stat(Path.c_str(),&Buf) != 0)
+   /* Transfer the modification times from the patch file
+      to be able to see in which state the file should be
+      and use the access time from the "old" file */
+   struct stat BufBase, BufPatch;
+   if (stat(Path.c_str(),&BufBase) != 0 ||
+       stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
       return _error->Errno("stat",_("Failed to stat"));
 
    struct utimbuf TimeBuf;
       return _error->Errno("stat",_("Failed to stat"));
 
    struct utimbuf TimeBuf;
-   TimeBuf.actime = Buf.st_atime;
-   TimeBuf.modtime = Buf.st_mtime;
+   TimeBuf.actime = BufBase.st_atime;
+   TimeBuf.modtime = BufPatch.st_mtime;
    if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
       return _error->Errno("utime",_("Failed to set modification time"));
 
    if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
       return _error->Errno("utime",_("Failed to set modification time"));
 
-   if (stat(Itm->DestFile.c_str(),&Buf) != 0)
+   if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
       return _error->Errno("stat",_("Failed to stat"));
 
    // return done
       return _error->Errno("stat",_("Failed to stat"));
 
    // return done
-   if (Itm->Uri.empty() == true) {
-      Res.LastModified = Buf.st_mtime;
-      Res.Size = Buf.st_size;
-      Res.TakeHashes(Hash);
-      URIDone(Res);
-   }
+   Res.LastModified = BufBase.st_mtime;
+   Res.Size = BufBase.st_size;
+   Res.TakeHashes(Hash);
+   URIDone(Res);
 
    return true;
 }
 
    return true;
 }
@@ -307,7 +558,7 @@ public:
  *  and will write the result to "Testfile.result".
  */
 int main(int argc, char *argv[]) {
  *  and will write the result to "Testfile.result".
  */
 int main(int argc, char *argv[]) {
-       if (argc == 0) {
+       if (argc <= 1) {
                RredMethod Mth;
                return Mth.Run();
        } else {
                RredMethod Mth;
                return Mth.Run();
        } else {