]> git.saurik.com Git - apt.git/commitdiff
Finally adope the patch from Morten Hustveit <morten@debian.org> to be
authorDavid Kalnischkies <kalnischkies@gmail.com>
Fri, 6 Nov 2009 08:43:31 +0000 (09:43 +0100)
committerDavid Kalnischkies <kalnischkies@gmail.com>
Fri, 6 Nov 2009 08:43:31 +0000 (09:43 +0100)
able to optional use mmaps and iovec to increase patch speed -
but as this increase memory usage we can always fall back to the "old"
method which doesn't relay on mmaps.

methods/rred.cc

index 9abb1b89c6699e026f7f0566e46b0541a67a22aa..7236efd03743c56140ca807eeb0b3ede3b1a797c 100644 (file)
@@ -1,11 +1,13 @@
 // Includes                                                                    /*{{{*/
 #include <apt-pkg/fileutl.h>
+#include <apt-pkg/mmap.h>
 #include <apt-pkg/error.h>
 #include <apt-pkg/acquire-method.h>
 #include <apt-pkg/strutl.h>
 #include <apt-pkg/hashes.h>
 
 #include <sys/stat.h>
+#include <sys/uio.h>
 #include <unistd.h>
 #include <utime.h>
 #include <stdio.h>
@@ -29,7 +31,7 @@ class RredMethod : public pkgAcqMethod {
        // the supported ed commands
        enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
        // return values
-       enum State {ED_OK=0, ED_ORDERING=1, ED_PARSER=2, ED_FAILURE=3};
+       enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
 
        State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
                     unsigned long &line, char *buffer, Hashes *hash) const;
@@ -37,7 +39,8 @@ class RredMethod : public pkgAcqMethod {
        void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
                                    Hashes *hash, char *buffer) const;
 
-       State patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash) const;
+       State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
+       State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
 
 protected:
        // the methods main method
@@ -67,7 +70,7 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_
        // get the current command and parse it
        if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) {
                if (Debug == true)
-                       std::clog << "rred: encounter end of file - we can start patching now.";
+                       std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
                line = 0;
                return ED_OK;
        }
@@ -189,22 +192,240 @@ void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const {               /*{{{*/
        }
 }
                                                                                /*}}}*/
-RredMethod::State RredMethod::patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,          /*{{{*/
-      Hashes *hash) const {
+RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From,           /*{{{*/
+                                       FileFd &out_file, Hashes *hash) const {
    char buffer[BUF_SIZE];
-   
+   FILE* fFrom = fdopen(From.Fd(), "r");
+   FILE* fPatch = fdopen(Patch.Fd(), "r");
+   FILE* fTo = fdopen(out_file.Fd(), "w");
+
    /* we do a tail recursion to read the commands in the right order */
    unsigned long line = -1; // assign highest possible value
-   State result = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
+   State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
    
    /* read the rest from infile */
    if (result == ED_OK) {
-      while (fgets(buffer, BUF_SIZE, in_file) != NULL) {
-         size_t const written = fwrite(buffer, 1, strlen(buffer), out_file);
+      while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
+         size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
          hash->Add((unsigned char*)buffer, written);
       }
+      fflush(fTo);
    }
    return result;
+}
+                                                                               /*}}}*/
+struct EdCommand {                                                             /*{{{*/
+  size_t data_start;
+  size_t data_end;
+  size_t data_lines;
+  size_t first_line;
+  size_t last_line;
+  char type;
+};
+#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
+                                                                               /*}}}*/
+RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From,           /*{{{*/
+                                       FileFd &out_file, Hashes *hash) const {
+#ifdef _POSIX_MAPPED_FILES
+       MMap ed_cmds(Patch, MMap::ReadOnly);
+       MMap in_file(From, MMap::ReadOnly);
+       FILE* fTo = fdopen(out_file.Fd(), "w");
+
+       if (ed_cmds.Size() == 0 || in_file.Size() == 0)
+               return MMAP_FAILED;
+
+       EdCommand* commands = 0;
+       size_t command_count = 0;
+       size_t command_alloc = 0;
+
+       const char* begin = (char*) ed_cmds.Data();
+       const char* end = begin;
+       const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
+
+       const char* input = (char*) in_file.Data();
+       const char* input_end = (char*) in_file.Data() + in_file.Size();
+
+       size_t i;
+
+       /* 1. Parse entire script.  It is executed in reverse order, so we cather it
+        *    in the `commands' buffer first
+        */
+
+       for(;;) {
+               EdCommand cmd;
+               cmd.data_start = 0;
+               cmd.data_end = 0;
+
+               while(begin != ed_end && *begin == '\n')
+                       ++begin;
+               while(end != ed_end && *end != '\n')
+                       ++end;
+               if(end == ed_end && begin == end)
+                       break;
+
+               /* Determine command range */
+               const char* tmp = begin;
+
+               for(;;) {
+                       /* atoll is safe despite lacking NUL-termination; we know there's an
+                        * alphabetic character at end[-1]
+                        */
+                       if(tmp == end) {
+                               cmd.first_line = atol(begin);
+                               cmd.last_line = cmd.first_line;
+                               break;
+                       }
+                       if(*tmp == ',') {
+                               cmd.first_line = atol(begin);
+                               cmd.last_line = atol(tmp + 1);
+                               break;
+                       }
+                       ++tmp;
+               }
+
+               // which command to execute on this line(s)?
+               switch (end[-1]) {
+                       case MODE_CHANGED:
+                               if (Debug == true)
+                                       std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
+                               break;
+                       case MODE_ADDED:
+                               if (Debug == true)
+                                       std::clog << "Insert after line " << cmd.first_line << std::endl;
+                               break;
+                       case MODE_DELETED:
+                               if (Debug == true)
+                                       std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
+                               break;
+                       default:
+                               _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
+                               free(commands);
+                               return ED_PARSER;
+               }
+               cmd.type = end[-1];
+
+               /* Determine the size of the inserted text, so we don't have to scan this
+                * text again later.
+                */
+               begin = end + 1;
+               end = begin;
+               cmd.data_lines = 0;
+
+               if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
+                       cmd.data_start = begin - (char*) ed_cmds.Data();
+                       while(end != ed_end) {
+                               if(*end == '\n') {
+                                       if(end[-1] == '.' && end[-2] == '\n')
+                                               break;
+                                       ++cmd.data_lines;
+                               }
+                               ++end;
+                       }
+                       cmd.data_end = end - (char*) ed_cmds.Data() - 1;
+                       begin = end + 1;
+                       end = begin;
+               }
+               if(command_count == command_alloc) {
+                       command_alloc = (command_alloc + 64) * 3 / 2;
+                       commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
+               }
+               commands[command_count++] = cmd;
+       }
+
+       struct iovec* iov = new struct iovec[IOV_COUNT];
+       size_t iov_size = 0;
+
+       size_t amount, remaining;
+       size_t line = 1;
+       EdCommand* cmd;
+
+       /* 2. Execute script.  We gather writes in a `struct iov' array, and flush
+        *    using writev to minimize the number of system calls.  Data is read
+        *    directly from the memory mappings of the input file and the script.
+        */
+
+       for(i = command_count; i-- > 0; ) {
+               cmd = &commands[i];
+               if(cmd->type == MODE_ADDED)
+                       amount = cmd->first_line + 1;
+               else
+                       amount = cmd->first_line;
+
+               if(line < amount) {
+                       begin = input;
+                       while(line != amount) {
+                               input = (const char*) memchr(input, '\n', input_end - input);
+                               if(!input)
+                                       break;
+                               ++line;
+                               ++input;
+                       }
+
+                       iov[iov_size].iov_base = (void*) begin;
+                       iov[iov_size].iov_len = input - begin;
+                       hash->Add((const unsigned char*) begin, input - begin);
+
+                       if(++iov_size == IOV_COUNT) {
+                               writev(out_file.Fd(), iov, IOV_COUNT);
+                               iov_size = 0;
+                       }
+               }
+
+               if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
+                       remaining = (cmd->last_line - cmd->first_line) + 1;
+                       line += remaining;
+                       while(remaining) {
+                               input = (const char*) memchr(input, '\n', input_end - input);
+                               if(!input)
+                                       break;
+                               --remaining;
+                               ++input;
+                       }
+               }
+
+               if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
+                       if(cmd->data_end != cmd->data_start) {
+                               iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
+                               iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
+                               hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
+                               iov[iov_size].iov_len);
+
+                               if(++iov_size == IOV_COUNT) {
+                                       writev(out_file.Fd(), iov, IOV_COUNT);
+                                       iov_size = 0;
+                               }
+                       }
+               }
+       }
+
+       if(input != input_end) {
+               iov[iov_size].iov_base = (void*) input;
+               iov[iov_size].iov_len = input_end - input;
+               hash->Add((const unsigned char*) input, input_end - input);
+               ++iov_size;
+       }
+
+       if(iov_size) {
+               writev(out_file.Fd(), iov, iov_size);
+               iov_size = 0;
+       }
+
+       for(i = 0; i < iov_size; i += IOV_COUNT) {
+               if(iov_size - i < IOV_COUNT)
+                       writev(out_file.Fd(), iov + i, iov_size - i);
+               else
+                       writev(out_file.Fd(), iov + i, IOV_COUNT);
+       }
+
+       delete [] iov;
+       free(commands);
+
+       fflush(fTo);
+
+       return ED_OK;
+#else
+       return MMAP_FAILED;
+#endif
 }
                                                                                /*}}}*/
 bool RredMethod::Fetch(FetchItem *Itm)                                         /*{{{*/
@@ -234,19 +455,23 @@ bool RredMethod::Fetch(FetchItem *Itm)                                            /*{{{*/
       return false;
    
    Hashes Hash;
-   FILE* fFrom = fdopen(From.Fd(), "r");
-   FILE* fPatch = fdopen(Patch.Fd(), "r");
-   FILE* fTo = fdopen(To.Fd(), "w");
    // now do the actual patching
-   if (patchFile(fPatch, fFrom, fTo, &Hash) != ED_OK) {
-     _error->Errno("rred", _("Could not patch file"));  
-      return false;
+   State const result = patchMMap(Patch, From, To, &Hash);
+   if (result == MMAP_FAILED) {
+      // retry with patchFile
+      lseek(Patch.Fd(), 0, SEEK_SET);
+      lseek(From.Fd(), 0, SEEK_SET);
+      To.Open(Itm->DestFile,FileFd::WriteEmpty);
+      if (_error->PendingError() == true)
+         return false;
+      if (patchFile(Patch, From, To, &Hash) != ED_OK) {
+        return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (1)").c_str());
+      }
+   } else if (result != ED_OK) {
+      return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (2)").c_str());
    }
 
    // write out the result
-   fflush(fFrom);
-   fflush(fPatch);
-   fflush(fTo);
    From.Close();
    Patch.Close();
    To.Close();