methods/rred.cc

   1 // Includes                                                                     /*{{{*/
   2 #include <apt-pkg/fileutl.h>
   3 #include <apt-pkg/mmap.h>
   4 #include <apt-pkg/error.h>
   5 #include <apt-pkg/acquire-method.h>
   6 #include <apt-pkg/strutl.h>
   7 #include <apt-pkg/hashes.h>
   8
   9 #include <sys/stat.h>
  10 #include <sys/uio.h>
  11 #include <unistd.h>
  12 #include <utime.h>
  13 #include <stdio.h>
  14 #include <errno.h>
  15 #include <apti18n.h>
  16                                                                                 /*}}}*/
  17 /** \brief RredMethod - ed-style incremential patch method                      {{{
  18  *
  19  *  This method implements a patch functionality similar to "patch --ed" that is
  20  *  used by the "tiffany" incremental packages download stuff. It differs from
  21  *  "ed" insofar that it is way more restricted (and therefore secure).
  22  *  The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
  23  *  "<em>d</em>elete" (diff doesn't output any other).
  24  *  Additionally the records must be reverse sorted by line number and
  25  *  may not overlap (diff *seems* to produce this kind of output).
  26  * */
  27 class RredMethod : public pkgAcqMethod {
  28         bool Debug;
  29         // the size of this doesn't really matter (except for performance)
  30         const static int BUF_SIZE = 1024;
  31         // the supported ed commands
  32         enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
  33         // return values
  34         enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
  35
  36         State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
  37                      unsigned long &line, char *buffer, Hashes *hash) const;
  38         void ignoreLineInFile(FILE *fin, char *buffer) const;
  39         void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
  40                                     Hashes *hash, char *buffer) const;
  41
  42         State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
  43         State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
  44
  45 protected:
  46         // the methods main method
  47         virtual bool Fetch(FetchItem *Itm);
  48
  49 public:
  50         RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {};
  51 };
  52                                                                                 /*}}}*/
  53 /** \brief applyFile - in reverse order with a tail recursion                   {{{
  54  *
  55  *  As it is expected that the commands are in reversed order in the patch file
  56  *  we check in the first half if the command is valid, but doesn't execute it
  57  *  and move a step deeper. After reaching the end of the file we apply the
  58  *  patches in the correct order: last found command first.
  59  *
  60  *  \param ed_cmds patch file to apply
  61  *  \param in_file base file we want to patch
  62  *  \param out_file file to write the patched result to
  63  *  \param line of command operation
  64  *  \param buffer internal used read/write buffer
  65  *  \param hash the created file for correctness
  66  *  \return the success State of the ed command executor
  67  */
  68 RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
  69                         unsigned long &line, char *buffer, Hashes *hash) const {
  70         // get the current command and parse it
  71         if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) {
  72                 if (Debug == true)
  73                         std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
  74                 line = 0;
  75                 return ED_OK;
  76         }
  77
  78         // parse in the effected linenumbers
  79         char* idx;
  80         errno=0;
  81         unsigned long const startline = strtol(buffer, &idx, 10);
  82         if (errno == ERANGE || errno == EINVAL) {
  83                 _error->Errno("rred", "startline is an invalid number");
  84                 return ED_PARSER;
  85         }
  86         if (startline > line) {
  87                 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
  88                 return ED_ORDERING;
  89         }
  90         unsigned long stopline;
  91         if (*idx == ',') {
  92                 idx++;
  93                 errno=0;
  94                 stopline = strtol(idx, &idx, 10);
  95                 if (errno == ERANGE || errno == EINVAL) {
  96                         _error->Errno("rred", "stopline is an invalid number");
  97                         return ED_PARSER;
  98                 }
  99         }
 100         else {
 101                 stopline = startline;
 102         }
 103         line = startline;
 104
 105         // which command to execute on this line(s)?
 106         switch (*idx) {
 107                 case MODE_CHANGED:
 108                         if (Debug == true)
 109                                 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
 110                         break;
 111                 case MODE_ADDED:
 112                         if (Debug == true)
 113                                 std::clog << "Insert after line " << startline << std::endl;
 114                         break;
 115                 case MODE_DELETED:
 116                         if (Debug == true)
 117                                 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
 118                         break;
 119                 default:
 120                         _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
 121                         return ED_PARSER;
 122         }
 123         unsigned char mode = *idx;
 124
 125         // save the current position
 126         unsigned const long pos = ftell(ed_cmds);
 127
 128         // if this is add or change then go to the next full stop
 129         unsigned int data_length = 0;
 130         if (mode == MODE_CHANGED || mode == MODE_ADDED) {
 131                 do {
 132                         ignoreLineInFile(ed_cmds, buffer);
 133                         data_length++;
 134                 }
 135                 while (strncmp(buffer, ".", 1) != 0);
 136                 data_length--; // the dot should not be copied
 137         }
 138
 139         // do the recursive call - the last command is the one we need to execute at first
 140         const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
 141         if (child != ED_OK) {
 142                 return child;
 143         }
 144
 145         // change and delete are working on "line" - add is done after "line"
 146         if (mode != MODE_ADDED)
 147                 line++;
 148
 149         // first wind to the current position and copy over all unchanged lines
 150         if (line < startline) {
 151                 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
 152                 line = startline;
 153         }
 154
 155         if (mode != MODE_ADDED)
 156                 line--;
 157
 158         // include data from ed script
 159         if (mode == MODE_CHANGED || mode == MODE_ADDED) {
 160                 fseek(ed_cmds, pos, SEEK_SET);
 161                 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
 162         }
 163
 164         // ignore the corresponding number of lines from input
 165         if (mode == MODE_CHANGED || mode == MODE_DELETED) {
 166                 while (line < stopline) {
 167                         ignoreLineInFile(in_file, buffer);
 168                         line++;
 169                 }
 170         }
 171         return ED_OK;
 172 }
 173                                                                                 /*}}}*/
 174 void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
 175                                         Hashes *hash, char *buffer) const {
 176         while (0 < lines--) {
 177                 do {
 178                         fgets(buffer, BUF_SIZE, fin);
 179                         size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
 180                         hash->Add((unsigned char*)buffer, written);
 181                 } while (strlen(buffer) == (BUF_SIZE - 1) &&
 182                        buffer[BUF_SIZE - 2] != '\n');
 183         }
 184 }
 185                                                                                 /*}}}*/
 186 void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const {              /*{{{*/
 187         fgets(buffer, BUF_SIZE, fin);
 188         while (strlen(buffer) == (BUF_SIZE - 1) &&
 189                buffer[BUF_SIZE - 2] != '\n') {
 190                 fgets(buffer, BUF_SIZE, fin);
 191                 buffer[0] = ' ';
 192         }
 193 }
 194                                                                                 /*}}}*/
 195 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From,            /*{{{*/
 196                                         FileFd &out_file, Hashes *hash) const {
 197    char buffer[BUF_SIZE];
 198    FILE* fFrom = fdopen(From.Fd(), "r");
 199    FILE* fPatch = fdopen(Patch.Fd(), "r");
 200    FILE* fTo = fdopen(out_file.Fd(), "w");
 201
 202    /* we do a tail recursion to read the commands in the right order */
 203    unsigned long line = -1; // assign highest possible value
 204    State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
 205
 206    /* read the rest from infile */
 207    if (result == ED_OK) {
 208       while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
 209          size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
 210          hash->Add((unsigned char*)buffer, written);
 211       }
 212       fflush(fTo);
 213    }
 214    return result;
 215 }
 216                                                                                 /*}}}*/
 217 struct EdCommand {                                                              /*{{{*/
 218   size_t data_start;
 219   size_t data_end;
 220   size_t data_lines;
 221   size_t first_line;
 222   size_t last_line;
 223   char type;
 224 };
 225 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
 226                                                                                 /*}}}*/
 227 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From,            /*{{{*/
 228                                         FileFd &out_file, Hashes *hash) const {
 229 #ifdef _POSIX_MAPPED_FILES
 230         MMap ed_cmds(Patch, MMap::ReadOnly);
 231         MMap in_file(From, MMap::ReadOnly);
 232
 233         if (ed_cmds.Size() == 0 || in_file.Size() == 0)
 234                 return MMAP_FAILED;
 235
 236         EdCommand* commands = 0;
 237         size_t command_count = 0;
 238         size_t command_alloc = 0;
 239
 240         const char* begin = (char*) ed_cmds.Data();
 241         const char* end = begin;
 242         const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
 243
 244         const char* input = (char*) in_file.Data();
 245         const char* input_end = (char*) in_file.Data() + in_file.Size();
 246
 247         size_t i;
 248
 249         /* 1. Parse entire script.  It is executed in reverse order, so we cather it
 250          *    in the `commands' buffer first
 251          */
 252
 253         for(;;) {
 254                 EdCommand cmd;
 255                 cmd.data_start = 0;
 256                 cmd.data_end = 0;
 257
 258                 while(begin != ed_end && *begin == '\n')
 259                         ++begin;
 260                 while(end != ed_end && *end != '\n')
 261                         ++end;
 262                 if(end == ed_end && begin == end)
 263                         break;
 264
 265                 /* Determine command range */
 266                 const char* tmp = begin;
 267
 268                 for(;;) {
 269                         /* atoll is safe despite lacking NUL-termination; we know there's an
 270                          * alphabetic character at end[-1]
 271                          */
 272                         if(tmp == end) {
 273                                 cmd.first_line = atol(begin);
 274                                 cmd.last_line = cmd.first_line;
 275                                 break;
 276                         }
 277                         if(*tmp == ',') {
 278                                 cmd.first_line = atol(begin);
 279                                 cmd.last_line = atol(tmp + 1);
 280                                 break;
 281                         }
 282                         ++tmp;
 283                 }
 284
 285                 // which command to execute on this line(s)?
 286                 switch (end[-1]) {
 287                         case MODE_CHANGED:
 288                                 if (Debug == true)
 289                                         std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
 290                                 break;
 291                         case MODE_ADDED:
 292                                 if (Debug == true)
 293                                         std::clog << "Insert after line " << cmd.first_line << std::endl;
 294                                 break;
 295                         case MODE_DELETED:
 296                                 if (Debug == true)
 297                                         std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
 298                                 break;
 299                         default:
 300                                 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
 301                                 free(commands);
 302                                 return ED_PARSER;
 303                 }
 304                 cmd.type = end[-1];
 305
 306                 /* Determine the size of the inserted text, so we don't have to scan this
 307                  * text again later.
 308                  */
 309                 begin = end + 1;
 310                 end = begin;
 311                 cmd.data_lines = 0;
 312
 313                 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
 314                         cmd.data_start = begin - (char*) ed_cmds.Data();
 315                         while(end != ed_end) {
 316                                 if(*end == '\n') {
 317                                         if(end[-1] == '.' && end[-2] == '\n')
 318                                                 break;
 319                                         ++cmd.data_lines;
 320                                 }
 321                                 ++end;
 322                         }
 323                         cmd.data_end = end - (char*) ed_cmds.Data() - 1;
 324                         begin = end + 1;
 325                         end = begin;
 326                 }
 327                 if(command_count == command_alloc) {
 328                         command_alloc = (command_alloc + 64) * 3 / 2;
 329                         commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
 330                 }
 331                 commands[command_count++] = cmd;
 332         }
 333
 334         struct iovec* iov = new struct iovec[IOV_COUNT];
 335         size_t iov_size = 0;
 336
 337         size_t amount, remaining;
 338         size_t line = 1;
 339         EdCommand* cmd;
 340
 341         /* 2. Execute script.  We gather writes in a `struct iov' array, and flush
 342          *    using writev to minimize the number of system calls.  Data is read
 343          *    directly from the memory mappings of the input file and the script.
 344          */
 345
 346         for(i = command_count; i-- > 0; ) {
 347                 cmd = &commands[i];
 348                 if(cmd->type == MODE_ADDED)
 349                         amount = cmd->first_line + 1;
 350                 else
 351                         amount = cmd->first_line;
 352
 353                 if(line < amount) {
 354                         begin = input;
 355                         while(line != amount) {
 356                                 input = (const char*) memchr(input, '\n', input_end - input);
 357                                 if(!input)
 358                                         break;
 359                                 ++line;
 360                                 ++input;
 361                         }
 362
 363                         iov[iov_size].iov_base = (void*) begin;
 364                         iov[iov_size].iov_len = input - begin;
 365                         hash->Add((const unsigned char*) begin, input - begin);
 366
 367                         if(++iov_size == IOV_COUNT) {
 368                                 writev(out_file.Fd(), iov, IOV_COUNT);
 369                                 iov_size = 0;
 370                         }
 371                 }
 372
 373                 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
 374                         remaining = (cmd->last_line - cmd->first_line) + 1;
 375                         line += remaining;
 376                         while(remaining) {
 377                                 input = (const char*) memchr(input, '\n', input_end - input);
 378                                 if(!input)
 379                                         break;
 380                                 --remaining;
 381                                 ++input;
 382                         }
 383                 }
 384
 385                 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
 386                         if(cmd->data_end != cmd->data_start) {
 387                                 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
 388                                 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
 389                                 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
 390                                 iov[iov_size].iov_len);
 391
 392                                 if(++iov_size == IOV_COUNT) {
 393                                         writev(out_file.Fd(), iov, IOV_COUNT);
 394                                         iov_size = 0;
 395                                 }
 396                         }
 397                 }
 398         }
 399
 400         if(input != input_end) {
 401                 iov[iov_size].iov_base = (void*) input;
 402                 iov[iov_size].iov_len = input_end - input;
 403                 hash->Add((const unsigned char*) input, input_end - input);
 404                 ++iov_size;
 405         }
 406
 407         if(iov_size) {
 408                 writev(out_file.Fd(), iov, iov_size);
 409                 iov_size = 0;
 410         }
 411
 412         for(i = 0; i < iov_size; i += IOV_COUNT) {
 413                 if(iov_size - i < IOV_COUNT)
 414                         writev(out_file.Fd(), iov + i, iov_size - i);
 415                 else
 416                         writev(out_file.Fd(), iov + i, IOV_COUNT);
 417         }
 418
 419         delete [] iov;
 420         free(commands);
 421
 422         return ED_OK;
 423 #else
 424         return MMAP_FAILED;
 425 #endif
 426 }
 427                                                                                 /*}}}*/
 428 bool RredMethod::Fetch(FetchItem *Itm)                                          /*{{{*/
 429 {
 430    Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
 431    URI Get = Itm->Uri;
 432    string Path = Get.Host + Get.Path; // To account for relative paths
 433
 434    FetchResult Res;
 435    Res.Filename = Itm->DestFile;
 436    if (Itm->Uri.empty() == true) {
 437       Path = Itm->DestFile;
 438       Itm->DestFile.append(".result");
 439    } else
 440       URIStart(Res);
 441
 442    if (Debug == true)
 443       std::clog << "Patching " << Path << " with " << Path
 444          << ".ed and putting result into " << Itm->DestFile << std::endl;
 445    // Open the source and destination files (the d'tor of FileFd will do
 446    // the cleanup/closing of the fds)
 447    FileFd From(Path,FileFd::ReadOnly);
 448    FileFd Patch(Path+".ed",FileFd::ReadOnly);
 449    FileFd To(Itm->DestFile,FileFd::WriteAtomic);
 450    To.EraseOnFailure();
 451    if (_error->PendingError() == true)
 452       return false;
 453
 454    Hashes Hash;
 455    // now do the actual patching
 456    State const result = patchMMap(Patch, From, To, &Hash);
 457    if (result == MMAP_FAILED) {
 458       // retry with patchFile
 459       lseek(Patch.Fd(), 0, SEEK_SET);
 460       lseek(From.Fd(), 0, SEEK_SET);
 461       To.Open(Itm->DestFile,FileFd::WriteAtomic);
 462       if (_error->PendingError() == true)
 463          return false;
 464       if (patchFile(Patch, From, To, &Hash) != ED_OK) {
 465          return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
 466       } else if (Debug == true) {
 467          std::clog << "rred: finished file patching of " << Path  << " after mmap failed." << std::endl;
 468       }
 469    } else if (result != ED_OK) {
 470       return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
 471    } else if (Debug == true) {
 472       std::clog << "rred: finished mmap patching of " << Path << std::endl;
 473    }
 474
 475    // write out the result
 476    From.Close();
 477    Patch.Close();
 478    To.Close();
 479
 480    /* Transfer the modification times from the patch file
 481       to be able to see in which state the file should be
 482       and use the access time from the "old" file */
 483    struct stat BufBase, BufPatch;
 484    if (stat(Path.c_str(),&BufBase) != 0 ||
 485        stat(string(Path+".ed").c_str(),&BufPatch) != 0)
 486       return _error->Errno("stat",_("Failed to stat"));
 487
 488    struct utimbuf TimeBuf;
 489    TimeBuf.actime = BufBase.st_atime;
 490    TimeBuf.modtime = BufPatch.st_mtime;
 491    if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
 492       return _error->Errno("utime",_("Failed to set modification time"));
 493
 494    if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
 495       return _error->Errno("stat",_("Failed to stat"));
 496
 497    // return done
 498    Res.LastModified = BufBase.st_mtime;
 499    Res.Size = BufBase.st_size;
 500    Res.TakeHashes(Hash);
 501    URIDone(Res);
 502
 503    return true;
 504 }
 505                                                                                 /*}}}*/
 506 /** \brief Wrapper class for testing rred */                                    /*{{{*/
 507 class TestRredMethod : public RredMethod {
 508 public:
 509         /** \brief Run rred in debug test mode
 510          *
 511          *  This method can be used to run the rred method outside
 512          *  of the "normal" acquire environment for easier testing.
 513          *
 514          *  \param base basename of all files involved in this rred test
 515          */
 516         bool Run(char const *base) {
 517                 _config->CndSet("Debug::pkgAcquire::RRed", "true");
 518                 FetchItem *test = new FetchItem;
 519                 test->DestFile = base;
 520                 return Fetch(test);
 521         }
 522 };
 523                                                                                 /*}}}*/
 524 /** \brief Starter for the rred method (or its test method)                     {{{
 525  *
 526  *  Used without parameters is the normal behavior for methods for
 527  *  the APT acquire system. While this works great for the acquire system
 528  *  it is very hard to test the method and therefore the method also
 529  *  accepts one parameter which will switch it directly to debug test mode:
 530  *  The test mode expects that if "Testfile" is given as parameter
 531  *  the file "Testfile" should be ed-style patched with "Testfile.ed"
 532  *  and will write the result to "Testfile.result".
 533  */
 534 int main(int argc, char *argv[]) {
 535         if (argc <= 1) {
 536                 RredMethod Mth;
 537                 return Mth.Run();
 538         } else {
 539                 TestRredMethod Mth;
 540                 bool result = Mth.Run(argv[1]);
 541                 _error->DumpErrors();
 542                 return result;
 543         }
 544 }
 545                                                                                 /*}}}*/