X-Git-Url: https://git.saurik.com/apt.git/blobdiff_plain/40cdb71e715cf1c8e6d8201da307f992763f1654..3b5607fc31371190470074371793cb8500b5139e:/methods/rred.cc diff --git a/methods/rred.cc b/methods/rred.cc index 27d95bdde..cabb3c456 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -1,262 +1,659 @@ +// Copyright (c) 2014 Anthony Towns +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. + +#include + #include #include #include #include #include +#include -#include -#include -#include +#include +#include +#include +#include +#include + +#include #include -#include +#include +#include +#include +#include + #include -/* this method implements a patch functionality similar to "patch --ed" that is - * used by the "tiffany" incremental packages download stuff. it differs from - * "ed" insofar that it is way more restricted (and therefore secure). in the - * moment only the "c", "a" and "d" commands of ed are implemented (diff - * doesn't output any other). additionally the records must be reverse sorted - * by line number and may not overlap (diff *seems* to produce this kind of - * output). - * */ +#define BLOCK_SIZE (512*1024) -const char *Prog; +class MemBlock { + char *start; + size_t size; + char *free; + struct MemBlock *next; + + MemBlock(size_t size) : size(size), next(NULL) + { + free = start = new char[size]; + } + + size_t avail(void) { return size - (free - start); } -class RredMethod : public pkgAcqMethod -{ - bool Debug; - // the size of this doesn't really matter (except for performance) - const static int BUF_SIZE = 1024; - // the ed commands - enum Mode {MODE_CHANGED, MODE_DELETED, MODE_ADDED}; - // return values - enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE}; - // this applies a single hunk, it uses a tail recursion to - // reverse the hunks in the file - int ed_rec(FILE *ed_cmds, FILE *in_file, FILE *out_file, int line, - char *buffer, unsigned int bufsize, Hashes *hash); - // apply a patch file - int ed_file(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash); - // the methods main method - virtual bool Fetch(FetchItem *Itm); - public: - - RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {}; + + MemBlock(void) { + free = start = new char[BLOCK_SIZE]; + size = BLOCK_SIZE; + next = NULL; + } + + ~MemBlock() { + delete [] start; + delete next; + } + + void clear(void) { + free = start; + if (next) + next->clear(); + } + + char *add_easy(char *src, size_t len, char *last) + { + if (last) { + for (MemBlock *k = this; k; k = k->next) { + if (k->free == last) { + if (len <= k->avail()) { + char *n = k->add(src, len); + assert(last == n); + if (last == n) + return NULL; + return n; + } else { + break; + } + } else if (last >= start && last < free) { + break; + } + } + } + return add(src, len); + } + + char *add(char *src, size_t len) { + if (len > avail()) { + if (!next) { + if (len > BLOCK_SIZE) { + next = new MemBlock(len); + } else { + next = new MemBlock; + } + } + return next->add(src, len); + } + char *dst = free; + free += len; + memcpy(dst, src, len); + return dst; + } }; -int RredMethod::ed_rec(FILE *ed_cmds, FILE *in_file, FILE *out_file, int line, - char *buffer, unsigned int bufsize, Hashes *hash) { - int pos; - int startline; - int stopline; - int mode; - int written; - char *idx; - - /* get the current command and parse it*/ - if (fgets(buffer, bufsize, ed_cmds) == NULL) { - return line; +struct Change { + /* Ordering: + * + * 1. write out lines unchanged + * 2. skip lines from source + * 3. write out lines (/) + */ + size_t offset; + size_t del_cnt; + size_t add_cnt; /* lines */ + size_t add_len; /* bytes */ + char *add; + + Change(int off) + { + offset = off; + del_cnt = add_cnt = add_len = 0; + add = NULL; } - startline = strtol(buffer, &idx, 10); - if (startline < line) { - return ED_ORDERING; + + /* actually, don't write lines from */ + void skip_lines(size_t lines) + { + while (lines > 0) { + char *s = (char*) memchr(add, '\n', add_len); + assert(s != NULL); + s++; + add_len -= (s - add); + add_cnt--; + lines--; + if (add_len == 0) { + add = NULL; + assert(add_cnt == 0); + assert(lines == 0); + } else { + add = s; + assert(add_cnt > 0); + } + } } - if (*idx == ',') { - idx++; - stopline = strtol(idx, &idx, 10); +}; + +class FileChanges { + std::list changes; + std::list::iterator where; + size_t pos; // line number is as far left of iterator as possible + + bool pos_is_okay(void) + { +#ifdef POSDEBUG + size_t cpos = 0; + std::list::iterator x; + for (x = changes.begin(); x != where; ++x) { + assert(x != changes.end()); + cpos += x->offset + x->add_cnt; + } + return cpos == pos; +#else + return true; +#endif } - else { - stopline = startline; + + public: + FileChanges() { + where = changes.end(); + pos = 0; + } + + std::list::iterator begin(void) { return changes.begin(); } + std::list::iterator end(void) { return changes.end(); } + + std::list::reverse_iterator rbegin(void) { return changes.rbegin(); } + std::list::reverse_iterator rend(void) { return changes.rend(); } + + void add_change(Change c) { + assert(pos_is_okay()); + go_to_change_for(c.offset); + assert(pos + where->offset == c.offset); + if (c.del_cnt > 0) + delete_lines(c.del_cnt); + assert(pos + where->offset == c.offset); + if (c.add_len > 0) { + assert(pos_is_okay()); + if (where->add_len > 0) + new_change(); + assert(where->add_len == 0 && where->add_cnt == 0); + + where->add_len = c.add_len; + where->add_cnt = c.add_cnt; + where->add = c.add; + } + assert(pos_is_okay()); + merge(); + assert(pos_is_okay()); } - if (*idx == 'c') { - mode = MODE_CHANGED; - if (Debug == true) { - std::clog << "changing from line " << startline - << " to " << stopline << std::endl; - } + + private: + void merge(void) + { + while (where->offset == 0 && where != changes.begin()) { + left(); + } + std::list::iterator next = where; + ++next; + + while (next != changes.end() && next->offset == 0) { + where->del_cnt += next->del_cnt; + next->del_cnt = 0; + if (next->add == NULL) { + next = changes.erase(next); + } else if (where->add == NULL) { + where->add = next->add; + where->add_len = next->add_len; + where->add_cnt = next->add_cnt; + next = changes.erase(next); + } else { + ++next; + } + } } - else if (*idx == 'a') { - mode = MODE_ADDED; - if (Debug == true) { - std::clog << "adding after line " << startline << std::endl; - } + + void go_to_change_for(size_t line) + { + while(where != changes.end()) { + if (line < pos) { + left(); + continue; + } + if (pos + where->offset + where->add_cnt <= line) { + right(); + continue; + } + // line is somewhere in this slot + if (line < pos + where->offset) { + break; + } else if (line == pos + where->offset) { + return; + } else { + split(line - pos); + right(); + return; + } + } + /* it goes before this patch */ + insert(line-pos); } - else if (*idx == 'd') { - mode = MODE_DELETED; - if (Debug == true) { - std::clog << "deleting from line " << startline - << " to " << stopline << std::endl; - } + + void new_change(void) { insert(where->offset); } + + void insert(size_t offset) + { + assert(pos_is_okay()); + assert(where == changes.end() || offset <= where->offset); + if (where != changes.end()) + where->offset -= offset; + changes.insert(where, Change(offset)); + --where; + assert(pos_is_okay()); } - else { - return ED_PARSER; + + void split(size_t offset) + { + assert(pos_is_okay()); + + assert(where->offset < offset); + assert(offset < where->offset + where->add_cnt); + + size_t keep_lines = offset - where->offset; + + Change before(*where); + + where->del_cnt = 0; + where->offset = 0; + where->skip_lines(keep_lines); + + before.add_cnt = keep_lines; + before.add_len -= where->add_len; + + changes.insert(where, before); + --where; + assert(pos_is_okay()); } - /* get the current position */ - pos = ftell(ed_cmds); - /* if this is add or change then go to the next full stop */ - if ((mode == MODE_CHANGED) || (mode == MODE_ADDED)) { - do { - fgets(buffer, bufsize, ed_cmds); - while ((strlen(buffer) == (bufsize - 1)) - && (buffer[bufsize - 2] != '\n')) { - fgets(buffer, bufsize, ed_cmds); - buffer[0] = ' '; - } - } while (strncmp(buffer, ".", 1) != 0); + + void delete_lines(size_t cnt) + { + std::list::iterator x = where; + assert(pos_is_okay()); + while (cnt > 0) + { + size_t del; + del = x->add_cnt; + if (del > cnt) + del = cnt; + x->skip_lines(del); + cnt -= del; + + ++x; + if (x == changes.end()) { + del = cnt; + } else { + del = x->offset; + if (del > cnt) + del = cnt; + x->offset -= del; + } + where->del_cnt += del; + cnt -= del; + } + assert(pos_is_okay()); } - /* do the recursive call */ - line = ed_rec(ed_cmds, in_file, out_file, line, buffer, bufsize, - hash); - /* pass on errors */ - if (line < 0) { - return line; + + void left(void) { + assert(pos_is_okay()); + --where; + pos -= where->offset + where->add_cnt; + assert(pos_is_okay()); } - /* apply our hunk */ - fseek(ed_cmds, pos, SEEK_SET); - /* first wind to the current position */ - if (mode != MODE_ADDED) { - startline -= 1; + + void right(void) { + assert(pos_is_okay()); + pos += where->offset + where->add_cnt; + ++where; + assert(pos_is_okay()); } - while (line < startline) { - fgets(buffer, bufsize, in_file); - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); - while ((strlen(buffer) == (bufsize - 1)) - && (buffer[bufsize - 2] != '\n')) { - fgets(buffer, bufsize, in_file); - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); +}; + +class Patch { + FileChanges filechanges; + MemBlock add_text; + + static bool retry_fwrite(char *b, size_t l, FILE *f, Hashes *hash) + { + size_t r = 1; + while (r > 0 && l > 0) + { + r = fwrite(b, 1, l, f); + if (hash) + hash->Add((unsigned char*)b, r); + l -= r; + b += r; } - line++; + return l == 0; } - /* include from ed script */ - if ((mode == MODE_ADDED) || (mode == MODE_CHANGED)) { - do { - fgets(buffer, bufsize, ed_cmds); - if (strncmp(buffer, ".", 1) != 0) { - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); - while ((strlen(buffer) == (bufsize - 1)) - && (buffer[bufsize - 2] != '\n')) { - fgets(buffer, bufsize, ed_cmds); - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); - } - } - else { - break; - } - } while (1); + + static void dump_rest(FILE *o, FILE *i, Hashes *hash) + { + char buffer[BLOCK_SIZE]; + size_t l; + while (0 < (l = fread(buffer, 1, sizeof(buffer), i))) { + if (!retry_fwrite(buffer, l, o, hash)) + break; + } } - /* ignore the corresponding number of lines from input */ - if ((mode == MODE_DELETED) || (mode == MODE_CHANGED)) { - while (line < stopline) { - fgets(buffer, bufsize, in_file); - while ((strlen(buffer) == (bufsize - 1)) - && (buffer[bufsize - 2] != '\n')) { - fgets(buffer, bufsize, in_file); - } - line++; + + static void dump_lines(FILE *o, FILE *i, size_t n, Hashes *hash) + { + char buffer[BLOCK_SIZE]; + while (n > 0) { + if (fgets(buffer, sizeof(buffer), i) == 0) + buffer[0] = '\0'; + size_t const l = strlen(buffer); + if (l == 0 || buffer[l-1] == '\n') + n--; + retry_fwrite(buffer, l, o, hash); } } - return line; -} -int RredMethod::ed_file(FILE *ed_cmds, FILE *in_file, FILE *out_file, - Hashes *hash) { - char buffer[BUF_SIZE]; - int result; - int written; - - /* we do a tail recursion to read the commands in the right order */ - result = ed_rec(ed_cmds, in_file, out_file, 0, buffer, BUF_SIZE, - hash); - - /* read the rest from infile */ - if (result >= 0) { - while (fgets(buffer, BUF_SIZE, in_file) != NULL) { - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); + static void skip_lines(FILE *i, int n) + { + char buffer[BLOCK_SIZE]; + while (n > 0) { + if (fgets(buffer, sizeof(buffer), i) == 0) + buffer[0] = '\0'; + size_t const l = strlen(buffer); + if (l == 0 || buffer[l-1] == '\n') + n--; } } - else { - return ED_FAILURE; + + static void dump_mem(FILE *o, char *p, size_t s, Hashes *hash) { + retry_fwrite(p, s, o, hash); } - return ED_OK; -} + public: -bool RredMethod::Fetch(FetchItem *Itm) -{ - Debug = _config->FindB("Debug::pkgAcquire::RRed",false); - URI Get = Itm->Uri; - string Path = Get.Host + Get.Path; // To account for relative paths - // Path contains the filename to patch - FetchResult Res; - Res.Filename = Itm->DestFile; - URIStart(Res); - // Res.Filename the destination filename - - if (Debug == true) - std::clog << "Patching " << Path << " with " << Path - << ".ed and putting result into " << Itm->DestFile << std::endl; - // Open the source and destination files (the d'tor of FileFd will do - // the cleanup/closing of the fds) - FileFd From(Path,FileFd::ReadOnly); - FileFd Patch(Path+".ed",FileFd::ReadOnly); - FileFd To(Itm->DestFile,FileFd::WriteEmpty); - To.EraseOnFailure(); - if (_error->PendingError() == true) - return false; - - Hashes Hash; - FILE* fFrom = fdopen(From.Fd(), "r"); - FILE* fPatch = fdopen(Patch.Fd(), "r"); - FILE* fTo = fdopen(To.Fd(), "w"); - // now do the actual patching - if (ed_file(fPatch, fFrom, fTo, &Hash) != ED_OK) { - _error->Errno("rred", _("Could not patch file")); - return false; + void read_diff(FileFd &f) + { + char buffer[BLOCK_SIZE]; + bool cmdwanted = true; + + Change ch(0); + while(f.ReadLine(buffer, sizeof(buffer))) + { + if (cmdwanted) { + char *m, *c; + size_t s, e; + s = strtol(buffer, &m, 10); + if (m == buffer) { + s = e = ch.offset + ch.add_cnt; + c = buffer; + } else if (*m == ',') { + m++; + e = strtol(m, &c, 10); + } else { + e = s; + c = m; + } + switch(*c) { + case 'a': + cmdwanted = false; + ch.add = NULL; + ch.add_cnt = 0; + ch.add_len = 0; + ch.offset = s; + ch.del_cnt = 0; + break; + case 'c': + cmdwanted = false; + ch.add = NULL; + ch.add_cnt = 0; + ch.add_len = 0; + ch.offset = s - 1; + ch.del_cnt = e - s + 1; + break; + case 'd': + ch.offset = s - 1; + ch.del_cnt = e - s + 1; + ch.add = NULL; + ch.add_cnt = 0; + ch.add_len = 0; + filechanges.add_change(ch); + break; + } + } else { /* !cmdwanted */ + if (buffer[0] == '.' && buffer[1] == '\n') { + cmdwanted = true; + filechanges.add_change(ch); + } else { + char *last = NULL; + char *add; + size_t l; + if (ch.add) + last = ch.add + ch.add_len; + l = strlen(buffer); + add = add_text.add_easy(buffer, l, last); + if (!add) { + ch.add_len += l; + ch.add_cnt++; + } else { + if (ch.add) { + filechanges.add_change(ch); + ch.del_cnt = 0; + } + ch.offset += ch.add_cnt; + ch.add = add; + ch.add_len = l; + ch.add_cnt = 1; + } + } + } + } } - // write out the result - fflush(fFrom); - fflush(fPatch); - fflush(fTo); - From.Close(); - Patch.Close(); - To.Close(); - - // Transfer the modification times - struct stat Buf; - if (stat(Path.c_str(),&Buf) != 0) - return _error->Errno("stat",_("Failed to stat")); - - struct utimbuf TimeBuf; - TimeBuf.actime = Buf.st_atime; - TimeBuf.modtime = Buf.st_mtime; - if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0) - return _error->Errno("utime",_("Failed to set modification time")); - - if (stat(Itm->DestFile.c_str(),&Buf) != 0) - return _error->Errno("stat",_("Failed to stat")); - - // return done - Res.LastModified = Buf.st_mtime; - Res.Size = Buf.st_size; - Res.TakeHashes(Hash); - URIDone(Res); - - return true; -} + void write_diff(FILE *f) + { + unsigned long long line = 0; + std::list::reverse_iterator ch; + for (ch = filechanges.rbegin(); ch != filechanges.rend(); ++ch) { + line += ch->offset + ch->del_cnt; + } + + for (ch = filechanges.rbegin(); ch != filechanges.rend(); ++ch) { + std::list::reverse_iterator mg_i, mg_e = ch; + while (ch->del_cnt == 0 && ch->offset == 0) + ++ch; + line -= ch->del_cnt; + if (ch->add_cnt > 0) { + if (ch->del_cnt == 0) { + fprintf(f, "%llua\n", line); + } else if (ch->del_cnt == 1) { + fprintf(f, "%lluc\n", line+1); + } else { + fprintf(f, "%llu,%lluc\n", line+1, line+ch->del_cnt); + } + + mg_i = ch; + do { + dump_mem(f, mg_i->add, mg_i->add_len, NULL); + } while (mg_i-- != mg_e); + + fprintf(f, ".\n"); + } else if (ch->del_cnt == 1) { + fprintf(f, "%llud\n", line+1); + } else if (ch->del_cnt > 1) { + fprintf(f, "%llu,%llud\n", line+1, line+ch->del_cnt); + } + line -= ch->offset; + } + } + + void apply_against_file(FILE *out, FILE *in, Hashes *hash = NULL) + { + std::list::iterator ch; + for (ch = filechanges.begin(); ch != filechanges.end(); ++ch) { + dump_lines(out, in, ch->offset, hash); + skip_lines(in, ch->del_cnt); + dump_mem(out, ch->add, ch->add_len, hash); + } + dump_rest(out, in, hash); + } +}; + +class RredMethod : public pkgAcqMethod { + private: + bool Debug; + + protected: + virtual bool Fetch(FetchItem *Itm) { + Debug = _config->FindB("Debug::pkgAcquire::RRed", false); + URI Get = Itm->Uri; + std::string Path = Get.Host + Get.Path; // rred:/path - no host + + FetchResult Res; + Res.Filename = Itm->DestFile; + if (Itm->Uri.empty()) + { + Path = Itm->DestFile; + Itm->DestFile.append(".result"); + } else + URIStart(Res); + + std::vector patchpaths; + Patch patch; -int main(int argc, char *argv[]) + if (FileExists(Path + ".ed") == true) + patchpaths.push_back(Path + ".ed"); + else + { + _error->PushToStack(); + std::vector patches = GetListOfFilesInDir(flNotFile(Path), "gz", true, false); + _error->RevertToStack(); + + std::string const baseName = Path + ".ed."; + for (std::vector::const_iterator p = patches.begin(); + p != patches.end(); ++p) + if (p->compare(0, baseName.length(), baseName) == 0) + patchpaths.push_back(*p); + } + + std::string patch_name; + for (std::vector::iterator I = patchpaths.begin(); + I != patchpaths.end(); + ++I) + { + patch_name = *I; + if (Debug == true) + std::clog << "Patching " << Path << " with " << patch_name + << std::endl; + + FileFd p; + // all patches are compressed, even if the name doesn't reflect it + if (p.Open(patch_name, FileFd::ReadOnly, FileFd::Gzip) == false) { + std::cerr << "Could not open patch file " << patch_name << std::endl; + _error->DumpErrors(std::cerr); + abort(); + } + patch.read_diff(p); + p.Close(); + } + + if (Debug == true) + std::clog << "Applying patches against " << Path + << " and writing results to " << Itm->DestFile + << std::endl; + + FILE *inp = fopen(Path.c_str(), "r"); + FILE *out = fopen(Itm->DestFile.c_str(), "w"); + + Hashes hash; + + patch.apply_against_file(out, inp, &hash); + + fclose(out); + fclose(inp); + + if (Debug == true) { + std::clog << "rred: finished file patching of " << Path << "." << std::endl; + } + + struct stat bufbase, bufpatch; + if (stat(Path.c_str(), &bufbase) != 0 || + stat(patch_name.c_str(), &bufpatch) != 0) + return _error->Errno("stat", _("Failed to stat")); + + struct timeval times[2]; + times[0].tv_sec = bufbase.st_atime; + times[1].tv_sec = bufpatch.st_mtime; + times[0].tv_usec = times[1].tv_usec = 0; + if (utimes(Itm->DestFile.c_str(), times) != 0) + return _error->Errno("utimes",_("Failed to set modification time")); + + if (stat(Itm->DestFile.c_str(), &bufbase) != 0) + return _error->Errno("stat", _("Failed to stat")); + + Res.LastModified = bufbase.st_mtime; + Res.Size = bufbase.st_size; + Res.TakeHashes(hash); + URIDone(Res); + + return true; + } + + public: + RredMethod() : pkgAcqMethod("2.0",SingleInstance | SendConfig), Debug(false) {} +}; + +int main(int argc, char **argv) { - RredMethod Mth; + int i; + bool just_diff = true; + Patch patch; + + if (argc <= 1) { + RredMethod Mth; + return Mth.Run(); + } - Prog = strrchr(argv[0],'/'); - Prog++; - - return Mth.Run(); + if (argc > 1 && strcmp(argv[1], "-f") == 0) { + just_diff = false; + i = 2; + } else { + i = 1; + } + + for (; i < argc; i++) { + FileFd p; + if (p.Open(argv[i], FileFd::ReadOnly) == false) { + _error->DumpErrors(std::cerr); + exit(1); + } + patch.read_diff(p); + } + + if (just_diff) { + patch.write_diff(stdout); + } else { + FILE *out, *inp; + out = stdout; + inp = stdin; + + patch.apply_against_file(out, inp); + } + return 0; }