]> git.saurik.com Git - apt.git/blame_incremental - methods/rred.cc
more explicit MarkRequired algorithm code
[apt.git] / methods / rred.cc
... / ...
CommitLineData
1// Copyright (c) 2014 Anthony Towns
2//
3// This program is free software; you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation; either version 2 of the License, or
6// (at your option) any later version.
7
8#include <config.h>
9
10#include <apt-pkg/init.h>
11#include <apt-pkg/fileutl.h>
12#include <apt-pkg/error.h>
13#include <apt-pkg/strutl.h>
14#include <apt-pkg/hashes.h>
15#include <apt-pkg/configuration.h>
16#include "aptmethod.h"
17
18#include <stddef.h>
19#include <iostream>
20#include <string>
21#include <list>
22#include <vector>
23
24#include <assert.h>
25#include <errno.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <sys/stat.h>
30#include <sys/time.h>
31
32#include <apti18n.h>
33
34#define BLOCK_SIZE (512*1024)
35
36class MemBlock {
37 char *start;
38 size_t size;
39 char *free;
40 MemBlock *next;
41
42 explicit MemBlock(size_t size) : size(size), next(NULL)
43 {
44 free = start = new char[size];
45 }
46
47 size_t avail(void) { return size - (free - start); }
48
49 public:
50
51 MemBlock(void) {
52 free = start = new char[BLOCK_SIZE];
53 size = BLOCK_SIZE;
54 next = NULL;
55 }
56
57 ~MemBlock() {
58 delete [] start;
59 delete next;
60 }
61
62 void clear(void) {
63 free = start;
64 if (next)
65 next->clear();
66 }
67
68 char *add_easy(char *src, size_t len, char *last)
69 {
70 if (last) {
71 for (MemBlock *k = this; k; k = k->next) {
72 if (k->free == last) {
73 if (len <= k->avail()) {
74 char *n = k->add(src, len);
75 assert(last == n);
76 if (last == n)
77 return NULL;
78 return n;
79 } else {
80 break;
81 }
82 } else if (last >= start && last < free) {
83 break;
84 }
85 }
86 }
87 return add(src, len);
88 }
89
90 char *add(char *src, size_t len) {
91 if (len > avail()) {
92 if (!next) {
93 if (len > BLOCK_SIZE) {
94 next = new MemBlock(len);
95 } else {
96 next = new MemBlock;
97 }
98 }
99 return next->add(src, len);
100 }
101 char *dst = free;
102 free += len;
103 memcpy(dst, src, len);
104 return dst;
105 }
106};
107
108struct Change {
109 /* Ordering:
110 *
111 * 1. write out <offset> lines unchanged
112 * 2. skip <del_cnt> lines from source
113 * 3. write out <add_cnt> lines (<add>/<add_len>)
114 */
115 size_t offset;
116 size_t del_cnt;
117 size_t add_cnt; /* lines */
118 size_t add_len; /* bytes */
119 char *add;
120
121 explicit Change(size_t off)
122 {
123 offset = off;
124 del_cnt = add_cnt = add_len = 0;
125 add = NULL;
126 }
127
128 /* actually, don't write <lines> lines from <add> */
129 void skip_lines(size_t lines)
130 {
131 while (lines > 0) {
132 char *s = (char*) memchr(add, '\n', add_len);
133 assert(s != NULL);
134 s++;
135 add_len -= (s - add);
136 add_cnt--;
137 lines--;
138 if (add_len == 0) {
139 add = NULL;
140 assert(add_cnt == 0);
141 assert(lines == 0);
142 } else {
143 add = s;
144 assert(add_cnt > 0);
145 }
146 }
147 }
148};
149
150class FileChanges {
151 std::list<struct Change> changes;
152 std::list<struct Change>::iterator where;
153 size_t pos; // line number is as far left of iterator as possible
154
155 bool pos_is_okay(void) const
156 {
157#ifdef POSDEBUG
158 size_t cpos = 0;
159 std::list<struct Change>::const_iterator x;
160 for (x = changes.begin(); x != where; ++x) {
161 assert(x != changes.end());
162 cpos += x->offset + x->add_cnt;
163 }
164 return cpos == pos;
165#else
166 return true;
167#endif
168 }
169
170 public:
171 FileChanges() {
172 where = changes.end();
173 pos = 0;
174 }
175
176 std::list<struct Change>::iterator begin(void) { return changes.begin(); }
177 std::list<struct Change>::iterator end(void) { return changes.end(); }
178
179 std::list<struct Change>::reverse_iterator rbegin(void) { return changes.rbegin(); }
180 std::list<struct Change>::reverse_iterator rend(void) { return changes.rend(); }
181
182 void add_change(Change c) {
183 assert(pos_is_okay());
184 go_to_change_for(c.offset);
185 assert(pos + where->offset == c.offset);
186 if (c.del_cnt > 0)
187 delete_lines(c.del_cnt);
188 assert(pos + where->offset == c.offset);
189 if (c.add_len > 0) {
190 assert(pos_is_okay());
191 if (where->add_len > 0)
192 new_change();
193 assert(where->add_len == 0 && where->add_cnt == 0);
194
195 where->add_len = c.add_len;
196 where->add_cnt = c.add_cnt;
197 where->add = c.add;
198 }
199 assert(pos_is_okay());
200 merge();
201 assert(pos_is_okay());
202 }
203
204 private:
205 void merge(void)
206 {
207 while (where->offset == 0 && where != changes.begin()) {
208 left();
209 }
210 std::list<struct Change>::iterator next = where;
211 ++next;
212
213 while (next != changes.end() && next->offset == 0) {
214 where->del_cnt += next->del_cnt;
215 next->del_cnt = 0;
216 if (next->add == NULL) {
217 next = changes.erase(next);
218 } else if (where->add == NULL) {
219 where->add = next->add;
220 where->add_len = next->add_len;
221 where->add_cnt = next->add_cnt;
222 next = changes.erase(next);
223 } else {
224 ++next;
225 }
226 }
227 }
228
229 void go_to_change_for(size_t line)
230 {
231 while(where != changes.end()) {
232 if (line < pos) {
233 left();
234 continue;
235 }
236 if (pos + where->offset + where->add_cnt <= line) {
237 right();
238 continue;
239 }
240 // line is somewhere in this slot
241 if (line < pos + where->offset) {
242 break;
243 } else if (line == pos + where->offset) {
244 return;
245 } else {
246 split(line - pos);
247 right();
248 return;
249 }
250 }
251 /* it goes before this patch */
252 insert(line-pos);
253 }
254
255 void new_change(void) { insert(where->offset); }
256
257 void insert(size_t offset)
258 {
259 assert(pos_is_okay());
260 assert(where == changes.end() || offset <= where->offset);
261 if (where != changes.end())
262 where->offset -= offset;
263 changes.insert(where, Change(offset));
264 --where;
265 assert(pos_is_okay());
266 }
267
268 void split(size_t offset)
269 {
270 assert(pos_is_okay());
271
272 assert(where->offset < offset);
273 assert(offset < where->offset + where->add_cnt);
274
275 size_t keep_lines = offset - where->offset;
276
277 Change before(*where);
278
279 where->del_cnt = 0;
280 where->offset = 0;
281 where->skip_lines(keep_lines);
282
283 before.add_cnt = keep_lines;
284 before.add_len -= where->add_len;
285
286 changes.insert(where, before);
287 --where;
288 assert(pos_is_okay());
289 }
290
291 void delete_lines(size_t cnt)
292 {
293 std::list<struct Change>::iterator x = where;
294 assert(pos_is_okay());
295 while (cnt > 0)
296 {
297 size_t del;
298 del = x->add_cnt;
299 if (del > cnt)
300 del = cnt;
301 x->skip_lines(del);
302 cnt -= del;
303
304 ++x;
305 if (x == changes.end()) {
306 del = cnt;
307 } else {
308 del = x->offset;
309 if (del > cnt)
310 del = cnt;
311 x->offset -= del;
312 }
313 where->del_cnt += del;
314 cnt -= del;
315 }
316 assert(pos_is_okay());
317 }
318
319 void left(void) {
320 assert(pos_is_okay());
321 --where;
322 pos -= where->offset + where->add_cnt;
323 assert(pos_is_okay());
324 }
325
326 void right(void) {
327 assert(pos_is_okay());
328 pos += where->offset + where->add_cnt;
329 ++where;
330 assert(pos_is_okay());
331 }
332};
333
334class Patch {
335 FileChanges filechanges;
336 MemBlock add_text;
337
338 static bool retry_fwrite(char *b, size_t l, FileFd &f, Hashes *hash)
339 {
340 if (f.Write(b, l) == false)
341 return false;
342 if (hash)
343 hash->Add((unsigned char*)b, l);
344 return true;
345 }
346
347 static void dump_rest(FileFd &o, FileFd &i, Hashes *hash)
348 {
349 char buffer[BLOCK_SIZE];
350 unsigned long long l = 0;
351 while (i.Read(buffer, sizeof(buffer), &l)) {
352 if (l ==0 || !retry_fwrite(buffer, l, o, hash))
353 break;
354 }
355 }
356
357 static void dump_lines(FileFd &o, FileFd &i, size_t n, Hashes *hash)
358 {
359 char buffer[BLOCK_SIZE];
360 while (n > 0) {
361 if (i.ReadLine(buffer, sizeof(buffer)) == NULL)
362 buffer[0] = '\0';
363 size_t const l = strlen(buffer);
364 if (l == 0 || buffer[l-1] == '\n')
365 n--;
366 retry_fwrite(buffer, l, o, hash);
367 }
368 }
369
370 static void skip_lines(FileFd &i, int n)
371 {
372 char buffer[BLOCK_SIZE];
373 while (n > 0) {
374 if (i.ReadLine(buffer, sizeof(buffer)) == NULL)
375 buffer[0] = '\0';
376 size_t const l = strlen(buffer);
377 if (l == 0 || buffer[l-1] == '\n')
378 n--;
379 }
380 }
381
382 static void dump_mem(FileFd &o, char *p, size_t s, Hashes *hash) {
383 retry_fwrite(p, s, o, hash);
384 }
385
386 public:
387
388 bool read_diff(FileFd &f, Hashes * const h)
389 {
390 char buffer[BLOCK_SIZE];
391 bool cmdwanted = true;
392
393 Change ch(std::numeric_limits<size_t>::max());
394 if (f.ReadLine(buffer, sizeof(buffer)) == NULL)
395 return _error->Error("Reading first line of patchfile %s failed", f.Name().c_str());
396 do {
397 if (h != NULL)
398 h->Add(buffer);
399 if (cmdwanted) {
400 char *m, *c;
401 size_t s, e;
402 errno = 0;
403 s = strtoul(buffer, &m, 10);
404 if (unlikely(m == buffer || s == std::numeric_limits<unsigned long>::max() || errno != 0))
405 return _error->Error("Parsing patchfile %s failed: Expected an effected line start", f.Name().c_str());
406 else if (*m == ',') {
407 ++m;
408 e = strtol(m, &c, 10);
409 if (unlikely(m == c || e == std::numeric_limits<unsigned long>::max() || errno != 0))
410 return _error->Error("Parsing patchfile %s failed: Expected an effected line end", f.Name().c_str());
411 if (unlikely(e < s))
412 return _error->Error("Parsing patchfile %s failed: Effected lines end %lu is before start %lu", f.Name().c_str(), e, s);
413 } else {
414 e = s;
415 c = m;
416 }
417 if (s > ch.offset)
418 return _error->Error("Parsing patchfile %s failed: Effected line is after previous effected line", f.Name().c_str());
419 switch(*c) {
420 case 'a':
421 cmdwanted = false;
422 ch.add = NULL;
423 ch.add_cnt = 0;
424 ch.add_len = 0;
425 ch.offset = s;
426 ch.del_cnt = 0;
427 break;
428 case 'c':
429 if (unlikely(s == 0))
430 return _error->Error("Parsing patchfile %s failed: Change command can't effect line zero", f.Name().c_str());
431 cmdwanted = false;
432 ch.add = NULL;
433 ch.add_cnt = 0;
434 ch.add_len = 0;
435 ch.offset = s - 1;
436 ch.del_cnt = e - s + 1;
437 break;
438 case 'd':
439 if (unlikely(s == 0))
440 return _error->Error("Parsing patchfile %s failed: Delete command can't effect line zero", f.Name().c_str());
441 ch.offset = s - 1;
442 ch.del_cnt = e - s + 1;
443 ch.add = NULL;
444 ch.add_cnt = 0;
445 ch.add_len = 0;
446 filechanges.add_change(ch);
447 break;
448 default:
449 return _error->Error("Parsing patchfile %s failed: Unknown command", f.Name().c_str());
450 }
451 } else { /* !cmdwanted */
452 if (strcmp(buffer, ".\n") == 0) {
453 cmdwanted = true;
454 filechanges.add_change(ch);
455 } else {
456 char *last = NULL;
457 char *add;
458 size_t l;
459 if (ch.add)
460 last = ch.add + ch.add_len;
461 l = strlen(buffer);
462 add = add_text.add_easy(buffer, l, last);
463 if (!add) {
464 ch.add_len += l;
465 ch.add_cnt++;
466 } else {
467 if (ch.add) {
468 filechanges.add_change(ch);
469 ch.del_cnt = 0;
470 }
471 ch.offset += ch.add_cnt;
472 ch.add = add;
473 ch.add_len = l;
474 ch.add_cnt = 1;
475 }
476 }
477 }
478 } while(f.ReadLine(buffer, sizeof(buffer)));
479 return true;
480 }
481
482 void write_diff(FileFd &f)
483 {
484 unsigned long long line = 0;
485 std::list<struct Change>::reverse_iterator ch;
486 for (ch = filechanges.rbegin(); ch != filechanges.rend(); ++ch) {
487 line += ch->offset + ch->del_cnt;
488 }
489
490 for (ch = filechanges.rbegin(); ch != filechanges.rend(); ++ch) {
491 std::list<struct Change>::reverse_iterator mg_i, mg_e = ch;
492 while (ch->del_cnt == 0 && ch->offset == 0)
493 {
494 ++ch;
495 if (unlikely(ch == filechanges.rend()))
496 return;
497 }
498 line -= ch->del_cnt;
499 std::string buf;
500 if (ch->add_cnt > 0) {
501 if (ch->del_cnt == 0) {
502 strprintf(buf, "%llua\n", line);
503 } else if (ch->del_cnt == 1) {
504 strprintf(buf, "%lluc\n", line+1);
505 } else {
506 strprintf(buf, "%llu,%lluc\n", line+1, line+ch->del_cnt);
507 }
508 f.Write(buf.c_str(), buf.length());
509
510 mg_i = ch;
511 do {
512 dump_mem(f, mg_i->add, mg_i->add_len, NULL);
513 } while (mg_i-- != mg_e);
514
515 buf = ".\n";
516 f.Write(buf.c_str(), buf.length());
517 } else if (ch->del_cnt == 1) {
518 strprintf(buf, "%llud\n", line+1);
519 f.Write(buf.c_str(), buf.length());
520 } else if (ch->del_cnt > 1) {
521 strprintf(buf, "%llu,%llud\n", line+1, line+ch->del_cnt);
522 f.Write(buf.c_str(), buf.length());
523 }
524 line -= ch->offset;
525 }
526 }
527
528 void apply_against_file(FileFd &out, FileFd &in, Hashes *hash = NULL)
529 {
530 std::list<struct Change>::iterator ch;
531 for (ch = filechanges.begin(); ch != filechanges.end(); ++ch) {
532 dump_lines(out, in, ch->offset, hash);
533 skip_lines(in, ch->del_cnt);
534 dump_mem(out, ch->add, ch->add_len, hash);
535 }
536 dump_rest(out, in, hash);
537 out.Flush();
538 }
539};
540
541class RredMethod : public aptMethod {
542 private:
543 bool Debug;
544
545 struct PDiffFile {
546 std::string FileName;
547 HashStringList ExpectedHashes;
548 PDiffFile(std::string const &FileName, HashStringList const &ExpectedHashes) :
549 FileName(FileName), ExpectedHashes(ExpectedHashes) {}
550 };
551
552 HashStringList ReadExpectedHashesForPatch(unsigned int const patch, std::string const &Message)
553 {
554 HashStringList ExpectedHashes;
555 for (char const * const * type = HashString::SupportedHashes(); *type != NULL; ++type)
556 {
557 std::string tagname;
558 strprintf(tagname, "Patch-%d-%s-Hash", patch, *type);
559 std::string const hashsum = LookupTag(Message, tagname.c_str());
560 if (hashsum.empty() == false)
561 ExpectedHashes.push_back(HashString(*type, hashsum));
562 }
563 return ExpectedHashes;
564 }
565
566 protected:
567 virtual bool URIAcquire(std::string const &Message, FetchItem *Itm) APT_OVERRIDE {
568 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
569 URI Get = Itm->Uri;
570 std::string Path = Get.Host + Get.Path; // rred:/path - no host
571
572 FetchResult Res;
573 Res.Filename = Itm->DestFile;
574 if (Itm->Uri.empty())
575 {
576 Path = Itm->DestFile;
577 Itm->DestFile.append(".result");
578 } else
579 URIStart(Res);
580
581 std::vector<PDiffFile> patchfiles;
582 Patch patch;
583
584 if (FileExists(Path + ".ed") == true)
585 {
586 HashStringList const ExpectedHashes = ReadExpectedHashesForPatch(0, Message);
587 std::string const FileName = Path + ".ed";
588 if (ExpectedHashes.usable() == false)
589 return _error->Error("No hashes found for uncompressed patch: %s", FileName.c_str());
590 patchfiles.push_back(PDiffFile(FileName, ExpectedHashes));
591 }
592 else
593 {
594 _error->PushToStack();
595 std::vector<std::string> patches = GetListOfFilesInDir(flNotFile(Path), "gz", true, false);
596 _error->RevertToStack();
597
598 std::string const baseName = Path + ".ed.";
599 unsigned int seen_patches = 0;
600 for (std::vector<std::string>::const_iterator p = patches.begin();
601 p != patches.end(); ++p)
602 {
603 if (p->compare(0, baseName.length(), baseName) == 0)
604 {
605 HashStringList const ExpectedHashes = ReadExpectedHashesForPatch(seen_patches, Message);
606 if (ExpectedHashes.usable() == false)
607 return _error->Error("No hashes found for uncompressed patch %d: %s", seen_patches, p->c_str());
608 patchfiles.push_back(PDiffFile(*p, ExpectedHashes));
609 ++seen_patches;
610 }
611 }
612 }
613
614 std::string patch_name;
615 for (std::vector<PDiffFile>::iterator I = patchfiles.begin();
616 I != patchfiles.end();
617 ++I)
618 {
619 patch_name = I->FileName;
620 if (Debug == true)
621 std::clog << "Patching " << Path << " with " << patch_name
622 << std::endl;
623
624 FileFd p;
625 Hashes patch_hash(I->ExpectedHashes);
626 // all patches are compressed, even if the name doesn't reflect it
627 if (p.Open(patch_name, FileFd::ReadOnly, FileFd::Gzip) == false ||
628 patch.read_diff(p, &patch_hash) == false)
629 {
630 _error->DumpErrors(std::cerr, GlobalError::DEBUG, false);
631 return false;
632 }
633 p.Close();
634 HashStringList const hsl = patch_hash.GetHashStringList();
635 if (hsl != I->ExpectedHashes)
636 return _error->Error("Hash Sum mismatch for uncompressed patch %s", patch_name.c_str());
637 }
638
639 if (Debug == true)
640 std::clog << "Applying patches against " << Path
641 << " and writing results to " << Itm->DestFile
642 << std::endl;
643
644 FileFd inp, out;
645 if (inp.Open(Path, FileFd::ReadOnly, FileFd::Extension) == false)
646 {
647 std::cerr << "FAILED to open inp " << Path << std::endl;
648 return _error->Error("Failed to open inp %s", Path.c_str());
649 }
650 if (out.Open(Itm->DestFile, FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite, FileFd::Extension) == false)
651 {
652 std::cerr << "FAILED to open out " << Itm->DestFile << std::endl;
653 return _error->Error("Failed to open out %s", Itm->DestFile.c_str());
654 }
655
656 Hashes hash(Itm->ExpectedHashes);
657 patch.apply_against_file(out, inp, &hash);
658
659 out.Close();
660 inp.Close();
661
662 if (_error->PendingError() == true) {
663 std::cerr << "FAILED to read or write files" << std::endl;
664 return false;
665 }
666
667 if (Debug == true) {
668 std::clog << "rred: finished file patching of " << Path << "." << std::endl;
669 }
670
671 struct stat bufbase, bufpatch;
672 if (stat(Path.c_str(), &bufbase) != 0 ||
673 stat(patch_name.c_str(), &bufpatch) != 0)
674 return _error->Errno("stat", _("Failed to stat %s"), Path.c_str());
675
676 struct timeval times[2];
677 times[0].tv_sec = bufbase.st_atime;
678 times[1].tv_sec = bufpatch.st_mtime;
679 times[0].tv_usec = times[1].tv_usec = 0;
680 if (utimes(Itm->DestFile.c_str(), times) != 0)
681 return _error->Errno("utimes",_("Failed to set modification time"));
682
683 if (stat(Itm->DestFile.c_str(), &bufbase) != 0)
684 return _error->Errno("stat", _("Failed to stat %s"), Itm->DestFile.c_str());
685
686 Res.LastModified = bufbase.st_mtime;
687 Res.Size = bufbase.st_size;
688 Res.TakeHashes(hash);
689 URIDone(Res);
690
691 return true;
692 }
693
694 public:
695 RredMethod() : aptMethod("rred", "2.0", SendConfig), Debug(false) {}
696};
697
698int main(int argc, char **argv)
699{
700 int i;
701 bool just_diff = true;
702 bool test = false;
703 Patch patch;
704
705 if (argc <= 1) {
706 return RredMethod().Run();
707 }
708
709 // Usage: rred -t input output diff ...
710 if (argc > 1 && strcmp(argv[1], "-t") == 0) {
711 // Read config files so we see compressors.
712 pkgInitConfig(*_config);
713 just_diff = false;
714 test = true;
715 i = 4;
716 } else if (argc > 1 && strcmp(argv[1], "-f") == 0) {
717 just_diff = false;
718 i = 2;
719 } else {
720 i = 1;
721 }
722
723 for (; i < argc; i++) {
724 FileFd p;
725 if (p.Open(argv[i], FileFd::ReadOnly) == false) {
726 _error->DumpErrors(std::cerr);
727 exit(1);
728 }
729 if (patch.read_diff(p, NULL) == false)
730 {
731 _error->DumpErrors(std::cerr);
732 exit(2);
733 }
734 }
735
736 if (test) {
737 FileFd out, inp;
738 std::cerr << "Patching " << argv[2] << " into " << argv[3] << "\n";
739 inp.Open(argv[2], FileFd::ReadOnly,FileFd::Extension);
740 out.Open(argv[3], FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite, FileFd::Extension);
741 patch.apply_against_file(out, inp);
742 out.Close();
743 } else if (just_diff) {
744 FileFd out;
745 out.OpenDescriptor(STDOUT_FILENO, FileFd::WriteOnly | FileFd::Create);
746 patch.write_diff(out);
747 out.Close();
748 } else {
749 FileFd out, inp;
750 out.OpenDescriptor(STDOUT_FILENO, FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite);
751 inp.OpenDescriptor(STDIN_FILENO, FileFd::ReadOnly);
752 patch.apply_against_file(out, inp);
753 out.Close();
754 }
755 return 0;
756}