]> git.saurik.com Git - apt.git/blob - methods/rred.cc
allow pdiff bootstrap from all supported compressors
[apt.git] / methods / rred.cc
1 // Copyright (c) 2014 Anthony Towns
2 //
3 // This program is free software; you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation; either version 2 of the License, or
6 // (at your option) any later version.
7
8 #include <config.h>
9
10 #include <apt-pkg/init.h>
11 #include <apt-pkg/fileutl.h>
12 #include <apt-pkg/error.h>
13 #include <apt-pkg/acquire-method.h>
14 #include <apt-pkg/strutl.h>
15 #include <apt-pkg/hashes.h>
16 #include <apt-pkg/configuration.h>
17 #include "aptmethod.h"
18
19 #include <stddef.h>
20 #include <iostream>
21 #include <string>
22 #include <list>
23 #include <vector>
24
25 #include <assert.h>
26 #include <errno.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/stat.h>
31 #include <sys/time.h>
32
33 #include <apti18n.h>
34
35 #define BLOCK_SIZE (512*1024)
36
37 class MemBlock {
38 char *start;
39 size_t size;
40 char *free;
41 MemBlock *next;
42
43 explicit MemBlock(size_t size) : size(size), next(NULL)
44 {
45 free = start = new char[size];
46 }
47
48 size_t avail(void) { return size - (free - start); }
49
50 public:
51
52 MemBlock(void) {
53 free = start = new char[BLOCK_SIZE];
54 size = BLOCK_SIZE;
55 next = NULL;
56 }
57
58 ~MemBlock() {
59 delete [] start;
60 delete next;
61 }
62
63 void clear(void) {
64 free = start;
65 if (next)
66 next->clear();
67 }
68
69 char *add_easy(char *src, size_t len, char *last)
70 {
71 if (last) {
72 for (MemBlock *k = this; k; k = k->next) {
73 if (k->free == last) {
74 if (len <= k->avail()) {
75 char *n = k->add(src, len);
76 assert(last == n);
77 if (last == n)
78 return NULL;
79 return n;
80 } else {
81 break;
82 }
83 } else if (last >= start && last < free) {
84 break;
85 }
86 }
87 }
88 return add(src, len);
89 }
90
91 char *add(char *src, size_t len) {
92 if (len > avail()) {
93 if (!next) {
94 if (len > BLOCK_SIZE) {
95 next = new MemBlock(len);
96 } else {
97 next = new MemBlock;
98 }
99 }
100 return next->add(src, len);
101 }
102 char *dst = free;
103 free += len;
104 memcpy(dst, src, len);
105 return dst;
106 }
107 };
108
109 struct Change {
110 /* Ordering:
111 *
112 * 1. write out <offset> lines unchanged
113 * 2. skip <del_cnt> lines from source
114 * 3. write out <add_cnt> lines (<add>/<add_len>)
115 */
116 size_t offset;
117 size_t del_cnt;
118 size_t add_cnt; /* lines */
119 size_t add_len; /* bytes */
120 char *add;
121
122 explicit Change(size_t off)
123 {
124 offset = off;
125 del_cnt = add_cnt = add_len = 0;
126 add = NULL;
127 }
128
129 /* actually, don't write <lines> lines from <add> */
130 void skip_lines(size_t lines)
131 {
132 while (lines > 0) {
133 char *s = (char*) memchr(add, '\n', add_len);
134 assert(s != NULL);
135 s++;
136 add_len -= (s - add);
137 add_cnt--;
138 lines--;
139 if (add_len == 0) {
140 add = NULL;
141 assert(add_cnt == 0);
142 assert(lines == 0);
143 } else {
144 add = s;
145 assert(add_cnt > 0);
146 }
147 }
148 }
149 };
150
151 class FileChanges {
152 std::list<struct Change> changes;
153 std::list<struct Change>::iterator where;
154 size_t pos; // line number is as far left of iterator as possible
155
156 bool pos_is_okay(void) const
157 {
158 #ifdef POSDEBUG
159 size_t cpos = 0;
160 std::list<struct Change>::const_iterator x;
161 for (x = changes.begin(); x != where; ++x) {
162 assert(x != changes.end());
163 cpos += x->offset + x->add_cnt;
164 }
165 return cpos == pos;
166 #else
167 return true;
168 #endif
169 }
170
171 public:
172 FileChanges() {
173 where = changes.end();
174 pos = 0;
175 }
176
177 std::list<struct Change>::iterator begin(void) { return changes.begin(); }
178 std::list<struct Change>::iterator end(void) { return changes.end(); }
179
180 std::list<struct Change>::reverse_iterator rbegin(void) { return changes.rbegin(); }
181 std::list<struct Change>::reverse_iterator rend(void) { return changes.rend(); }
182
183 void add_change(Change c) {
184 assert(pos_is_okay());
185 go_to_change_for(c.offset);
186 assert(pos + where->offset == c.offset);
187 if (c.del_cnt > 0)
188 delete_lines(c.del_cnt);
189 assert(pos + where->offset == c.offset);
190 if (c.add_len > 0) {
191 assert(pos_is_okay());
192 if (where->add_len > 0)
193 new_change();
194 assert(where->add_len == 0 && where->add_cnt == 0);
195
196 where->add_len = c.add_len;
197 where->add_cnt = c.add_cnt;
198 where->add = c.add;
199 }
200 assert(pos_is_okay());
201 merge();
202 assert(pos_is_okay());
203 }
204
205 private:
206 void merge(void)
207 {
208 while (where->offset == 0 && where != changes.begin()) {
209 left();
210 }
211 std::list<struct Change>::iterator next = where;
212 ++next;
213
214 while (next != changes.end() && next->offset == 0) {
215 where->del_cnt += next->del_cnt;
216 next->del_cnt = 0;
217 if (next->add == NULL) {
218 next = changes.erase(next);
219 } else if (where->add == NULL) {
220 where->add = next->add;
221 where->add_len = next->add_len;
222 where->add_cnt = next->add_cnt;
223 next = changes.erase(next);
224 } else {
225 ++next;
226 }
227 }
228 }
229
230 void go_to_change_for(size_t line)
231 {
232 while(where != changes.end()) {
233 if (line < pos) {
234 left();
235 continue;
236 }
237 if (pos + where->offset + where->add_cnt <= line) {
238 right();
239 continue;
240 }
241 // line is somewhere in this slot
242 if (line < pos + where->offset) {
243 break;
244 } else if (line == pos + where->offset) {
245 return;
246 } else {
247 split(line - pos);
248 right();
249 return;
250 }
251 }
252 /* it goes before this patch */
253 insert(line-pos);
254 }
255
256 void new_change(void) { insert(where->offset); }
257
258 void insert(size_t offset)
259 {
260 assert(pos_is_okay());
261 assert(where == changes.end() || offset <= where->offset);
262 if (where != changes.end())
263 where->offset -= offset;
264 changes.insert(where, Change(offset));
265 --where;
266 assert(pos_is_okay());
267 }
268
269 void split(size_t offset)
270 {
271 assert(pos_is_okay());
272
273 assert(where->offset < offset);
274 assert(offset < where->offset + where->add_cnt);
275
276 size_t keep_lines = offset - where->offset;
277
278 Change before(*where);
279
280 where->del_cnt = 0;
281 where->offset = 0;
282 where->skip_lines(keep_lines);
283
284 before.add_cnt = keep_lines;
285 before.add_len -= where->add_len;
286
287 changes.insert(where, before);
288 --where;
289 assert(pos_is_okay());
290 }
291
292 void delete_lines(size_t cnt)
293 {
294 std::list<struct Change>::iterator x = where;
295 assert(pos_is_okay());
296 while (cnt > 0)
297 {
298 size_t del;
299 del = x->add_cnt;
300 if (del > cnt)
301 del = cnt;
302 x->skip_lines(del);
303 cnt -= del;
304
305 ++x;
306 if (x == changes.end()) {
307 del = cnt;
308 } else {
309 del = x->offset;
310 if (del > cnt)
311 del = cnt;
312 x->offset -= del;
313 }
314 where->del_cnt += del;
315 cnt -= del;
316 }
317 assert(pos_is_okay());
318 }
319
320 void left(void) {
321 assert(pos_is_okay());
322 --where;
323 pos -= where->offset + where->add_cnt;
324 assert(pos_is_okay());
325 }
326
327 void right(void) {
328 assert(pos_is_okay());
329 pos += where->offset + where->add_cnt;
330 ++where;
331 assert(pos_is_okay());
332 }
333 };
334
335 class Patch {
336 FileChanges filechanges;
337 MemBlock add_text;
338
339 static bool retry_fwrite(char *b, size_t l, FileFd &f, Hashes *hash)
340 {
341 if (f.Write(b, l) == false)
342 return false;
343 if (hash)
344 hash->Add((unsigned char*)b, l);
345 return true;
346 }
347
348 static void dump_rest(FileFd &o, FileFd &i, Hashes *hash)
349 {
350 char buffer[BLOCK_SIZE];
351 unsigned long long l = 0;
352 while (i.Read(buffer, sizeof(buffer), &l)) {
353 if (l ==0 || !retry_fwrite(buffer, l, o, hash))
354 break;
355 }
356 }
357
358 static void dump_lines(FileFd &o, FileFd &i, size_t n, Hashes *hash)
359 {
360 char buffer[BLOCK_SIZE];
361 while (n > 0) {
362 if (i.ReadLine(buffer, sizeof(buffer)) == NULL)
363 buffer[0] = '\0';
364 size_t const l = strlen(buffer);
365 if (l == 0 || buffer[l-1] == '\n')
366 n--;
367 retry_fwrite(buffer, l, o, hash);
368 }
369 }
370
371 static void skip_lines(FileFd &i, int n)
372 {
373 char buffer[BLOCK_SIZE];
374 while (n > 0) {
375 if (i.ReadLine(buffer, sizeof(buffer)) == NULL)
376 buffer[0] = '\0';
377 size_t const l = strlen(buffer);
378 if (l == 0 || buffer[l-1] == '\n')
379 n--;
380 }
381 }
382
383 static void dump_mem(FileFd &o, char *p, size_t s, Hashes *hash) {
384 retry_fwrite(p, s, o, hash);
385 }
386
387 public:
388
389 bool read_diff(FileFd &f, Hashes * const h)
390 {
391 char buffer[BLOCK_SIZE];
392 bool cmdwanted = true;
393
394 Change ch(std::numeric_limits<size_t>::max());
395 if (f.ReadLine(buffer, sizeof(buffer)) == NULL)
396 return _error->Error("Reading first line of patchfile %s failed", f.Name().c_str());
397 do {
398 if (h != NULL)
399 h->Add(buffer);
400 if (cmdwanted) {
401 char *m, *c;
402 size_t s, e;
403 errno = 0;
404 s = strtoul(buffer, &m, 10);
405 if (unlikely(m == buffer || s == std::numeric_limits<unsigned long>::max() || errno != 0))
406 return _error->Error("Parsing patchfile %s failed: Expected an effected line start", f.Name().c_str());
407 else if (*m == ',') {
408 ++m;
409 e = strtol(m, &c, 10);
410 if (unlikely(m == c || e == std::numeric_limits<unsigned long>::max() || errno != 0))
411 return _error->Error("Parsing patchfile %s failed: Expected an effected line end", f.Name().c_str());
412 if (unlikely(e < s))
413 return _error->Error("Parsing patchfile %s failed: Effected lines end %lu is before start %lu", f.Name().c_str(), e, s);
414 } else {
415 e = s;
416 c = m;
417 }
418 if (s > ch.offset)
419 return _error->Error("Parsing patchfile %s failed: Effected line is after previous effected line", f.Name().c_str());
420 switch(*c) {
421 case 'a':
422 cmdwanted = false;
423 ch.add = NULL;
424 ch.add_cnt = 0;
425 ch.add_len = 0;
426 ch.offset = s;
427 ch.del_cnt = 0;
428 break;
429 case 'c':
430 if (unlikely(s == 0))
431 return _error->Error("Parsing patchfile %s failed: Change command can't effect line zero", f.Name().c_str());
432 cmdwanted = false;
433 ch.add = NULL;
434 ch.add_cnt = 0;
435 ch.add_len = 0;
436 ch.offset = s - 1;
437 ch.del_cnt = e - s + 1;
438 break;
439 case 'd':
440 if (unlikely(s == 0))
441 return _error->Error("Parsing patchfile %s failed: Delete command can't effect line zero", f.Name().c_str());
442 ch.offset = s - 1;
443 ch.del_cnt = e - s + 1;
444 ch.add = NULL;
445 ch.add_cnt = 0;
446 ch.add_len = 0;
447 filechanges.add_change(ch);
448 break;
449 default:
450 return _error->Error("Parsing patchfile %s failed: Unknown command", f.Name().c_str());
451 }
452 } else { /* !cmdwanted */
453 if (strcmp(buffer, ".\n") == 0) {
454 cmdwanted = true;
455 filechanges.add_change(ch);
456 } else {
457 char *last = NULL;
458 char *add;
459 size_t l;
460 if (ch.add)
461 last = ch.add + ch.add_len;
462 l = strlen(buffer);
463 add = add_text.add_easy(buffer, l, last);
464 if (!add) {
465 ch.add_len += l;
466 ch.add_cnt++;
467 } else {
468 if (ch.add) {
469 filechanges.add_change(ch);
470 ch.del_cnt = 0;
471 }
472 ch.offset += ch.add_cnt;
473 ch.add = add;
474 ch.add_len = l;
475 ch.add_cnt = 1;
476 }
477 }
478 }
479 } while(f.ReadLine(buffer, sizeof(buffer)));
480 return true;
481 }
482
483 void write_diff(FileFd &f)
484 {
485 unsigned long long line = 0;
486 std::list<struct Change>::reverse_iterator ch;
487 for (ch = filechanges.rbegin(); ch != filechanges.rend(); ++ch) {
488 line += ch->offset + ch->del_cnt;
489 }
490
491 for (ch = filechanges.rbegin(); ch != filechanges.rend(); ++ch) {
492 std::list<struct Change>::reverse_iterator mg_i, mg_e = ch;
493 while (ch->del_cnt == 0 && ch->offset == 0)
494 ++ch;
495 line -= ch->del_cnt;
496 std::string buf;
497 if (ch->add_cnt > 0) {
498 if (ch->del_cnt == 0) {
499 strprintf(buf, "%llua\n", line);
500 } else if (ch->del_cnt == 1) {
501 strprintf(buf, "%lluc\n", line+1);
502 } else {
503 strprintf(buf, "%llu,%lluc\n", line+1, line+ch->del_cnt);
504 }
505 f.Write(buf.c_str(), buf.length());
506
507 mg_i = ch;
508 do {
509 dump_mem(f, mg_i->add, mg_i->add_len, NULL);
510 } while (mg_i-- != mg_e);
511
512 buf = ".\n";
513 f.Write(buf.c_str(), buf.length());
514 } else if (ch->del_cnt == 1) {
515 strprintf(buf, "%llud\n", line+1);
516 f.Write(buf.c_str(), buf.length());
517 } else if (ch->del_cnt > 1) {
518 strprintf(buf, "%llu,%llud\n", line+1, line+ch->del_cnt);
519 f.Write(buf.c_str(), buf.length());
520 }
521 line -= ch->offset;
522 }
523 }
524
525 void apply_against_file(FileFd &out, FileFd &in, Hashes *hash = NULL)
526 {
527 std::list<struct Change>::iterator ch;
528 for (ch = filechanges.begin(); ch != filechanges.end(); ++ch) {
529 dump_lines(out, in, ch->offset, hash);
530 skip_lines(in, ch->del_cnt);
531 dump_mem(out, ch->add, ch->add_len, hash);
532 }
533 dump_rest(out, in, hash);
534 out.Flush();
535 }
536 };
537
538 class RredMethod : public aptMethod {
539 private:
540 bool Debug;
541
542 struct PDiffFile {
543 std::string FileName;
544 HashStringList ExpectedHashes;
545 PDiffFile(std::string const &FileName, HashStringList const &ExpectedHashes) :
546 FileName(FileName), ExpectedHashes(ExpectedHashes) {}
547 };
548
549 HashStringList ReadExpectedHashesForPatch(unsigned int const patch, std::string const &Message)
550 {
551 HashStringList ExpectedHashes;
552 for (char const * const * type = HashString::SupportedHashes(); *type != NULL; ++type)
553 {
554 std::string tagname;
555 strprintf(tagname, "Patch-%d-%s-Hash", patch, *type);
556 std::string const hashsum = LookupTag(Message, tagname.c_str());
557 if (hashsum.empty() == false)
558 ExpectedHashes.push_back(HashString(*type, hashsum));
559 }
560 return ExpectedHashes;
561 }
562
563 protected:
564 virtual bool URIAcquire(std::string const &Message, FetchItem *Itm) APT_OVERRIDE {
565 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
566 URI Get = Itm->Uri;
567 std::string Path = Get.Host + Get.Path; // rred:/path - no host
568
569 FetchResult Res;
570 Res.Filename = Itm->DestFile;
571 if (Itm->Uri.empty())
572 {
573 Path = Itm->DestFile;
574 Itm->DestFile.append(".result");
575 } else
576 URIStart(Res);
577
578 std::vector<PDiffFile> patchfiles;
579 Patch patch;
580
581 if (FileExists(Path + ".ed") == true)
582 {
583 HashStringList const ExpectedHashes = ReadExpectedHashesForPatch(0, Message);
584 std::string const FileName = Path + ".ed";
585 if (ExpectedHashes.usable() == false)
586 return _error->Error("No hashes found for uncompressed patch: %s", FileName.c_str());
587 patchfiles.push_back(PDiffFile(FileName, ExpectedHashes));
588 }
589 else
590 {
591 _error->PushToStack();
592 std::vector<std::string> patches = GetListOfFilesInDir(flNotFile(Path), "gz", true, false);
593 _error->RevertToStack();
594
595 std::string const baseName = Path + ".ed.";
596 unsigned int seen_patches = 0;
597 for (std::vector<std::string>::const_iterator p = patches.begin();
598 p != patches.end(); ++p)
599 {
600 if (p->compare(0, baseName.length(), baseName) == 0)
601 {
602 HashStringList const ExpectedHashes = ReadExpectedHashesForPatch(seen_patches, Message);
603 if (ExpectedHashes.usable() == false)
604 return _error->Error("No hashes found for uncompressed patch %d: %s", seen_patches, p->c_str());
605 patchfiles.push_back(PDiffFile(*p, ExpectedHashes));
606 ++seen_patches;
607 }
608 }
609 }
610
611 std::string patch_name;
612 for (std::vector<PDiffFile>::iterator I = patchfiles.begin();
613 I != patchfiles.end();
614 ++I)
615 {
616 patch_name = I->FileName;
617 if (Debug == true)
618 std::clog << "Patching " << Path << " with " << patch_name
619 << std::endl;
620
621 FileFd p;
622 Hashes patch_hash(I->ExpectedHashes);
623 // all patches are compressed, even if the name doesn't reflect it
624 if (p.Open(patch_name, FileFd::ReadOnly, FileFd::Gzip) == false ||
625 patch.read_diff(p, &patch_hash) == false)
626 {
627 _error->DumpErrors(std::cerr, GlobalError::DEBUG, false);
628 return false;
629 }
630 p.Close();
631 HashStringList const hsl = patch_hash.GetHashStringList();
632 if (hsl != I->ExpectedHashes)
633 return _error->Error("Hash Sum mismatch for uncompressed patch %s", patch_name.c_str());
634 }
635
636 if (Debug == true)
637 std::clog << "Applying patches against " << Path
638 << " and writing results to " << Itm->DestFile
639 << std::endl;
640
641 FileFd inp, out;
642 if (inp.Open(Path, FileFd::ReadOnly, FileFd::Extension) == false)
643 {
644 std::cerr << "FAILED to open inp " << Path << std::endl;
645 return _error->Error("Failed to open inp %s", Path.c_str());
646 }
647 if (out.Open(Itm->DestFile, FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite, FileFd::Extension) == false)
648 {
649 std::cerr << "FAILED to open out " << Itm->DestFile << std::endl;
650 return _error->Error("Failed to open out %s", Itm->DestFile.c_str());
651 }
652
653 Hashes hash(Itm->ExpectedHashes);
654 patch.apply_against_file(out, inp, &hash);
655
656 out.Close();
657 inp.Close();
658
659 if (Debug == true) {
660 std::clog << "rred: finished file patching of " << Path << "." << std::endl;
661 }
662
663 struct stat bufbase, bufpatch;
664 if (stat(Path.c_str(), &bufbase) != 0 ||
665 stat(patch_name.c_str(), &bufpatch) != 0)
666 return _error->Errno("stat", _("Failed to stat %s"), Path.c_str());
667
668 struct timeval times[2];
669 times[0].tv_sec = bufbase.st_atime;
670 times[1].tv_sec = bufpatch.st_mtime;
671 times[0].tv_usec = times[1].tv_usec = 0;
672 if (utimes(Itm->DestFile.c_str(), times) != 0)
673 return _error->Errno("utimes",_("Failed to set modification time"));
674
675 if (stat(Itm->DestFile.c_str(), &bufbase) != 0)
676 return _error->Errno("stat", _("Failed to stat %s"), Itm->DestFile.c_str());
677
678 Res.LastModified = bufbase.st_mtime;
679 Res.Size = bufbase.st_size;
680 Res.TakeHashes(hash);
681 URIDone(Res);
682
683 return true;
684 }
685
686 public:
687 RredMethod() : aptMethod("rred", "2.0", SendConfig), Debug(false) {}
688 };
689
690 int main(int argc, char **argv)
691 {
692 int i;
693 bool just_diff = true;
694 bool test = false;
695 Patch patch;
696
697 if (argc <= 1) {
698 RredMethod Mth;
699 return Mth.Run();
700 }
701
702 // Usage: rred -t input output diff ...
703 if (argc > 1 && strcmp(argv[1], "-t") == 0) {
704 // Read config files so we see compressors.
705 pkgInitConfig(*_config);
706 just_diff = false;
707 test = true;
708 i = 4;
709 } else if (argc > 1 && strcmp(argv[1], "-f") == 0) {
710 just_diff = false;
711 i = 2;
712 } else {
713 i = 1;
714 }
715
716 for (; i < argc; i++) {
717 FileFd p;
718 if (p.Open(argv[i], FileFd::ReadOnly) == false) {
719 _error->DumpErrors(std::cerr);
720 exit(1);
721 }
722 if (patch.read_diff(p, NULL) == false)
723 {
724 _error->DumpErrors(std::cerr);
725 exit(2);
726 }
727 }
728
729 if (test) {
730 FileFd out, inp;
731 std::cerr << "Patching " << argv[2] << " into " << argv[3] << "\n";
732 inp.Open(argv[2], FileFd::ReadOnly,FileFd::Extension);
733 out.Open(argv[3], FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite, FileFd::Extension);
734 patch.apply_against_file(out, inp);
735 out.Close();
736 } else if (just_diff) {
737 FileFd out;
738 out.OpenDescriptor(STDOUT_FILENO, FileFd::WriteOnly | FileFd::Create);
739 patch.write_diff(out);
740 out.Close();
741 } else {
742 FileFd out, inp;
743 out.OpenDescriptor(STDOUT_FILENO, FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite);
744 inp.OpenDescriptor(STDIN_FILENO, FileFd::ReadOnly);
745 patch.apply_against_file(out, inp);
746 out.Close();
747 }
748 return 0;
749 }