]> git.saurik.com Git - apt.git/blob - methods/rred.cc
Do not buffer writes larger than the buffer if possible
[apt.git] / methods / rred.cc
1 // Copyright (c) 2014 Anthony Towns
2 //
3 // This program is free software; you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation; either version 2 of the License, or
6 // (at your option) any later version.
7
8 #include <config.h>
9
10 #include <apt-pkg/init.h>
11 #include <apt-pkg/fileutl.h>
12 #include <apt-pkg/error.h>
13 #include <apt-pkg/acquire-method.h>
14 #include <apt-pkg/strutl.h>
15 #include <apt-pkg/hashes.h>
16 #include <apt-pkg/configuration.h>
17 #include "aptmethod.h"
18
19 #include <stddef.h>
20 #include <iostream>
21 #include <string>
22 #include <list>
23 #include <vector>
24
25 #include <assert.h>
26 #include <errno.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/stat.h>
31 #include <sys/time.h>
32
33 #include <apti18n.h>
34
35 #define BLOCK_SIZE (512*1024)
36
37 class MemBlock {
38 char *start;
39 size_t size;
40 char *free;
41 MemBlock *next;
42
43 explicit MemBlock(size_t size) : size(size), next(NULL)
44 {
45 free = start = new char[size];
46 }
47
48 size_t avail(void) { return size - (free - start); }
49
50 public:
51
52 MemBlock(void) {
53 free = start = new char[BLOCK_SIZE];
54 size = BLOCK_SIZE;
55 next = NULL;
56 }
57
58 ~MemBlock() {
59 delete [] start;
60 delete next;
61 }
62
63 void clear(void) {
64 free = start;
65 if (next)
66 next->clear();
67 }
68
69 char *add_easy(char *src, size_t len, char *last)
70 {
71 if (last) {
72 for (MemBlock *k = this; k; k = k->next) {
73 if (k->free == last) {
74 if (len <= k->avail()) {
75 char *n = k->add(src, len);
76 assert(last == n);
77 if (last == n)
78 return NULL;
79 return n;
80 } else {
81 break;
82 }
83 } else if (last >= start && last < free) {
84 break;
85 }
86 }
87 }
88 return add(src, len);
89 }
90
91 char *add(char *src, size_t len) {
92 if (len > avail()) {
93 if (!next) {
94 if (len > BLOCK_SIZE) {
95 next = new MemBlock(len);
96 } else {
97 next = new MemBlock;
98 }
99 }
100 return next->add(src, len);
101 }
102 char *dst = free;
103 free += len;
104 memcpy(dst, src, len);
105 return dst;
106 }
107 };
108
109 struct Change {
110 /* Ordering:
111 *
112 * 1. write out <offset> lines unchanged
113 * 2. skip <del_cnt> lines from source
114 * 3. write out <add_cnt> lines (<add>/<add_len>)
115 */
116 size_t offset;
117 size_t del_cnt;
118 size_t add_cnt; /* lines */
119 size_t add_len; /* bytes */
120 char *add;
121
122 explicit Change(size_t off)
123 {
124 offset = off;
125 del_cnt = add_cnt = add_len = 0;
126 add = NULL;
127 }
128
129 /* actually, don't write <lines> lines from <add> */
130 void skip_lines(size_t lines)
131 {
132 while (lines > 0) {
133 char *s = (char*) memchr(add, '\n', add_len);
134 assert(s != NULL);
135 s++;
136 add_len -= (s - add);
137 add_cnt--;
138 lines--;
139 if (add_len == 0) {
140 add = NULL;
141 assert(add_cnt == 0);
142 assert(lines == 0);
143 } else {
144 add = s;
145 assert(add_cnt > 0);
146 }
147 }
148 }
149 };
150
151 class FileChanges {
152 std::list<struct Change> changes;
153 std::list<struct Change>::iterator where;
154 size_t pos; // line number is as far left of iterator as possible
155
156 bool pos_is_okay(void) const
157 {
158 #ifdef POSDEBUG
159 size_t cpos = 0;
160 std::list<struct Change>::const_iterator x;
161 for (x = changes.begin(); x != where; ++x) {
162 assert(x != changes.end());
163 cpos += x->offset + x->add_cnt;
164 }
165 return cpos == pos;
166 #else
167 return true;
168 #endif
169 }
170
171 public:
172 FileChanges() {
173 where = changes.end();
174 pos = 0;
175 }
176
177 std::list<struct Change>::iterator begin(void) { return changes.begin(); }
178 std::list<struct Change>::iterator end(void) { return changes.end(); }
179
180 std::list<struct Change>::reverse_iterator rbegin(void) { return changes.rbegin(); }
181 std::list<struct Change>::reverse_iterator rend(void) { return changes.rend(); }
182
183 void add_change(Change c) {
184 assert(pos_is_okay());
185 go_to_change_for(c.offset);
186 assert(pos + where->offset == c.offset);
187 if (c.del_cnt > 0)
188 delete_lines(c.del_cnt);
189 assert(pos + where->offset == c.offset);
190 if (c.add_len > 0) {
191 assert(pos_is_okay());
192 if (where->add_len > 0)
193 new_change();
194 assert(where->add_len == 0 && where->add_cnt == 0);
195
196 where->add_len = c.add_len;
197 where->add_cnt = c.add_cnt;
198 where->add = c.add;
199 }
200 assert(pos_is_okay());
201 merge();
202 assert(pos_is_okay());
203 }
204
205 private:
206 void merge(void)
207 {
208 while (where->offset == 0 && where != changes.begin()) {
209 left();
210 }
211 std::list<struct Change>::iterator next = where;
212 ++next;
213
214 while (next != changes.end() && next->offset == 0) {
215 where->del_cnt += next->del_cnt;
216 next->del_cnt = 0;
217 if (next->add == NULL) {
218 next = changes.erase(next);
219 } else if (where->add == NULL) {
220 where->add = next->add;
221 where->add_len = next->add_len;
222 where->add_cnt = next->add_cnt;
223 next = changes.erase(next);
224 } else {
225 ++next;
226 }
227 }
228 }
229
230 void go_to_change_for(size_t line)
231 {
232 while(where != changes.end()) {
233 if (line < pos) {
234 left();
235 continue;
236 }
237 if (pos + where->offset + where->add_cnt <= line) {
238 right();
239 continue;
240 }
241 // line is somewhere in this slot
242 if (line < pos + where->offset) {
243 break;
244 } else if (line == pos + where->offset) {
245 return;
246 } else {
247 split(line - pos);
248 right();
249 return;
250 }
251 }
252 /* it goes before this patch */
253 insert(line-pos);
254 }
255
256 void new_change(void) { insert(where->offset); }
257
258 void insert(size_t offset)
259 {
260 assert(pos_is_okay());
261 assert(where == changes.end() || offset <= where->offset);
262 if (where != changes.end())
263 where->offset -= offset;
264 changes.insert(where, Change(offset));
265 --where;
266 assert(pos_is_okay());
267 }
268
269 void split(size_t offset)
270 {
271 assert(pos_is_okay());
272
273 assert(where->offset < offset);
274 assert(offset < where->offset + where->add_cnt);
275
276 size_t keep_lines = offset - where->offset;
277
278 Change before(*where);
279
280 where->del_cnt = 0;
281 where->offset = 0;
282 where->skip_lines(keep_lines);
283
284 before.add_cnt = keep_lines;
285 before.add_len -= where->add_len;
286
287 changes.insert(where, before);
288 --where;
289 assert(pos_is_okay());
290 }
291
292 void delete_lines(size_t cnt)
293 {
294 std::list<struct Change>::iterator x = where;
295 assert(pos_is_okay());
296 while (cnt > 0)
297 {
298 size_t del;
299 del = x->add_cnt;
300 if (del > cnt)
301 del = cnt;
302 x->skip_lines(del);
303 cnt -= del;
304
305 ++x;
306 if (x == changes.end()) {
307 del = cnt;
308 } else {
309 del = x->offset;
310 if (del > cnt)
311 del = cnt;
312 x->offset -= del;
313 }
314 where->del_cnt += del;
315 cnt -= del;
316 }
317 assert(pos_is_okay());
318 }
319
320 void left(void) {
321 assert(pos_is_okay());
322 --where;
323 pos -= where->offset + where->add_cnt;
324 assert(pos_is_okay());
325 }
326
327 void right(void) {
328 assert(pos_is_okay());
329 pos += where->offset + where->add_cnt;
330 ++where;
331 assert(pos_is_okay());
332 }
333 };
334
335 class Patch {
336 FileChanges filechanges;
337 MemBlock add_text;
338
339 static bool retry_fwrite(char *b, size_t l, FileFd &f, Hashes *hash)
340 {
341 if (f.Write(b, l) == false)
342 return false;
343 if (hash)
344 hash->Add((unsigned char*)b, l);
345 return true;
346 }
347
348 static void dump_rest(FileFd &o, FileFd &i, Hashes *hash)
349 {
350 char buffer[BLOCK_SIZE];
351 unsigned long long l = 0;
352 while (i.Read(buffer, sizeof(buffer), &l)) {
353 if (l ==0 || !retry_fwrite(buffer, l, o, hash))
354 break;
355 }
356 }
357
358 static void dump_lines(FileFd &o, FileFd &i, size_t n, Hashes *hash)
359 {
360 char buffer[BLOCK_SIZE];
361 while (n > 0) {
362 if (i.ReadLine(buffer, sizeof(buffer)) == NULL)
363 buffer[0] = '\0';
364 size_t const l = strlen(buffer);
365 if (l == 0 || buffer[l-1] == '\n')
366 n--;
367 retry_fwrite(buffer, l, o, hash);
368 }
369 }
370
371 static void skip_lines(FileFd &i, int n)
372 {
373 char buffer[BLOCK_SIZE];
374 while (n > 0) {
375 if (i.ReadLine(buffer, sizeof(buffer)) == NULL)
376 buffer[0] = '\0';
377 size_t const l = strlen(buffer);
378 if (l == 0 || buffer[l-1] == '\n')
379 n--;
380 }
381 }
382
383 static void dump_mem(FileFd &o, char *p, size_t s, Hashes *hash) {
384 retry_fwrite(p, s, o, hash);
385 }
386
387 public:
388
389 bool read_diff(FileFd &f, Hashes * const h)
390 {
391 char buffer[BLOCK_SIZE];
392 bool cmdwanted = true;
393
394 Change ch(std::numeric_limits<size_t>::max());
395 if (f.ReadLine(buffer, sizeof(buffer)) == NULL)
396 return _error->Error("Reading first line of patchfile %s failed", f.Name().c_str());
397 do {
398 if (h != NULL)
399 h->Add(buffer);
400 if (cmdwanted) {
401 char *m, *c;
402 size_t s, e;
403 errno = 0;
404 s = strtoul(buffer, &m, 10);
405 if (unlikely(m == buffer || s == std::numeric_limits<unsigned long>::max() || errno != 0))
406 return _error->Error("Parsing patchfile %s failed: Expected an effected line start", f.Name().c_str());
407 else if (*m == ',') {
408 ++m;
409 e = strtol(m, &c, 10);
410 if (unlikely(m == c || e == std::numeric_limits<unsigned long>::max() || errno != 0))
411 return _error->Error("Parsing patchfile %s failed: Expected an effected line end", f.Name().c_str());
412 if (unlikely(e < s))
413 return _error->Error("Parsing patchfile %s failed: Effected lines end %lu is before start %lu", f.Name().c_str(), e, s);
414 } else {
415 e = s;
416 c = m;
417 }
418 if (s > ch.offset)
419 return _error->Error("Parsing patchfile %s failed: Effected line is after previous effected line", f.Name().c_str());
420 switch(*c) {
421 case 'a':
422 cmdwanted = false;
423 ch.add = NULL;
424 ch.add_cnt = 0;
425 ch.add_len = 0;
426 ch.offset = s;
427 ch.del_cnt = 0;
428 break;
429 case 'c':
430 if (unlikely(s == 0))
431 return _error->Error("Parsing patchfile %s failed: Change command can't effect line zero", f.Name().c_str());
432 cmdwanted = false;
433 ch.add = NULL;
434 ch.add_cnt = 0;
435 ch.add_len = 0;
436 ch.offset = s - 1;
437 ch.del_cnt = e - s + 1;
438 break;
439 case 'd':
440 if (unlikely(s == 0))
441 return _error->Error("Parsing patchfile %s failed: Delete command can't effect line zero", f.Name().c_str());
442 ch.offset = s - 1;
443 ch.del_cnt = e - s + 1;
444 ch.add = NULL;
445 ch.add_cnt = 0;
446 ch.add_len = 0;
447 filechanges.add_change(ch);
448 break;
449 default:
450 return _error->Error("Parsing patchfile %s failed: Unknown command", f.Name().c_str());
451 }
452 } else { /* !cmdwanted */
453 if (strcmp(buffer, ".\n") == 0) {
454 cmdwanted = true;
455 filechanges.add_change(ch);
456 } else {
457 char *last = NULL;
458 char *add;
459 size_t l;
460 if (ch.add)
461 last = ch.add + ch.add_len;
462 l = strlen(buffer);
463 add = add_text.add_easy(buffer, l, last);
464 if (!add) {
465 ch.add_len += l;
466 ch.add_cnt++;
467 } else {
468 if (ch.add) {
469 filechanges.add_change(ch);
470 ch.del_cnt = 0;
471 }
472 ch.offset += ch.add_cnt;
473 ch.add = add;
474 ch.add_len = l;
475 ch.add_cnt = 1;
476 }
477 }
478 }
479 } while(f.ReadLine(buffer, sizeof(buffer)));
480 return true;
481 }
482
483 void write_diff(FileFd &f)
484 {
485 unsigned long long line = 0;
486 std::list<struct Change>::reverse_iterator ch;
487 for (ch = filechanges.rbegin(); ch != filechanges.rend(); ++ch) {
488 line += ch->offset + ch->del_cnt;
489 }
490
491 for (ch = filechanges.rbegin(); ch != filechanges.rend(); ++ch) {
492 std::list<struct Change>::reverse_iterator mg_i, mg_e = ch;
493 while (ch->del_cnt == 0 && ch->offset == 0)
494 {
495 ++ch;
496 if (unlikely(ch == filechanges.rend()))
497 return;
498 }
499 line -= ch->del_cnt;
500 std::string buf;
501 if (ch->add_cnt > 0) {
502 if (ch->del_cnt == 0) {
503 strprintf(buf, "%llua\n", line);
504 } else if (ch->del_cnt == 1) {
505 strprintf(buf, "%lluc\n", line+1);
506 } else {
507 strprintf(buf, "%llu,%lluc\n", line+1, line+ch->del_cnt);
508 }
509 f.Write(buf.c_str(), buf.length());
510
511 mg_i = ch;
512 do {
513 dump_mem(f, mg_i->add, mg_i->add_len, NULL);
514 } while (mg_i-- != mg_e);
515
516 buf = ".\n";
517 f.Write(buf.c_str(), buf.length());
518 } else if (ch->del_cnt == 1) {
519 strprintf(buf, "%llud\n", line+1);
520 f.Write(buf.c_str(), buf.length());
521 } else if (ch->del_cnt > 1) {
522 strprintf(buf, "%llu,%llud\n", line+1, line+ch->del_cnt);
523 f.Write(buf.c_str(), buf.length());
524 }
525 line -= ch->offset;
526 }
527 }
528
529 void apply_against_file(FileFd &out, FileFd &in, Hashes *hash = NULL)
530 {
531 std::list<struct Change>::iterator ch;
532 for (ch = filechanges.begin(); ch != filechanges.end(); ++ch) {
533 dump_lines(out, in, ch->offset, hash);
534 skip_lines(in, ch->del_cnt);
535 dump_mem(out, ch->add, ch->add_len, hash);
536 }
537 dump_rest(out, in, hash);
538 out.Flush();
539 }
540 };
541
542 class RredMethod : public aptMethod {
543 private:
544 bool Debug;
545
546 struct PDiffFile {
547 std::string FileName;
548 HashStringList ExpectedHashes;
549 PDiffFile(std::string const &FileName, HashStringList const &ExpectedHashes) :
550 FileName(FileName), ExpectedHashes(ExpectedHashes) {}
551 };
552
553 HashStringList ReadExpectedHashesForPatch(unsigned int const patch, std::string const &Message)
554 {
555 HashStringList ExpectedHashes;
556 for (char const * const * type = HashString::SupportedHashes(); *type != NULL; ++type)
557 {
558 std::string tagname;
559 strprintf(tagname, "Patch-%d-%s-Hash", patch, *type);
560 std::string const hashsum = LookupTag(Message, tagname.c_str());
561 if (hashsum.empty() == false)
562 ExpectedHashes.push_back(HashString(*type, hashsum));
563 }
564 return ExpectedHashes;
565 }
566
567 protected:
568 virtual bool URIAcquire(std::string const &Message, FetchItem *Itm) APT_OVERRIDE {
569 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
570 URI Get = Itm->Uri;
571 std::string Path = Get.Host + Get.Path; // rred:/path - no host
572
573 FetchResult Res;
574 Res.Filename = Itm->DestFile;
575 if (Itm->Uri.empty())
576 {
577 Path = Itm->DestFile;
578 Itm->DestFile.append(".result");
579 } else
580 URIStart(Res);
581
582 std::vector<PDiffFile> patchfiles;
583 Patch patch;
584
585 if (FileExists(Path + ".ed") == true)
586 {
587 HashStringList const ExpectedHashes = ReadExpectedHashesForPatch(0, Message);
588 std::string const FileName = Path + ".ed";
589 if (ExpectedHashes.usable() == false)
590 return _error->Error("No hashes found for uncompressed patch: %s", FileName.c_str());
591 patchfiles.push_back(PDiffFile(FileName, ExpectedHashes));
592 }
593 else
594 {
595 _error->PushToStack();
596 std::vector<std::string> patches = GetListOfFilesInDir(flNotFile(Path), "gz", true, false);
597 _error->RevertToStack();
598
599 std::string const baseName = Path + ".ed.";
600 unsigned int seen_patches = 0;
601 for (std::vector<std::string>::const_iterator p = patches.begin();
602 p != patches.end(); ++p)
603 {
604 if (p->compare(0, baseName.length(), baseName) == 0)
605 {
606 HashStringList const ExpectedHashes = ReadExpectedHashesForPatch(seen_patches, Message);
607 if (ExpectedHashes.usable() == false)
608 return _error->Error("No hashes found for uncompressed patch %d: %s", seen_patches, p->c_str());
609 patchfiles.push_back(PDiffFile(*p, ExpectedHashes));
610 ++seen_patches;
611 }
612 }
613 }
614
615 std::string patch_name;
616 for (std::vector<PDiffFile>::iterator I = patchfiles.begin();
617 I != patchfiles.end();
618 ++I)
619 {
620 patch_name = I->FileName;
621 if (Debug == true)
622 std::clog << "Patching " << Path << " with " << patch_name
623 << std::endl;
624
625 FileFd p;
626 Hashes patch_hash(I->ExpectedHashes);
627 // all patches are compressed, even if the name doesn't reflect it
628 if (p.Open(patch_name, FileFd::ReadOnly, FileFd::Gzip) == false ||
629 patch.read_diff(p, &patch_hash) == false)
630 {
631 _error->DumpErrors(std::cerr, GlobalError::DEBUG, false);
632 return false;
633 }
634 p.Close();
635 HashStringList const hsl = patch_hash.GetHashStringList();
636 if (hsl != I->ExpectedHashes)
637 return _error->Error("Hash Sum mismatch for uncompressed patch %s", patch_name.c_str());
638 }
639
640 if (Debug == true)
641 std::clog << "Applying patches against " << Path
642 << " and writing results to " << Itm->DestFile
643 << std::endl;
644
645 FileFd inp, out;
646 if (inp.Open(Path, FileFd::ReadOnly, FileFd::Extension) == false)
647 {
648 std::cerr << "FAILED to open inp " << Path << std::endl;
649 return _error->Error("Failed to open inp %s", Path.c_str());
650 }
651 if (out.Open(Itm->DestFile, FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite, FileFd::Extension) == false)
652 {
653 std::cerr << "FAILED to open out " << Itm->DestFile << std::endl;
654 return _error->Error("Failed to open out %s", Itm->DestFile.c_str());
655 }
656
657 Hashes hash(Itm->ExpectedHashes);
658 patch.apply_against_file(out, inp, &hash);
659
660 out.Close();
661 inp.Close();
662
663 if (Debug == true) {
664 std::clog << "rred: finished file patching of " << Path << "." << std::endl;
665 }
666
667 struct stat bufbase, bufpatch;
668 if (stat(Path.c_str(), &bufbase) != 0 ||
669 stat(patch_name.c_str(), &bufpatch) != 0)
670 return _error->Errno("stat", _("Failed to stat %s"), Path.c_str());
671
672 struct timeval times[2];
673 times[0].tv_sec = bufbase.st_atime;
674 times[1].tv_sec = bufpatch.st_mtime;
675 times[0].tv_usec = times[1].tv_usec = 0;
676 if (utimes(Itm->DestFile.c_str(), times) != 0)
677 return _error->Errno("utimes",_("Failed to set modification time"));
678
679 if (stat(Itm->DestFile.c_str(), &bufbase) != 0)
680 return _error->Errno("stat", _("Failed to stat %s"), Itm->DestFile.c_str());
681
682 Res.LastModified = bufbase.st_mtime;
683 Res.Size = bufbase.st_size;
684 Res.TakeHashes(hash);
685 URIDone(Res);
686
687 return true;
688 }
689
690 public:
691 RredMethod() : aptMethod("rred", "2.0", SendConfig), Debug(false) {}
692 };
693
694 int main(int argc, char **argv)
695 {
696 int i;
697 bool just_diff = true;
698 bool test = false;
699 Patch patch;
700
701 if (argc <= 1) {
702 RredMethod Mth;
703 return Mth.Run();
704 }
705
706 // Usage: rred -t input output diff ...
707 if (argc > 1 && strcmp(argv[1], "-t") == 0) {
708 // Read config files so we see compressors.
709 pkgInitConfig(*_config);
710 just_diff = false;
711 test = true;
712 i = 4;
713 } else if (argc > 1 && strcmp(argv[1], "-f") == 0) {
714 just_diff = false;
715 i = 2;
716 } else {
717 i = 1;
718 }
719
720 for (; i < argc; i++) {
721 FileFd p;
722 if (p.Open(argv[i], FileFd::ReadOnly) == false) {
723 _error->DumpErrors(std::cerr);
724 exit(1);
725 }
726 if (patch.read_diff(p, NULL) == false)
727 {
728 _error->DumpErrors(std::cerr);
729 exit(2);
730 }
731 }
732
733 if (test) {
734 FileFd out, inp;
735 std::cerr << "Patching " << argv[2] << " into " << argv[3] << "\n";
736 inp.Open(argv[2], FileFd::ReadOnly,FileFd::Extension);
737 out.Open(argv[3], FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite, FileFd::Extension);
738 patch.apply_against_file(out, inp);
739 out.Close();
740 } else if (just_diff) {
741 FileFd out;
742 out.OpenDescriptor(STDOUT_FILENO, FileFd::WriteOnly | FileFd::Create);
743 patch.write_diff(out);
744 out.Close();
745 } else {
746 FileFd out, inp;
747 out.OpenDescriptor(STDOUT_FILENO, FileFd::WriteOnly | FileFd::Create | FileFd::BufferedWrite);
748 inp.OpenDescriptor(STDIN_FILENO, FileFd::ReadOnly);
749 patch.apply_against_file(out, inp);
750 out.Close();
751 }
752 return 0;
753 }