]> git.saurik.com Git - apt.git/blob - methods/rred.cc
reimplement rred to allow applying all the diffs in a single pass
[apt.git] / methods / rred.cc
1 // Copyright (c) 2014 Anthony Towns
2 //
3 // This program is free software; you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation; either version 2 of the License, or
6 // (at your option) any later version.
7
8 #include <config.h>
9
10 #include <apt-pkg/fileutl.h>
11 #include <apt-pkg/mmap.h>
12 #include <apt-pkg/error.h>
13 #include <apt-pkg/acquire-method.h>
14 #include <apt-pkg/strutl.h>
15 #include <apt-pkg/hashes.h>
16 #include <apt-pkg/configuration.h>
17
18 #include <string>
19 #include <list>
20 #include <vector>
21 #include <iterator>
22
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <sys/stat.h>
28 #include <utime.h>
29
30 #include <apti18n.h>
31
32 #define BLOCK_SIZE (512*1024)
33
34 class MemBlock {
35 char *start;
36 size_t size;
37 char *free;
38 struct MemBlock *next;
39
40 MemBlock(size_t size)
41 {
42 free = start = new char[size];
43 size = size;
44 next = NULL;
45 }
46
47 size_t avail(void) { return size - (free - start); }
48
49 public:
50
51 MemBlock(void) {
52 free = start = new char[BLOCK_SIZE];
53 size = BLOCK_SIZE;
54 next = NULL;
55 }
56
57 ~MemBlock() {
58 delete [] start;
59 delete next;
60 }
61
62 void clear(void) {
63 free = start;
64 if (next)
65 next->clear();
66 }
67
68 char *add_easy(char *src, size_t len, char *last)
69 {
70 if (last) {
71 for (MemBlock *k = this; k; k = k->next) {
72 if (k->free == last) {
73 if (len <= k->avail()) {
74 char *n = k->add(src, len);
75 assert(last == n);
76 if (last == n)
77 return NULL;
78 return n;
79 } else {
80 break;
81 }
82 } else if (last >= start && last < free) {
83 break;
84 }
85 }
86 }
87 return add(src, len);
88 }
89
90 char *add(char *src, size_t len) {
91 if (len > avail()) {
92 if (!next) {
93 if (len > BLOCK_SIZE) {
94 next = new MemBlock(len);
95 } else {
96 next = new MemBlock;
97 }
98 }
99 return next->add(src, len);
100 }
101 char *dst = free;
102 free += len;
103 memcpy(dst, src, len);
104 return dst;
105 }
106 };
107
108 struct Change {
109 /* Ordering:
110 *
111 * 1. write out <offset> lines unchanged
112 * 2. skip <del_cnt> lines from source
113 * 3. write out <add_cnt> lines (<add>/<add_len>)
114 */
115 size_t offset;
116 size_t del_cnt;
117 size_t add_cnt; /* lines */
118 size_t add_len; /* bytes */
119 char *add;
120
121 Change(int off)
122 {
123 offset = off;
124 del_cnt = add_cnt = add_len = 0;
125 add = NULL;
126 }
127
128 /* actually, don't write <lines> lines from <add> */
129 void skip_lines(size_t lines)
130 {
131 while (lines > 0) {
132 char *s = (char*) memchr(add, '\n', add_len);
133 assert(s != NULL);
134 s++;
135 add_len -= (s - add);
136 add_cnt--;
137 lines--;
138 if (add_len == 0) {
139 add = NULL;
140 assert(add_cnt == 0);
141 assert(lines == 0);
142 } else {
143 add = s;
144 assert(add_cnt > 0);
145 }
146 }
147 }
148 };
149
150 class FileChanges {
151 std::list<struct Change> changes;
152 std::list<struct Change>::iterator where;
153 size_t pos; // line number is as far left of iterator as possible
154
155 bool pos_is_okay(void)
156 {
157 #ifdef POSDEBUG
158 size_t cpos = 0;
159 std::list<struct Change>::iterator x;
160 for (x = changes.begin(); x != where; x++) {
161 assert(x != changes.end());
162 cpos += x->offset + x->add_cnt;
163 }
164 return cpos == pos;
165 #else
166 return true;
167 #endif
168 }
169
170 public:
171 FileChanges() {
172 where = changes.end();
173 pos = 0;
174 }
175
176 std::list<struct Change>::iterator begin(void) { return changes.begin(); }
177 std::list<struct Change>::iterator end(void) { return changes.end(); }
178
179 std::list<struct Change>::reverse_iterator rbegin(void) { return changes.rbegin(); }
180 std::list<struct Change>::reverse_iterator rend(void) { return changes.rend(); }
181
182 void add_change(Change c) {
183 assert(pos_is_okay());
184 go_to_change_for(c.offset);
185 assert(pos + where->offset == c.offset);
186 if (c.del_cnt > 0)
187 delete_lines(c.del_cnt);
188 assert(pos + where->offset == c.offset);
189 if (c.add_len > 0) {
190 assert(pos_is_okay());
191 if (where->add_len > 0)
192 new_change();
193 assert(where->add_len == 0 && where->add_cnt == 0);
194
195 where->add_len = c.add_len;
196 where->add_cnt = c.add_cnt;
197 where->add = c.add;
198 }
199 assert(pos_is_okay());
200 merge();
201 assert(pos_is_okay());
202 }
203
204 private:
205 void merge(void)
206 {
207 while (where->offset == 0 && where != changes.begin()) {
208 left();
209 }
210 std::list<struct Change>::iterator next = where;
211 next++;
212
213 while (next != changes.end() && next->offset == 0) {
214 where->del_cnt += next->del_cnt;
215 next->del_cnt = 0;
216 if (next->add == NULL) {
217 next = changes.erase(next);
218 } else if (where->add == NULL) {
219 where->add = next->add;
220 where->add_len = next->add_len;
221 where->add_cnt = next->add_cnt;
222 next = changes.erase(next);
223 } else {
224 next++;
225 }
226 }
227 }
228
229 void go_to_change_for(size_t line)
230 {
231 while(where != changes.end()) {
232 if (line < pos) {
233 left();
234 continue;
235 }
236 if (pos + where->offset + where->add_cnt <= line) {
237 right();
238 continue;
239 }
240 // line is somewhere in this slot
241 if (line < pos + where->offset) {
242 break;
243 } else if (line == pos + where->offset) {
244 return;
245 } else {
246 split(line - pos);
247 right();
248 return;
249 }
250 }
251 /* it goes before this patch */
252 insert(line-pos);
253 }
254
255 void new_change(void) { insert(where->offset); }
256
257 void insert(size_t offset)
258 {
259 assert(pos_is_okay());
260 assert(where == changes.end() || offset <= where->offset);
261 if (where != changes.end())
262 where->offset -= offset;
263 changes.insert(where, Change(offset));
264 where--;
265 assert(pos_is_okay());
266 }
267
268 void split(size_t offset)
269 {
270 assert(pos_is_okay());
271
272 assert(where->offset < offset);
273 assert(offset < where->offset + where->add_cnt);
274
275 size_t keep_lines = offset - where->offset;
276
277 Change before(*where);
278
279 where->del_cnt = 0;
280 where->offset = 0;
281 where->skip_lines(keep_lines);
282
283 before.add_cnt = keep_lines;
284 before.add_len -= where->add_len;
285
286 changes.insert(where, before);
287 where--;
288 assert(pos_is_okay());
289 }
290
291 size_t check_next_offset(size_t max)
292 {
293 assert(pos_is_okay());
294 if (max > 0)
295 {
296 where++;
297 if (where != changes.end()) {
298 if (where->offset < max)
299 max = where->offset;
300 }
301 where--;
302 assert(pos_is_okay());
303 }
304 return max;
305 }
306
307 void delete_lines(size_t cnt)
308 {
309 std::list<struct Change>::iterator x = where;
310 assert(pos_is_okay());
311 while (cnt > 0)
312 {
313 size_t del;
314 del = x->add_cnt;
315 if (del > cnt)
316 del = cnt;
317 x->skip_lines(del);
318 cnt -= del;
319
320 x++;
321 if (x == changes.end()) {
322 del = cnt;
323 } else {
324 del = x->offset;
325 if (del > cnt)
326 del = cnt;
327 x->offset -= del;
328 }
329 where->del_cnt += del;
330 cnt -= del;
331 }
332 assert(pos_is_okay());
333 }
334
335 void left(void) {
336 assert(pos_is_okay());
337 where--;
338 pos -= where->offset + where->add_cnt;
339 assert(pos_is_okay());
340 }
341
342 void right(void) {
343 assert(pos_is_okay());
344 pos += where->offset + where->add_cnt;
345 where++;
346 assert(pos_is_okay());
347 }
348 };
349
350 class Patch {
351 FileChanges filechanges;
352 MemBlock add_text;
353
354 static void dump_rest(FILE *o, FILE *i, Hashes *hash)
355 {
356 char buffer[BLOCK_SIZE];
357 size_t l;
358 while (0 < (l = fread(buffer, 1, sizeof(buffer), i))) {
359 fwrite(buffer, 1, l, o);
360 if (hash)
361 hash->Add((unsigned char*)buffer, l);
362 }
363 }
364
365 static void dump_lines(FILE *o, FILE *i, size_t n, Hashes *hash)
366 {
367 char buffer[BLOCK_SIZE];
368 size_t l;
369 while (n > 0) {
370 if (fgets(buffer, sizeof(buffer), i) == 0)
371 buffer[0] = '\0';
372 l = strlen(buffer);
373 if (l == 0 || buffer[l-1] == '\n')
374 n--;
375 fwrite(buffer, 1, l, o);
376
377 if (hash)
378 hash->Add((unsigned char*)buffer, l);
379 }
380 }
381
382 static void skip_lines(FILE *i, int n)
383 {
384 char buffer[BLOCK_SIZE];
385 size_t l;
386 while (n > 0) {
387 if (fgets(buffer, sizeof(buffer), i) == 0)
388 buffer[0] = '\0';
389 l = strlen(buffer);
390 if (l == 0 || buffer[l-1] == '\n')
391 n--;
392 }
393 }
394
395 static bool dump_mem(FILE *o, char *p, size_t s, Hashes *hash) {
396 size_t r;
397 while (s > 0) {
398 r = fwrite(p, 1, s, o);
399 if (hash)
400 hash->Add((unsigned char*)p, s);
401 s -= r;
402 p += r;
403 if (r == 0) return false;
404 }
405 return true;
406 }
407
408 public:
409
410 void read_diff(FILE *f)
411 {
412 char buffer[BLOCK_SIZE];
413 bool cmdwanted = true;
414
415 Change ch(0);
416 while(fgets(buffer, sizeof(buffer), f))
417 {
418 if (cmdwanted) {
419 char *m, *c;
420 size_t s, e;
421 s = strtol(buffer, &m, 10);
422 if (m == buffer) {
423 s = e = ch.offset + ch.add_cnt;
424 c = buffer;
425 } else if (*m == ',') {
426 m++;
427 e = strtol(m, &c, 10);
428 } else {
429 e = s;
430 c = m;
431 }
432 switch(*c) {
433 case 'a':
434 cmdwanted = false;
435 ch.add = NULL;
436 ch.add_cnt = 0;
437 ch.add_len = 0;
438 ch.offset = s;
439 ch.del_cnt = 0;
440 break;
441 case 'c':
442 cmdwanted = false;
443 ch.add = NULL;
444 ch.add_cnt = 0;
445 ch.add_len = 0;
446 ch.offset = s - 1;
447 ch.del_cnt = e - s + 1;
448 break;
449 case 'd':
450 ch.offset = s - 1;
451 ch.del_cnt = e - s + 1;
452 ch.add = NULL;
453 ch.add_cnt = 0;
454 ch.add_len = 0;
455 filechanges.add_change(ch);
456 break;
457 }
458 } else { /* !cmdwaanted */
459 if (buffer[0] == '.' && buffer[1] == '\n') {
460 cmdwanted = true;
461 filechanges.add_change(ch);
462 } else {
463 char *last = NULL;
464 char *add;
465 size_t l;
466 if (ch.add)
467 last = ch.add + ch.add_len;
468 l = strlen(buffer);
469 add = add_text.add_easy(buffer, l, last);
470 if (!add) {
471 ch.add_len += l;
472 ch.add_cnt++;
473 } else {
474 if (ch.add) {
475 filechanges.add_change(ch);
476 ch.del_cnt = 0;
477 }
478 ch.offset += ch.add_cnt;
479 ch.add = add;
480 ch.add_len = l;
481 ch.add_cnt = 1;
482 }
483 }
484 }
485 }
486 }
487
488 void write_diff(FILE *f)
489 {
490 size_t line = 0;
491 std::list<struct Change>::reverse_iterator ch;
492 for (ch = filechanges.rbegin(); ch != filechanges.rend(); ch++) {
493 line += ch->offset + ch->del_cnt;
494 }
495
496 for (ch = filechanges.rbegin(); ch != filechanges.rend(); ch++) {
497 std::list<struct Change>::reverse_iterator mg_i, mg_e = ch;
498 while (ch->del_cnt == 0 && ch->offset == 0)
499 ch++;
500 line -= ch->del_cnt;
501 if (ch->add_cnt > 0) {
502 if (ch->del_cnt == 0) {
503 fprintf(f, "%lua\n", line);
504 } else if (ch->del_cnt == 1) {
505 fprintf(f, "%luc\n", line+1);
506 } else {
507 fprintf(f, "%lu,%luc\n", line+1, line+ch->del_cnt);
508 }
509
510 mg_i = ch;
511 do {
512 dump_mem(f, mg_i->add, mg_i->add_len, NULL);
513 } while (mg_i-- != mg_e);
514
515 fprintf(f, ".\n");
516 } else if (ch->del_cnt == 1) {
517 fprintf(f, "%lud\n", line+1);
518 } else if (ch->del_cnt > 1) {
519 fprintf(f, "%lu,%lud\n", line+1, line+ch->del_cnt);
520 }
521 line -= ch->offset;
522 }
523 }
524
525 void apply_against_file(FILE *out, FILE *in, Hashes *hash = NULL)
526 {
527 std::list<struct Change>::iterator ch;
528 for (ch = filechanges.begin(); ch != filechanges.end(); ch++) {
529 dump_lines(out, in, ch->offset, hash);
530 skip_lines(in, ch->del_cnt);
531 dump_mem(out, ch->add, ch->add_len, hash);
532 }
533 dump_rest(out, in, hash);
534 }
535 };
536
537 bool LookupPatches(const std::string &Message, std::vector<std::string> &lines)
538 {
539 const char *Tag = "Patches";
540 const size_t Length = strlen(Tag);
541
542 std::string::const_iterator I, J;
543
544 std::clog << "Looking for \"Patches:\" section in message:\n\n" << Message << "\n\n";
545 std::clog.flush();
546
547 for (I = Message.begin(); I + Length < Message.end(); ++I)
548 {
549 if (I[Length] == ':' && stringcasecmp(I, I+Length, Tag) == 0)
550 {
551 // found the tag, now read the patches
552 for(;;) {
553 for (; I < Message.end() && *I != '\n'; ++I);
554 if (I < Message.end()) I++;
555 if (I == Message.end() || *I != ' ')
556 break;
557 while (I < Message.end() && isspace(*I)) I++;
558 for (J = I; J < Message.end() && *J != '\n'; ++J)
559 ;
560 do
561 J--;
562 while (I < J && isspace(*J));
563 if (I < J)
564 lines.push_back(std::string(I,++J));
565 else
566 break;
567 I = J;
568 }
569 std::clog << "Found " << lines.size() << " patches!\n";
570 std::clog.flush();
571 return true;
572 }
573 }
574 std::clog << "Found no patches! :(\n";
575 std::clog.flush();
576 return false;
577 }
578
579
580 class RredMethod : public pkgAcqMethod {
581 private:
582 bool Debug;
583 std::vector<std::string> patchpaths;
584
585 protected:
586 virtual bool HandleMessage(int Number, std::string Message) {
587 if (Number == 600)
588 {
589 patchpaths.clear();
590 LookupPatches(Message, patchpaths);
591 std::clog << "Ended up with " << patchpaths.size() << " patches!\n";
592 std::clog.flush();
593 }
594 return pkgAcqMethod::HandleMessage(Number, Message);
595 }
596
597 virtual bool Fetch(FetchItem *Itm) {
598 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
599 URI Get = Itm->Uri;
600 std::string Path = Get.Host + Get.Path; // rred:/path - no host
601
602 FetchResult Res;
603 Res.Filename = Itm->DestFile;
604 if (Itm->Uri.empty()) {
605 Path = Itm->DestFile;
606 Itm->DestFile.append(".result");
607 } else
608 URIStart(Res);
609
610 Patch patch;
611
612 if (patchpaths.empty())
613 {
614 patchpaths.push_back(Path + ".ed");
615 }
616
617 std::string patch_name;
618 for (std::vector<std::string>::iterator I = patchpaths.begin();
619 I != patchpaths.end();
620 I++)
621 {
622 patch_name = *I;
623 if (Debug == true)
624 std::clog << "Patching " << Path << " with " << patch_name
625 << std::endl;
626
627 FILE *p = fopen(patch_name.c_str(), "r");
628 if (p == NULL) {
629 std::clog << "Could not open patch file " << patch_name << std::endl;
630 abort();
631 }
632 patch.read_diff(p);
633 fclose(p);
634 }
635
636 if (Debug == true)
637 std::clog << "Applying patches against " << Path
638 << " and writing results to " << Itm->DestFile
639 << std::endl;
640
641 FILE *inp = fopen(Path.c_str(), "r");
642 FILE *out = fopen(Itm->DestFile.c_str(), "w");
643
644 Hashes hash;
645
646 patch.apply_against_file(out, inp, &hash);
647
648 fclose(out);
649 fclose(inp);
650
651 if (Debug == true) {
652 std::clog << "rred: finished file patching of " << Path << "." << std::endl;
653 }
654
655 struct stat bufbase, bufpatch;
656 if (stat(Path.c_str(), &bufbase) != 0 ||
657 stat(patch_name.c_str(), &bufpatch) != 0)
658 return _error->Errno("stat", _("Failed to stat"));
659
660 struct utimbuf timebuf;
661 timebuf.actime = bufbase.st_atime;
662 timebuf.modtime = bufpatch.st_mtime;
663 if (utime(Itm->DestFile.c_str(), &timebuf) != 0)
664 return _error->Errno("utime", _("Failed to set modification time"));
665
666 if (stat(Itm->DestFile.c_str(), &bufbase) != 0)
667 return _error->Errno("stat", _("Failed to stat"));
668
669 Res.LastModified = bufbase.st_mtime;
670 Res.Size = bufbase.st_size;
671 Res.TakeHashes(hash);
672 URIDone(Res);
673
674 return true;
675 }
676
677 public:
678 RredMethod() : pkgAcqMethod("2.0",SingleInstance | SendConfig) {}
679 };
680
681 int main(int argc, char **argv)
682 {
683 int i;
684 bool just_diff = true;
685 Patch patch;
686
687 if (argc <= 1) {
688 RredMethod Mth;
689 return Mth.Run();
690 }
691
692 if (argc > 1 && strcmp(argv[1], "-f") == 0) {
693 just_diff = false;
694 i = 2;
695 } else {
696 i = 1;
697 }
698
699 for (; i < argc; i++) {
700 FILE *p;
701 p = fopen(argv[i], "r");
702 if (!p) {
703 perror(argv[i]);
704 exit(1);
705 }
706 patch.read_diff(p);
707 }
708
709 if (just_diff) {
710 patch.write_diff(stdout);
711 } else {
712 FILE *out, *inp;
713 out = stdout;
714 inp = stdin;
715
716 patch.apply_against_file(out, inp);
717 }
718 return 0;
719 }