]> git.saurik.com Git - apt.git/blame - methods/rred.cc
enable FileFd to guess the compressor based on the filename if requested or
[apt.git] / methods / rred.cc
CommitLineData
bb1293d9 1// Includes /*{{{*/
ea542140
DK
2#include <config.h>
3
2e178d1c 4#include <apt-pkg/fileutl.h>
bb1293d9 5#include <apt-pkg/mmap.h>
2e178d1c
MV
6#include <apt-pkg/error.h>
7#include <apt-pkg/acquire-method.h>
8#include <apt-pkg/strutl.h>
9#include <apt-pkg/hashes.h>
472ff00e 10#include <apt-pkg/configuration.h>
2e178d1c
MV
11
12#include <sys/stat.h>
bb1293d9 13#include <sys/uio.h>
2e178d1c
MV
14#include <unistd.h>
15#include <utime.h>
16#include <stdio.h>
17#include <errno.h>
caffd480 18#include <zlib.h>
2e178d1c 19#include <apti18n.h>
bb1293d9
DK
20 /*}}}*/
21/** \brief RredMethod - ed-style incremential patch method {{{
22 *
23 * This method implements a patch functionality similar to "patch --ed" that is
24 * used by the "tiffany" incremental packages download stuff. It differs from
25 * "ed" insofar that it is way more restricted (and therefore secure).
26 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
27 * "<em>d</em>elete" (diff doesn't output any other).
28 * Additionally the records must be reverse sorted by line number and
29 * may not overlap (diff *seems* to produce this kind of output).
d84cd865 30 * */
bb1293d9
DK
31class RredMethod : public pkgAcqMethod {
32 bool Debug;
33 // the size of this doesn't really matter (except for performance)
34 const static int BUF_SIZE = 1024;
35 // the supported ed commands
36 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
37 // return values
38 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
d84cd865 39
caffd480 40 State applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
bb1293d9
DK
41 unsigned long &line, char *buffer, Hashes *hash) const;
42 void ignoreLineInFile(FILE *fin, char *buffer) const;
caffd480 43 void ignoreLineInFile(gzFile &fin, char *buffer) const;
bb1293d9
DK
44 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
45 Hashes *hash, char *buffer) const;
caffd480
DK
46 void copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,
47 Hashes *hash, char *buffer) const;
2e178d1c 48
bb1293d9
DK
49 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
50 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
51
52protected:
53 // the methods main method
54 virtual bool Fetch(FetchItem *Itm);
55
56public:
f5a34606 57 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
2e178d1c 58};
bb1293d9
DK
59 /*}}}*/
60/** \brief applyFile - in reverse order with a tail recursion {{{
61 *
62 * As it is expected that the commands are in reversed order in the patch file
63 * we check in the first half if the command is valid, but doesn't execute it
64 * and move a step deeper. After reaching the end of the file we apply the
65 * patches in the correct order: last found command first.
66 *
67 * \param ed_cmds patch file to apply
68 * \param in_file base file we want to patch
69 * \param out_file file to write the patched result to
70 * \param line of command operation
71 * \param buffer internal used read/write buffer
72 * \param hash the created file for correctness
73 * \return the success State of the ed command executor
74 */
caffd480 75RredMethod::State RredMethod::applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
bb1293d9
DK
76 unsigned long &line, char *buffer, Hashes *hash) const {
77 // get the current command and parse it
caffd480 78 if (gzgets(ed_cmds, buffer, BUF_SIZE) == NULL) {
bb1293d9
DK
79 if (Debug == true)
80 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
81 line = 0;
82 return ED_OK;
83 }
2e178d1c 84
bb1293d9
DK
85 // parse in the effected linenumbers
86 char* idx;
87 errno=0;
88 unsigned long const startline = strtol(buffer, &idx, 10);
89 if (errno == ERANGE || errno == EINVAL) {
90 _error->Errno("rred", "startline is an invalid number");
91 return ED_PARSER;
92 }
93 if (startline > line) {
94 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
95 return ED_ORDERING;
96 }
97 unsigned long stopline;
98 if (*idx == ',') {
99 idx++;
100 errno=0;
101 stopline = strtol(idx, &idx, 10);
102 if (errno == ERANGE || errno == EINVAL) {
103 _error->Errno("rred", "stopline is an invalid number");
104 return ED_PARSER;
105 }
106 }
107 else {
108 stopline = startline;
109 }
110 line = startline;
111
112 // which command to execute on this line(s)?
113 switch (*idx) {
114 case MODE_CHANGED:
115 if (Debug == true)
116 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
117 break;
118 case MODE_ADDED:
119 if (Debug == true)
120 std::clog << "Insert after line " << startline << std::endl;
121 break;
122 case MODE_DELETED:
123 if (Debug == true)
124 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
125 break;
126 default:
127 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
128 return ED_PARSER;
129 }
130 unsigned char mode = *idx;
131
132 // save the current position
caffd480 133 unsigned const long pos = gztell(ed_cmds);
bb1293d9
DK
134
135 // if this is add or change then go to the next full stop
136 unsigned int data_length = 0;
137 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
138 do {
139 ignoreLineInFile(ed_cmds, buffer);
140 data_length++;
141 }
142 while (strncmp(buffer, ".", 1) != 0);
143 data_length--; // the dot should not be copied
144 }
145
146 // do the recursive call - the last command is the one we need to execute at first
147 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
148 if (child != ED_OK) {
149 return child;
150 }
151
152 // change and delete are working on "line" - add is done after "line"
153 if (mode != MODE_ADDED)
154 line++;
155
156 // first wind to the current position and copy over all unchanged lines
157 if (line < startline) {
158 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
159 line = startline;
160 }
2e178d1c 161
bb1293d9
DK
162 if (mode != MODE_ADDED)
163 line--;
164
165 // include data from ed script
166 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
caffd480 167 gzseek(ed_cmds, pos, SEEK_SET);
bb1293d9
DK
168 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
169 }
170
171 // ignore the corresponding number of lines from input
172 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
173 while (line < stopline) {
174 ignoreLineInFile(in_file, buffer);
175 line++;
176 }
177 }
178 return ED_OK;
179}
180 /*}}}*/
181void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
182 Hashes *hash, char *buffer) const {
183 while (0 < lines--) {
184 do {
185 fgets(buffer, BUF_SIZE, fin);
186 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
187 hash->Add((unsigned char*)buffer, written);
188 } while (strlen(buffer) == (BUF_SIZE - 1) &&
189 buffer[BUF_SIZE - 2] != '\n');
190 }
191}
192 /*}}}*/
caffd480
DK
193void RredMethod::copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,/*{{{*/
194 Hashes *hash, char *buffer) const {
195 while (0 < lines--) {
196 do {
197 gzgets(fin, buffer, BUF_SIZE);
198 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
199 hash->Add((unsigned char*)buffer, written);
200 } while (strlen(buffer) == (BUF_SIZE - 1) &&
201 buffer[BUF_SIZE - 2] != '\n');
202 }
203}
204 /*}}}*/
bb1293d9
DK
205void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
206 fgets(buffer, BUF_SIZE, fin);
207 while (strlen(buffer) == (BUF_SIZE - 1) &&
208 buffer[BUF_SIZE - 2] != '\n') {
209 fgets(buffer, BUF_SIZE, fin);
210 buffer[0] = ' ';
211 }
212}
213 /*}}}*/
caffd480
DK
214void RredMethod::ignoreLineInFile(gzFile &fin, char *buffer) const { /*{{{*/
215 gzgets(fin, buffer, BUF_SIZE);
216 while (strlen(buffer) == (BUF_SIZE - 1) &&
217 buffer[BUF_SIZE - 2] != '\n') {
218 gzgets(fin, buffer, BUF_SIZE);
219 buffer[0] = ' ';
220 }
221}
222 /*}}}*/
bb1293d9
DK
223RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
224 FileFd &out_file, Hashes *hash) const {
d84cd865 225 char buffer[BUF_SIZE];
bb1293d9 226 FILE* fFrom = fdopen(From.Fd(), "r");
caffd480 227 gzFile fPatch = Patch.gzFd();
bb1293d9
DK
228 FILE* fTo = fdopen(out_file.Fd(), "w");
229
d84cd865 230 /* we do a tail recursion to read the commands in the right order */
bb1293d9
DK
231 unsigned long line = -1; // assign highest possible value
232 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
d84cd865
MV
233
234 /* read the rest from infile */
bb1293d9
DK
235 if (result == ED_OK) {
236 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
237 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
d84cd865
MV
238 hash->Add((unsigned char*)buffer, written);
239 }
bb1293d9 240 fflush(fTo);
d84cd865 241 }
bb1293d9 242 return result;
2e178d1c 243}
bb1293d9 244 /*}}}*/
f5a34606
DK
245/* struct EdCommand {{{*/
246#ifdef _POSIX_MAPPED_FILES
247struct EdCommand {
bb1293d9
DK
248 size_t data_start;
249 size_t data_end;
250 size_t data_lines;
251 size_t first_line;
252 size_t last_line;
253 char type;
254};
255#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
f5a34606 256#endif
bb1293d9
DK
257 /*}}}*/
258RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
259 FileFd &out_file, Hashes *hash) const {
260#ifdef _POSIX_MAPPED_FILES
37254316 261 MMap ed_cmds(MMap::ReadOnly);
caffd480 262 if (Patch.gzFd() != NULL) {
650faab0 263 unsigned long long mapSize = Patch.Size();
00542838 264 DynamicMMap* dyn = new DynamicMMap(0, mapSize, 0);
2a79d5b5 265 if (dyn->validData() == false) {
00542838
DK
266 delete dyn;
267 return MMAP_FAILED;
268 }
269 dyn->AddSize(mapSize);
270 gzread(Patch.gzFd(), dyn->Data(), mapSize);
271 ed_cmds = *dyn;
37254316
DK
272 } else
273 ed_cmds = MMap(Patch, MMap::ReadOnly);
274
bb1293d9
DK
275 MMap in_file(From, MMap::ReadOnly);
276
277 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
278 return MMAP_FAILED;
279
280 EdCommand* commands = 0;
281 size_t command_count = 0;
282 size_t command_alloc = 0;
283
284 const char* begin = (char*) ed_cmds.Data();
285 const char* end = begin;
286 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
287
288 const char* input = (char*) in_file.Data();
289 const char* input_end = (char*) in_file.Data() + in_file.Size();
290
291 size_t i;
292
293 /* 1. Parse entire script. It is executed in reverse order, so we cather it
294 * in the `commands' buffer first
295 */
296
297 for(;;) {
298 EdCommand cmd;
299 cmd.data_start = 0;
300 cmd.data_end = 0;
301
302 while(begin != ed_end && *begin == '\n')
303 ++begin;
304 while(end != ed_end && *end != '\n')
305 ++end;
306 if(end == ed_end && begin == end)
307 break;
308
309 /* Determine command range */
310 const char* tmp = begin;
311
312 for(;;) {
313 /* atoll is safe despite lacking NUL-termination; we know there's an
314 * alphabetic character at end[-1]
315 */
316 if(tmp == end) {
317 cmd.first_line = atol(begin);
318 cmd.last_line = cmd.first_line;
319 break;
320 }
321 if(*tmp == ',') {
322 cmd.first_line = atol(begin);
323 cmd.last_line = atol(tmp + 1);
324 break;
325 }
326 ++tmp;
327 }
328
329 // which command to execute on this line(s)?
330 switch (end[-1]) {
331 case MODE_CHANGED:
332 if (Debug == true)
333 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
334 break;
335 case MODE_ADDED:
336 if (Debug == true)
337 std::clog << "Insert after line " << cmd.first_line << std::endl;
338 break;
339 case MODE_DELETED:
340 if (Debug == true)
341 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
342 break;
343 default:
344 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
345 free(commands);
346 return ED_PARSER;
347 }
348 cmd.type = end[-1];
349
350 /* Determine the size of the inserted text, so we don't have to scan this
351 * text again later.
352 */
353 begin = end + 1;
354 end = begin;
355 cmd.data_lines = 0;
356
357 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
358 cmd.data_start = begin - (char*) ed_cmds.Data();
359 while(end != ed_end) {
360 if(*end == '\n') {
361 if(end[-1] == '.' && end[-2] == '\n')
362 break;
363 ++cmd.data_lines;
364 }
365 ++end;
366 }
367 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
368 begin = end + 1;
369 end = begin;
370 }
371 if(command_count == command_alloc) {
372 command_alloc = (command_alloc + 64) * 3 / 2;
373 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
374 }
375 commands[command_count++] = cmd;
376 }
377
378 struct iovec* iov = new struct iovec[IOV_COUNT];
379 size_t iov_size = 0;
380
381 size_t amount, remaining;
382 size_t line = 1;
383 EdCommand* cmd;
384
385 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
386 * using writev to minimize the number of system calls. Data is read
387 * directly from the memory mappings of the input file and the script.
388 */
389
390 for(i = command_count; i-- > 0; ) {
391 cmd = &commands[i];
392 if(cmd->type == MODE_ADDED)
393 amount = cmd->first_line + 1;
394 else
395 amount = cmd->first_line;
396
397 if(line < amount) {
398 begin = input;
399 while(line != amount) {
400 input = (const char*) memchr(input, '\n', input_end - input);
401 if(!input)
402 break;
403 ++line;
404 ++input;
405 }
2e178d1c 406
bb1293d9
DK
407 iov[iov_size].iov_base = (void*) begin;
408 iov[iov_size].iov_len = input - begin;
409 hash->Add((const unsigned char*) begin, input - begin);
2e178d1c 410
bb1293d9
DK
411 if(++iov_size == IOV_COUNT) {
412 writev(out_file.Fd(), iov, IOV_COUNT);
413 iov_size = 0;
414 }
415 }
416
417 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
418 remaining = (cmd->last_line - cmd->first_line) + 1;
419 line += remaining;
420 while(remaining) {
421 input = (const char*) memchr(input, '\n', input_end - input);
422 if(!input)
423 break;
424 --remaining;
425 ++input;
426 }
427 }
428
429 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
430 if(cmd->data_end != cmd->data_start) {
431 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
432 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
433 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
434 iov[iov_size].iov_len);
435
436 if(++iov_size == IOV_COUNT) {
437 writev(out_file.Fd(), iov, IOV_COUNT);
438 iov_size = 0;
439 }
440 }
441 }
442 }
443
444 if(input != input_end) {
445 iov[iov_size].iov_base = (void*) input;
446 iov[iov_size].iov_len = input_end - input;
447 hash->Add((const unsigned char*) input, input_end - input);
448 ++iov_size;
449 }
450
451 if(iov_size) {
452 writev(out_file.Fd(), iov, iov_size);
453 iov_size = 0;
454 }
455
456 for(i = 0; i < iov_size; i += IOV_COUNT) {
457 if(iov_size - i < IOV_COUNT)
458 writev(out_file.Fd(), iov + i, iov_size - i);
459 else
460 writev(out_file.Fd(), iov + i, IOV_COUNT);
461 }
462
463 delete [] iov;
464 free(commands);
465
466 return ED_OK;
467#else
468 return MMAP_FAILED;
469#endif
470}
471 /*}}}*/
472bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
2e178d1c 473{
bb1293d9 474 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
2e178d1c 475 URI Get = Itm->Uri;
8f3ba4e8 476 std::string Path = Get.Host + Get.Path; // To account for relative paths
bb1293d9 477
2e178d1c
MV
478 FetchResult Res;
479 Res.Filename = Itm->DestFile;
bb1293d9
DK
480 if (Itm->Uri.empty() == true) {
481 Path = Itm->DestFile;
482 Itm->DestFile.append(".result");
483 } else
484 URIStart(Res);
4a0a786f 485
6040f589
MV
486 if (Debug == true)
487 std::clog << "Patching " << Path << " with " << Path
488 << ".ed and putting result into " << Itm->DestFile << std::endl;
59a704f0
MV
489 // Open the source and destination files (the d'tor of FileFd will do
490 // the cleanup/closing of the fds)
2e178d1c 491 FileFd From(Path,FileFd::ReadOnly);
468720c5 492 FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip);
22041bd2 493 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
2e178d1c
MV
494 To.EraseOnFailure();
495 if (_error->PendingError() == true)
496 return false;
497
498 Hashes Hash;
2e178d1c 499 // now do the actual patching
bb1293d9
DK
500 State const result = patchMMap(Patch, From, To, &Hash);
501 if (result == MMAP_FAILED) {
502 // retry with patchFile
caffd480
DK
503 Patch.Seek(0);
504 From.Seek(0);
22041bd2 505 To.Open(Itm->DestFile,FileFd::WriteAtomic);
bb1293d9
DK
506 if (_error->PendingError() == true)
507 return false;
508 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
509 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
510 } else if (Debug == true) {
511 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
512 }
513 } else if (result != ED_OK) {
514 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
515 } else if (Debug == true) {
516 std::clog << "rred: finished mmap patching of " << Path << std::endl;
3de9ff77
MV
517 }
518
519 // write out the result
3de9ff77
MV
520 From.Close();
521 Patch.Close();
522 To.Close();
523
1082d4c7
DK
524 /* Transfer the modification times from the patch file
525 to be able to see in which state the file should be
526 and use the access time from the "old" file */
527 struct stat BufBase, BufPatch;
528 if (stat(Path.c_str(),&BufBase) != 0 ||
8f3ba4e8 529 stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
3de9ff77
MV
530 return _error->Errno("stat",_("Failed to stat"));
531
532 struct utimbuf TimeBuf;
1082d4c7
DK
533 TimeBuf.actime = BufBase.st_atime;
534 TimeBuf.modtime = BufPatch.st_mtime;
3de9ff77
MV
535 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
536 return _error->Errno("utime",_("Failed to set modification time"));
537
1082d4c7 538 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
3de9ff77
MV
539 return _error->Errno("stat",_("Failed to stat"));
540
541 // return done
1082d4c7
DK
542 Res.LastModified = BufBase.st_mtime;
543 Res.Size = BufBase.st_size;
2e178d1c
MV
544 Res.TakeHashes(Hash);
545 URIDone(Res);
3de9ff77 546
2e178d1c
MV
547 return true;
548}
bb1293d9
DK
549 /*}}}*/
550/** \brief Wrapper class for testing rred */ /*{{{*/
551class TestRredMethod : public RredMethod {
552public:
553 /** \brief Run rred in debug test mode
554 *
555 * This method can be used to run the rred method outside
556 * of the "normal" acquire environment for easier testing.
557 *
558 * \param base basename of all files involved in this rred test
559 */
560 bool Run(char const *base) {
561 _config->CndSet("Debug::pkgAcquire::RRed", "true");
562 FetchItem *test = new FetchItem;
563 test->DestFile = base;
564 return Fetch(test);
565 }
566};
567 /*}}}*/
568/** \brief Starter for the rred method (or its test method) {{{
569 *
570 * Used without parameters is the normal behavior for methods for
571 * the APT acquire system. While this works great for the acquire system
572 * it is very hard to test the method and therefore the method also
573 * accepts one parameter which will switch it directly to debug test mode:
574 * The test mode expects that if "Testfile" is given as parameter
575 * the file "Testfile" should be ed-style patched with "Testfile.ed"
576 * and will write the result to "Testfile.result".
577 */
578int main(int argc, char *argv[]) {
579 if (argc <= 1) {
580 RredMethod Mth;
581 return Mth.Run();
582 } else {
583 TestRredMethod Mth;
584 bool result = Mth.Run(argv[1]);
585 _error->DumpErrors();
586 return result;
587 }
2e178d1c 588}
bb1293d9 589 /*}}}*/