]> git.saurik.com Git - apt.git/blame - methods/rred.cc
merge fix from Matt Zimmerman, many thanks (LP: #741098)
[apt.git] / methods / rred.cc
CommitLineData
bb1293d9 1// Includes /*{{{*/
2e178d1c 2#include <apt-pkg/fileutl.h>
bb1293d9 3#include <apt-pkg/mmap.h>
2e178d1c
MV
4#include <apt-pkg/error.h>
5#include <apt-pkg/acquire-method.h>
6#include <apt-pkg/strutl.h>
7#include <apt-pkg/hashes.h>
8
9#include <sys/stat.h>
bb1293d9 10#include <sys/uio.h>
2e178d1c
MV
11#include <unistd.h>
12#include <utime.h>
13#include <stdio.h>
14#include <errno.h>
caffd480 15#include <zlib.h>
2e178d1c 16#include <apti18n.h>
bb1293d9
DK
17 /*}}}*/
18/** \brief RredMethod - ed-style incremential patch method {{{
19 *
20 * This method implements a patch functionality similar to "patch --ed" that is
21 * used by the "tiffany" incremental packages download stuff. It differs from
22 * "ed" insofar that it is way more restricted (and therefore secure).
23 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
24 * "<em>d</em>elete" (diff doesn't output any other).
25 * Additionally the records must be reverse sorted by line number and
26 * may not overlap (diff *seems* to produce this kind of output).
d84cd865 27 * */
bb1293d9
DK
28class RredMethod : public pkgAcqMethod {
29 bool Debug;
30 // the size of this doesn't really matter (except for performance)
31 const static int BUF_SIZE = 1024;
32 // the supported ed commands
33 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
34 // return values
35 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
d84cd865 36
caffd480 37 State applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
bb1293d9
DK
38 unsigned long &line, char *buffer, Hashes *hash) const;
39 void ignoreLineInFile(FILE *fin, char *buffer) const;
caffd480 40 void ignoreLineInFile(gzFile &fin, char *buffer) const;
bb1293d9
DK
41 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
42 Hashes *hash, char *buffer) const;
caffd480
DK
43 void copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,
44 Hashes *hash, char *buffer) const;
2e178d1c 45
bb1293d9
DK
46 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
47 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
48
49protected:
50 // the methods main method
51 virtual bool Fetch(FetchItem *Itm);
52
53public:
54 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {};
2e178d1c 55};
bb1293d9
DK
56 /*}}}*/
57/** \brief applyFile - in reverse order with a tail recursion {{{
58 *
59 * As it is expected that the commands are in reversed order in the patch file
60 * we check in the first half if the command is valid, but doesn't execute it
61 * and move a step deeper. After reaching the end of the file we apply the
62 * patches in the correct order: last found command first.
63 *
64 * \param ed_cmds patch file to apply
65 * \param in_file base file we want to patch
66 * \param out_file file to write the patched result to
67 * \param line of command operation
68 * \param buffer internal used read/write buffer
69 * \param hash the created file for correctness
70 * \return the success State of the ed command executor
71 */
caffd480 72RredMethod::State RredMethod::applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
bb1293d9
DK
73 unsigned long &line, char *buffer, Hashes *hash) const {
74 // get the current command and parse it
caffd480 75 if (gzgets(ed_cmds, buffer, BUF_SIZE) == NULL) {
bb1293d9
DK
76 if (Debug == true)
77 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
78 line = 0;
79 return ED_OK;
80 }
2e178d1c 81
bb1293d9
DK
82 // parse in the effected linenumbers
83 char* idx;
84 errno=0;
85 unsigned long const startline = strtol(buffer, &idx, 10);
86 if (errno == ERANGE || errno == EINVAL) {
87 _error->Errno("rred", "startline is an invalid number");
88 return ED_PARSER;
89 }
90 if (startline > line) {
91 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
92 return ED_ORDERING;
93 }
94 unsigned long stopline;
95 if (*idx == ',') {
96 idx++;
97 errno=0;
98 stopline = strtol(idx, &idx, 10);
99 if (errno == ERANGE || errno == EINVAL) {
100 _error->Errno("rred", "stopline is an invalid number");
101 return ED_PARSER;
102 }
103 }
104 else {
105 stopline = startline;
106 }
107 line = startline;
108
109 // which command to execute on this line(s)?
110 switch (*idx) {
111 case MODE_CHANGED:
112 if (Debug == true)
113 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
114 break;
115 case MODE_ADDED:
116 if (Debug == true)
117 std::clog << "Insert after line " << startline << std::endl;
118 break;
119 case MODE_DELETED:
120 if (Debug == true)
121 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
122 break;
123 default:
124 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
125 return ED_PARSER;
126 }
127 unsigned char mode = *idx;
128
129 // save the current position
caffd480 130 unsigned const long pos = gztell(ed_cmds);
bb1293d9
DK
131
132 // if this is add or change then go to the next full stop
133 unsigned int data_length = 0;
134 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
135 do {
136 ignoreLineInFile(ed_cmds, buffer);
137 data_length++;
138 }
139 while (strncmp(buffer, ".", 1) != 0);
140 data_length--; // the dot should not be copied
141 }
142
143 // do the recursive call - the last command is the one we need to execute at first
144 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
145 if (child != ED_OK) {
146 return child;
147 }
148
149 // change and delete are working on "line" - add is done after "line"
150 if (mode != MODE_ADDED)
151 line++;
152
153 // first wind to the current position and copy over all unchanged lines
154 if (line < startline) {
155 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
156 line = startline;
157 }
2e178d1c 158
bb1293d9
DK
159 if (mode != MODE_ADDED)
160 line--;
161
162 // include data from ed script
163 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
caffd480 164 gzseek(ed_cmds, pos, SEEK_SET);
bb1293d9
DK
165 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
166 }
167
168 // ignore the corresponding number of lines from input
169 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
170 while (line < stopline) {
171 ignoreLineInFile(in_file, buffer);
172 line++;
173 }
174 }
175 return ED_OK;
176}
177 /*}}}*/
178void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
179 Hashes *hash, char *buffer) const {
180 while (0 < lines--) {
181 do {
182 fgets(buffer, BUF_SIZE, fin);
183 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
184 hash->Add((unsigned char*)buffer, written);
185 } while (strlen(buffer) == (BUF_SIZE - 1) &&
186 buffer[BUF_SIZE - 2] != '\n');
187 }
188}
189 /*}}}*/
caffd480
DK
190void RredMethod::copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,/*{{{*/
191 Hashes *hash, char *buffer) const {
192 while (0 < lines--) {
193 do {
194 gzgets(fin, buffer, BUF_SIZE);
195 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
196 hash->Add((unsigned char*)buffer, written);
197 } while (strlen(buffer) == (BUF_SIZE - 1) &&
198 buffer[BUF_SIZE - 2] != '\n');
199 }
200}
201 /*}}}*/
bb1293d9
DK
202void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
203 fgets(buffer, BUF_SIZE, fin);
204 while (strlen(buffer) == (BUF_SIZE - 1) &&
205 buffer[BUF_SIZE - 2] != '\n') {
206 fgets(buffer, BUF_SIZE, fin);
207 buffer[0] = ' ';
208 }
209}
210 /*}}}*/
caffd480
DK
211void RredMethod::ignoreLineInFile(gzFile &fin, char *buffer) const { /*{{{*/
212 gzgets(fin, buffer, BUF_SIZE);
213 while (strlen(buffer) == (BUF_SIZE - 1) &&
214 buffer[BUF_SIZE - 2] != '\n') {
215 gzgets(fin, buffer, BUF_SIZE);
216 buffer[0] = ' ';
217 }
218}
219 /*}}}*/
bb1293d9
DK
220RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
221 FileFd &out_file, Hashes *hash) const {
d84cd865 222 char buffer[BUF_SIZE];
bb1293d9 223 FILE* fFrom = fdopen(From.Fd(), "r");
caffd480 224 gzFile fPatch = Patch.gzFd();
bb1293d9
DK
225 FILE* fTo = fdopen(out_file.Fd(), "w");
226
d84cd865 227 /* we do a tail recursion to read the commands in the right order */
bb1293d9
DK
228 unsigned long line = -1; // assign highest possible value
229 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
d84cd865
MV
230
231 /* read the rest from infile */
bb1293d9
DK
232 if (result == ED_OK) {
233 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
234 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
d84cd865
MV
235 hash->Add((unsigned char*)buffer, written);
236 }
bb1293d9 237 fflush(fTo);
d84cd865 238 }
bb1293d9 239 return result;
2e178d1c 240}
bb1293d9
DK
241 /*}}}*/
242struct EdCommand { /*{{{*/
243 size_t data_start;
244 size_t data_end;
245 size_t data_lines;
246 size_t first_line;
247 size_t last_line;
248 char type;
249};
250#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
251 /*}}}*/
252RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
253 FileFd &out_file, Hashes *hash) const {
254#ifdef _POSIX_MAPPED_FILES
37254316 255 MMap ed_cmds(MMap::ReadOnly);
caffd480
DK
256 if (Patch.gzFd() != NULL) {
257 unsigned long mapSize = Patch.Size();
00542838 258 DynamicMMap* dyn = new DynamicMMap(0, mapSize, 0);
2a79d5b5 259 if (dyn->validData() == false) {
00542838
DK
260 delete dyn;
261 return MMAP_FAILED;
262 }
263 dyn->AddSize(mapSize);
264 gzread(Patch.gzFd(), dyn->Data(), mapSize);
265 ed_cmds = *dyn;
37254316
DK
266 } else
267 ed_cmds = MMap(Patch, MMap::ReadOnly);
268
bb1293d9
DK
269 MMap in_file(From, MMap::ReadOnly);
270
271 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
272 return MMAP_FAILED;
273
274 EdCommand* commands = 0;
275 size_t command_count = 0;
276 size_t command_alloc = 0;
277
278 const char* begin = (char*) ed_cmds.Data();
279 const char* end = begin;
280 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
281
282 const char* input = (char*) in_file.Data();
283 const char* input_end = (char*) in_file.Data() + in_file.Size();
284
285 size_t i;
286
287 /* 1. Parse entire script. It is executed in reverse order, so we cather it
288 * in the `commands' buffer first
289 */
290
291 for(;;) {
292 EdCommand cmd;
293 cmd.data_start = 0;
294 cmd.data_end = 0;
295
296 while(begin != ed_end && *begin == '\n')
297 ++begin;
298 while(end != ed_end && *end != '\n')
299 ++end;
300 if(end == ed_end && begin == end)
301 break;
302
303 /* Determine command range */
304 const char* tmp = begin;
305
306 for(;;) {
307 /* atoll is safe despite lacking NUL-termination; we know there's an
308 * alphabetic character at end[-1]
309 */
310 if(tmp == end) {
311 cmd.first_line = atol(begin);
312 cmd.last_line = cmd.first_line;
313 break;
314 }
315 if(*tmp == ',') {
316 cmd.first_line = atol(begin);
317 cmd.last_line = atol(tmp + 1);
318 break;
319 }
320 ++tmp;
321 }
322
323 // which command to execute on this line(s)?
324 switch (end[-1]) {
325 case MODE_CHANGED:
326 if (Debug == true)
327 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
328 break;
329 case MODE_ADDED:
330 if (Debug == true)
331 std::clog << "Insert after line " << cmd.first_line << std::endl;
332 break;
333 case MODE_DELETED:
334 if (Debug == true)
335 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
336 break;
337 default:
338 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
339 free(commands);
340 return ED_PARSER;
341 }
342 cmd.type = end[-1];
343
344 /* Determine the size of the inserted text, so we don't have to scan this
345 * text again later.
346 */
347 begin = end + 1;
348 end = begin;
349 cmd.data_lines = 0;
350
351 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
352 cmd.data_start = begin - (char*) ed_cmds.Data();
353 while(end != ed_end) {
354 if(*end == '\n') {
355 if(end[-1] == '.' && end[-2] == '\n')
356 break;
357 ++cmd.data_lines;
358 }
359 ++end;
360 }
361 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
362 begin = end + 1;
363 end = begin;
364 }
365 if(command_count == command_alloc) {
366 command_alloc = (command_alloc + 64) * 3 / 2;
367 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
368 }
369 commands[command_count++] = cmd;
370 }
371
372 struct iovec* iov = new struct iovec[IOV_COUNT];
373 size_t iov_size = 0;
374
375 size_t amount, remaining;
376 size_t line = 1;
377 EdCommand* cmd;
378
379 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
380 * using writev to minimize the number of system calls. Data is read
381 * directly from the memory mappings of the input file and the script.
382 */
383
384 for(i = command_count; i-- > 0; ) {
385 cmd = &commands[i];
386 if(cmd->type == MODE_ADDED)
387 amount = cmd->first_line + 1;
388 else
389 amount = cmd->first_line;
390
391 if(line < amount) {
392 begin = input;
393 while(line != amount) {
394 input = (const char*) memchr(input, '\n', input_end - input);
395 if(!input)
396 break;
397 ++line;
398 ++input;
399 }
2e178d1c 400
bb1293d9
DK
401 iov[iov_size].iov_base = (void*) begin;
402 iov[iov_size].iov_len = input - begin;
403 hash->Add((const unsigned char*) begin, input - begin);
2e178d1c 404
bb1293d9
DK
405 if(++iov_size == IOV_COUNT) {
406 writev(out_file.Fd(), iov, IOV_COUNT);
407 iov_size = 0;
408 }
409 }
410
411 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
412 remaining = (cmd->last_line - cmd->first_line) + 1;
413 line += remaining;
414 while(remaining) {
415 input = (const char*) memchr(input, '\n', input_end - input);
416 if(!input)
417 break;
418 --remaining;
419 ++input;
420 }
421 }
422
423 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
424 if(cmd->data_end != cmd->data_start) {
425 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
426 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
427 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
428 iov[iov_size].iov_len);
429
430 if(++iov_size == IOV_COUNT) {
431 writev(out_file.Fd(), iov, IOV_COUNT);
432 iov_size = 0;
433 }
434 }
435 }
436 }
437
438 if(input != input_end) {
439 iov[iov_size].iov_base = (void*) input;
440 iov[iov_size].iov_len = input_end - input;
441 hash->Add((const unsigned char*) input, input_end - input);
442 ++iov_size;
443 }
444
445 if(iov_size) {
446 writev(out_file.Fd(), iov, iov_size);
447 iov_size = 0;
448 }
449
450 for(i = 0; i < iov_size; i += IOV_COUNT) {
451 if(iov_size - i < IOV_COUNT)
452 writev(out_file.Fd(), iov + i, iov_size - i);
453 else
454 writev(out_file.Fd(), iov + i, IOV_COUNT);
455 }
456
457 delete [] iov;
458 free(commands);
459
460 return ED_OK;
461#else
462 return MMAP_FAILED;
463#endif
464}
465 /*}}}*/
466bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
2e178d1c 467{
bb1293d9 468 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
2e178d1c
MV
469 URI Get = Itm->Uri;
470 string Path = Get.Host + Get.Path; // To account for relative paths
bb1293d9 471
2e178d1c
MV
472 FetchResult Res;
473 Res.Filename = Itm->DestFile;
bb1293d9
DK
474 if (Itm->Uri.empty() == true) {
475 Path = Itm->DestFile;
476 Itm->DestFile.append(".result");
477 } else
478 URIStart(Res);
4a0a786f 479
6040f589
MV
480 if (Debug == true)
481 std::clog << "Patching " << Path << " with " << Path
482 << ".ed and putting result into " << Itm->DestFile << std::endl;
59a704f0
MV
483 // Open the source and destination files (the d'tor of FileFd will do
484 // the cleanup/closing of the fds)
2e178d1c 485 FileFd From(Path,FileFd::ReadOnly);
caffd480 486 FileFd Patch(Path+".ed",FileFd::ReadOnlyGzip);
22041bd2 487 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
2e178d1c
MV
488 To.EraseOnFailure();
489 if (_error->PendingError() == true)
490 return false;
491
492 Hashes Hash;
2e178d1c 493 // now do the actual patching
bb1293d9
DK
494 State const result = patchMMap(Patch, From, To, &Hash);
495 if (result == MMAP_FAILED) {
496 // retry with patchFile
caffd480
DK
497 Patch.Seek(0);
498 From.Seek(0);
22041bd2 499 To.Open(Itm->DestFile,FileFd::WriteAtomic);
bb1293d9
DK
500 if (_error->PendingError() == true)
501 return false;
502 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
503 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
504 } else if (Debug == true) {
505 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
506 }
507 } else if (result != ED_OK) {
508 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
509 } else if (Debug == true) {
510 std::clog << "rred: finished mmap patching of " << Path << std::endl;
3de9ff77
MV
511 }
512
513 // write out the result
3de9ff77
MV
514 From.Close();
515 Patch.Close();
516 To.Close();
517
1082d4c7
DK
518 /* Transfer the modification times from the patch file
519 to be able to see in which state the file should be
520 and use the access time from the "old" file */
521 struct stat BufBase, BufPatch;
522 if (stat(Path.c_str(),&BufBase) != 0 ||
523 stat(string(Path+".ed").c_str(),&BufPatch) != 0)
3de9ff77
MV
524 return _error->Errno("stat",_("Failed to stat"));
525
526 struct utimbuf TimeBuf;
1082d4c7
DK
527 TimeBuf.actime = BufBase.st_atime;
528 TimeBuf.modtime = BufPatch.st_mtime;
3de9ff77
MV
529 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
530 return _error->Errno("utime",_("Failed to set modification time"));
531
1082d4c7 532 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
3de9ff77
MV
533 return _error->Errno("stat",_("Failed to stat"));
534
535 // return done
1082d4c7
DK
536 Res.LastModified = BufBase.st_mtime;
537 Res.Size = BufBase.st_size;
2e178d1c
MV
538 Res.TakeHashes(Hash);
539 URIDone(Res);
3de9ff77 540
2e178d1c
MV
541 return true;
542}
bb1293d9
DK
543 /*}}}*/
544/** \brief Wrapper class for testing rred */ /*{{{*/
545class TestRredMethod : public RredMethod {
546public:
547 /** \brief Run rred in debug test mode
548 *
549 * This method can be used to run the rred method outside
550 * of the "normal" acquire environment for easier testing.
551 *
552 * \param base basename of all files involved in this rred test
553 */
554 bool Run(char const *base) {
555 _config->CndSet("Debug::pkgAcquire::RRed", "true");
556 FetchItem *test = new FetchItem;
557 test->DestFile = base;
558 return Fetch(test);
559 }
560};
561 /*}}}*/
562/** \brief Starter for the rred method (or its test method) {{{
563 *
564 * Used without parameters is the normal behavior for methods for
565 * the APT acquire system. While this works great for the acquire system
566 * it is very hard to test the method and therefore the method also
567 * accepts one parameter which will switch it directly to debug test mode:
568 * The test mode expects that if "Testfile" is given as parameter
569 * the file "Testfile" should be ed-style patched with "Testfile.ed"
570 * and will write the result to "Testfile.result".
571 */
572int main(int argc, char *argv[]) {
573 if (argc <= 1) {
574 RredMethod Mth;
575 return Mth.Run();
576 } else {
577 TestRredMethod Mth;
578 bool result = Mth.Run(argv[1]);
579 _error->DumpErrors();
580 return result;
581 }
2e178d1c 582}
bb1293d9 583 /*}}}*/