]>
Commit | Line | Data |
---|---|---|
1 | // Includes /*{{{*/ | |
2 | #include <apt-pkg/fileutl.h> | |
3 | #include <apt-pkg/mmap.h> | |
4 | #include <apt-pkg/error.h> | |
5 | #include <apt-pkg/acquire-method.h> | |
6 | #include <apt-pkg/strutl.h> | |
7 | #include <apt-pkg/hashes.h> | |
8 | ||
9 | #include <sys/stat.h> | |
10 | #include <sys/uio.h> | |
11 | #include <unistd.h> | |
12 | #include <utime.h> | |
13 | #include <stdio.h> | |
14 | #include <errno.h> | |
15 | #include <apti18n.h> | |
16 | /*}}}*/ | |
17 | /** \brief RredMethod - ed-style incremential patch method {{{ | |
18 | * | |
19 | * This method implements a patch functionality similar to "patch --ed" that is | |
20 | * used by the "tiffany" incremental packages download stuff. It differs from | |
21 | * "ed" insofar that it is way more restricted (and therefore secure). | |
22 | * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and | |
23 | * "<em>d</em>elete" (diff doesn't output any other). | |
24 | * Additionally the records must be reverse sorted by line number and | |
25 | * may not overlap (diff *seems* to produce this kind of output). | |
26 | * */ | |
27 | class RredMethod : public pkgAcqMethod { | |
28 | bool Debug; | |
29 | // the size of this doesn't really matter (except for performance) | |
30 | const static int BUF_SIZE = 1024; | |
31 | // the supported ed commands | |
32 | enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'}; | |
33 | // return values | |
34 | enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED}; | |
35 | ||
36 | State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, | |
37 | unsigned long &line, char *buffer, Hashes *hash) const; | |
38 | void ignoreLineInFile(FILE *fin, char *buffer) const; | |
39 | void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines, | |
40 | Hashes *hash, char *buffer) const; | |
41 | ||
42 | State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const; | |
43 | State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const; | |
44 | ||
45 | protected: | |
46 | // the methods main method | |
47 | virtual bool Fetch(FetchItem *Itm); | |
48 | ||
49 | public: | |
50 | RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {}; | |
51 | }; | |
52 | /*}}}*/ | |
53 | /** \brief applyFile - in reverse order with a tail recursion {{{ | |
54 | * | |
55 | * As it is expected that the commands are in reversed order in the patch file | |
56 | * we check in the first half if the command is valid, but doesn't execute it | |
57 | * and move a step deeper. After reaching the end of the file we apply the | |
58 | * patches in the correct order: last found command first. | |
59 | * | |
60 | * \param ed_cmds patch file to apply | |
61 | * \param in_file base file we want to patch | |
62 | * \param out_file file to write the patched result to | |
63 | * \param line of command operation | |
64 | * \param buffer internal used read/write buffer | |
65 | * \param hash the created file for correctness | |
66 | * \return the success State of the ed command executor | |
67 | */ | |
68 | RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, | |
69 | unsigned long &line, char *buffer, Hashes *hash) const { | |
70 | // get the current command and parse it | |
71 | if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) { | |
72 | if (Debug == true) | |
73 | std::clog << "rred: encounter end of file - we can start patching now." << std::endl; | |
74 | line = 0; | |
75 | return ED_OK; | |
76 | } | |
77 | ||
78 | // parse in the effected linenumbers | |
79 | char* idx; | |
80 | errno=0; | |
81 | unsigned long const startline = strtol(buffer, &idx, 10); | |
82 | if (errno == ERANGE || errno == EINVAL) { | |
83 | _error->Errno("rred", "startline is an invalid number"); | |
84 | return ED_PARSER; | |
85 | } | |
86 | if (startline > line) { | |
87 | _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line); | |
88 | return ED_ORDERING; | |
89 | } | |
90 | unsigned long stopline; | |
91 | if (*idx == ',') { | |
92 | idx++; | |
93 | errno=0; | |
94 | stopline = strtol(idx, &idx, 10); | |
95 | if (errno == ERANGE || errno == EINVAL) { | |
96 | _error->Errno("rred", "stopline is an invalid number"); | |
97 | return ED_PARSER; | |
98 | } | |
99 | } | |
100 | else { | |
101 | stopline = startline; | |
102 | } | |
103 | line = startline; | |
104 | ||
105 | // which command to execute on this line(s)? | |
106 | switch (*idx) { | |
107 | case MODE_CHANGED: | |
108 | if (Debug == true) | |
109 | std::clog << "Change from line " << startline << " to " << stopline << std::endl; | |
110 | break; | |
111 | case MODE_ADDED: | |
112 | if (Debug == true) | |
113 | std::clog << "Insert after line " << startline << std::endl; | |
114 | break; | |
115 | case MODE_DELETED: | |
116 | if (Debug == true) | |
117 | std::clog << "Delete from line " << startline << " to " << stopline << std::endl; | |
118 | break; | |
119 | default: | |
120 | _error->Error("rred: Unknown ed command '%c'. Abort.", *idx); | |
121 | return ED_PARSER; | |
122 | } | |
123 | unsigned char mode = *idx; | |
124 | ||
125 | // save the current position | |
126 | unsigned const long pos = ftell(ed_cmds); | |
127 | ||
128 | // if this is add or change then go to the next full stop | |
129 | unsigned int data_length = 0; | |
130 | if (mode == MODE_CHANGED || mode == MODE_ADDED) { | |
131 | do { | |
132 | ignoreLineInFile(ed_cmds, buffer); | |
133 | data_length++; | |
134 | } | |
135 | while (strncmp(buffer, ".", 1) != 0); | |
136 | data_length--; // the dot should not be copied | |
137 | } | |
138 | ||
139 | // do the recursive call - the last command is the one we need to execute at first | |
140 | const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash); | |
141 | if (child != ED_OK) { | |
142 | return child; | |
143 | } | |
144 | ||
145 | // change and delete are working on "line" - add is done after "line" | |
146 | if (mode != MODE_ADDED) | |
147 | line++; | |
148 | ||
149 | // first wind to the current position and copy over all unchanged lines | |
150 | if (line < startline) { | |
151 | copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer); | |
152 | line = startline; | |
153 | } | |
154 | ||
155 | if (mode != MODE_ADDED) | |
156 | line--; | |
157 | ||
158 | // include data from ed script | |
159 | if (mode == MODE_CHANGED || mode == MODE_ADDED) { | |
160 | fseek(ed_cmds, pos, SEEK_SET); | |
161 | copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer); | |
162 | } | |
163 | ||
164 | // ignore the corresponding number of lines from input | |
165 | if (mode == MODE_CHANGED || mode == MODE_DELETED) { | |
166 | while (line < stopline) { | |
167 | ignoreLineInFile(in_file, buffer); | |
168 | line++; | |
169 | } | |
170 | } | |
171 | return ED_OK; | |
172 | } | |
173 | /*}}}*/ | |
174 | void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/ | |
175 | Hashes *hash, char *buffer) const { | |
176 | while (0 < lines--) { | |
177 | do { | |
178 | fgets(buffer, BUF_SIZE, fin); | |
179 | size_t const written = fwrite(buffer, 1, strlen(buffer), fout); | |
180 | hash->Add((unsigned char*)buffer, written); | |
181 | } while (strlen(buffer) == (BUF_SIZE - 1) && | |
182 | buffer[BUF_SIZE - 2] != '\n'); | |
183 | } | |
184 | } | |
185 | /*}}}*/ | |
186 | void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/ | |
187 | fgets(buffer, BUF_SIZE, fin); | |
188 | while (strlen(buffer) == (BUF_SIZE - 1) && | |
189 | buffer[BUF_SIZE - 2] != '\n') { | |
190 | fgets(buffer, BUF_SIZE, fin); | |
191 | buffer[0] = ' '; | |
192 | } | |
193 | } | |
194 | /*}}}*/ | |
195 | RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/ | |
196 | FileFd &out_file, Hashes *hash) const { | |
197 | char buffer[BUF_SIZE]; | |
198 | FILE* fFrom = fdopen(From.Fd(), "r"); | |
199 | FILE* fPatch = fdopen(Patch.Fd(), "r"); | |
200 | FILE* fTo = fdopen(out_file.Fd(), "w"); | |
201 | ||
202 | /* we do a tail recursion to read the commands in the right order */ | |
203 | unsigned long line = -1; // assign highest possible value | |
204 | State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash); | |
205 | ||
206 | /* read the rest from infile */ | |
207 | if (result == ED_OK) { | |
208 | while (fgets(buffer, BUF_SIZE, fFrom) != NULL) { | |
209 | size_t const written = fwrite(buffer, 1, strlen(buffer), fTo); | |
210 | hash->Add((unsigned char*)buffer, written); | |
211 | } | |
212 | fflush(fTo); | |
213 | } | |
214 | return result; | |
215 | } | |
216 | /*}}}*/ | |
217 | struct EdCommand { /*{{{*/ | |
218 | size_t data_start; | |
219 | size_t data_end; | |
220 | size_t data_lines; | |
221 | size_t first_line; | |
222 | size_t last_line; | |
223 | char type; | |
224 | }; | |
225 | #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */ | |
226 | /*}}}*/ | |
227 | RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/ | |
228 | FileFd &out_file, Hashes *hash) const { | |
229 | #ifdef _POSIX_MAPPED_FILES | |
230 | MMap ed_cmds(Patch, MMap::ReadOnly); | |
231 | MMap in_file(From, MMap::ReadOnly); | |
232 | ||
233 | if (ed_cmds.Size() == 0 || in_file.Size() == 0) | |
234 | return MMAP_FAILED; | |
235 | ||
236 | EdCommand* commands = 0; | |
237 | size_t command_count = 0; | |
238 | size_t command_alloc = 0; | |
239 | ||
240 | const char* begin = (char*) ed_cmds.Data(); | |
241 | const char* end = begin; | |
242 | const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size(); | |
243 | ||
244 | const char* input = (char*) in_file.Data(); | |
245 | const char* input_end = (char*) in_file.Data() + in_file.Size(); | |
246 | ||
247 | size_t i; | |
248 | ||
249 | /* 1. Parse entire script. It is executed in reverse order, so we cather it | |
250 | * in the `commands' buffer first | |
251 | */ | |
252 | ||
253 | for(;;) { | |
254 | EdCommand cmd; | |
255 | cmd.data_start = 0; | |
256 | cmd.data_end = 0; | |
257 | ||
258 | while(begin != ed_end && *begin == '\n') | |
259 | ++begin; | |
260 | while(end != ed_end && *end != '\n') | |
261 | ++end; | |
262 | if(end == ed_end && begin == end) | |
263 | break; | |
264 | ||
265 | /* Determine command range */ | |
266 | const char* tmp = begin; | |
267 | ||
268 | for(;;) { | |
269 | /* atoll is safe despite lacking NUL-termination; we know there's an | |
270 | * alphabetic character at end[-1] | |
271 | */ | |
272 | if(tmp == end) { | |
273 | cmd.first_line = atol(begin); | |
274 | cmd.last_line = cmd.first_line; | |
275 | break; | |
276 | } | |
277 | if(*tmp == ',') { | |
278 | cmd.first_line = atol(begin); | |
279 | cmd.last_line = atol(tmp + 1); | |
280 | break; | |
281 | } | |
282 | ++tmp; | |
283 | } | |
284 | ||
285 | // which command to execute on this line(s)? | |
286 | switch (end[-1]) { | |
287 | case MODE_CHANGED: | |
288 | if (Debug == true) | |
289 | std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl; | |
290 | break; | |
291 | case MODE_ADDED: | |
292 | if (Debug == true) | |
293 | std::clog << "Insert after line " << cmd.first_line << std::endl; | |
294 | break; | |
295 | case MODE_DELETED: | |
296 | if (Debug == true) | |
297 | std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl; | |
298 | break; | |
299 | default: | |
300 | _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]); | |
301 | free(commands); | |
302 | return ED_PARSER; | |
303 | } | |
304 | cmd.type = end[-1]; | |
305 | ||
306 | /* Determine the size of the inserted text, so we don't have to scan this | |
307 | * text again later. | |
308 | */ | |
309 | begin = end + 1; | |
310 | end = begin; | |
311 | cmd.data_lines = 0; | |
312 | ||
313 | if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) { | |
314 | cmd.data_start = begin - (char*) ed_cmds.Data(); | |
315 | while(end != ed_end) { | |
316 | if(*end == '\n') { | |
317 | if(end[-1] == '.' && end[-2] == '\n') | |
318 | break; | |
319 | ++cmd.data_lines; | |
320 | } | |
321 | ++end; | |
322 | } | |
323 | cmd.data_end = end - (char*) ed_cmds.Data() - 1; | |
324 | begin = end + 1; | |
325 | end = begin; | |
326 | } | |
327 | if(command_count == command_alloc) { | |
328 | command_alloc = (command_alloc + 64) * 3 / 2; | |
329 | commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand)); | |
330 | } | |
331 | commands[command_count++] = cmd; | |
332 | } | |
333 | ||
334 | struct iovec* iov = new struct iovec[IOV_COUNT]; | |
335 | size_t iov_size = 0; | |
336 | ||
337 | size_t amount, remaining; | |
338 | size_t line = 1; | |
339 | EdCommand* cmd; | |
340 | ||
341 | /* 2. Execute script. We gather writes in a `struct iov' array, and flush | |
342 | * using writev to minimize the number of system calls. Data is read | |
343 | * directly from the memory mappings of the input file and the script. | |
344 | */ | |
345 | ||
346 | for(i = command_count; i-- > 0; ) { | |
347 | cmd = &commands[i]; | |
348 | if(cmd->type == MODE_ADDED) | |
349 | amount = cmd->first_line + 1; | |
350 | else | |
351 | amount = cmd->first_line; | |
352 | ||
353 | if(line < amount) { | |
354 | begin = input; | |
355 | while(line != amount) { | |
356 | input = (const char*) memchr(input, '\n', input_end - input); | |
357 | if(!input) | |
358 | break; | |
359 | ++line; | |
360 | ++input; | |
361 | } | |
362 | ||
363 | iov[iov_size].iov_base = (void*) begin; | |
364 | iov[iov_size].iov_len = input - begin; | |
365 | hash->Add((const unsigned char*) begin, input - begin); | |
366 | ||
367 | if(++iov_size == IOV_COUNT) { | |
368 | writev(out_file.Fd(), iov, IOV_COUNT); | |
369 | iov_size = 0; | |
370 | } | |
371 | } | |
372 | ||
373 | if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) { | |
374 | remaining = (cmd->last_line - cmd->first_line) + 1; | |
375 | line += remaining; | |
376 | while(remaining) { | |
377 | input = (const char*) memchr(input, '\n', input_end - input); | |
378 | if(!input) | |
379 | break; | |
380 | --remaining; | |
381 | ++input; | |
382 | } | |
383 | } | |
384 | ||
385 | if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) { | |
386 | if(cmd->data_end != cmd->data_start) { | |
387 | iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start); | |
388 | iov[iov_size].iov_len = cmd->data_end - cmd->data_start; | |
389 | hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start), | |
390 | iov[iov_size].iov_len); | |
391 | ||
392 | if(++iov_size == IOV_COUNT) { | |
393 | writev(out_file.Fd(), iov, IOV_COUNT); | |
394 | iov_size = 0; | |
395 | } | |
396 | } | |
397 | } | |
398 | } | |
399 | ||
400 | if(input != input_end) { | |
401 | iov[iov_size].iov_base = (void*) input; | |
402 | iov[iov_size].iov_len = input_end - input; | |
403 | hash->Add((const unsigned char*) input, input_end - input); | |
404 | ++iov_size; | |
405 | } | |
406 | ||
407 | if(iov_size) { | |
408 | writev(out_file.Fd(), iov, iov_size); | |
409 | iov_size = 0; | |
410 | } | |
411 | ||
412 | for(i = 0; i < iov_size; i += IOV_COUNT) { | |
413 | if(iov_size - i < IOV_COUNT) | |
414 | writev(out_file.Fd(), iov + i, iov_size - i); | |
415 | else | |
416 | writev(out_file.Fd(), iov + i, IOV_COUNT); | |
417 | } | |
418 | ||
419 | delete [] iov; | |
420 | free(commands); | |
421 | ||
422 | return ED_OK; | |
423 | #else | |
424 | return MMAP_FAILED; | |
425 | #endif | |
426 | } | |
427 | /*}}}*/ | |
428 | bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ | |
429 | { | |
430 | Debug = _config->FindB("Debug::pkgAcquire::RRed", false); | |
431 | URI Get = Itm->Uri; | |
432 | string Path = Get.Host + Get.Path; // To account for relative paths | |
433 | ||
434 | FetchResult Res; | |
435 | Res.Filename = Itm->DestFile; | |
436 | if (Itm->Uri.empty() == true) { | |
437 | Path = Itm->DestFile; | |
438 | Itm->DestFile.append(".result"); | |
439 | } else | |
440 | URIStart(Res); | |
441 | ||
442 | if (Debug == true) | |
443 | std::clog << "Patching " << Path << " with " << Path | |
444 | << ".ed and putting result into " << Itm->DestFile << std::endl; | |
445 | // Open the source and destination files (the d'tor of FileFd will do | |
446 | // the cleanup/closing of the fds) | |
447 | FileFd From(Path,FileFd::ReadOnly); | |
448 | FileFd Patch(Path+".ed",FileFd::ReadOnly); | |
449 | FileFd To(Itm->DestFile,FileFd::WriteAtomic); | |
450 | To.EraseOnFailure(); | |
451 | if (_error->PendingError() == true) | |
452 | return false; | |
453 | ||
454 | Hashes Hash; | |
455 | // now do the actual patching | |
456 | State const result = patchMMap(Patch, From, To, &Hash); | |
457 | if (result == MMAP_FAILED) { | |
458 | // retry with patchFile | |
459 | lseek(Patch.Fd(), 0, SEEK_SET); | |
460 | lseek(From.Fd(), 0, SEEK_SET); | |
461 | To.Open(Itm->DestFile,FileFd::WriteAtomic); | |
462 | if (_error->PendingError() == true) | |
463 | return false; | |
464 | if (patchFile(Patch, From, To, &Hash) != ED_OK) { | |
465 | return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str()); | |
466 | } else if (Debug == true) { | |
467 | std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl; | |
468 | } | |
469 | } else if (result != ED_OK) { | |
470 | return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str()); | |
471 | } else if (Debug == true) { | |
472 | std::clog << "rred: finished mmap patching of " << Path << std::endl; | |
473 | } | |
474 | ||
475 | // write out the result | |
476 | From.Close(); | |
477 | Patch.Close(); | |
478 | To.Close(); | |
479 | ||
480 | /* Transfer the modification times from the patch file | |
481 | to be able to see in which state the file should be | |
482 | and use the access time from the "old" file */ | |
483 | struct stat BufBase, BufPatch; | |
484 | if (stat(Path.c_str(),&BufBase) != 0 || | |
485 | stat(string(Path+".ed").c_str(),&BufPatch) != 0) | |
486 | return _error->Errno("stat",_("Failed to stat")); | |
487 | ||
488 | struct utimbuf TimeBuf; | |
489 | TimeBuf.actime = BufBase.st_atime; | |
490 | TimeBuf.modtime = BufPatch.st_mtime; | |
491 | if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0) | |
492 | return _error->Errno("utime",_("Failed to set modification time")); | |
493 | ||
494 | if (stat(Itm->DestFile.c_str(),&BufBase) != 0) | |
495 | return _error->Errno("stat",_("Failed to stat")); | |
496 | ||
497 | // return done | |
498 | Res.LastModified = BufBase.st_mtime; | |
499 | Res.Size = BufBase.st_size; | |
500 | Res.TakeHashes(Hash); | |
501 | URIDone(Res); | |
502 | ||
503 | return true; | |
504 | } | |
505 | /*}}}*/ | |
506 | /** \brief Wrapper class for testing rred */ /*{{{*/ | |
507 | class TestRredMethod : public RredMethod { | |
508 | public: | |
509 | /** \brief Run rred in debug test mode | |
510 | * | |
511 | * This method can be used to run the rred method outside | |
512 | * of the "normal" acquire environment for easier testing. | |
513 | * | |
514 | * \param base basename of all files involved in this rred test | |
515 | */ | |
516 | bool Run(char const *base) { | |
517 | _config->CndSet("Debug::pkgAcquire::RRed", "true"); | |
518 | FetchItem *test = new FetchItem; | |
519 | test->DestFile = base; | |
520 | return Fetch(test); | |
521 | } | |
522 | }; | |
523 | /*}}}*/ | |
524 | /** \brief Starter for the rred method (or its test method) {{{ | |
525 | * | |
526 | * Used without parameters is the normal behavior for methods for | |
527 | * the APT acquire system. While this works great for the acquire system | |
528 | * it is very hard to test the method and therefore the method also | |
529 | * accepts one parameter which will switch it directly to debug test mode: | |
530 | * The test mode expects that if "Testfile" is given as parameter | |
531 | * the file "Testfile" should be ed-style patched with "Testfile.ed" | |
532 | * and will write the result to "Testfile.result". | |
533 | */ | |
534 | int main(int argc, char *argv[]) { | |
535 | if (argc <= 1) { | |
536 | RredMethod Mth; | |
537 | return Mth.Run(); | |
538 | } else { | |
539 | TestRredMethod Mth; | |
540 | bool result = Mth.Run(argv[1]); | |
541 | _error->DumpErrors(); | |
542 | return result; | |
543 | } | |
544 | } | |
545 | /*}}}*/ |