]>
git.saurik.com Git - apt.git/blob - methods/rred.cc
2a05acce1f8275536277712a4e22b4b502feb25c
4 #include <apt-pkg/fileutl.h>
5 #include <apt-pkg/mmap.h>
6 #include <apt-pkg/error.h>
7 #include <apt-pkg/acquire-method.h>
8 #include <apt-pkg/strutl.h>
9 #include <apt-pkg/hashes.h>
20 /** \brief RredMethod - ed-style incremential patch method {{{
22 * This method implements a patch functionality similar to "patch --ed" that is
23 * used by the "tiffany" incremental packages download stuff. It differs from
24 * "ed" insofar that it is way more restricted (and therefore secure).
25 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
26 * "<em>d</em>elete" (diff doesn't output any other).
27 * Additionally the records must be reverse sorted by line number and
28 * may not overlap (diff *seems* to produce this kind of output).
30 class RredMethod
: public pkgAcqMethod
{
32 // the size of this doesn't really matter (except for performance)
33 const static int BUF_SIZE
= 1024;
34 // the supported ed commands
35 enum Mode
{MODE_CHANGED
='c', MODE_DELETED
='d', MODE_ADDED
='a'};
37 enum State
{ED_OK
, ED_ORDERING
, ED_PARSER
, ED_FAILURE
, MMAP_FAILED
};
39 State
applyFile(gzFile
&ed_cmds
, FILE *in_file
, FILE *out_file
,
40 unsigned long &line
, char *buffer
, Hashes
*hash
) const;
41 void ignoreLineInFile(FILE *fin
, char *buffer
) const;
42 void ignoreLineInFile(gzFile
&fin
, char *buffer
) const;
43 void copyLinesFromFileToFile(FILE *fin
, FILE *fout
, unsigned int lines
,
44 Hashes
*hash
, char *buffer
) const;
45 void copyLinesFromFileToFile(gzFile
&fin
, FILE *fout
, unsigned int lines
,
46 Hashes
*hash
, char *buffer
) const;
48 State
patchFile(FileFd
&Patch
, FileFd
&From
, FileFd
&out_file
, Hashes
*hash
) const;
49 State
patchMMap(FileFd
&Patch
, FileFd
&From
, FileFd
&out_file
, Hashes
*hash
) const;
52 // the methods main method
53 virtual bool Fetch(FetchItem
*Itm
);
56 RredMethod() : pkgAcqMethod("1.1",SingleInstance
| SendConfig
), Debug(false) {};
59 /** \brief applyFile - in reverse order with a tail recursion {{{
61 * As it is expected that the commands are in reversed order in the patch file
62 * we check in the first half if the command is valid, but doesn't execute it
63 * and move a step deeper. After reaching the end of the file we apply the
64 * patches in the correct order: last found command first.
66 * \param ed_cmds patch file to apply
67 * \param in_file base file we want to patch
68 * \param out_file file to write the patched result to
69 * \param line of command operation
70 * \param buffer internal used read/write buffer
71 * \param hash the created file for correctness
72 * \return the success State of the ed command executor
74 RredMethod::State
RredMethod::applyFile(gzFile
&ed_cmds
, FILE *in_file
, FILE *out_file
,
75 unsigned long &line
, char *buffer
, Hashes
*hash
) const {
76 // get the current command and parse it
77 if (gzgets(ed_cmds
, buffer
, BUF_SIZE
) == NULL
) {
79 std::clog
<< "rred: encounter end of file - we can start patching now." << std::endl
;
84 // parse in the effected linenumbers
87 unsigned long const startline
= strtol(buffer
, &idx
, 10);
88 if (errno
== ERANGE
|| errno
== EINVAL
) {
89 _error
->Errno("rred", "startline is an invalid number");
92 if (startline
> line
) {
93 _error
->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline
, line
);
96 unsigned long stopline
;
100 stopline
= strtol(idx
, &idx
, 10);
101 if (errno
== ERANGE
|| errno
== EINVAL
) {
102 _error
->Errno("rred", "stopline is an invalid number");
107 stopline
= startline
;
111 // which command to execute on this line(s)?
115 std::clog
<< "Change from line " << startline
<< " to " << stopline
<< std::endl
;
119 std::clog
<< "Insert after line " << startline
<< std::endl
;
123 std::clog
<< "Delete from line " << startline
<< " to " << stopline
<< std::endl
;
126 _error
->Error("rred: Unknown ed command '%c'. Abort.", *idx
);
129 unsigned char mode
= *idx
;
131 // save the current position
132 unsigned const long pos
= gztell(ed_cmds
);
134 // if this is add or change then go to the next full stop
135 unsigned int data_length
= 0;
136 if (mode
== MODE_CHANGED
|| mode
== MODE_ADDED
) {
138 ignoreLineInFile(ed_cmds
, buffer
);
141 while (strncmp(buffer
, ".", 1) != 0);
142 data_length
--; // the dot should not be copied
145 // do the recursive call - the last command is the one we need to execute at first
146 const State child
= applyFile(ed_cmds
, in_file
, out_file
, line
, buffer
, hash
);
147 if (child
!= ED_OK
) {
151 // change and delete are working on "line" - add is done after "line"
152 if (mode
!= MODE_ADDED
)
155 // first wind to the current position and copy over all unchanged lines
156 if (line
< startline
) {
157 copyLinesFromFileToFile(in_file
, out_file
, (startline
- line
), hash
, buffer
);
161 if (mode
!= MODE_ADDED
)
164 // include data from ed script
165 if (mode
== MODE_CHANGED
|| mode
== MODE_ADDED
) {
166 gzseek(ed_cmds
, pos
, SEEK_SET
);
167 copyLinesFromFileToFile(ed_cmds
, out_file
, data_length
, hash
, buffer
);
170 // ignore the corresponding number of lines from input
171 if (mode
== MODE_CHANGED
|| mode
== MODE_DELETED
) {
172 while (line
< stopline
) {
173 ignoreLineInFile(in_file
, buffer
);
180 void RredMethod::copyLinesFromFileToFile(FILE *fin
, FILE *fout
, unsigned int lines
,/*{{{*/
181 Hashes
*hash
, char *buffer
) const {
182 while (0 < lines
--) {
184 fgets(buffer
, BUF_SIZE
, fin
);
185 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fout
);
186 hash
->Add((unsigned char*)buffer
, written
);
187 } while (strlen(buffer
) == (BUF_SIZE
- 1) &&
188 buffer
[BUF_SIZE
- 2] != '\n');
192 void RredMethod::copyLinesFromFileToFile(gzFile
&fin
, FILE *fout
, unsigned int lines
,/*{{{*/
193 Hashes
*hash
, char *buffer
) const {
194 while (0 < lines
--) {
196 gzgets(fin
, buffer
, BUF_SIZE
);
197 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fout
);
198 hash
->Add((unsigned char*)buffer
, written
);
199 } while (strlen(buffer
) == (BUF_SIZE
- 1) &&
200 buffer
[BUF_SIZE
- 2] != '\n');
204 void RredMethod::ignoreLineInFile(FILE *fin
, char *buffer
) const { /*{{{*/
205 fgets(buffer
, BUF_SIZE
, fin
);
206 while (strlen(buffer
) == (BUF_SIZE
- 1) &&
207 buffer
[BUF_SIZE
- 2] != '\n') {
208 fgets(buffer
, BUF_SIZE
, fin
);
213 void RredMethod::ignoreLineInFile(gzFile
&fin
, char *buffer
) const { /*{{{*/
214 gzgets(fin
, buffer
, BUF_SIZE
);
215 while (strlen(buffer
) == (BUF_SIZE
- 1) &&
216 buffer
[BUF_SIZE
- 2] != '\n') {
217 gzgets(fin
, buffer
, BUF_SIZE
);
222 RredMethod::State
RredMethod::patchFile(FileFd
&Patch
, FileFd
&From
, /*{{{*/
223 FileFd
&out_file
, Hashes
*hash
) const {
224 char buffer
[BUF_SIZE
];
225 FILE* fFrom
= fdopen(From
.Fd(), "r");
226 gzFile fPatch
= Patch
.gzFd();
227 FILE* fTo
= fdopen(out_file
.Fd(), "w");
229 /* we do a tail recursion to read the commands in the right order */
230 unsigned long line
= -1; // assign highest possible value
231 State
const result
= applyFile(fPatch
, fFrom
, fTo
, line
, buffer
, hash
);
233 /* read the rest from infile */
234 if (result
== ED_OK
) {
235 while (fgets(buffer
, BUF_SIZE
, fFrom
) != NULL
) {
236 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fTo
);
237 hash
->Add((unsigned char*)buffer
, written
);
244 /* struct EdCommand {{{*/
245 #ifdef _POSIX_MAPPED_FILES
254 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
257 RredMethod::State
RredMethod::patchMMap(FileFd
&Patch
, FileFd
&From
, /*{{{*/
258 FileFd
&out_file
, Hashes
*hash
) const {
259 #ifdef _POSIX_MAPPED_FILES
260 MMap
ed_cmds(MMap::ReadOnly
);
261 if (Patch
.gzFd() != NULL
) {
262 unsigned long long mapSize
= Patch
.Size();
263 DynamicMMap
* dyn
= new DynamicMMap(0, mapSize
, 0);
264 if (dyn
->validData() == false) {
268 dyn
->AddSize(mapSize
);
269 gzread(Patch
.gzFd(), dyn
->Data(), mapSize
);
272 ed_cmds
= MMap(Patch
, MMap::ReadOnly
);
274 MMap
in_file(From
, MMap::ReadOnly
);
276 if (ed_cmds
.Size() == 0 || in_file
.Size() == 0)
279 EdCommand
* commands
= 0;
280 size_t command_count
= 0;
281 size_t command_alloc
= 0;
283 const char* begin
= (char*) ed_cmds
.Data();
284 const char* end
= begin
;
285 const char* ed_end
= (char*) ed_cmds
.Data() + ed_cmds
.Size();
287 const char* input
= (char*) in_file
.Data();
288 const char* input_end
= (char*) in_file
.Data() + in_file
.Size();
292 /* 1. Parse entire script. It is executed in reverse order, so we cather it
293 * in the `commands' buffer first
301 while(begin
!= ed_end
&& *begin
== '\n')
303 while(end
!= ed_end
&& *end
!= '\n')
305 if(end
== ed_end
&& begin
== end
)
308 /* Determine command range */
309 const char* tmp
= begin
;
312 /* atoll is safe despite lacking NUL-termination; we know there's an
313 * alphabetic character at end[-1]
316 cmd
.first_line
= atol(begin
);
317 cmd
.last_line
= cmd
.first_line
;
321 cmd
.first_line
= atol(begin
);
322 cmd
.last_line
= atol(tmp
+ 1);
328 // which command to execute on this line(s)?
332 std::clog
<< "Change from line " << cmd
.first_line
<< " to " << cmd
.last_line
<< std::endl
;
336 std::clog
<< "Insert after line " << cmd
.first_line
<< std::endl
;
340 std::clog
<< "Delete from line " << cmd
.first_line
<< " to " << cmd
.last_line
<< std::endl
;
343 _error
->Error("rred: Unknown ed command '%c'. Abort.", end
[-1]);
349 /* Determine the size of the inserted text, so we don't have to scan this
356 if(cmd
.type
== MODE_ADDED
|| cmd
.type
== MODE_CHANGED
) {
357 cmd
.data_start
= begin
- (char*) ed_cmds
.Data();
358 while(end
!= ed_end
) {
360 if(end
[-1] == '.' && end
[-2] == '\n')
366 cmd
.data_end
= end
- (char*) ed_cmds
.Data() - 1;
370 if(command_count
== command_alloc
) {
371 command_alloc
= (command_alloc
+ 64) * 3 / 2;
372 commands
= (EdCommand
*) realloc(commands
, command_alloc
* sizeof(EdCommand
));
374 commands
[command_count
++] = cmd
;
377 struct iovec
* iov
= new struct iovec
[IOV_COUNT
];
380 size_t amount
, remaining
;
384 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
385 * using writev to minimize the number of system calls. Data is read
386 * directly from the memory mappings of the input file and the script.
389 for(i
= command_count
; i
-- > 0; ) {
391 if(cmd
->type
== MODE_ADDED
)
392 amount
= cmd
->first_line
+ 1;
394 amount
= cmd
->first_line
;
398 while(line
!= amount
) {
399 input
= (const char*) memchr(input
, '\n', input_end
- input
);
406 iov
[iov_size
].iov_base
= (void*) begin
;
407 iov
[iov_size
].iov_len
= input
- begin
;
408 hash
->Add((const unsigned char*) begin
, input
- begin
);
410 if(++iov_size
== IOV_COUNT
) {
411 writev(out_file
.Fd(), iov
, IOV_COUNT
);
416 if(cmd
->type
== MODE_DELETED
|| cmd
->type
== MODE_CHANGED
) {
417 remaining
= (cmd
->last_line
- cmd
->first_line
) + 1;
420 input
= (const char*) memchr(input
, '\n', input_end
- input
);
428 if(cmd
->type
== MODE_CHANGED
|| cmd
->type
== MODE_ADDED
) {
429 if(cmd
->data_end
!= cmd
->data_start
) {
430 iov
[iov_size
].iov_base
= (void*) ((char*)ed_cmds
.Data() + cmd
->data_start
);
431 iov
[iov_size
].iov_len
= cmd
->data_end
- cmd
->data_start
;
432 hash
->Add((const unsigned char*) ((char*)ed_cmds
.Data() + cmd
->data_start
),
433 iov
[iov_size
].iov_len
);
435 if(++iov_size
== IOV_COUNT
) {
436 writev(out_file
.Fd(), iov
, IOV_COUNT
);
443 if(input
!= input_end
) {
444 iov
[iov_size
].iov_base
= (void*) input
;
445 iov
[iov_size
].iov_len
= input_end
- input
;
446 hash
->Add((const unsigned char*) input
, input_end
- input
);
451 writev(out_file
.Fd(), iov
, iov_size
);
455 for(i
= 0; i
< iov_size
; i
+= IOV_COUNT
) {
456 if(iov_size
- i
< IOV_COUNT
)
457 writev(out_file
.Fd(), iov
+ i
, iov_size
- i
);
459 writev(out_file
.Fd(), iov
+ i
, IOV_COUNT
);
471 bool RredMethod::Fetch(FetchItem
*Itm
) /*{{{*/
473 Debug
= _config
->FindB("Debug::pkgAcquire::RRed", false);
475 std::string Path
= Get
.Host
+ Get
.Path
; // To account for relative paths
478 Res
.Filename
= Itm
->DestFile
;
479 if (Itm
->Uri
.empty() == true) {
480 Path
= Itm
->DestFile
;
481 Itm
->DestFile
.append(".result");
486 std::clog
<< "Patching " << Path
<< " with " << Path
487 << ".ed and putting result into " << Itm
->DestFile
<< std::endl
;
488 // Open the source and destination files (the d'tor of FileFd will do
489 // the cleanup/closing of the fds)
490 FileFd
From(Path
,FileFd::ReadOnly
);
491 FileFd
Patch(Path
+".ed",FileFd::ReadOnlyGzip
);
492 FileFd
To(Itm
->DestFile
,FileFd::WriteAtomic
);
494 if (_error
->PendingError() == true)
498 // now do the actual patching
499 State
const result
= patchMMap(Patch
, From
, To
, &Hash
);
500 if (result
== MMAP_FAILED
) {
501 // retry with patchFile
504 To
.Open(Itm
->DestFile
,FileFd::WriteAtomic
);
505 if (_error
->PendingError() == true)
507 if (patchFile(Patch
, From
, To
, &Hash
) != ED_OK
) {
508 return _error
->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path
.c_str());
509 } else if (Debug
== true) {
510 std::clog
<< "rred: finished file patching of " << Path
<< " after mmap failed." << std::endl
;
512 } else if (result
!= ED_OK
) {
513 return _error
->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path
.c_str());
514 } else if (Debug
== true) {
515 std::clog
<< "rred: finished mmap patching of " << Path
<< std::endl
;
518 // write out the result
523 /* Transfer the modification times from the patch file
524 to be able to see in which state the file should be
525 and use the access time from the "old" file */
526 struct stat BufBase
, BufPatch
;
527 if (stat(Path
.c_str(),&BufBase
) != 0 ||
528 stat(std::string(Path
+".ed").c_str(),&BufPatch
) != 0)
529 return _error
->Errno("stat",_("Failed to stat"));
531 struct utimbuf TimeBuf
;
532 TimeBuf
.actime
= BufBase
.st_atime
;
533 TimeBuf
.modtime
= BufPatch
.st_mtime
;
534 if (utime(Itm
->DestFile
.c_str(),&TimeBuf
) != 0)
535 return _error
->Errno("utime",_("Failed to set modification time"));
537 if (stat(Itm
->DestFile
.c_str(),&BufBase
) != 0)
538 return _error
->Errno("stat",_("Failed to stat"));
541 Res
.LastModified
= BufBase
.st_mtime
;
542 Res
.Size
= BufBase
.st_size
;
543 Res
.TakeHashes(Hash
);
549 /** \brief Wrapper class for testing rred */ /*{{{*/
550 class TestRredMethod
: public RredMethod
{
552 /** \brief Run rred in debug test mode
554 * This method can be used to run the rred method outside
555 * of the "normal" acquire environment for easier testing.
557 * \param base basename of all files involved in this rred test
559 bool Run(char const *base
) {
560 _config
->CndSet("Debug::pkgAcquire::RRed", "true");
561 FetchItem
*test
= new FetchItem
;
562 test
->DestFile
= base
;
567 /** \brief Starter for the rred method (or its test method) {{{
569 * Used without parameters is the normal behavior for methods for
570 * the APT acquire system. While this works great for the acquire system
571 * it is very hard to test the method and therefore the method also
572 * accepts one parameter which will switch it directly to debug test mode:
573 * The test mode expects that if "Testfile" is given as parameter
574 * the file "Testfile" should be ed-style patched with "Testfile.ed"
575 * and will write the result to "Testfile.result".
577 int main(int argc
, char *argv
[]) {
583 bool result
= Mth
.Run(argv
[1]);
584 _error
->DumpErrors();