]>
git.saurik.com Git - apt.git/blob - methods/rred.cc
2 #include <apt-pkg/fileutl.h>
3 #include <apt-pkg/mmap.h>
4 #include <apt-pkg/error.h>
5 #include <apt-pkg/acquire-method.h>
6 #include <apt-pkg/strutl.h>
7 #include <apt-pkg/hashes.h>
18 /** \brief RredMethod - ed-style incremential patch method {{{
20 * This method implements a patch functionality similar to "patch --ed" that is
21 * used by the "tiffany" incremental packages download stuff. It differs from
22 * "ed" insofar that it is way more restricted (and therefore secure).
23 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
24 * "<em>d</em>elete" (diff doesn't output any other).
25 * Additionally the records must be reverse sorted by line number and
26 * may not overlap (diff *seems* to produce this kind of output).
28 class RredMethod
: public pkgAcqMethod
{
30 // the size of this doesn't really matter (except for performance)
31 const static int BUF_SIZE
= 1024;
32 // the supported ed commands
33 enum Mode
{MODE_CHANGED
='c', MODE_DELETED
='d', MODE_ADDED
='a'};
35 enum State
{ED_OK
, ED_ORDERING
, ED_PARSER
, ED_FAILURE
, MMAP_FAILED
};
37 State
applyFile(gzFile
&ed_cmds
, FILE *in_file
, FILE *out_file
,
38 unsigned long &line
, char *buffer
, Hashes
*hash
) const;
39 void ignoreLineInFile(FILE *fin
, char *buffer
) const;
40 void ignoreLineInFile(gzFile
&fin
, char *buffer
) const;
41 void copyLinesFromFileToFile(FILE *fin
, FILE *fout
, unsigned int lines
,
42 Hashes
*hash
, char *buffer
) const;
43 void copyLinesFromFileToFile(gzFile
&fin
, FILE *fout
, unsigned int lines
,
44 Hashes
*hash
, char *buffer
) const;
46 State
patchFile(FileFd
&Patch
, FileFd
&From
, FileFd
&out_file
, Hashes
*hash
) const;
47 State
patchMMap(FileFd
&Patch
, FileFd
&From
, FileFd
&out_file
, Hashes
*hash
) const;
50 // the methods main method
51 virtual bool Fetch(FetchItem
*Itm
);
54 RredMethod() : pkgAcqMethod("1.1",SingleInstance
| SendConfig
) {};
57 /** \brief applyFile - in reverse order with a tail recursion {{{
59 * As it is expected that the commands are in reversed order in the patch file
60 * we check in the first half if the command is valid, but doesn't execute it
61 * and move a step deeper. After reaching the end of the file we apply the
62 * patches in the correct order: last found command first.
64 * \param ed_cmds patch file to apply
65 * \param in_file base file we want to patch
66 * \param out_file file to write the patched result to
67 * \param line of command operation
68 * \param buffer internal used read/write buffer
69 * \param hash the created file for correctness
70 * \return the success State of the ed command executor
72 RredMethod::State
RredMethod::applyFile(gzFile
&ed_cmds
, FILE *in_file
, FILE *out_file
,
73 unsigned long &line
, char *buffer
, Hashes
*hash
) const {
74 // get the current command and parse it
75 if (gzgets(ed_cmds
, buffer
, BUF_SIZE
) == NULL
) {
77 std::clog
<< "rred: encounter end of file - we can start patching now." << std::endl
;
82 // parse in the effected linenumbers
85 unsigned long const startline
= strtol(buffer
, &idx
, 10);
86 if (errno
== ERANGE
|| errno
== EINVAL
) {
87 _error
->Errno("rred", "startline is an invalid number");
90 if (startline
> line
) {
91 _error
->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline
, line
);
94 unsigned long stopline
;
98 stopline
= strtol(idx
, &idx
, 10);
99 if (errno
== ERANGE
|| errno
== EINVAL
) {
100 _error
->Errno("rred", "stopline is an invalid number");
105 stopline
= startline
;
109 // which command to execute on this line(s)?
113 std::clog
<< "Change from line " << startline
<< " to " << stopline
<< std::endl
;
117 std::clog
<< "Insert after line " << startline
<< std::endl
;
121 std::clog
<< "Delete from line " << startline
<< " to " << stopline
<< std::endl
;
124 _error
->Error("rred: Unknown ed command '%c'. Abort.", *idx
);
127 unsigned char mode
= *idx
;
129 // save the current position
130 unsigned const long pos
= gztell(ed_cmds
);
132 // if this is add or change then go to the next full stop
133 unsigned int data_length
= 0;
134 if (mode
== MODE_CHANGED
|| mode
== MODE_ADDED
) {
136 ignoreLineInFile(ed_cmds
, buffer
);
139 while (strncmp(buffer
, ".", 1) != 0);
140 data_length
--; // the dot should not be copied
143 // do the recursive call - the last command is the one we need to execute at first
144 const State child
= applyFile(ed_cmds
, in_file
, out_file
, line
, buffer
, hash
);
145 if (child
!= ED_OK
) {
149 // change and delete are working on "line" - add is done after "line"
150 if (mode
!= MODE_ADDED
)
153 // first wind to the current position and copy over all unchanged lines
154 if (line
< startline
) {
155 copyLinesFromFileToFile(in_file
, out_file
, (startline
- line
), hash
, buffer
);
159 if (mode
!= MODE_ADDED
)
162 // include data from ed script
163 if (mode
== MODE_CHANGED
|| mode
== MODE_ADDED
) {
164 gzseek(ed_cmds
, pos
, SEEK_SET
);
165 copyLinesFromFileToFile(ed_cmds
, out_file
, data_length
, hash
, buffer
);
168 // ignore the corresponding number of lines from input
169 if (mode
== MODE_CHANGED
|| mode
== MODE_DELETED
) {
170 while (line
< stopline
) {
171 ignoreLineInFile(in_file
, buffer
);
178 void RredMethod::copyLinesFromFileToFile(FILE *fin
, FILE *fout
, unsigned int lines
,/*{{{*/
179 Hashes
*hash
, char *buffer
) const {
180 while (0 < lines
--) {
182 fgets(buffer
, BUF_SIZE
, fin
);
183 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fout
);
184 hash
->Add((unsigned char*)buffer
, written
);
185 } while (strlen(buffer
) == (BUF_SIZE
- 1) &&
186 buffer
[BUF_SIZE
- 2] != '\n');
190 void RredMethod::copyLinesFromFileToFile(gzFile
&fin
, FILE *fout
, unsigned int lines
,/*{{{*/
191 Hashes
*hash
, char *buffer
) const {
192 while (0 < lines
--) {
194 gzgets(fin
, buffer
, BUF_SIZE
);
195 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fout
);
196 hash
->Add((unsigned char*)buffer
, written
);
197 } while (strlen(buffer
) == (BUF_SIZE
- 1) &&
198 buffer
[BUF_SIZE
- 2] != '\n');
202 void RredMethod::ignoreLineInFile(FILE *fin
, char *buffer
) const { /*{{{*/
203 fgets(buffer
, BUF_SIZE
, fin
);
204 while (strlen(buffer
) == (BUF_SIZE
- 1) &&
205 buffer
[BUF_SIZE
- 2] != '\n') {
206 fgets(buffer
, BUF_SIZE
, fin
);
211 void RredMethod::ignoreLineInFile(gzFile
&fin
, char *buffer
) const { /*{{{*/
212 gzgets(fin
, buffer
, BUF_SIZE
);
213 while (strlen(buffer
) == (BUF_SIZE
- 1) &&
214 buffer
[BUF_SIZE
- 2] != '\n') {
215 gzgets(fin
, buffer
, BUF_SIZE
);
220 RredMethod::State
RredMethod::patchFile(FileFd
&Patch
, FileFd
&From
, /*{{{*/
221 FileFd
&out_file
, Hashes
*hash
) const {
222 char buffer
[BUF_SIZE
];
223 FILE* fFrom
= fdopen(From
.Fd(), "r");
224 gzFile fPatch
= Patch
.gzFd();
225 FILE* fTo
= fdopen(out_file
.Fd(), "w");
227 /* we do a tail recursion to read the commands in the right order */
228 unsigned long line
= -1; // assign highest possible value
229 State
const result
= applyFile(fPatch
, fFrom
, fTo
, line
, buffer
, hash
);
231 /* read the rest from infile */
232 if (result
== ED_OK
) {
233 while (fgets(buffer
, BUF_SIZE
, fFrom
) != NULL
) {
234 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fTo
);
235 hash
->Add((unsigned char*)buffer
, written
);
242 struct EdCommand
{ /*{{{*/
250 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
252 RredMethod::State
RredMethod::patchMMap(FileFd
&Patch
, FileFd
&From
, /*{{{*/
253 FileFd
&out_file
, Hashes
*hash
) const {
254 #ifdef _POSIX_MAPPED_FILES
255 MMap
ed_cmds(MMap::ReadOnly
);
256 if (Patch
.gzFd() != NULL
) {
257 unsigned long mapSize
= Patch
.Size();
258 DynamicMMap
* dyn
= new DynamicMMap(0, mapSize
, 0);
259 if (dyn
->validData() == false) {
263 dyn
->AddSize(mapSize
);
264 gzread(Patch
.gzFd(), dyn
->Data(), mapSize
);
267 ed_cmds
= MMap(Patch
, MMap::ReadOnly
);
269 MMap
in_file(From
, MMap::ReadOnly
);
271 if (ed_cmds
.Size() == 0 || in_file
.Size() == 0)
274 EdCommand
* commands
= 0;
275 size_t command_count
= 0;
276 size_t command_alloc
= 0;
278 const char* begin
= (char*) ed_cmds
.Data();
279 const char* end
= begin
;
280 const char* ed_end
= (char*) ed_cmds
.Data() + ed_cmds
.Size();
282 const char* input
= (char*) in_file
.Data();
283 const char* input_end
= (char*) in_file
.Data() + in_file
.Size();
287 /* 1. Parse entire script. It is executed in reverse order, so we cather it
288 * in the `commands' buffer first
296 while(begin
!= ed_end
&& *begin
== '\n')
298 while(end
!= ed_end
&& *end
!= '\n')
300 if(end
== ed_end
&& begin
== end
)
303 /* Determine command range */
304 const char* tmp
= begin
;
307 /* atoll is safe despite lacking NUL-termination; we know there's an
308 * alphabetic character at end[-1]
311 cmd
.first_line
= atol(begin
);
312 cmd
.last_line
= cmd
.first_line
;
316 cmd
.first_line
= atol(begin
);
317 cmd
.last_line
= atol(tmp
+ 1);
323 // which command to execute on this line(s)?
327 std::clog
<< "Change from line " << cmd
.first_line
<< " to " << cmd
.last_line
<< std::endl
;
331 std::clog
<< "Insert after line " << cmd
.first_line
<< std::endl
;
335 std::clog
<< "Delete from line " << cmd
.first_line
<< " to " << cmd
.last_line
<< std::endl
;
338 _error
->Error("rred: Unknown ed command '%c'. Abort.", end
[-1]);
344 /* Determine the size of the inserted text, so we don't have to scan this
351 if(cmd
.type
== MODE_ADDED
|| cmd
.type
== MODE_CHANGED
) {
352 cmd
.data_start
= begin
- (char*) ed_cmds
.Data();
353 while(end
!= ed_end
) {
355 if(end
[-1] == '.' && end
[-2] == '\n')
361 cmd
.data_end
= end
- (char*) ed_cmds
.Data() - 1;
365 if(command_count
== command_alloc
) {
366 command_alloc
= (command_alloc
+ 64) * 3 / 2;
367 commands
= (EdCommand
*) realloc(commands
, command_alloc
* sizeof(EdCommand
));
369 commands
[command_count
++] = cmd
;
372 struct iovec
* iov
= new struct iovec
[IOV_COUNT
];
375 size_t amount
, remaining
;
379 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
380 * using writev to minimize the number of system calls. Data is read
381 * directly from the memory mappings of the input file and the script.
384 for(i
= command_count
; i
-- > 0; ) {
386 if(cmd
->type
== MODE_ADDED
)
387 amount
= cmd
->first_line
+ 1;
389 amount
= cmd
->first_line
;
393 while(line
!= amount
) {
394 input
= (const char*) memchr(input
, '\n', input_end
- input
);
401 iov
[iov_size
].iov_base
= (void*) begin
;
402 iov
[iov_size
].iov_len
= input
- begin
;
403 hash
->Add((const unsigned char*) begin
, input
- begin
);
405 if(++iov_size
== IOV_COUNT
) {
406 writev(out_file
.Fd(), iov
, IOV_COUNT
);
411 if(cmd
->type
== MODE_DELETED
|| cmd
->type
== MODE_CHANGED
) {
412 remaining
= (cmd
->last_line
- cmd
->first_line
) + 1;
415 input
= (const char*) memchr(input
, '\n', input_end
- input
);
423 if(cmd
->type
== MODE_CHANGED
|| cmd
->type
== MODE_ADDED
) {
424 if(cmd
->data_end
!= cmd
->data_start
) {
425 iov
[iov_size
].iov_base
= (void*) ((char*)ed_cmds
.Data() + cmd
->data_start
);
426 iov
[iov_size
].iov_len
= cmd
->data_end
- cmd
->data_start
;
427 hash
->Add((const unsigned char*) ((char*)ed_cmds
.Data() + cmd
->data_start
),
428 iov
[iov_size
].iov_len
);
430 if(++iov_size
== IOV_COUNT
) {
431 writev(out_file
.Fd(), iov
, IOV_COUNT
);
438 if(input
!= input_end
) {
439 iov
[iov_size
].iov_base
= (void*) input
;
440 iov
[iov_size
].iov_len
= input_end
- input
;
441 hash
->Add((const unsigned char*) input
, input_end
- input
);
446 writev(out_file
.Fd(), iov
, iov_size
);
450 for(i
= 0; i
< iov_size
; i
+= IOV_COUNT
) {
451 if(iov_size
- i
< IOV_COUNT
)
452 writev(out_file
.Fd(), iov
+ i
, iov_size
- i
);
454 writev(out_file
.Fd(), iov
+ i
, IOV_COUNT
);
466 bool RredMethod::Fetch(FetchItem
*Itm
) /*{{{*/
468 Debug
= _config
->FindB("Debug::pkgAcquire::RRed", false);
470 string Path
= Get
.Host
+ Get
.Path
; // To account for relative paths
473 Res
.Filename
= Itm
->DestFile
;
474 if (Itm
->Uri
.empty() == true) {
475 Path
= Itm
->DestFile
;
476 Itm
->DestFile
.append(".result");
481 std::clog
<< "Patching " << Path
<< " with " << Path
482 << ".ed and putting result into " << Itm
->DestFile
<< std::endl
;
483 // Open the source and destination files (the d'tor of FileFd will do
484 // the cleanup/closing of the fds)
485 FileFd
From(Path
,FileFd::ReadOnly
);
486 FileFd
Patch(Path
+".ed",FileFd::ReadOnlyGzip
);
487 FileFd
To(Itm
->DestFile
,FileFd::WriteAtomic
);
489 if (_error
->PendingError() == true)
493 // now do the actual patching
494 State
const result
= patchMMap(Patch
, From
, To
, &Hash
);
495 if (result
== MMAP_FAILED
) {
496 // retry with patchFile
499 To
.Open(Itm
->DestFile
,FileFd::WriteAtomic
);
500 if (_error
->PendingError() == true)
502 if (patchFile(Patch
, From
, To
, &Hash
) != ED_OK
) {
503 return _error
->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path
.c_str());
504 } else if (Debug
== true) {
505 std::clog
<< "rred: finished file patching of " << Path
<< " after mmap failed." << std::endl
;
507 } else if (result
!= ED_OK
) {
508 return _error
->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path
.c_str());
509 } else if (Debug
== true) {
510 std::clog
<< "rred: finished mmap patching of " << Path
<< std::endl
;
513 // write out the result
518 /* Transfer the modification times from the patch file
519 to be able to see in which state the file should be
520 and use the access time from the "old" file */
521 struct stat BufBase
, BufPatch
;
522 if (stat(Path
.c_str(),&BufBase
) != 0 ||
523 stat(string(Path
+".ed").c_str(),&BufPatch
) != 0)
524 return _error
->Errno("stat",_("Failed to stat"));
526 struct utimbuf TimeBuf
;
527 TimeBuf
.actime
= BufBase
.st_atime
;
528 TimeBuf
.modtime
= BufPatch
.st_mtime
;
529 if (utime(Itm
->DestFile
.c_str(),&TimeBuf
) != 0)
530 return _error
->Errno("utime",_("Failed to set modification time"));
532 if (stat(Itm
->DestFile
.c_str(),&BufBase
) != 0)
533 return _error
->Errno("stat",_("Failed to stat"));
536 Res
.LastModified
= BufBase
.st_mtime
;
537 Res
.Size
= BufBase
.st_size
;
538 Res
.TakeHashes(Hash
);
544 /** \brief Wrapper class for testing rred */ /*{{{*/
545 class TestRredMethod
: public RredMethod
{
547 /** \brief Run rred in debug test mode
549 * This method can be used to run the rred method outside
550 * of the "normal" acquire environment for easier testing.
552 * \param base basename of all files involved in this rred test
554 bool Run(char const *base
) {
555 _config
->CndSet("Debug::pkgAcquire::RRed", "true");
556 FetchItem
*test
= new FetchItem
;
557 test
->DestFile
= base
;
562 /** \brief Starter for the rred method (or its test method) {{{
564 * Used without parameters is the normal behavior for methods for
565 * the APT acquire system. While this works great for the acquire system
566 * it is very hard to test the method and therefore the method also
567 * accepts one parameter which will switch it directly to debug test mode:
568 * The test mode expects that if "Testfile" is given as parameter
569 * the file "Testfile" should be ed-style patched with "Testfile.ed"
570 * and will write the result to "Testfile.result".
572 int main(int argc
, char *argv
[]) {
578 bool result
= Mth
.Run(argv
[1]);
579 _error
->DumpErrors();