]>
git.saurik.com Git - apt.git/blob - methods/rred.cc
4 #include <apt-pkg/fileutl.h>
5 #include <apt-pkg/mmap.h>
6 #include <apt-pkg/error.h>
7 #include <apt-pkg/acquire-method.h>
8 #include <apt-pkg/strutl.h>
9 #include <apt-pkg/hashes.h>
10 #include <apt-pkg/configuration.h>
21 /** \brief RredMethod - ed-style incremential patch method {{{
23 * This method implements a patch functionality similar to "patch --ed" that is
24 * used by the "tiffany" incremental packages download stuff. It differs from
25 * "ed" insofar that it is way more restricted (and therefore secure).
26 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
27 * "<em>d</em>elete" (diff doesn't output any other).
28 * Additionally the records must be reverse sorted by line number and
29 * may not overlap (diff *seems* to produce this kind of output).
31 class RredMethod
: public pkgAcqMethod
{
33 // the size of this doesn't really matter (except for performance)
34 const static int BUF_SIZE
= 1024;
35 // the supported ed commands
36 enum Mode
{MODE_CHANGED
='c', MODE_DELETED
='d', MODE_ADDED
='a'};
38 enum State
{ED_OK
, ED_ORDERING
, ED_PARSER
, ED_FAILURE
, MMAP_FAILED
};
40 State
applyFile(gzFile
&ed_cmds
, FILE *in_file
, FILE *out_file
,
41 unsigned long &line
, char *buffer
, Hashes
*hash
) const;
42 void ignoreLineInFile(FILE *fin
, char *buffer
) const;
43 void ignoreLineInFile(gzFile
&fin
, char *buffer
) const;
44 void copyLinesFromFileToFile(FILE *fin
, FILE *fout
, unsigned int lines
,
45 Hashes
*hash
, char *buffer
) const;
46 void copyLinesFromFileToFile(gzFile
&fin
, FILE *fout
, unsigned int lines
,
47 Hashes
*hash
, char *buffer
) const;
49 State
patchFile(FileFd
&Patch
, FileFd
&From
, FileFd
&out_file
, Hashes
*hash
) const;
50 State
patchMMap(FileFd
&Patch
, FileFd
&From
, FileFd
&out_file
, Hashes
*hash
) const;
53 // the methods main method
54 virtual bool Fetch(FetchItem
*Itm
);
57 RredMethod() : pkgAcqMethod("1.1",SingleInstance
| SendConfig
), Debug(false) {};
60 /** \brief applyFile - in reverse order with a tail recursion {{{
62 * As it is expected that the commands are in reversed order in the patch file
63 * we check in the first half if the command is valid, but doesn't execute it
64 * and move a step deeper. After reaching the end of the file we apply the
65 * patches in the correct order: last found command first.
67 * \param ed_cmds patch file to apply
68 * \param in_file base file we want to patch
69 * \param out_file file to write the patched result to
70 * \param line of command operation
71 * \param buffer internal used read/write buffer
72 * \param hash the created file for correctness
73 * \return the success State of the ed command executor
75 RredMethod::State
RredMethod::applyFile(gzFile
&ed_cmds
, FILE *in_file
, FILE *out_file
,
76 unsigned long &line
, char *buffer
, Hashes
*hash
) const {
77 // get the current command and parse it
78 if (gzgets(ed_cmds
, buffer
, BUF_SIZE
) == NULL
) {
80 std::clog
<< "rred: encounter end of file - we can start patching now." << std::endl
;
85 // parse in the effected linenumbers
88 unsigned long const startline
= strtol(buffer
, &idx
, 10);
89 if (errno
== ERANGE
|| errno
== EINVAL
) {
90 _error
->Errno("rred", "startline is an invalid number");
93 if (startline
> line
) {
94 _error
->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline
, line
);
97 unsigned long stopline
;
101 stopline
= strtol(idx
, &idx
, 10);
102 if (errno
== ERANGE
|| errno
== EINVAL
) {
103 _error
->Errno("rred", "stopline is an invalid number");
108 stopline
= startline
;
112 // which command to execute on this line(s)?
116 std::clog
<< "Change from line " << startline
<< " to " << stopline
<< std::endl
;
120 std::clog
<< "Insert after line " << startline
<< std::endl
;
124 std::clog
<< "Delete from line " << startline
<< " to " << stopline
<< std::endl
;
127 _error
->Error("rred: Unknown ed command '%c'. Abort.", *idx
);
130 unsigned char mode
= *idx
;
132 // save the current position
133 unsigned const long pos
= gztell(ed_cmds
);
135 // if this is add or change then go to the next full stop
136 unsigned int data_length
= 0;
137 if (mode
== MODE_CHANGED
|| mode
== MODE_ADDED
) {
139 ignoreLineInFile(ed_cmds
, buffer
);
142 while (strncmp(buffer
, ".", 1) != 0);
143 data_length
--; // the dot should not be copied
146 // do the recursive call - the last command is the one we need to execute at first
147 const State child
= applyFile(ed_cmds
, in_file
, out_file
, line
, buffer
, hash
);
148 if (child
!= ED_OK
) {
152 // change and delete are working on "line" - add is done after "line"
153 if (mode
!= MODE_ADDED
)
156 // first wind to the current position and copy over all unchanged lines
157 if (line
< startline
) {
158 copyLinesFromFileToFile(in_file
, out_file
, (startline
- line
), hash
, buffer
);
162 if (mode
!= MODE_ADDED
)
165 // include data from ed script
166 if (mode
== MODE_CHANGED
|| mode
== MODE_ADDED
) {
167 gzseek(ed_cmds
, pos
, SEEK_SET
);
168 copyLinesFromFileToFile(ed_cmds
, out_file
, data_length
, hash
, buffer
);
171 // ignore the corresponding number of lines from input
172 if (mode
== MODE_CHANGED
|| mode
== MODE_DELETED
) {
173 while (line
< stopline
) {
174 ignoreLineInFile(in_file
, buffer
);
181 void RredMethod::copyLinesFromFileToFile(FILE *fin
, FILE *fout
, unsigned int lines
,/*{{{*/
182 Hashes
*hash
, char *buffer
) const {
183 while (0 < lines
--) {
185 fgets(buffer
, BUF_SIZE
, fin
);
186 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fout
);
187 hash
->Add((unsigned char*)buffer
, written
);
188 } while (strlen(buffer
) == (BUF_SIZE
- 1) &&
189 buffer
[BUF_SIZE
- 2] != '\n');
193 void RredMethod::copyLinesFromFileToFile(gzFile
&fin
, FILE *fout
, unsigned int lines
,/*{{{*/
194 Hashes
*hash
, char *buffer
) const {
195 while (0 < lines
--) {
197 gzgets(fin
, buffer
, BUF_SIZE
);
198 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fout
);
199 hash
->Add((unsigned char*)buffer
, written
);
200 } while (strlen(buffer
) == (BUF_SIZE
- 1) &&
201 buffer
[BUF_SIZE
- 2] != '\n');
205 void RredMethod::ignoreLineInFile(FILE *fin
, char *buffer
) const { /*{{{*/
206 fgets(buffer
, BUF_SIZE
, fin
);
207 while (strlen(buffer
) == (BUF_SIZE
- 1) &&
208 buffer
[BUF_SIZE
- 2] != '\n') {
209 fgets(buffer
, BUF_SIZE
, fin
);
214 void RredMethod::ignoreLineInFile(gzFile
&fin
, char *buffer
) const { /*{{{*/
215 gzgets(fin
, buffer
, BUF_SIZE
);
216 while (strlen(buffer
) == (BUF_SIZE
- 1) &&
217 buffer
[BUF_SIZE
- 2] != '\n') {
218 gzgets(fin
, buffer
, BUF_SIZE
);
223 RredMethod::State
RredMethod::patchFile(FileFd
&Patch
, FileFd
&From
, /*{{{*/
224 FileFd
&out_file
, Hashes
*hash
) const {
225 char buffer
[BUF_SIZE
];
226 FILE* fFrom
= fdopen(From
.Fd(), "r");
227 gzFile fPatch
= Patch
.gzFd();
228 FILE* fTo
= fdopen(out_file
.Fd(), "w");
230 /* we do a tail recursion to read the commands in the right order */
231 unsigned long line
= -1; // assign highest possible value
232 State
const result
= applyFile(fPatch
, fFrom
, fTo
, line
, buffer
, hash
);
234 /* read the rest from infile */
235 if (result
== ED_OK
) {
236 while (fgets(buffer
, BUF_SIZE
, fFrom
) != NULL
) {
237 size_t const written
= fwrite(buffer
, 1, strlen(buffer
), fTo
);
238 hash
->Add((unsigned char*)buffer
, written
);
245 /* struct EdCommand {{{*/
246 #ifdef _POSIX_MAPPED_FILES
255 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
258 RredMethod::State
RredMethod::patchMMap(FileFd
&Patch
, FileFd
&From
, /*{{{*/
259 FileFd
&out_file
, Hashes
*hash
) const {
260 #ifdef _POSIX_MAPPED_FILES
261 MMap
ed_cmds(MMap::ReadOnly
);
262 if (Patch
.gzFd() != NULL
) {
263 unsigned long long mapSize
= Patch
.Size();
264 DynamicMMap
* dyn
= new DynamicMMap(0, mapSize
, 0);
265 if (dyn
->validData() == false) {
269 dyn
->AddSize(mapSize
);
270 gzread(Patch
.gzFd(), dyn
->Data(), mapSize
);
273 ed_cmds
= MMap(Patch
, MMap::ReadOnly
);
275 MMap
in_file(From
, MMap::ReadOnly
);
277 if (ed_cmds
.Size() == 0 || in_file
.Size() == 0)
280 EdCommand
* commands
= 0;
281 size_t command_count
= 0;
282 size_t command_alloc
= 0;
284 const char* begin
= (char*) ed_cmds
.Data();
285 const char* end
= begin
;
286 const char* ed_end
= (char*) ed_cmds
.Data() + ed_cmds
.Size();
288 const char* input
= (char*) in_file
.Data();
289 const char* input_end
= (char*) in_file
.Data() + in_file
.Size();
293 /* 1. Parse entire script. It is executed in reverse order, so we cather it
294 * in the `commands' buffer first
302 while(begin
!= ed_end
&& *begin
== '\n')
304 while(end
!= ed_end
&& *end
!= '\n')
306 if(end
== ed_end
&& begin
== end
)
309 /* Determine command range */
310 const char* tmp
= begin
;
313 /* atoll is safe despite lacking NUL-termination; we know there's an
314 * alphabetic character at end[-1]
317 cmd
.first_line
= atol(begin
);
318 cmd
.last_line
= cmd
.first_line
;
322 cmd
.first_line
= atol(begin
);
323 cmd
.last_line
= atol(tmp
+ 1);
329 // which command to execute on this line(s)?
333 std::clog
<< "Change from line " << cmd
.first_line
<< " to " << cmd
.last_line
<< std::endl
;
337 std::clog
<< "Insert after line " << cmd
.first_line
<< std::endl
;
341 std::clog
<< "Delete from line " << cmd
.first_line
<< " to " << cmd
.last_line
<< std::endl
;
344 _error
->Error("rred: Unknown ed command '%c'. Abort.", end
[-1]);
350 /* Determine the size of the inserted text, so we don't have to scan this
357 if(cmd
.type
== MODE_ADDED
|| cmd
.type
== MODE_CHANGED
) {
358 cmd
.data_start
= begin
- (char*) ed_cmds
.Data();
359 while(end
!= ed_end
) {
361 if(end
[-1] == '.' && end
[-2] == '\n')
367 cmd
.data_end
= end
- (char*) ed_cmds
.Data() - 1;
371 if(command_count
== command_alloc
) {
372 command_alloc
= (command_alloc
+ 64) * 3 / 2;
373 commands
= (EdCommand
*) realloc(commands
, command_alloc
* sizeof(EdCommand
));
375 commands
[command_count
++] = cmd
;
378 struct iovec
* iov
= new struct iovec
[IOV_COUNT
];
381 size_t amount
, remaining
;
385 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
386 * using writev to minimize the number of system calls. Data is read
387 * directly from the memory mappings of the input file and the script.
390 for(i
= command_count
; i
-- > 0; ) {
392 if(cmd
->type
== MODE_ADDED
)
393 amount
= cmd
->first_line
+ 1;
395 amount
= cmd
->first_line
;
399 while(line
!= amount
) {
400 input
= (const char*) memchr(input
, '\n', input_end
- input
);
407 iov
[iov_size
].iov_base
= (void*) begin
;
408 iov
[iov_size
].iov_len
= input
- begin
;
409 hash
->Add((const unsigned char*) begin
, input
- begin
);
411 if(++iov_size
== IOV_COUNT
) {
412 writev(out_file
.Fd(), iov
, IOV_COUNT
);
417 if(cmd
->type
== MODE_DELETED
|| cmd
->type
== MODE_CHANGED
) {
418 remaining
= (cmd
->last_line
- cmd
->first_line
) + 1;
421 input
= (const char*) memchr(input
, '\n', input_end
- input
);
429 if(cmd
->type
== MODE_CHANGED
|| cmd
->type
== MODE_ADDED
) {
430 if(cmd
->data_end
!= cmd
->data_start
) {
431 iov
[iov_size
].iov_base
= (void*) ((char*)ed_cmds
.Data() + cmd
->data_start
);
432 iov
[iov_size
].iov_len
= cmd
->data_end
- cmd
->data_start
;
433 hash
->Add((const unsigned char*) ((char*)ed_cmds
.Data() + cmd
->data_start
),
434 iov
[iov_size
].iov_len
);
436 if(++iov_size
== IOV_COUNT
) {
437 writev(out_file
.Fd(), iov
, IOV_COUNT
);
444 if(input
!= input_end
) {
445 iov
[iov_size
].iov_base
= (void*) input
;
446 iov
[iov_size
].iov_len
= input_end
- input
;
447 hash
->Add((const unsigned char*) input
, input_end
- input
);
452 writev(out_file
.Fd(), iov
, iov_size
);
456 for(i
= 0; i
< iov_size
; i
+= IOV_COUNT
) {
457 if(iov_size
- i
< IOV_COUNT
)
458 writev(out_file
.Fd(), iov
+ i
, iov_size
- i
);
460 writev(out_file
.Fd(), iov
+ i
, IOV_COUNT
);
472 bool RredMethod::Fetch(FetchItem
*Itm
) /*{{{*/
474 Debug
= _config
->FindB("Debug::pkgAcquire::RRed", false);
476 std::string Path
= Get
.Host
+ Get
.Path
; // To account for relative paths
479 Res
.Filename
= Itm
->DestFile
;
480 if (Itm
->Uri
.empty() == true) {
481 Path
= Itm
->DestFile
;
482 Itm
->DestFile
.append(".result");
487 std::clog
<< "Patching " << Path
<< " with " << Path
488 << ".ed and putting result into " << Itm
->DestFile
<< std::endl
;
489 // Open the source and destination files (the d'tor of FileFd will do
490 // the cleanup/closing of the fds)
491 FileFd
From(Path
,FileFd::ReadOnly
);
492 FileFd
Patch(Path
+".ed",FileFd::ReadOnly
, FileFd::Gzip
);
493 FileFd
To(Itm
->DestFile
,FileFd::WriteAtomic
);
495 if (_error
->PendingError() == true)
499 // now do the actual patching
500 State
const result
= patchMMap(Patch
, From
, To
, &Hash
);
501 if (result
== MMAP_FAILED
) {
502 // retry with patchFile
505 To
.Open(Itm
->DestFile
,FileFd::WriteAtomic
);
506 if (_error
->PendingError() == true)
508 if (patchFile(Patch
, From
, To
, &Hash
) != ED_OK
) {
509 return _error
->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path
.c_str());
510 } else if (Debug
== true) {
511 std::clog
<< "rred: finished file patching of " << Path
<< " after mmap failed." << std::endl
;
513 } else if (result
!= ED_OK
) {
514 return _error
->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path
.c_str());
515 } else if (Debug
== true) {
516 std::clog
<< "rred: finished mmap patching of " << Path
<< std::endl
;
519 // write out the result
524 /* Transfer the modification times from the patch file
525 to be able to see in which state the file should be
526 and use the access time from the "old" file */
527 struct stat BufBase
, BufPatch
;
528 if (stat(Path
.c_str(),&BufBase
) != 0 ||
529 stat(std::string(Path
+".ed").c_str(),&BufPatch
) != 0)
530 return _error
->Errno("stat",_("Failed to stat"));
532 struct utimbuf TimeBuf
;
533 TimeBuf
.actime
= BufBase
.st_atime
;
534 TimeBuf
.modtime
= BufPatch
.st_mtime
;
535 if (utime(Itm
->DestFile
.c_str(),&TimeBuf
) != 0)
536 return _error
->Errno("utime",_("Failed to set modification time"));
538 if (stat(Itm
->DestFile
.c_str(),&BufBase
) != 0)
539 return _error
->Errno("stat",_("Failed to stat"));
542 Res
.LastModified
= BufBase
.st_mtime
;
543 Res
.Size
= BufBase
.st_size
;
544 Res
.TakeHashes(Hash
);
550 /** \brief Wrapper class for testing rred */ /*{{{*/
551 class TestRredMethod
: public RredMethod
{
553 /** \brief Run rred in debug test mode
555 * This method can be used to run the rred method outside
556 * of the "normal" acquire environment for easier testing.
558 * \param base basename of all files involved in this rred test
560 bool Run(char const *base
) {
561 _config
->CndSet("Debug::pkgAcquire::RRed", "true");
562 FetchItem
*test
= new FetchItem
;
563 test
->DestFile
= base
;
568 /** \brief Starter for the rred method (or its test method) {{{
570 * Used without parameters is the normal behavior for methods for
571 * the APT acquire system. While this works great for the acquire system
572 * it is very hard to test the method and therefore the method also
573 * accepts one parameter which will switch it directly to debug test mode:
574 * The test mode expects that if "Testfile" is given as parameter
575 * the file "Testfile" should be ed-style patched with "Testfile.ed"
576 * and will write the result to "Testfile.result".
578 int main(int argc
, char *argv
[]) {
584 bool result
= Mth
.Run(argv
[1]);
585 _error
->DumpErrors();