]> git.saurik.com Git - apt.git/blame - ftparchive/multicompress.cc
* merged with otavios branch
[apt.git] / ftparchive / multicompress.cc
CommitLineData
b2e465d6
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
dc738e7a 3// $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
b2e465d6
AL
4/* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16// Include Files /*{{{*/
17#ifdef __GNUG__
18#pragma implementation "multicompress.h"
19#endif
20
21#include "multicompress.h"
22
dc738e7a 23#include <apti18n.h>
b2e465d6
AL
24#include <apt-pkg/strutl.h>
25#include <apt-pkg/error.h>
26#include <apt-pkg/md5.h>
27
28#include <sys/types.h>
29#include <sys/stat.h>
30#include <utime.h>
31#include <unistd.h>
812f4169 32#include <iostream>
b2e465d6
AL
33 /*}}}*/
34
812f4169
AL
35using namespace std;
36
b2e465d6
AL
37const MultiCompress::CompType MultiCompress::Compressors[] =
38 {{".","",0,0,0,1},
39 {"gzip",".gz","gzip","-9n","-d",2},
40 {"bzip2",".bz2","bzip2","-9","-d",3},
41 {}};
42
43// MultiCompress::MultiCompress - Constructor /*{{{*/
44// ---------------------------------------------------------------------
45/* Setup the file outputs, compression modes and fork the writer child */
46MultiCompress::MultiCompress(string Output,string Compress,
47 mode_t Permissions,bool Write)
48{
49 Outputs = 0;
50 Outputter = -1;
51 Input = 0;
52 UpdateMTime = 0;
53 this->Permissions = Permissions;
54
55 /* Parse the compression string, a space separated lists of compresison
56 types */
57 string::const_iterator I = Compress.begin();
58 for (; I != Compress.end();)
59 {
60 for (; I != Compress.end() && isspace(*I); I++);
61
62 // Grab a word
63 string::const_iterator Start = I;
64 for (; I != Compress.end() && !isspace(*I); I++);
65
66 // Find the matching compressor
67 const CompType *Comp = Compressors;
68 for (; Comp->Name != 0; Comp++)
69 if (stringcmp(Start,I,Comp->Name) == 0)
70 break;
71
72 // Hmm.. unknown.
73 if (Comp->Name == 0)
74 {
db0db9fe 75 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
b2e465d6
AL
76 continue;
77 }
78
79 // Create and link in a new output
80 Files *NewOut = new Files;
81 NewOut->Next = Outputs;
82 Outputs = NewOut;
83 NewOut->CompressProg = Comp;
84 NewOut->Output = Output+Comp->Extension;
85
86 struct stat St;
87 if (stat(NewOut->Output.c_str(),&St) == 0)
88 NewOut->OldMTime = St.st_mtime;
89 else
90 NewOut->OldMTime = 0;
91 }
92
93 if (Write == false)
94 return;
95
96 /* Open all the temp files now so we can report any errors. File is
97 made unreable to prevent people from touching it during creating. */
98 for (Files *I = Outputs; I != 0; I = I->Next)
99 I->TmpFile.Open(I->Output + ".new",FileFd::WriteEmpty,0600);
100 if (_error->PendingError() == true)
101 return;
102
103 if (Outputs == 0)
104 {
dc738e7a 105 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
b2e465d6
AL
106 return;
107 }
108
109 Start();
110}
111 /*}}}*/
112// MultiCompress::~MultiCompress - Destructor /*{{{*/
113// ---------------------------------------------------------------------
114/* Just erase the file linked list. */
115MultiCompress::~MultiCompress()
116{
117 Die();
118
119 for (; Outputs != 0;)
120 {
121 Files *Tmp = Outputs->Next;
122 delete Outputs;
123 Outputs = Tmp;
124 }
125}
126 /*}}}*/
127// MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
128// ---------------------------------------------------------------------
129/* This checks each compressed file to make sure it exists and returns
130 stat information for a random file from the collection. False means
131 one or more of the files is missing. */
132bool MultiCompress::GetStat(string Output,string Compress,struct stat &St)
133{
134 /* Parse the compression string, a space separated lists of compresison
135 types */
136 string::const_iterator I = Compress.begin();
137 bool DidStat = false;
138 for (; I != Compress.end();)
139 {
140 for (; I != Compress.end() && isspace(*I); I++);
141
142 // Grab a word
143 string::const_iterator Start = I;
144 for (; I != Compress.end() && !isspace(*I); I++);
145
146 // Find the matching compressor
147 const CompType *Comp = Compressors;
148 for (; Comp->Name != 0; Comp++)
149 if (stringcmp(Start,I,Comp->Name) == 0)
150 break;
151
152 // Hmm.. unknown.
153 if (Comp->Name == 0)
154 continue;
155
156 string Name = Output+Comp->Extension;
157 if (stat(Name.c_str(),&St) != 0)
158 return false;
159 DidStat = true;
160 }
161 return DidStat;
162}
163 /*}}}*/
164// MultiCompress::Start - Start up the writer child /*{{{*/
165// ---------------------------------------------------------------------
166/* Fork a child and setup the communication pipe. */
167bool MultiCompress::Start()
168{
169 // Create a data pipe
170 int Pipe[2] = {-1,-1};
171 if (pipe(Pipe) != 0)
dc738e7a 172 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
b2e465d6
AL
173 for (int I = 0; I != 2; I++)
174 SetCloseExec(Pipe[I],true);
175
176 // The child..
177 Outputter = fork();
178 if (Outputter == 0)
179 {
180 close(Pipe[1]);
181 Child(Pipe[0]);
182 if (_error->PendingError() == true)
183 {
184 _error->DumpErrors();
185 _exit(100);
186 }
187 _exit(0);
188 };
189
190 /* Tidy up the temp files, we open them in the constructor so as to
191 get proper error reporting. Close them now. */
192 for (Files *I = Outputs; I != 0; I = I->Next)
193 I->TmpFile.Close();
194
195 close(Pipe[0]);
196 Input = fdopen(Pipe[1],"w");
197 if (Input == 0)
dc738e7a 198 return _error->Errno("fdopen",_("Failed to create FILE*"));
b2e465d6
AL
199
200 if (Outputter == -1)
dc738e7a 201 return _error->Errno("fork",_("Failed to fork"));
b2e465d6
AL
202 return true;
203}
204 /*}}}*/
205// MultiCompress::Die - Clean up the writer /*{{{*/
206// ---------------------------------------------------------------------
207/* */
208bool MultiCompress::Die()
209{
210 if (Input == 0)
211 return true;
212
213 fclose(Input);
214 Input = 0;
db0db9fe 215 bool Res = ExecWait(Outputter,_("Compress child"),false);
b2e465d6
AL
216 Outputter = -1;
217 return Res;
218}
219 /*}}}*/
220// MultiCompress::Finalize - Finish up writing /*{{{*/
221// ---------------------------------------------------------------------
222/* This is only necessary for statistics reporting. */
223bool MultiCompress::Finalize(unsigned long &OutSize)
224{
225 OutSize = 0;
226 if (Input == 0 || Die() == false)
227 return false;
228
229 time_t Now;
230 time(&Now);
231
232 // Check the mtimes to see if the files were replaced.
233 bool Changed = false;
234 for (Files *I = Outputs; I != 0; I = I->Next)
235 {
236 struct stat St;
237 if (stat(I->Output.c_str(),&St) != 0)
db0db9fe 238 return _error->Error(_("Internal error, failed to create %s"),
b2e465d6
AL
239 I->Output.c_str());
240
241 if (I->OldMTime != St.st_mtime)
242 Changed = true;
243 else
244 {
245 // Update the mtime if necessary
246 if (UpdateMTime > 0 &&
247 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
248 {
249 struct utimbuf Buf;
250 Buf.actime = Buf.modtime = Now;
251 utime(I->Output.c_str(),&Buf);
252 Changed = true;
253 }
254 }
255
256 // Force the file permissions
257 if (St.st_mode != Permissions)
258 chmod(I->Output.c_str(),Permissions);
259
260 OutSize += St.st_size;
261 }
262
263 if (Changed == false)
264 OutSize = 0;
265
266 return true;
267}
268 /*}}}*/
269// MultiCompress::OpenCompress - Open the compressor /*{{{*/
270// ---------------------------------------------------------------------
271/* This opens the compressor, either in compress mode or decompress
272 mode. FileFd is always the compressor input/output file,
273 OutFd is the created pipe, Input for Compress, Output for Decompress. */
3826564e 274bool MultiCompress::OpenCompress(const CompType *Prog,pid_t &Pid,int FileFd,
b2e465d6
AL
275 int &OutFd,bool Comp)
276{
277 Pid = -1;
278
279 // No compression
280 if (Prog->Binary == 0)
281 {
282 OutFd = dup(FileFd);
283 return true;
284 }
285
286 // Create a data pipe
287 int Pipe[2] = {-1,-1};
288 if (pipe(Pipe) != 0)
dc738e7a 289 return _error->Errno("pipe",_("Failed to create subprocess IPC"));
b2e465d6
AL
290 for (int J = 0; J != 2; J++)
291 SetCloseExec(Pipe[J],true);
292
293 if (Comp == true)
294 OutFd = Pipe[1];
295 else
296 OutFd = Pipe[0];
297
298 // The child..
299 Pid = ExecFork();
300 if (Pid == 0)
301 {
302 if (Comp == true)
303 {
304 dup2(FileFd,STDOUT_FILENO);
305 dup2(Pipe[0],STDIN_FILENO);
306 }
307 else
308 {
309 dup2(FileFd,STDIN_FILENO);
310 dup2(Pipe[1],STDOUT_FILENO);
311 }
312
313 SetCloseExec(STDOUT_FILENO,false);
314 SetCloseExec(STDIN_FILENO,false);
315
316 const char *Args[3];
317 Args[0] = Prog->Binary;
318 if (Comp == true)
319 Args[1] = Prog->CompArgs;
320 else
321 Args[1] = Prog->UnCompArgs;
322 Args[2] = 0;
323 execvp(Args[0],(char **)Args);
dc738e7a 324 cerr << _("Failed to exec compressor ") << Args[0] << endl;
b2e465d6
AL
325 _exit(100);
326 };
327 if (Comp == true)
328 close(Pipe[0]);
329 else
330 close(Pipe[1]);
331 return true;
332}
333 /*}}}*/
334// MultiCompress::OpenOld - Open an old file /*{{{*/
335// ---------------------------------------------------------------------
336/* This opens one of the original output files, possibly decompressing it. */
3826564e 337bool MultiCompress::OpenOld(int &Fd,pid_t &Proc)
b2e465d6
AL
338{
339 Files *Best = Outputs;
340 for (Files *I = Outputs; I != 0; I = I->Next)
341 if (Best->CompressProg->Cost > I->CompressProg->Cost)
342 Best = I;
343
344 // Open the file
345 FileFd F(Best->Output,FileFd::ReadOnly);
346 if (_error->PendingError() == true)
347 return false;
348
349 // Decompress the file so we can read it
350 if (OpenCompress(Best->CompressProg,Proc,F.Fd(),Fd,false) == false)
351 return false;
352
353 return true;
354}
355 /*}}}*/
356// MultiCompress::CloseOld - Close the old file /*{{{*/
357// ---------------------------------------------------------------------
358/* */
3826564e 359bool MultiCompress::CloseOld(int Fd,pid_t Proc)
b2e465d6
AL
360{
361 close(Fd);
362 if (Proc != -1)
dc738e7a 363 if (ExecWait(Proc,_("decompressor"),false) == false)
b2e465d6
AL
364 return false;
365 return true;
366}
367 /*}}}*/
368// MultiCompress::Child - The writer child /*{{{*/
369// ---------------------------------------------------------------------
370/* The child process forks a bunch of compression children and takes
371 input on FD and passes it to all the compressor childer. On the way it
372 computes the MD5 of the raw data. After this the raw data in the
373 original files is compared to see if this data is new. If the data
374 is new then the temp files are renamed, otherwise they are erased. */
375bool MultiCompress::Child(int FD)
376{
377 // Start the compression children.
378 for (Files *I = Outputs; I != 0; I = I->Next)
379 {
380 if (OpenCompress(I->CompressProg,I->CompressProc,I->TmpFile.Fd(),
381 I->Fd,true) == false)
382 return false;
383 }
384
385 /* Okay, now we just feed data from FD to all the other FDs. Also
386 stash a hash of the data to use later. */
387 SetNonBlock(FD,false);
388 unsigned char Buffer[32*1024];
389 unsigned long FileSize = 0;
390 MD5Summation MD5;
391 while (1)
392 {
393 WaitFd(FD,false);
394 int Res = read(FD,Buffer,sizeof(Buffer));
395 if (Res == 0)
396 break;
397 if (Res < 0)
398 continue;
399
400 MD5.Add(Buffer,Res);
401 FileSize += Res;
402 for (Files *I = Outputs; I != 0; I = I->Next)
403 {
404 if (write(I->Fd,Buffer,Res) != Res)
405 {
dc738e7a 406 _error->Errno("write",_("IO to subprocess/file failed"));
b2e465d6
AL
407 break;
408 }
409 }
410 }
411
412 // Close all the writers
413 for (Files *I = Outputs; I != 0; I = I->Next)
414 close(I->Fd);
415
416 // Wait for the compressors to exit
417 for (Files *I = Outputs; I != 0; I = I->Next)
418 {
419 if (I->CompressProc != -1)
420 ExecWait(I->CompressProc,I->CompressProg->Binary,false);
421 }
422
423 if (_error->PendingError() == true)
424 return false;
425
426 /* Now we have to copy the files over, or erase them if they
427 have not changed. First find the cheapest decompressor */
428 bool Missing = false;
429 for (Files *I = Outputs; I != 0; I = I->Next)
430 {
431 if (I->OldMTime == 0)
432 {
433 Missing = true;
434 break;
435 }
436 }
437
438 // Check the MD5 of the lowest cost entity.
439 while (Missing == false)
440 {
441 int CompFd = -1;
3826564e 442 pid_t Proc = -1;
b2e465d6
AL
443 if (OpenOld(CompFd,Proc) == false)
444 {
445 _error->Discard();
446 break;
447 }
448
449 // Compute the hash
450 MD5Summation OldMD5;
451 unsigned long NewFileSize = 0;
452 while (1)
453 {
454 int Res = read(CompFd,Buffer,sizeof(Buffer));
455 if (Res == 0)
456 break;
457 if (Res < 0)
dc738e7a 458 return _error->Errno("read",_("Failed to read while computing MD5"));
b2e465d6
AL
459 NewFileSize += Res;
460 OldMD5.Add(Buffer,Res);
461 }
462
463 // Tidy the compressor
464 if (CloseOld(CompFd,Proc) == false)
465 return false;
466
467 // Check the hash
468 if (OldMD5.Result() == MD5.Result() &&
469 FileSize == NewFileSize)
470 {
471 for (Files *I = Outputs; I != 0; I = I->Next)
472 {
473 I->TmpFile.Close();
474 if (unlink(I->TmpFile.Name().c_str()) != 0)
dc738e7a 475 _error->Errno("unlink",_("Problem unlinking %s"),
b2e465d6
AL
476 I->TmpFile.Name().c_str());
477 }
478 return !_error->PendingError();
479 }
480 break;
481 }
482
483 // Finalize
484 for (Files *I = Outputs; I != 0; I = I->Next)
485 {
486 // Set the correct file modes
487 fchmod(I->TmpFile.Fd(),Permissions);
488
489 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
dc738e7a 490 _error->Errno("rename",_("Failed to rename %s to %s"),
b2e465d6
AL
491 I->TmpFile.Name().c_str(),I->Output.c_str());
492 I->TmpFile.Close();
493 }
494
495 return !_error->PendingError();
496}
497 /*}}}*/
498