]> git.saurik.com Git - apt.git/blame - ftparchive/multicompress.cc
apt-pkg/pkgcachegen.cc: copy Arch to avoid segfault on cache remap
[apt.git] / ftparchive / multicompress.cc
CommitLineData
b2e465d6
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
dc738e7a 3// $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
b2e465d6
AL
4/* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16// Include Files /*{{{*/
b2e465d6
AL
17#include "multicompress.h"
18
dc738e7a 19#include <apti18n.h>
b2e465d6
AL
20#include <apt-pkg/strutl.h>
21#include <apt-pkg/error.h>
22#include <apt-pkg/md5.h>
23
24#include <sys/types.h>
25#include <sys/stat.h>
26#include <utime.h>
27#include <unistd.h>
812f4169 28#include <iostream>
b2e465d6
AL
29 /*}}}*/
30
812f4169
AL
31using namespace std;
32
b2e465d6
AL
33
34// MultiCompress::MultiCompress - Constructor /*{{{*/
35// ---------------------------------------------------------------------
36/* Setup the file outputs, compression modes and fork the writer child */
9209ec47
DK
37MultiCompress::MultiCompress(string const &Output,string const &Compress,
38 mode_t const &Permissions,bool const &Write) :
39 Permissions(Permissions)
b2e465d6
AL
40{
41 Outputs = 0;
42 Outputter = -1;
43 Input = 0;
44 UpdateMTime = 0;
03bef784 45
b2e465d6
AL
46 /* Parse the compression string, a space separated lists of compresison
47 types */
48 string::const_iterator I = Compress.begin();
49 for (; I != Compress.end();)
50 {
51 for (; I != Compress.end() && isspace(*I); I++);
52
53 // Grab a word
54 string::const_iterator Start = I;
55 for (; I != Compress.end() && !isspace(*I); I++);
56
57 // Find the matching compressor
03bef784
DK
58 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
59 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
60 for (; Comp != Compressors.end(); ++Comp)
61 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
b2e465d6
AL
62 break;
63
64 // Hmm.. unknown.
03bef784 65 if (Comp == Compressors.end())
b2e465d6 66 {
db0db9fe 67 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
b2e465d6
AL
68 continue;
69 }
70
71 // Create and link in a new output
72 Files *NewOut = new Files;
73 NewOut->Next = Outputs;
74 Outputs = NewOut;
03bef784 75 NewOut->CompressProg = *Comp;
b2e465d6
AL
76 NewOut->Output = Output+Comp->Extension;
77
78 struct stat St;
79 if (stat(NewOut->Output.c_str(),&St) == 0)
80 NewOut->OldMTime = St.st_mtime;
81 else
82 NewOut->OldMTime = 0;
83 }
84
85 if (Write == false)
86 return;
87
88 /* Open all the temp files now so we can report any errors. File is
89 made unreable to prevent people from touching it during creating. */
90 for (Files *I = Outputs; I != 0; I = I->Next)
91 I->TmpFile.Open(I->Output + ".new",FileFd::WriteEmpty,0600);
92 if (_error->PendingError() == true)
93 return;
94
95 if (Outputs == 0)
96 {
dc738e7a 97 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
b2e465d6
AL
98 return;
99 }
100
101 Start();
102}
103 /*}}}*/
104// MultiCompress::~MultiCompress - Destructor /*{{{*/
105// ---------------------------------------------------------------------
106/* Just erase the file linked list. */
107MultiCompress::~MultiCompress()
108{
109 Die();
110
111 for (; Outputs != 0;)
112 {
113 Files *Tmp = Outputs->Next;
114 delete Outputs;
115 Outputs = Tmp;
116 }
117}
118 /*}}}*/
119// MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
120// ---------------------------------------------------------------------
121/* This checks each compressed file to make sure it exists and returns
122 stat information for a random file from the collection. False means
123 one or more of the files is missing. */
9209ec47 124bool MultiCompress::GetStat(string const &Output,string const &Compress,struct stat &St)
b2e465d6
AL
125{
126 /* Parse the compression string, a space separated lists of compresison
127 types */
128 string::const_iterator I = Compress.begin();
129 bool DidStat = false;
130 for (; I != Compress.end();)
131 {
132 for (; I != Compress.end() && isspace(*I); I++);
133
134 // Grab a word
135 string::const_iterator Start = I;
136 for (; I != Compress.end() && !isspace(*I); I++);
137
138 // Find the matching compressor
03bef784
DK
139 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
140 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
141 for (; Comp != Compressors.end(); ++Comp)
142 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
b2e465d6
AL
143 break;
144
145 // Hmm.. unknown.
03bef784 146 if (Comp == Compressors.end())
b2e465d6
AL
147 continue;
148
149 string Name = Output+Comp->Extension;
150 if (stat(Name.c_str(),&St) != 0)
151 return false;
152 DidStat = true;
153 }
154 return DidStat;
155}
156 /*}}}*/
157// MultiCompress::Start - Start up the writer child /*{{{*/
158// ---------------------------------------------------------------------
159/* Fork a child and setup the communication pipe. */
160bool MultiCompress::Start()
161{
162 // Create a data pipe
163 int Pipe[2] = {-1,-1};
164 if (pipe(Pipe) != 0)
dc738e7a 165 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
b2e465d6
AL
166 for (int I = 0; I != 2; I++)
167 SetCloseExec(Pipe[I],true);
168
169 // The child..
170 Outputter = fork();
171 if (Outputter == 0)
172 {
173 close(Pipe[1]);
174 Child(Pipe[0]);
175 if (_error->PendingError() == true)
176 {
177 _error->DumpErrors();
178 _exit(100);
179 }
180 _exit(0);
181 };
182
183 /* Tidy up the temp files, we open them in the constructor so as to
184 get proper error reporting. Close them now. */
185 for (Files *I = Outputs; I != 0; I = I->Next)
186 I->TmpFile.Close();
187
188 close(Pipe[0]);
189 Input = fdopen(Pipe[1],"w");
190 if (Input == 0)
dc738e7a 191 return _error->Errno("fdopen",_("Failed to create FILE*"));
b2e465d6
AL
192
193 if (Outputter == -1)
dc738e7a 194 return _error->Errno("fork",_("Failed to fork"));
b2e465d6
AL
195 return true;
196}
197 /*}}}*/
198// MultiCompress::Die - Clean up the writer /*{{{*/
199// ---------------------------------------------------------------------
200/* */
201bool MultiCompress::Die()
202{
203 if (Input == 0)
204 return true;
205
206 fclose(Input);
207 Input = 0;
db0db9fe 208 bool Res = ExecWait(Outputter,_("Compress child"),false);
b2e465d6
AL
209 Outputter = -1;
210 return Res;
211}
212 /*}}}*/
213// MultiCompress::Finalize - Finish up writing /*{{{*/
214// ---------------------------------------------------------------------
215/* This is only necessary for statistics reporting. */
216bool MultiCompress::Finalize(unsigned long &OutSize)
217{
218 OutSize = 0;
219 if (Input == 0 || Die() == false)
220 return false;
221
222 time_t Now;
223 time(&Now);
224
225 // Check the mtimes to see if the files were replaced.
226 bool Changed = false;
227 for (Files *I = Outputs; I != 0; I = I->Next)
228 {
229 struct stat St;
230 if (stat(I->Output.c_str(),&St) != 0)
db0db9fe 231 return _error->Error(_("Internal error, failed to create %s"),
b2e465d6
AL
232 I->Output.c_str());
233
234 if (I->OldMTime != St.st_mtime)
235 Changed = true;
236 else
237 {
238 // Update the mtime if necessary
239 if (UpdateMTime > 0 &&
240 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
241 {
242 struct utimbuf Buf;
243 Buf.actime = Buf.modtime = Now;
244 utime(I->Output.c_str(),&Buf);
245 Changed = true;
246 }
247 }
248
249 // Force the file permissions
250 if (St.st_mode != Permissions)
251 chmod(I->Output.c_str(),Permissions);
252
253 OutSize += St.st_size;
254 }
255
256 if (Changed == false)
257 OutSize = 0;
258
259 return true;
260}
261 /*}}}*/
262// MultiCompress::OpenCompress - Open the compressor /*{{{*/
263// ---------------------------------------------------------------------
264/* This opens the compressor, either in compress mode or decompress
265 mode. FileFd is always the compressor input/output file,
266 OutFd is the created pipe, Input for Compress, Output for Decompress. */
03bef784
DK
267bool MultiCompress::OpenCompress(APT::Configuration::Compressor const &Prog,
268 pid_t &Pid,int const &FileFd,int &OutFd,bool const &Comp)
b2e465d6
AL
269{
270 Pid = -1;
271
272 // No compression
03bef784 273 if (Prog.Binary.empty() == true)
b2e465d6
AL
274 {
275 OutFd = dup(FileFd);
276 return true;
277 }
278
279 // Create a data pipe
280 int Pipe[2] = {-1,-1};
281 if (pipe(Pipe) != 0)
dc738e7a 282 return _error->Errno("pipe",_("Failed to create subprocess IPC"));
b2e465d6
AL
283 for (int J = 0; J != 2; J++)
284 SetCloseExec(Pipe[J],true);
285
286 if (Comp == true)
287 OutFd = Pipe[1];
288 else
289 OutFd = Pipe[0];
290
291 // The child..
292 Pid = ExecFork();
293 if (Pid == 0)
294 {
295 if (Comp == true)
296 {
297 dup2(FileFd,STDOUT_FILENO);
298 dup2(Pipe[0],STDIN_FILENO);
299 }
300 else
301 {
302 dup2(FileFd,STDIN_FILENO);
303 dup2(Pipe[1],STDOUT_FILENO);
304 }
305
306 SetCloseExec(STDOUT_FILENO,false);
307 SetCloseExec(STDIN_FILENO,false);
03bef784
DK
308
309 std::vector<char const*> Args;
310 Args.push_back(Prog.Binary.c_str());
311 std::vector<std::string> const * const addArgs =
312 (Comp == true) ? &(Prog.CompressArgs) : &(Prog.UncompressArgs);
313 for (std::vector<std::string>::const_iterator a = addArgs->begin();
314 a != addArgs->end(); ++a)
315 Args.push_back(a->c_str());
316 Args.push_back(NULL);
317
318 execvp(Args[0],(char **)&Args[0]);
dc738e7a 319 cerr << _("Failed to exec compressor ") << Args[0] << endl;
b2e465d6
AL
320 _exit(100);
321 };
322 if (Comp == true)
323 close(Pipe[0]);
324 else
325 close(Pipe[1]);
326 return true;
327}
328 /*}}}*/
329// MultiCompress::OpenOld - Open an old file /*{{{*/
330// ---------------------------------------------------------------------
331/* This opens one of the original output files, possibly decompressing it. */
3826564e 332bool MultiCompress::OpenOld(int &Fd,pid_t &Proc)
b2e465d6
AL
333{
334 Files *Best = Outputs;
335 for (Files *I = Outputs; I != 0; I = I->Next)
03bef784 336 if (Best->CompressProg.Cost > I->CompressProg.Cost)
b2e465d6
AL
337 Best = I;
338
339 // Open the file
340 FileFd F(Best->Output,FileFd::ReadOnly);
341 if (_error->PendingError() == true)
342 return false;
343
344 // Decompress the file so we can read it
345 if (OpenCompress(Best->CompressProg,Proc,F.Fd(),Fd,false) == false)
346 return false;
347
348 return true;
349}
350 /*}}}*/
351// MultiCompress::CloseOld - Close the old file /*{{{*/
352// ---------------------------------------------------------------------
353/* */
3826564e 354bool MultiCompress::CloseOld(int Fd,pid_t Proc)
b2e465d6
AL
355{
356 close(Fd);
357 if (Proc != -1)
dc738e7a 358 if (ExecWait(Proc,_("decompressor"),false) == false)
b2e465d6
AL
359 return false;
360 return true;
361}
362 /*}}}*/
363// MultiCompress::Child - The writer child /*{{{*/
364// ---------------------------------------------------------------------
365/* The child process forks a bunch of compression children and takes
c6474fb6 366 input on FD and passes it to all the compressor child. On the way it
b2e465d6
AL
367 computes the MD5 of the raw data. After this the raw data in the
368 original files is compared to see if this data is new. If the data
369 is new then the temp files are renamed, otherwise they are erased. */
9209ec47 370bool MultiCompress::Child(int const &FD)
b2e465d6
AL
371{
372 // Start the compression children.
373 for (Files *I = Outputs; I != 0; I = I->Next)
374 {
375 if (OpenCompress(I->CompressProg,I->CompressProc,I->TmpFile.Fd(),
376 I->Fd,true) == false)
377 return false;
378 }
379
380 /* Okay, now we just feed data from FD to all the other FDs. Also
381 stash a hash of the data to use later. */
382 SetNonBlock(FD,false);
383 unsigned char Buffer[32*1024];
384 unsigned long FileSize = 0;
385 MD5Summation MD5;
386 while (1)
387 {
388 WaitFd(FD,false);
389 int Res = read(FD,Buffer,sizeof(Buffer));
390 if (Res == 0)
391 break;
392 if (Res < 0)
393 continue;
394
395 MD5.Add(Buffer,Res);
396 FileSize += Res;
397 for (Files *I = Outputs; I != 0; I = I->Next)
398 {
399 if (write(I->Fd,Buffer,Res) != Res)
400 {
dc738e7a 401 _error->Errno("write",_("IO to subprocess/file failed"));
b2e465d6
AL
402 break;
403 }
404 }
405 }
406
407 // Close all the writers
408 for (Files *I = Outputs; I != 0; I = I->Next)
409 close(I->Fd);
410
411 // Wait for the compressors to exit
412 for (Files *I = Outputs; I != 0; I = I->Next)
413 {
414 if (I->CompressProc != -1)
03bef784 415 ExecWait(I->CompressProc, I->CompressProg.Binary.c_str(), false);
b2e465d6
AL
416 }
417
418 if (_error->PendingError() == true)
419 return false;
420
421 /* Now we have to copy the files over, or erase them if they
422 have not changed. First find the cheapest decompressor */
423 bool Missing = false;
424 for (Files *I = Outputs; I != 0; I = I->Next)
425 {
426 if (I->OldMTime == 0)
427 {
428 Missing = true;
429 break;
430 }
431 }
432
433 // Check the MD5 of the lowest cost entity.
434 while (Missing == false)
435 {
436 int CompFd = -1;
3826564e 437 pid_t Proc = -1;
b2e465d6
AL
438 if (OpenOld(CompFd,Proc) == false)
439 {
440 _error->Discard();
441 break;
442 }
443
444 // Compute the hash
445 MD5Summation OldMD5;
446 unsigned long NewFileSize = 0;
447 while (1)
448 {
449 int Res = read(CompFd,Buffer,sizeof(Buffer));
450 if (Res == 0)
451 break;
452 if (Res < 0)
dc738e7a 453 return _error->Errno("read",_("Failed to read while computing MD5"));
b2e465d6
AL
454 NewFileSize += Res;
455 OldMD5.Add(Buffer,Res);
456 }
457
458 // Tidy the compressor
459 if (CloseOld(CompFd,Proc) == false)
460 return false;
461
462 // Check the hash
463 if (OldMD5.Result() == MD5.Result() &&
464 FileSize == NewFileSize)
465 {
466 for (Files *I = Outputs; I != 0; I = I->Next)
467 {
468 I->TmpFile.Close();
469 if (unlink(I->TmpFile.Name().c_str()) != 0)
dc738e7a 470 _error->Errno("unlink",_("Problem unlinking %s"),
b2e465d6
AL
471 I->TmpFile.Name().c_str());
472 }
473 return !_error->PendingError();
474 }
475 break;
476 }
477
478 // Finalize
479 for (Files *I = Outputs; I != 0; I = I->Next)
480 {
481 // Set the correct file modes
482 fchmod(I->TmpFile.Fd(),Permissions);
483
484 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
dc738e7a 485 _error->Errno("rename",_("Failed to rename %s to %s"),
b2e465d6
AL
486 I->TmpFile.Name().c_str(),I->Output.c_str());
487 I->TmpFile.Close();
488 }
489
490 return !_error->PendingError();
491}
492 /*}}}*/
493