]> git.saurik.com Git - apt.git/blob - ftparchive/multicompress.cc
add options to disable specific checksums for Indexes
[apt.git] / ftparchive / multicompress.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
4 /* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16 // Include Files /*{{{*/
17 #include "multicompress.h"
18
19 #include <apti18n.h>
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/error.h>
22 #include <apt-pkg/md5.h>
23
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <utime.h>
27 #include <unistd.h>
28 #include <iostream>
29 /*}}}*/
30
31 using namespace std;
32
33 const MultiCompress::CompType MultiCompress::Compressors[] =
34 {{".","",0,0,0,1},
35 {"gzip",".gz","gzip","-9n","-d",2},
36 {"bzip2",".bz2","bzip2","-9","-d",3},
37 {"lzma",".lzma","lzma","-9","-d",4},
38 {}};
39
40 // MultiCompress::MultiCompress - Constructor /*{{{*/
41 // ---------------------------------------------------------------------
42 /* Setup the file outputs, compression modes and fork the writer child */
43 MultiCompress::MultiCompress(string const &Output,string const &Compress,
44 mode_t const &Permissions,bool const &Write) :
45 Permissions(Permissions)
46 {
47 Outputs = 0;
48 Outputter = -1;
49 Input = 0;
50 UpdateMTime = 0;
51
52 /* Parse the compression string, a space separated lists of compresison
53 types */
54 string::const_iterator I = Compress.begin();
55 for (; I != Compress.end();)
56 {
57 for (; I != Compress.end() && isspace(*I); I++);
58
59 // Grab a word
60 string::const_iterator Start = I;
61 for (; I != Compress.end() && !isspace(*I); I++);
62
63 // Find the matching compressor
64 const CompType *Comp = Compressors;
65 for (; Comp->Name != 0; Comp++)
66 if (stringcmp(Start,I,Comp->Name) == 0)
67 break;
68
69 // Hmm.. unknown.
70 if (Comp->Name == 0)
71 {
72 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
73 continue;
74 }
75
76 // Create and link in a new output
77 Files *NewOut = new Files;
78 NewOut->Next = Outputs;
79 Outputs = NewOut;
80 NewOut->CompressProg = Comp;
81 NewOut->Output = Output+Comp->Extension;
82
83 struct stat St;
84 if (stat(NewOut->Output.c_str(),&St) == 0)
85 NewOut->OldMTime = St.st_mtime;
86 else
87 NewOut->OldMTime = 0;
88 }
89
90 if (Write == false)
91 return;
92
93 /* Open all the temp files now so we can report any errors. File is
94 made unreable to prevent people from touching it during creating. */
95 for (Files *I = Outputs; I != 0; I = I->Next)
96 I->TmpFile.Open(I->Output + ".new",FileFd::WriteEmpty,0600);
97 if (_error->PendingError() == true)
98 return;
99
100 if (Outputs == 0)
101 {
102 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
103 return;
104 }
105
106 Start();
107 }
108 /*}}}*/
109 // MultiCompress::~MultiCompress - Destructor /*{{{*/
110 // ---------------------------------------------------------------------
111 /* Just erase the file linked list. */
112 MultiCompress::~MultiCompress()
113 {
114 Die();
115
116 for (; Outputs != 0;)
117 {
118 Files *Tmp = Outputs->Next;
119 delete Outputs;
120 Outputs = Tmp;
121 }
122 }
123 /*}}}*/
124 // MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
125 // ---------------------------------------------------------------------
126 /* This checks each compressed file to make sure it exists and returns
127 stat information for a random file from the collection. False means
128 one or more of the files is missing. */
129 bool MultiCompress::GetStat(string const &Output,string const &Compress,struct stat &St)
130 {
131 /* Parse the compression string, a space separated lists of compresison
132 types */
133 string::const_iterator I = Compress.begin();
134 bool DidStat = false;
135 for (; I != Compress.end();)
136 {
137 for (; I != Compress.end() && isspace(*I); I++);
138
139 // Grab a word
140 string::const_iterator Start = I;
141 for (; I != Compress.end() && !isspace(*I); I++);
142
143 // Find the matching compressor
144 const CompType *Comp = Compressors;
145 for (; Comp->Name != 0; Comp++)
146 if (stringcmp(Start,I,Comp->Name) == 0)
147 break;
148
149 // Hmm.. unknown.
150 if (Comp->Name == 0)
151 continue;
152
153 string Name = Output+Comp->Extension;
154 if (stat(Name.c_str(),&St) != 0)
155 return false;
156 DidStat = true;
157 }
158 return DidStat;
159 }
160 /*}}}*/
161 // MultiCompress::Start - Start up the writer child /*{{{*/
162 // ---------------------------------------------------------------------
163 /* Fork a child and setup the communication pipe. */
164 bool MultiCompress::Start()
165 {
166 // Create a data pipe
167 int Pipe[2] = {-1,-1};
168 if (pipe(Pipe) != 0)
169 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
170 for (int I = 0; I != 2; I++)
171 SetCloseExec(Pipe[I],true);
172
173 // The child..
174 Outputter = fork();
175 if (Outputter == 0)
176 {
177 close(Pipe[1]);
178 Child(Pipe[0]);
179 if (_error->PendingError() == true)
180 {
181 _error->DumpErrors();
182 _exit(100);
183 }
184 _exit(0);
185 };
186
187 /* Tidy up the temp files, we open them in the constructor so as to
188 get proper error reporting. Close them now. */
189 for (Files *I = Outputs; I != 0; I = I->Next)
190 I->TmpFile.Close();
191
192 close(Pipe[0]);
193 Input = fdopen(Pipe[1],"w");
194 if (Input == 0)
195 return _error->Errno("fdopen",_("Failed to create FILE*"));
196
197 if (Outputter == -1)
198 return _error->Errno("fork",_("Failed to fork"));
199 return true;
200 }
201 /*}}}*/
202 // MultiCompress::Die - Clean up the writer /*{{{*/
203 // ---------------------------------------------------------------------
204 /* */
205 bool MultiCompress::Die()
206 {
207 if (Input == 0)
208 return true;
209
210 fclose(Input);
211 Input = 0;
212 bool Res = ExecWait(Outputter,_("Compress child"),false);
213 Outputter = -1;
214 return Res;
215 }
216 /*}}}*/
217 // MultiCompress::Finalize - Finish up writing /*{{{*/
218 // ---------------------------------------------------------------------
219 /* This is only necessary for statistics reporting. */
220 bool MultiCompress::Finalize(unsigned long &OutSize)
221 {
222 OutSize = 0;
223 if (Input == 0 || Die() == false)
224 return false;
225
226 time_t Now;
227 time(&Now);
228
229 // Check the mtimes to see if the files were replaced.
230 bool Changed = false;
231 for (Files *I = Outputs; I != 0; I = I->Next)
232 {
233 struct stat St;
234 if (stat(I->Output.c_str(),&St) != 0)
235 return _error->Error(_("Internal error, failed to create %s"),
236 I->Output.c_str());
237
238 if (I->OldMTime != St.st_mtime)
239 Changed = true;
240 else
241 {
242 // Update the mtime if necessary
243 if (UpdateMTime > 0 &&
244 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
245 {
246 struct utimbuf Buf;
247 Buf.actime = Buf.modtime = Now;
248 utime(I->Output.c_str(),&Buf);
249 Changed = true;
250 }
251 }
252
253 // Force the file permissions
254 if (St.st_mode != Permissions)
255 chmod(I->Output.c_str(),Permissions);
256
257 OutSize += St.st_size;
258 }
259
260 if (Changed == false)
261 OutSize = 0;
262
263 return true;
264 }
265 /*}}}*/
266 // MultiCompress::OpenCompress - Open the compressor /*{{{*/
267 // ---------------------------------------------------------------------
268 /* This opens the compressor, either in compress mode or decompress
269 mode. FileFd is always the compressor input/output file,
270 OutFd is the created pipe, Input for Compress, Output for Decompress. */
271 bool MultiCompress::OpenCompress(const CompType *Prog,pid_t &Pid,int const &FileFd,
272 int &OutFd,bool const &Comp)
273 {
274 Pid = -1;
275
276 // No compression
277 if (Prog->Binary == 0)
278 {
279 OutFd = dup(FileFd);
280 return true;
281 }
282
283 // Create a data pipe
284 int Pipe[2] = {-1,-1};
285 if (pipe(Pipe) != 0)
286 return _error->Errno("pipe",_("Failed to create subprocess IPC"));
287 for (int J = 0; J != 2; J++)
288 SetCloseExec(Pipe[J],true);
289
290 if (Comp == true)
291 OutFd = Pipe[1];
292 else
293 OutFd = Pipe[0];
294
295 // The child..
296 Pid = ExecFork();
297 if (Pid == 0)
298 {
299 if (Comp == true)
300 {
301 dup2(FileFd,STDOUT_FILENO);
302 dup2(Pipe[0],STDIN_FILENO);
303 }
304 else
305 {
306 dup2(FileFd,STDIN_FILENO);
307 dup2(Pipe[1],STDOUT_FILENO);
308 }
309
310 SetCloseExec(STDOUT_FILENO,false);
311 SetCloseExec(STDIN_FILENO,false);
312
313 const char *Args[3];
314 Args[0] = Prog->Binary;
315 if (Comp == true)
316 Args[1] = Prog->CompArgs;
317 else
318 Args[1] = Prog->UnCompArgs;
319 Args[2] = 0;
320 execvp(Args[0],(char **)Args);
321 cerr << _("Failed to exec compressor ") << Args[0] << endl;
322 _exit(100);
323 };
324 if (Comp == true)
325 close(Pipe[0]);
326 else
327 close(Pipe[1]);
328 return true;
329 }
330 /*}}}*/
331 // MultiCompress::OpenOld - Open an old file /*{{{*/
332 // ---------------------------------------------------------------------
333 /* This opens one of the original output files, possibly decompressing it. */
334 bool MultiCompress::OpenOld(int &Fd,pid_t &Proc)
335 {
336 Files *Best = Outputs;
337 for (Files *I = Outputs; I != 0; I = I->Next)
338 if (Best->CompressProg->Cost > I->CompressProg->Cost)
339 Best = I;
340
341 // Open the file
342 FileFd F(Best->Output,FileFd::ReadOnly);
343 if (_error->PendingError() == true)
344 return false;
345
346 // Decompress the file so we can read it
347 if (OpenCompress(Best->CompressProg,Proc,F.Fd(),Fd,false) == false)
348 return false;
349
350 return true;
351 }
352 /*}}}*/
353 // MultiCompress::CloseOld - Close the old file /*{{{*/
354 // ---------------------------------------------------------------------
355 /* */
356 bool MultiCompress::CloseOld(int Fd,pid_t Proc)
357 {
358 close(Fd);
359 if (Proc != -1)
360 if (ExecWait(Proc,_("decompressor"),false) == false)
361 return false;
362 return true;
363 }
364 /*}}}*/
365 // MultiCompress::Child - The writer child /*{{{*/
366 // ---------------------------------------------------------------------
367 /* The child process forks a bunch of compression children and takes
368 input on FD and passes it to all the compressor child. On the way it
369 computes the MD5 of the raw data. After this the raw data in the
370 original files is compared to see if this data is new. If the data
371 is new then the temp files are renamed, otherwise they are erased. */
372 bool MultiCompress::Child(int const &FD)
373 {
374 // Start the compression children.
375 for (Files *I = Outputs; I != 0; I = I->Next)
376 {
377 if (OpenCompress(I->CompressProg,I->CompressProc,I->TmpFile.Fd(),
378 I->Fd,true) == false)
379 return false;
380 }
381
382 /* Okay, now we just feed data from FD to all the other FDs. Also
383 stash a hash of the data to use later. */
384 SetNonBlock(FD,false);
385 unsigned char Buffer[32*1024];
386 unsigned long FileSize = 0;
387 MD5Summation MD5;
388 while (1)
389 {
390 WaitFd(FD,false);
391 int Res = read(FD,Buffer,sizeof(Buffer));
392 if (Res == 0)
393 break;
394 if (Res < 0)
395 continue;
396
397 MD5.Add(Buffer,Res);
398 FileSize += Res;
399 for (Files *I = Outputs; I != 0; I = I->Next)
400 {
401 if (write(I->Fd,Buffer,Res) != Res)
402 {
403 _error->Errno("write",_("IO to subprocess/file failed"));
404 break;
405 }
406 }
407 }
408
409 // Close all the writers
410 for (Files *I = Outputs; I != 0; I = I->Next)
411 close(I->Fd);
412
413 // Wait for the compressors to exit
414 for (Files *I = Outputs; I != 0; I = I->Next)
415 {
416 if (I->CompressProc != -1)
417 ExecWait(I->CompressProc,I->CompressProg->Binary,false);
418 }
419
420 if (_error->PendingError() == true)
421 return false;
422
423 /* Now we have to copy the files over, or erase them if they
424 have not changed. First find the cheapest decompressor */
425 bool Missing = false;
426 for (Files *I = Outputs; I != 0; I = I->Next)
427 {
428 if (I->OldMTime == 0)
429 {
430 Missing = true;
431 break;
432 }
433 }
434
435 // Check the MD5 of the lowest cost entity.
436 while (Missing == false)
437 {
438 int CompFd = -1;
439 pid_t Proc = -1;
440 if (OpenOld(CompFd,Proc) == false)
441 {
442 _error->Discard();
443 break;
444 }
445
446 // Compute the hash
447 MD5Summation OldMD5;
448 unsigned long NewFileSize = 0;
449 while (1)
450 {
451 int Res = read(CompFd,Buffer,sizeof(Buffer));
452 if (Res == 0)
453 break;
454 if (Res < 0)
455 return _error->Errno("read",_("Failed to read while computing MD5"));
456 NewFileSize += Res;
457 OldMD5.Add(Buffer,Res);
458 }
459
460 // Tidy the compressor
461 if (CloseOld(CompFd,Proc) == false)
462 return false;
463
464 // Check the hash
465 if (OldMD5.Result() == MD5.Result() &&
466 FileSize == NewFileSize)
467 {
468 for (Files *I = Outputs; I != 0; I = I->Next)
469 {
470 I->TmpFile.Close();
471 if (unlink(I->TmpFile.Name().c_str()) != 0)
472 _error->Errno("unlink",_("Problem unlinking %s"),
473 I->TmpFile.Name().c_str());
474 }
475 return !_error->PendingError();
476 }
477 break;
478 }
479
480 // Finalize
481 for (Files *I = Outputs; I != 0; I = I->Next)
482 {
483 // Set the correct file modes
484 fchmod(I->TmpFile.Fd(),Permissions);
485
486 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
487 _error->Errno("rename",_("Failed to rename %s to %s"),
488 I->TmpFile.Name().c_str(),I->Output.c_str());
489 I->TmpFile.Close();
490 }
491
492 return !_error->PendingError();
493 }
494 /*}}}*/
495