]> git.saurik.com Git - apt.git/blob - ftparchive/multicompress.cc
Add new Acquire::MaxReleaseFileSize=10*1000*1000 option
[apt.git] / ftparchive / multicompress.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
4 /* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16 // Include Files /*{{{*/
17 #include <config.h>
18
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/error.h>
22 #include <apt-pkg/md5.h>
23 #include <apt-pkg/aptconfiguration.h>
24 #include <apt-pkg/hashsum_template.h>
25
26 #include <ctype.h>
27 #include <vector>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/time.h>
31 #include <unistd.h>
32
33 #include "multicompress.h"
34 #include <apti18n.h>
35 /*}}}*/
36
37 using namespace std;
38
39
40 // MultiCompress::MultiCompress - Constructor /*{{{*/
41 // ---------------------------------------------------------------------
42 /* Setup the file outputs, compression modes and fork the writer child */
43 MultiCompress::MultiCompress(string const &Output,string const &Compress,
44 mode_t const &Permissions,bool const &Write) :
45 Permissions(Permissions)
46 {
47 Outputs = 0;
48 Outputter = -1;
49 Input = 0;
50 UpdateMTime = 0;
51
52 /* Parse the compression string, a space separated lists of compresison
53 types */
54 string::const_iterator I = Compress.begin();
55 for (; I != Compress.end();)
56 {
57 for (; I != Compress.end() && isspace(*I); ++I);
58
59 // Grab a word
60 string::const_iterator Start = I;
61 for (; I != Compress.end() && !isspace(*I); ++I);
62
63 // Find the matching compressor
64 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
65 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
66 for (; Comp != Compressors.end(); ++Comp)
67 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
68 break;
69
70 // Hmm.. unknown.
71 if (Comp == Compressors.end())
72 {
73 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
74 continue;
75 }
76
77 // Create and link in a new output
78 Files *NewOut = new Files;
79 NewOut->Next = Outputs;
80 Outputs = NewOut;
81 NewOut->CompressProg = *Comp;
82 NewOut->Output = Output+Comp->Extension;
83
84 struct stat St;
85 if (stat(NewOut->Output.c_str(),&St) == 0)
86 NewOut->OldMTime = St.st_mtime;
87 else
88 NewOut->OldMTime = 0;
89 }
90
91 if (Write == false)
92 return;
93
94 /* Open all the temp files now so we can report any errors. File is
95 made unreable to prevent people from touching it during creating. */
96 for (Files *I = Outputs; I != 0; I = I->Next)
97 I->TmpFile.Open(I->Output + ".new", FileFd::WriteOnly | FileFd::Create | FileFd::Empty, FileFd::Extension, 0600);
98 if (_error->PendingError() == true)
99 return;
100
101 if (Outputs == 0)
102 {
103 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
104 return;
105 }
106
107 Start();
108 }
109 /*}}}*/
110 // MultiCompress::~MultiCompress - Destructor /*{{{*/
111 // ---------------------------------------------------------------------
112 /* Just erase the file linked list. */
113 MultiCompress::~MultiCompress()
114 {
115 Die();
116
117 for (; Outputs != 0;)
118 {
119 Files *Tmp = Outputs->Next;
120 delete Outputs;
121 Outputs = Tmp;
122 }
123 }
124 /*}}}*/
125 // MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
126 // ---------------------------------------------------------------------
127 /* This checks each compressed file to make sure it exists and returns
128 stat information for a random file from the collection. False means
129 one or more of the files is missing. */
130 bool MultiCompress::GetStat(string const &Output,string const &Compress,struct stat &St)
131 {
132 /* Parse the compression string, a space separated lists of compresison
133 types */
134 string::const_iterator I = Compress.begin();
135 bool DidStat = false;
136 for (; I != Compress.end();)
137 {
138 for (; I != Compress.end() && isspace(*I); ++I);
139
140 // Grab a word
141 string::const_iterator Start = I;
142 for (; I != Compress.end() && !isspace(*I); ++I);
143
144 // Find the matching compressor
145 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
146 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
147 for (; Comp != Compressors.end(); ++Comp)
148 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
149 break;
150
151 // Hmm.. unknown.
152 if (Comp == Compressors.end())
153 continue;
154
155 string Name = Output+Comp->Extension;
156 if (stat(Name.c_str(),&St) != 0)
157 return false;
158 DidStat = true;
159 }
160 return DidStat;
161 }
162 /*}}}*/
163 // MultiCompress::Start - Start up the writer child /*{{{*/
164 // ---------------------------------------------------------------------
165 /* Fork a child and setup the communication pipe. */
166 bool MultiCompress::Start()
167 {
168 // Create a data pipe
169 int Pipe[2] = {-1,-1};
170 if (pipe(Pipe) != 0)
171 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
172 for (int I = 0; I != 2; I++)
173 SetCloseExec(Pipe[I],true);
174
175 // The child..
176 Outputter = fork();
177 if (Outputter == 0)
178 {
179 close(Pipe[1]);
180 Child(Pipe[0]);
181 if (_error->PendingError() == true)
182 {
183 _error->DumpErrors();
184 _exit(100);
185 }
186 _exit(0);
187 };
188
189 close(Pipe[0]);
190 Input = fdopen(Pipe[1],"w");
191 if (Input == 0)
192 return _error->Errno("fdopen",_("Failed to create FILE*"));
193
194 if (Outputter == -1)
195 return _error->Errno("fork",_("Failed to fork"));
196 return true;
197 }
198 /*}}}*/
199 // MultiCompress::Die - Clean up the writer /*{{{*/
200 // ---------------------------------------------------------------------
201 /* */
202 bool MultiCompress::Die()
203 {
204 if (Input == 0)
205 return true;
206
207 fclose(Input);
208 Input = 0;
209 bool Res = ExecWait(Outputter,_("Compress child"),false);
210 Outputter = -1;
211 return Res;
212 }
213 /*}}}*/
214 // MultiCompress::Finalize - Finish up writing /*{{{*/
215 // ---------------------------------------------------------------------
216 /* This is only necessary for statistics reporting. */
217 bool MultiCompress::Finalize(unsigned long long &OutSize)
218 {
219 OutSize = 0;
220 if (Input == 0 || Die() == false)
221 return false;
222
223 time_t Now;
224 time(&Now);
225
226 // Check the mtimes to see if the files were replaced.
227 bool Changed = false;
228 for (Files *I = Outputs; I != 0; I = I->Next)
229 {
230 struct stat St;
231 if (stat(I->Output.c_str(),&St) != 0)
232 return _error->Error(_("Internal error, failed to create %s"),
233 I->Output.c_str());
234
235 if (I->OldMTime != St.st_mtime)
236 Changed = true;
237 else
238 {
239 // Update the mtime if necessary
240 if (UpdateMTime > 0 &&
241 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
242 {
243 utimes(I->Output.c_str(), NULL);
244 Changed = true;
245 }
246 }
247
248 // Force the file permissions
249 if (St.st_mode != Permissions)
250 chmod(I->Output.c_str(),Permissions);
251
252 OutSize += St.st_size;
253 }
254
255 if (Changed == false)
256 OutSize = 0;
257
258 return true;
259 }
260 /*}}}*/
261 // MultiCompress::OpenOld - Open an old file /*{{{*/
262 // ---------------------------------------------------------------------
263 /* This opens one of the original output files, possibly decompressing it. */
264 bool MultiCompress::OpenOld(FileFd &Fd)
265 {
266 Files *Best = Outputs;
267 for (Files *I = Outputs; I != 0; I = I->Next)
268 if (Best->CompressProg.Cost > I->CompressProg.Cost)
269 Best = I;
270
271 // Open the file
272 return Fd.Open(Best->Output, FileFd::ReadOnly, FileFd::Extension);
273 }
274 /*}}}*/
275 // MultiCompress::Child - The writer child /*{{{*/
276 // ---------------------------------------------------------------------
277 /* The child process forks a bunch of compression children and takes
278 input on FD and passes it to all the compressor child. On the way it
279 computes the MD5 of the raw data. After this the raw data in the
280 original files is compared to see if this data is new. If the data
281 is new then the temp files are renamed, otherwise they are erased. */
282 bool MultiCompress::Child(int const &FD)
283 {
284 /* Okay, now we just feed data from FD to all the other FDs. Also
285 stash a hash of the data to use later. */
286 SetNonBlock(FD,false);
287 unsigned char Buffer[32*1024];
288 unsigned long long FileSize = 0;
289 MD5Summation MD5;
290 while (1)
291 {
292 WaitFd(FD,false);
293 int Res = read(FD,Buffer,sizeof(Buffer));
294 if (Res == 0)
295 break;
296 if (Res < 0)
297 continue;
298
299 MD5.Add(Buffer,Res);
300 FileSize += Res;
301 for (Files *I = Outputs; I != 0; I = I->Next)
302 {
303 if (I->TmpFile.Write(Buffer, Res) == false)
304 {
305 _error->Errno("write",_("IO to subprocess/file failed"));
306 break;
307 }
308 }
309 }
310
311 if (_error->PendingError() == true)
312 return false;
313
314 /* Now we have to copy the files over, or erase them if they
315 have not changed. First find the cheapest decompressor */
316 bool Missing = false;
317 for (Files *I = Outputs; I != 0; I = I->Next)
318 {
319 if (I->OldMTime == 0)
320 {
321 Missing = true;
322 break;
323 }
324 }
325
326 // Check the MD5 of the lowest cost entity.
327 while (Missing == false)
328 {
329 FileFd CompFd;
330 if (OpenOld(CompFd) == false)
331 {
332 _error->Discard();
333 break;
334 }
335
336 // Compute the hash
337 MD5Summation OldMD5;
338 unsigned long long NewFileSize = 0;
339 while (1)
340 {
341 unsigned long long Res = 0;
342 if (CompFd.Read(Buffer,sizeof(Buffer), &Res) == false)
343 return _error->Errno("read",_("Failed to read while computing MD5"));
344 if (Res == 0)
345 break;
346 NewFileSize += Res;
347 OldMD5.Add(Buffer,Res);
348 }
349 CompFd.Close();
350
351 // Check the hash
352 if (OldMD5.Result() == MD5.Result() &&
353 FileSize == NewFileSize)
354 {
355 for (Files *I = Outputs; I != 0; I = I->Next)
356 {
357 I->TmpFile.Close();
358 if (unlink(I->TmpFile.Name().c_str()) != 0)
359 _error->Errno("unlink",_("Problem unlinking %s"),
360 I->TmpFile.Name().c_str());
361 }
362 return !_error->PendingError();
363 }
364 break;
365 }
366
367 // Finalize
368 for (Files *I = Outputs; I != 0; I = I->Next)
369 {
370 // Set the correct file modes
371 chmod(I->TmpFile.Name().c_str(),Permissions);
372
373 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
374 _error->Errno("rename",_("Failed to rename %s to %s"),
375 I->TmpFile.Name().c_str(),I->Output.c_str());
376 I->TmpFile.Close();
377 }
378
379 return !_error->PendingError();
380 }
381 /*}}}*/
382