]> git.saurik.com Git - apt.git/blame - apt-inst/contrib/extracttar.cc
Do not buffer writes larger than the buffer if possible
[apt.git] / apt-inst / contrib / extracttar.cc
CommitLineData
b2e465d6
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
7db98ffc 3// $Id: extracttar.cc,v 1.8.2.1 2004/01/16 18:58:50 mdz Exp $
b2e465d6
AL
4/* ######################################################################
5
6 Extract a Tar - Tar Extractor
7
8 Some performance measurements showed that zlib performed quite poorly
1e3f4083 9 in comparison to a forked gzip process. This tar extractor makes use
b2e465d6
AL
10 of the fact that dup'd file descriptors have the same seek pointer
11 and that gzip will not read past the end of a compressed stream,
12 even if there is more data. We use the dup property to track extraction
13 progress and the gzip feature to just feed gzip a fd in the middle
14 of an AR file.
15
16 ##################################################################### */
17 /*}}}*/
18// Include Files /*{{{*/
ea542140 19#include<config.h>
b2e465d6 20
472ff00e 21#include <apt-pkg/dirstream.h>
ea542140 22#include <apt-pkg/extracttar.h>
b2e465d6
AL
23#include <apt-pkg/error.h>
24#include <apt-pkg/strutl.h>
25#include <apt-pkg/configuration.h>
453b82a3 26#include <apt-pkg/fileutl.h>
b2e465d6 27
453b82a3
DK
28#include <string.h>
29#include <algorithm>
30#include <string>
b2e465d6
AL
31#include <unistd.h>
32#include <signal.h>
33#include <fcntl.h>
90f057fd 34#include <iostream>
ea542140 35
d77559ac 36#include <apti18n.h>
b2e465d6 37 /*}}}*/
d77559ac 38
584e4558 39using namespace std;
4520bfdf 40
b2e465d6
AL
41// The on disk header for a tar file.
42struct ExtractTar::TarHeader
43{
44 char Name[100];
45 char Mode[8];
46 char UserID[8];
47 char GroupID[8];
48 char Size[12];
49 char MTime[12];
50 char Checksum[8];
51 char LinkFlag;
52 char LinkName[100];
53 char MagicNumber[8];
54 char UserName[32];
55 char GroupName[32];
56 char Major[8];
57 char Minor[8];
58};
59
60// ExtractTar::ExtractTar - Constructor /*{{{*/
61// ---------------------------------------------------------------------
62/* */
3621b1c7
GJ
63ExtractTar::ExtractTar(FileFd &Fd,unsigned long long Max,string DecompressionProgram)
64 : File(Fd), MaxInSize(Max), DecompressProg(DecompressionProgram)
b2e465d6
AL
65{
66 GZPid = -1;
b2e465d6
AL
67 Eof = false;
68}
69 /*}}}*/
70// ExtractTar::ExtractTar - Destructor /*{{{*/
71// ---------------------------------------------------------------------
72/* */
73ExtractTar::~ExtractTar()
74{
4520bfdf 75 // Error close
b830f576 76 Done();
b2e465d6
AL
77}
78 /*}}}*/
79// ExtractTar::Done - Reap the gzip sub process /*{{{*/
b830f576
DK
80bool ExtractTar::Done(bool)
81{
82 return Done();
83}
84bool ExtractTar::Done()
b2e465d6 85{
3564c2f4 86 return InFd.Close();
b2e465d6
AL
87}
88 /*}}}*/
89// ExtractTar::StartGzip - Startup gzip /*{{{*/
90// ---------------------------------------------------------------------
91/* This creates a gzip sub process that has its input as the file itself.
92 If this tar file is embedded into something like an ar file then
93 gzip will efficiently ignore the extra bits. */
94bool ExtractTar::StartGzip()
95{
3255db2d
GJ
96 if (DecompressProg.empty())
97 {
98 InFd.OpenDescriptor(File.Fd(), FileFd::ReadOnly, FileFd::None, false);
99 return true;
100 }
101
3564c2f4
JAK
102 std::vector<APT::Configuration::Compressor> const compressors = APT::Configuration::getCompressors();
103 std::vector<APT::Configuration::Compressor>::const_iterator compressor = compressors.begin();
258b9e51 104 for (; compressor != compressors.end(); ++compressor) {
3564c2f4
JAK
105 if (compressor->Name == DecompressProg) {
106 return InFd.OpenDescriptor(File.Fd(), FileFd::ReadOnly, *compressor, false);
107 }
b2e465d6
AL
108 }
109
3564c2f4
JAK
110 return _error->Error(_("Cannot find a configured compressor for '%s'"),
111 DecompressProg.c_str());
112
b2e465d6
AL
113}
114 /*}}}*/
115// ExtractTar::Go - Perform extraction /*{{{*/
116// ---------------------------------------------------------------------
117/* This reads each 512 byte block from the archive and extracts the header
118 information into the Item structure. Then it resolves the UID/GID and
119 invokes the correct processing function. */
120bool ExtractTar::Go(pkgDirStream &Stream)
121{
122 if (StartGzip() == false)
123 return false;
124
125 // Loop over all blocks
a07b81e8
OS
126 string LastLongLink, ItemLink;
127 string LastLongName, ItemName;
b2e465d6
AL
128 while (1)
129 {
130 bool BadRecord = false;
131 unsigned char Block[512];
132 if (InFd.Read(Block,sizeof(Block),true) == false)
133 return false;
134
135 if (InFd.Eof() == true)
136 break;
137
138 // Get the checksum
139 TarHeader *Tar = (TarHeader *)Block;
140 unsigned long CheckSum;
141 if (StrToNum(Tar->Checksum,CheckSum,sizeof(Tar->Checksum),8) == false)
05eb7df0 142 return _error->Error(_("Corrupted archive"));
b2e465d6
AL
143
144 /* Compute the checksum field. The actual checksum is blanked out
145 with spaces so it is not included in the computation */
146 unsigned long NewSum = 0;
147 memset(Tar->Checksum,' ',sizeof(Tar->Checksum));
148 for (int I = 0; I != sizeof(Block); I++)
149 NewSum += Block[I];
150
151 /* Check for a block of nulls - in this case we kill gzip, GNU tar
152 does this.. */
153 if (NewSum == ' '*sizeof(Tar->Checksum))
b830f576 154 return Done();
b2e465d6
AL
155
156 if (NewSum != CheckSum)
db0db9fe 157 return _error->Error(_("Tar checksum failed, archive corrupted"));
b2e465d6
AL
158
159 // Decode all of the fields
160 pkgDirStream::Item Itm;
b2e465d6 161 if (StrToNum(Tar->Mode,Itm.Mode,sizeof(Tar->Mode),8) == false ||
54f2f0a3
NH
162 (Base256ToNum(Tar->UserID,Itm.UID,8) == false &&
163 StrToNum(Tar->UserID,Itm.UID,sizeof(Tar->UserID),8) == false) ||
164 (Base256ToNum(Tar->GroupID,Itm.GID,8) == false &&
165 StrToNum(Tar->GroupID,Itm.GID,sizeof(Tar->GroupID),8) == false) ||
166 (Base256ToNum(Tar->Size,Itm.Size,12) == false &&
167 StrToNum(Tar->Size,Itm.Size,sizeof(Tar->Size),8) == false) ||
168 (Base256ToNum(Tar->MTime,Itm.MTime,12) == false &&
169 StrToNum(Tar->MTime,Itm.MTime,sizeof(Tar->MTime),8) == false) ||
b2e465d6
AL
170 StrToNum(Tar->Major,Itm.Major,sizeof(Tar->Major),8) == false ||
171 StrToNum(Tar->Minor,Itm.Minor,sizeof(Tar->Minor),8) == false)
05eb7df0 172 return _error->Error(_("Corrupted archive"));
a07b81e8
OS
173
174 // Grab the filename and link target: use last long name if one was
175 // set, otherwise use the header value as-is, but remember that it may
176 // fill the entire 100-byte block and needs to be zero-terminated.
177 // See Debian Bug #689582.
b2e465d6
AL
178 if (LastLongName.empty() == false)
179 Itm.Name = (char *)LastLongName.c_str();
180 else
a07b81e8 181 Itm.Name = (char *)ItemName.assign(Tar->Name, sizeof(Tar->Name)).c_str();
b2e465d6
AL
182 if (Itm.Name[0] == '.' && Itm.Name[1] == '/' && Itm.Name[2] != 0)
183 Itm.Name += 2;
b2e465d6
AL
184
185 if (LastLongLink.empty() == false)
186 Itm.LinkTarget = (char *)LastLongLink.c_str();
a07b81e8
OS
187 else
188 Itm.LinkTarget = (char *)ItemLink.assign(Tar->LinkName, sizeof(Tar->LinkName)).c_str();
189
b2e465d6
AL
190 // Convert the type over
191 switch (Tar->LinkFlag)
192 {
193 case NormalFile0:
194 case NormalFile:
195 Itm.Type = pkgDirStream::Item::File;
196 break;
197
198 case HardLink:
199 Itm.Type = pkgDirStream::Item::HardLink;
200 break;
201
202 case SymbolicLink:
203 Itm.Type = pkgDirStream::Item::SymbolicLink;
204 break;
205
206 case CharacterDevice:
207 Itm.Type = pkgDirStream::Item::CharDevice;
208 break;
209
210 case BlockDevice:
211 Itm.Type = pkgDirStream::Item::BlockDevice;
212 break;
213
214 case Directory:
215 Itm.Type = pkgDirStream::Item::Directory;
216 break;
217
218 case FIFO:
219 Itm.Type = pkgDirStream::Item::FIFO;
220 break;
221
222 case GNU_LongLink:
223 {
3621b1c7 224 unsigned long long Length = Itm.Size;
b2e465d6
AL
225 unsigned char Block[512];
226 while (Length > 0)
227 {
228 if (InFd.Read(Block,sizeof(Block),true) == false)
229 return false;
230 if (Length <= sizeof(Block))
231 {
232 LastLongLink.append(Block,Block+sizeof(Block));
233 break;
234 }
235 LastLongLink.append(Block,Block+sizeof(Block));
236 Length -= sizeof(Block);
237 }
238 continue;
239 }
240
241 case GNU_LongName:
242 {
3621b1c7 243 unsigned long long Length = Itm.Size;
b2e465d6
AL
244 unsigned char Block[512];
245 while (Length > 0)
246 {
247 if (InFd.Read(Block,sizeof(Block),true) == false)
248 return false;
249 if (Length < sizeof(Block))
250 {
251 LastLongName.append(Block,Block+sizeof(Block));
252 break;
253 }
254 LastLongName.append(Block,Block+sizeof(Block));
255 Length -= sizeof(Block);
256 }
257 continue;
258 }
259
260 default:
261 BadRecord = true;
4c6a9fad 262 _error->Warning(_("Unknown TAR header type %u, member %s"),(unsigned)Tar->LinkFlag,Tar->Name);
b2e465d6
AL
263 break;
264 }
265
266 int Fd = -1;
267 if (BadRecord == false)
268 if (Stream.DoItem(Itm,Fd) == false)
269 return false;
270
271 // Copy the file over the FD
3621b1c7 272 unsigned long long Size = Itm.Size;
b2e465d6
AL
273 while (Size != 0)
274 {
275 unsigned char Junk[32*1024];
3621b1c7 276 unsigned long Read = min(Size, (unsigned long long)sizeof(Junk));
b2e465d6
AL
277 if (InFd.Read(Junk,((Read+511)/512)*512) == false)
278 return false;
279
280 if (BadRecord == false)
281 {
282 if (Fd > 0)
283 {
284 if (write(Fd,Junk,Read) != (signed)Read)
285 return Stream.Fail(Itm,Fd);
286 }
287 else
288 {
289 /* An Fd of -2 means to send to a special processing
290 function */
291 if (Fd == -2)
292 if (Stream.Process(Itm,Junk,Read,Itm.Size - Size) == false)
293 return Stream.Fail(Itm,Fd);
294 }
295 }
296
297 Size -= Read;
298 }
299
300 // And finish up
59152cdb 301 if (BadRecord == false)
b2e465d6
AL
302 if (Stream.FinishedFile(Itm,Fd) == false)
303 return false;
304
305 LastLongName.erase();
306 LastLongLink.erase();
307 }
308
b830f576 309 return Done();
b2e465d6
AL
310}
311 /*}}}*/