]> git.saurik.com Git - apt.git/blob - ftparchive/contents.cc
Version update
[apt.git] / ftparchive / contents.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: contents.cc,v 1.2 2001/02/20 07:03:18 jgg Exp $
4 /* ######################################################################
5
6 contents - Archive contents generator
7
8 The GenContents class is a back end for an archive contents generator.
9 It takes a list of per-deb file name and merges it into a memory
10 database of all previous output. This database is stored as a set
11 of binary trees linked across directories to form a tree of all files+dirs
12 given to it. The tree will also be sorted as it is built up thus
13 removing the massive sort time overhead.
14
15 By breaking all the pathnames into components and storing them
16 separately a space savings is realized by not duplicating the string
17 over and over again. Ultimately this saving is sacrificed to storage of
18 the tree structure itself but the tree structure yields a speed gain
19 in the sorting and processing. Ultimately it takes about 5 seconds to
20 do 141000 nodes and about 5 meg of ram.
21
22 The tree looks something like:
23
24 usr/
25 / \ / libslang
26 bin/ lib/ --> libc6
27 / \ \ libfoo
28 games/ sbin/
29
30 The ---> is the DirDown link
31
32
33 ##################################################################### */
34 /*}}}*/
35 // Include Files /*{{{*/
36 #include "contents.h"
37
38 #include <apt-pkg/extracttar.h>
39 #include <apt-pkg/error.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <malloc.h>
44 /*}}}*/
45
46 // GenContents::~GenContents - Free allocated memory /*{{{*/
47 // ---------------------------------------------------------------------
48 /* Since all our allocations are static big-block allocations all that is
49 needed is to free all of them. */
50 GenContents::~GenContents()
51 {
52 while (BlockList != 0)
53 {
54 BigBlock *Old = BlockList;
55 BlockList = Old->Next;
56 free(Old->Block);
57 delete Old;
58 }
59 }
60 /*}}}*/
61 // GenContents::Mystrdup - Custom strdup /*{{{*/
62 // ---------------------------------------------------------------------
63 /* This strdup also uses a large block allocator to eliminate glibc
64 overhead */
65 char *GenContents::Mystrdup(const char *From)
66 {
67 unsigned int Len = strlen(From) + 1;
68 if (StrLeft <= Len)
69 {
70 StrLeft = 4096*10;
71 StrPool = (char *)malloc(StrLeft);
72
73 BigBlock *Block = new BigBlock;
74 Block->Block = StrPool;
75 Block->Next = BlockList;
76 BlockList = Block;
77 }
78
79 memcpy(StrPool,From,Len);
80 StrLeft -= Len;
81
82 char *Res = StrPool;
83 StrPool += Len;
84 return Res;
85 }
86 /*}}}*/
87 // GenContents::Node::operator new - Big block allocator /*{{{*/
88 // ---------------------------------------------------------------------
89 /* This eliminates glibc's malloc overhead by allocating large blocks and
90 having a continuous set of Nodes. This takes about 8 bytes off each nodes
91 space needs. Freeing is not supported. */
92 void *GenContents::Node::operator new(size_t Amount,GenContents *Owner)
93 {
94 if (Owner->NodeLeft == 0)
95 {
96 Owner->NodeLeft = 10000;
97 Owner->NodePool = (Node *)malloc(Amount*Owner->NodeLeft);
98 BigBlock *Block = new BigBlock;
99 Block->Block = Owner->NodePool;
100 Block->Next = Owner->BlockList;
101 Owner->BlockList = Block;
102 }
103
104 Owner->NodeLeft--;
105 return Owner->NodePool++;
106 }
107 /*}}}*/
108 // GenContents::Grab - Grab a new node representing Name under Top /*{{{*/
109 // ---------------------------------------------------------------------
110 /* This grabs a new node representing the pathname component Name under
111 the node Top. The node is given the name Package. It is assumed that Name
112 is inside of top. If a duplicate already entered name is found then
113 a note is made on the Dup list and the previous in-tree node is returned. */
114 GenContents::Node *GenContents::Grab(GenContents::Node *Top,const char *Name,
115 const char *Package)
116 {
117 /* We drop down to the next dir level each call. This simplifies
118 the calling routine */
119 if (Top->DirDown == 0)
120 {
121 Node *Item = new(this) Node;
122 Item->Path = Mystrdup(Name);
123 Item->Package = Package;
124 Top->DirDown = Item;
125 return Item;
126 }
127 Top = Top->DirDown;
128
129 int Res;
130 while (1)
131 {
132 Res = strcmp(Name,Top->Path);
133
134 // Collision!
135 if (Res == 0)
136 {
137 // See if this the the same package (multi-version dup)
138 if (Top->Package == Package ||
139 strcasecmp(Top->Package,Package) == 0)
140 return Top;
141
142 // Look for an already existing Dup
143 for (Node *I = Top->Dups; I != 0; I = I->Dups)
144 if (I->Package == Package ||
145 strcasecmp(I->Package,Package) == 0)
146 return Top;
147
148 // Add the dup in
149 Node *Item = new(this) Node;
150 Item->Path = Top->Path;
151 Item->Package = Package;
152 Item->Dups = Top->Dups;
153 Top->Dups = Item;
154 return Top;
155 }
156
157 // Continue to traverse the tree
158 if (Res < 0)
159 {
160 if (Top->BTreeLeft == 0)
161 break;
162 Top = Top->BTreeLeft;
163 }
164 else
165 {
166 if (Top->BTreeRight == 0)
167 break;
168 Top = Top->BTreeRight;
169 }
170 }
171
172 // The item was not found in the tree
173 Node *Item = new(this) Node;
174 Item->Path = Mystrdup(Name);
175 Item->Package = Package;
176
177 // Link it into the tree
178 if (Res < 0)
179 {
180 Item->BTreeLeft = Top->BTreeLeft;
181 Top->BTreeLeft = Item;
182 }
183 else
184 {
185 Item->BTreeRight = Top->BTreeRight;
186 Top->BTreeRight = Item;
187 }
188
189 return Item;
190 }
191 /*}}}*/
192 // GenContents::Add - Add a path to the tree /*{{{*/
193 // ---------------------------------------------------------------------
194 /* This takes a full pathname and adds it into the tree. We split the
195 pathname into directory fragments adding each one as we go. Technically
196 in output from tar this should result in hitting previous items. */
197 void GenContents::Add(const char *Dir,const char *Package)
198 {
199 Node *Root = &this->Root;
200
201 // Drop leading slashes
202 while (*Dir == '/' && *Dir != 0)
203 Dir++;
204
205 // Run over the string and grab out each bit up to and including a /
206 const char *Start = Dir;
207 const char *I = Dir;
208 while (*I != 0)
209 {
210 if (*I != '/' || I - Start <= 1)
211 {
212 I++;
213 continue;
214 }
215 I++;
216
217 // Copy the path fragment over
218 char Tmp[1024];
219 strncpy(Tmp,Start,I - Start);
220 Tmp[I - Start] = 0;
221
222 // Grab a node for it
223 Root = Grab(Root,Tmp,Package);
224
225 Start = I;
226 }
227
228 // The final component if it does not have a trailing /
229 if (I - Start >= 1)
230 Root = Grab(Root,Start,Package);
231 }
232 /*}}}*/
233 // GenContents::WriteSpace - Write a given number of white space chars /*{{{*/
234 // ---------------------------------------------------------------------
235 /* We mod 8 it and write tabs where possible. */
236 void GenContents::WriteSpace(FILE *Out,unsigned int Current,unsigned int Target)
237 {
238 if (Target <= Current)
239 Target = Current + 1;
240
241 /* Now we write tabs so long as the next tab stop would not pass
242 the target */
243 for (; (Current/8 + 1)*8 < Target; Current = (Current/8 + 1)*8)
244 fputc('\t',Out);
245
246 // Fill the last bit with spaces
247 for (; Current < Target; Current++)
248 fputc(' ',Out);
249 }
250 /*}}}*/
251 // GenContents::Print - Display the tree /*{{{*/
252 // ---------------------------------------------------------------------
253 /* This is the final result function. It takes the tree and recursively
254 calls itself and runs over each section of the tree printing out
255 the pathname and the hit packages. We use Buf to build the pathname
256 summed over all the directory parents of this node. */
257 void GenContents::Print(FILE *Out)
258 {
259 char Buffer[1024];
260 DoPrint(Out,&Root,Buffer);
261 }
262 void GenContents::DoPrint(FILE *Out,GenContents::Node *Top, char *Buf)
263 {
264 if (Top == 0)
265 return;
266
267 // Go left
268 DoPrint(Out,Top->BTreeLeft,Buf);
269
270 // Print the current dir location and then descend to lower dirs
271 char *OldEnd = Buf + strlen(Buf);
272 if (Top->Path != 0)
273 {
274 strcat(Buf,Top->Path);
275
276 // Do not show the item if it is a directory with dups
277 if (Top->Path[strlen(Top->Path)-1] != '/' /*|| Top->Dups == 0*/)
278 {
279 fputs(Buf,Out);
280 WriteSpace(Out,strlen(Buf),60);
281 for (Node *I = Top; I != 0; I = I->Dups)
282 {
283 if (I != Top)
284 fputc(',',Out);
285 fputs(I->Package,Out);
286 }
287 fputc('\n',Out);
288 }
289 }
290
291 // Go along the directory link
292 DoPrint(Out,Top->DirDown,Buf);
293 *OldEnd = 0;
294
295 // Go right
296 DoPrint(Out,Top->BTreeRight,Buf);
297 }
298 /*}}}*/
299
300 // ContentsExtract::Read - Read the archive /*{{{*/
301 // ---------------------------------------------------------------------
302 /* */
303 bool ContentsExtract::Read(debDebFile &Deb)
304 {
305 Reset();
306
307 // Get the archive member and positition the file
308 const ARArchive::Member *Member = Deb.GotoMember("data.tar.gz");
309 if (Member == 0)
310 return false;
311
312 // Extract it.
313 ExtractTar Tar(Deb.GetFile(),Member->Size);
314 if (Tar.Go(*this) == false)
315 return false;
316 return true;
317 }
318 /*}}}*/
319 // ContentsExtract::DoItem - Extract an item /*{{{*/
320 // ---------------------------------------------------------------------
321 /* This just tacks the name onto the end of our memory buffer */
322 bool ContentsExtract::DoItem(Item &Itm,int &Fd)
323 {
324 unsigned long Len = strlen(Itm.Name);
325
326 // Strip leading ./'s
327 if (Itm.Name[0] == '.' && Itm.Name[1] == '/')
328 {
329 // == './'
330 if (Len == 2)
331 return true;
332
333 Len -= 2;
334 Itm.Name += 2;
335 }
336
337 // Allocate more storage for the string list
338 if (CurSize + Len + 2 >= MaxSize || Data == 0)
339 {
340 if (MaxSize == 0)
341 MaxSize = 512*1024/2;
342 char *NewData = (char *)realloc(Data,MaxSize*2);
343 if (NewData == 0)
344 return _error->Error("realloc - Failed to allocate memory");
345 Data = NewData;
346 MaxSize *= 2;
347 }
348
349 strcpy(Data+CurSize,Itm.Name);
350 CurSize += Len + 1;
351 return true;
352 }
353 /*}}}*/
354 // ContentsExtract::TakeContents - Load the contents data /*{{{*/
355 // ---------------------------------------------------------------------
356 /* */
357 bool ContentsExtract::TakeContents(const void *NewData,unsigned long Length)
358 {
359 if (Length == 0)
360 {
361 CurSize = 0;
362 return true;
363 }
364
365 // Allocate more storage for the string list
366 if (Length + 2 >= MaxSize || Data == 0)
367 {
368 if (MaxSize == 0)
369 MaxSize = 512*1024/2;
370 while (MaxSize*2 <= Length)
371 MaxSize *= 2;
372
373 char *NewData = (char *)realloc(Data,MaxSize*2);
374 if (NewData == 0)
375 return _error->Error("realloc - Failed to allocate memory");
376 Data = NewData;
377 MaxSize *= 2;
378 }
379 memcpy(Data,NewData,Length);
380 CurSize = Length;
381
382 return Data[CurSize-1] == 0;
383 }
384 /*}}}*/
385 // ContentsExtract::Add - Read the contents data into the sorter /*{{{*/
386 // ---------------------------------------------------------------------
387 /* */
388 void ContentsExtract::Add(GenContents &Contents,string Package)
389 {
390 const char *Start = Data;
391 char *Pkg = Contents.Mystrdup(Package.c_str());
392 for (const char *I = Data; I < Data + CurSize; I++)
393 {
394 if (*I == 0)
395 {
396 Contents.Add(Start,Pkg);
397 Start = ++I;
398 }
399 }
400 }
401 /*}}}*/