]> git.saurik.com Git - apt.git/blob - ftparchive/contents.cc
WIP start randomizing
[apt.git] / ftparchive / contents.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: contents.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
4 /* ######################################################################
5
6 contents - Archive contents generator
7
8 The GenContents class is a back end for an archive contents generator.
9 It takes a list of per-deb file name and merges it into a memory
10 database of all previous output. This database is stored as a set
11 of binary trees linked across directories to form a tree of all files+dirs
12 given to it. The tree will also be sorted as it is built up thus
13 removing the massive sort time overhead.
14
15 By breaking all the pathnames into components and storing them
16 separately a space saving is realized by not duplicating the string
17 over and over again. Ultimately this saving is sacrificed to storage of
18 the tree structure itself but the tree structure yields a speed gain
19 in the sorting and processing. Ultimately it takes about 5 seconds to
20 do 141000 nodes and about 5 meg of ram.
21
22 The tree looks something like:
23
24 usr/
25 / \ / libslang
26 bin/ lib/ --> libc6
27 / \ \ libfoo
28 games/ sbin/
29
30 The ---> is the DirDown link
31
32
33 ##################################################################### */
34 /*}}}*/
35 // Include Files /*{{{*/
36 #include <config.h>
37
38 #include <apt-pkg/debfile.h>
39 #include <apt-pkg/dirstream.h>
40 #include <apt-pkg/error.h>
41
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45
46 #include "contents.h"
47
48 #include <apti18n.h>
49 /*}}}*/
50
51 // GenContents::~GenContents - Free allocated memory /*{{{*/
52 // ---------------------------------------------------------------------
53 /* Since all our allocations are static big-block allocations all that is
54 needed is to free all of them. */
55 GenContents::~GenContents()
56 {
57 while (BlockList != 0)
58 {
59 BigBlock *Old = BlockList;
60 BlockList = Old->Next;
61 free(Old->Block);
62 delete Old;
63 }
64 }
65 /*}}}*/
66 // GenContents::Mystrdup - Custom strdup /*{{{*/
67 // ---------------------------------------------------------------------
68 /* This strdup also uses a large block allocator to eliminate glibc
69 overhead */
70 char *GenContents::Mystrdup(const char *From)
71 {
72 unsigned int Len = strlen(From) + 1;
73 if (StrLeft <= Len)
74 {
75 StrLeft = 4096*10;
76 StrPool = (char *)malloc(StrLeft);
77
78 BigBlock *Block = new BigBlock;
79 Block->Block = StrPool;
80 Block->Next = BlockList;
81 BlockList = Block;
82 }
83
84 memcpy(StrPool,From,Len);
85 StrLeft -= Len;
86
87 char *Res = StrPool;
88 StrPool += Len;
89 return Res;
90 }
91 /*}}}*/
92 // GenContents::Node::operator new - Big block allocator /*{{{*/
93 // ---------------------------------------------------------------------
94 /* This eliminates glibc's malloc overhead by allocating large blocks and
95 having a continuous set of Nodes. This takes about 8 bytes off each nodes
96 space needs. Freeing is not supported. */
97 void *GenContents::Node::operator new(size_t Amount,GenContents *Owner)
98 {
99 if (Owner->NodeLeft == 0)
100 {
101 Owner->NodeLeft = 10000;
102 Owner->NodePool = (Node *)malloc(Amount*Owner->NodeLeft);
103 BigBlock *Block = new BigBlock;
104 Block->Block = Owner->NodePool;
105 Block->Next = Owner->BlockList;
106 Owner->BlockList = Block;
107 }
108
109 Owner->NodeLeft--;
110 return Owner->NodePool++;
111 }
112 /*}}}*/
113 // GenContents::Grab - Grab a new node representing Name under Top /*{{{*/
114 // ---------------------------------------------------------------------
115 /* This grabs a new node representing the pathname component Name under
116 the node Top. The node is given the name Package. It is assumed that Name
117 is inside of top. If a duplicate already entered name is found then
118 a note is made on the Dup list and the previous in-tree node is returned. */
119 GenContents::Node *GenContents::Grab(GenContents::Node *Top,const char *Name,
120 const char *Package)
121 {
122 /* We drop down to the next dir level each call. This simplifies
123 the calling routine */
124 if (Top->DirDown == 0)
125 {
126 Node *Item = new(this) Node;
127 Item->Path = Mystrdup(Name);
128 Item->Package = Package;
129 Top->DirDown = Item;
130 return Item;
131 }
132 Top = Top->DirDown;
133
134 int Res;
135 while (1)
136 {
137 Res = strcmp(Name,Top->Path);
138
139 // Collision!
140 if (Res == 0)
141 {
142 // See if this the the same package (multi-version dup)
143 if (Top->Package == Package ||
144 strcasecmp(Top->Package,Package) == 0)
145 return Top;
146
147 // Look for an already existing Dup
148 for (Node *I = Top->Dups; I != 0; I = I->Dups)
149 if (I->Package == Package ||
150 strcasecmp(I->Package,Package) == 0)
151 return Top;
152
153 // Add the dup in
154 Node *Item = new(this) Node;
155 Item->Path = Top->Path;
156 Item->Package = Package;
157 Item->Dups = Top->Dups;
158 Top->Dups = Item;
159 return Top;
160 }
161
162 // Continue to traverse the tree
163 if (Res < 0)
164 {
165 if (Top->BTreeLeft == 0)
166 break;
167 Top = Top->BTreeLeft;
168 }
169 else
170 {
171 if (Top->BTreeRight == 0)
172 break;
173 Top = Top->BTreeRight;
174 }
175 }
176
177 // The item was not found in the tree
178 Node *Item = new(this) Node;
179 Item->Path = Mystrdup(Name);
180 Item->Package = Package;
181
182 // Link it into the tree
183 if (Res < 0)
184 {
185 Item->BTreeLeft = Top->BTreeLeft;
186 Top->BTreeLeft = Item;
187 }
188 else
189 {
190 Item->BTreeRight = Top->BTreeRight;
191 Top->BTreeRight = Item;
192 }
193
194 return Item;
195 }
196 /*}}}*/
197 // GenContents::Add - Add a path to the tree /*{{{*/
198 // ---------------------------------------------------------------------
199 /* This takes a full pathname and adds it into the tree. We split the
200 pathname into directory fragments adding each one as we go. Technically
201 in output from tar this should result in hitting previous items. */
202 void GenContents::Add(const char *Dir,const char *Package)
203 {
204 Node *Root = &this->Root;
205
206 // Drop leading slashes
207 while (*Dir == '/' && *Dir != 0)
208 Dir++;
209
210 // Run over the string and grab out each bit up to and including a /
211 const char *Start = Dir;
212 const char *I = Dir;
213 while (*I != 0)
214 {
215 if (*I != '/' || I - Start <= 1)
216 {
217 I++;
218 continue;
219 }
220 I++;
221
222 // Copy the path fragment over
223 char Tmp[1024];
224 strncpy(Tmp,Start,I - Start);
225 Tmp[I - Start] = 0;
226
227 // Grab a node for it
228 Root = Grab(Root,Tmp,Package);
229
230 Start = I;
231 }
232
233 // The final component if it does not have a trailing /
234 if (I - Start >= 1)
235 Grab(Root,Start,Package);
236 }
237 /*}}}*/
238 // GenContents::WriteSpace - Write a given number of white space chars /*{{{*/
239 // ---------------------------------------------------------------------
240 /* We mod 8 it and write tabs where possible. */
241 void GenContents::WriteSpace(FILE *Out,unsigned int Current,unsigned int Target)
242 {
243 if (Target <= Current)
244 Target = Current + 1;
245
246 /* Now we write tabs so long as the next tab stop would not pass
247 the target */
248 for (; (Current/8 + 1)*8 < Target; Current = (Current/8 + 1)*8)
249 fputc('\t',Out);
250
251 // Fill the last bit with spaces
252 for (; Current < Target; Current++)
253 fputc(' ',Out);
254 }
255 /*}}}*/
256 // GenContents::Print - Display the tree /*{{{*/
257 // ---------------------------------------------------------------------
258 /* This is the final result function. It takes the tree and recursively
259 calls itself and runs over each section of the tree printing out
260 the pathname and the hit packages. We use Buf to build the pathname
261 summed over all the directory parents of this node. */
262 void GenContents::Print(FILE *Out)
263 {
264 char Buffer[1024];
265 Buffer[0] = 0;
266 DoPrint(Out,&Root,Buffer);
267 }
268 void GenContents::DoPrint(FILE *Out,GenContents::Node *Top, char *Buf)
269 {
270 if (Top == 0)
271 return;
272
273 // Go left
274 DoPrint(Out,Top->BTreeLeft,Buf);
275
276 // Print the current dir location and then descend to lower dirs
277 char *OldEnd = Buf + strlen(Buf);
278 if (Top->Path != 0)
279 {
280 strcat(Buf,Top->Path);
281
282 // Do not show the item if it is a directory with dups
283 if (Top->Path[strlen(Top->Path)-1] != '/' /*|| Top->Dups == 0*/)
284 {
285 fputs(Buf,Out);
286 WriteSpace(Out,strlen(Buf),60);
287 for (Node *I = Top; I != 0; I = I->Dups)
288 {
289 if (I != Top)
290 fputc(',',Out);
291 fputs(I->Package,Out);
292 }
293 fputc('\n',Out);
294 }
295 }
296
297 // Go along the directory link
298 DoPrint(Out,Top->DirDown,Buf);
299 *OldEnd = 0;
300
301 // Go right
302 DoPrint(Out,Top->BTreeRight,Buf);
303 }
304 /*}}}*/
305
306 // ContentsExtract::Read - Read the archive /*{{{*/
307 // ---------------------------------------------------------------------
308 /* */
309 bool ContentsExtract::Read(debDebFile &Deb)
310 {
311 Reset();
312 return Deb.ExtractArchive(*this);
313 }
314 /*}}}*/
315 // ContentsExtract::DoItem - Extract an item /*{{{*/
316 // ---------------------------------------------------------------------
317 /* This just tacks the name onto the end of our memory buffer */
318 bool ContentsExtract::DoItem(Item &Itm, int &/*Fd*/)
319 {
320 unsigned long Len = strlen(Itm.Name);
321
322 // Strip leading ./'s
323 if (Itm.Name[0] == '.' && Itm.Name[1] == '/')
324 {
325 // == './'
326 if (Len == 2)
327 return true;
328
329 Len -= 2;
330 Itm.Name += 2;
331 }
332
333 // Allocate more storage for the string list
334 if (CurSize + Len + 2 >= MaxSize || Data == 0)
335 {
336 if (MaxSize == 0)
337 MaxSize = 512*1024/2;
338 char *NewData = (char *)realloc(Data,MaxSize*2);
339 if (NewData == 0)
340 return _error->Error(_("realloc - Failed to allocate memory"));
341 Data = NewData;
342 MaxSize *= 2;
343 }
344
345 strcpy(Data+CurSize,Itm.Name);
346 CurSize += Len + 1;
347 return true;
348 }
349 /*}}}*/
350 // ContentsExtract::TakeContents - Load the contents data /*{{{*/
351 // ---------------------------------------------------------------------
352 /* */
353 bool ContentsExtract::TakeContents(const void *NewData,unsigned long long Length)
354 {
355 if (Length == 0)
356 {
357 CurSize = 0;
358 return true;
359 }
360
361 // Allocate more storage for the string list
362 if (Length + 2 >= MaxSize || Data == 0)
363 {
364 if (MaxSize == 0)
365 MaxSize = 512*1024/2;
366 while (MaxSize*2 <= Length)
367 MaxSize *= 2;
368
369 char *NewData = (char *)realloc(Data,MaxSize*2);
370 if (NewData == 0)
371 return _error->Error(_("realloc - Failed to allocate memory"));
372 Data = NewData;
373 MaxSize *= 2;
374 }
375 memcpy(Data,NewData,Length);
376 CurSize = Length;
377
378 return Data[CurSize-1] == 0;
379 }
380 /*}}}*/
381 // ContentsExtract::Add - Read the contents data into the sorter /*{{{*/
382 // ---------------------------------------------------------------------
383 /* */
384 void ContentsExtract::Add(GenContents &Contents,std::string const &Package)
385 {
386 const char *Start = Data;
387 char *Pkg = Contents.Mystrdup(Package.c_str());
388 for (const char *I = Data; I < Data + CurSize; I++)
389 {
390 if (*I == 0)
391 {
392 Contents.Add(Start,Pkg);
393 Start = ++I;
394 }
395 }
396 }
397 /*}}}*/