]> git.saurik.com Git - apt.git/blame_incremental - apt-pkg/tagfile.cc
increase Pkg/Grp hash table size from 2k to 64k
[apt.git] / apt-pkg / tagfile.cc
... / ...
CommitLineData
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
3// $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4/* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13// Include Files /*{{{*/
14#include<config.h>
15
16#include <apt-pkg/tagfile.h>
17#include <apt-pkg/error.h>
18#include <apt-pkg/strutl.h>
19#include <apt-pkg/fileutl.h>
20
21#include <string>
22#include <stdio.h>
23#include <ctype.h>
24#include <stdlib.h>
25#include <string.h>
26
27#include <apti18n.h>
28 /*}}}*/
29
30using std::string;
31
32class pkgTagFilePrivate
33{
34public:
35 pkgTagFilePrivate(FileFd *pFd, unsigned long long Size) : Fd(*pFd), Buffer(NULL),
36 Start(NULL), End(NULL),
37 Done(false), iOffset(0),
38 Size(Size)
39 {
40 }
41 FileFd &Fd;
42 char *Buffer;
43 char *Start;
44 char *End;
45 bool Done;
46 unsigned long long iOffset;
47 unsigned long long Size;
48};
49
50// TagFile::pkgTagFile - Constructor /*{{{*/
51// ---------------------------------------------------------------------
52/* */
53pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long long Size)
54{
55 /* The size is increased by 4 because if we start with the Size of the
56 filename we need to try to read 1 char more to see an EOF faster, 1
57 char the end-pointer can be on and maybe 2 newlines need to be added
58 to the end of the file -> 4 extra chars */
59 Size += 4;
60 d = new pkgTagFilePrivate(pFd, Size);
61
62 if (d->Fd.IsOpen() == false)
63 d->Start = d->End = d->Buffer = 0;
64 else
65 d->Buffer = (char*)malloc(sizeof(char) * Size);
66
67 if (d->Buffer == NULL)
68 d->Done = true;
69 else
70 d->Done = false;
71
72 d->Start = d->End = d->Buffer;
73 d->iOffset = 0;
74 if (d->Done == false)
75 Fill();
76}
77 /*}}}*/
78// TagFile::~pkgTagFile - Destructor /*{{{*/
79// ---------------------------------------------------------------------
80/* */
81pkgTagFile::~pkgTagFile()
82{
83 free(d->Buffer);
84 delete d;
85}
86 /*}}}*/
87// TagFile::Offset - Return the current offset in the buffer /*{{{*/
88APT_PURE unsigned long pkgTagFile::Offset()
89{
90 return d->iOffset;
91}
92 /*}}}*/
93// TagFile::Resize - Resize the internal buffer /*{{{*/
94// ---------------------------------------------------------------------
95/* Resize the internal buffer (double it in size). Fail if a maximum size
96 * size is reached.
97 */
98bool pkgTagFile::Resize()
99{
100 // fail is the buffer grows too big
101 if(d->Size > 1024*1024+1)
102 return false;
103
104 return Resize(d->Size * 2);
105}
106bool pkgTagFile::Resize(unsigned long long const newSize)
107{
108 unsigned long long const EndSize = d->End - d->Start;
109
110 // get new buffer and use it
111 char* newBuffer = (char*)realloc(d->Buffer, sizeof(char) * newSize);
112 if (newBuffer == NULL)
113 return false;
114 d->Buffer = newBuffer;
115 d->Size = newSize;
116
117 // update the start/end pointers to the new buffer
118 d->Start = d->Buffer;
119 d->End = d->Start + EndSize;
120 return true;
121}
122 /*}}}*/
123// TagFile::Step - Advance to the next section /*{{{*/
124// ---------------------------------------------------------------------
125/* If the Section Scanner fails we refill the buffer and try again.
126 * If that fails too, double the buffer size and try again until a
127 * maximum buffer is reached.
128 */
129bool pkgTagFile::Step(pkgTagSection &Tag)
130{
131 while (Tag.Scan(d->Start,d->End - d->Start) == false)
132 {
133 if (Fill() == false)
134 return false;
135
136 if(Tag.Scan(d->Start,d->End - d->Start))
137 break;
138
139 if (Resize() == false)
140 return _error->Error(_("Unable to parse package file %s (1)"),
141 d->Fd.Name().c_str());
142 }
143 d->Start += Tag.size();
144 d->iOffset += Tag.size();
145
146 Tag.Trim();
147 return true;
148}
149 /*}}}*/
150// TagFile::Fill - Top up the buffer /*{{{*/
151// ---------------------------------------------------------------------
152/* This takes the bit at the end of the buffer and puts it at the start
153 then fills the rest from the file */
154bool pkgTagFile::Fill()
155{
156 unsigned long long EndSize = d->End - d->Start;
157 unsigned long long Actual = 0;
158
159 memmove(d->Buffer,d->Start,EndSize);
160 d->Start = d->Buffer;
161 d->End = d->Buffer + EndSize;
162
163 if (d->Done == false)
164 {
165 // See if only a bit of the file is left
166 unsigned long long const dataSize = d->Size - ((d->End - d->Buffer) + 1);
167 if (d->Fd.Read(d->End, dataSize, &Actual) == false)
168 return false;
169 if (Actual != dataSize)
170 d->Done = true;
171 d->End += Actual;
172 }
173
174 if (d->Done == true)
175 {
176 if (EndSize <= 3 && Actual == 0)
177 return false;
178 if (d->Size - (d->End - d->Buffer) < 4)
179 return true;
180
181 // Append a double new line if one does not exist
182 unsigned int LineCount = 0;
183 for (const char *E = d->End - 1; E - d->End < 6 && (*E == '\n' || *E == '\r'); E--)
184 if (*E == '\n')
185 LineCount++;
186 if (LineCount < 2)
187 {
188 if ((unsigned)(d->End - d->Buffer) >= d->Size)
189 Resize(d->Size + 3);
190 for (; LineCount < 2; LineCount++)
191 *d->End++ = '\n';
192 }
193
194 return true;
195 }
196
197 return true;
198}
199 /*}}}*/
200// TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
201// ---------------------------------------------------------------------
202/* This jumps to a pre-recorded file location and reads the record
203 that is there */
204bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long long Offset)
205{
206 // We are within a buffer space of the next hit..
207 if (Offset >= d->iOffset && d->iOffset + (d->End - d->Start) > Offset)
208 {
209 unsigned long long Dist = Offset - d->iOffset;
210 d->Start += Dist;
211 d->iOffset += Dist;
212 // if we have seen the end, don't ask for more
213 if (d->Done == true)
214 return Tag.Scan(d->Start, d->End - d->Start);
215 else
216 return Step(Tag);
217 }
218
219 // Reposition and reload..
220 d->iOffset = Offset;
221 d->Done = false;
222 if (d->Fd.Seek(Offset) == false)
223 return false;
224 d->End = d->Start = d->Buffer;
225
226 if (Fill() == false)
227 return false;
228
229 if (Tag.Scan(d->Start, d->End - d->Start) == true)
230 return true;
231
232 // This appends a double new line (for the real eof handling)
233 if (Fill() == false)
234 return false;
235
236 if (Tag.Scan(d->Start, d->End - d->Start) == false)
237 return _error->Error(_("Unable to parse package file %s (2)"),d->Fd.Name().c_str());
238
239 return true;
240}
241 /*}}}*/
242// pkgTagSection::pkgTagSection - Constructor /*{{{*/
243// ---------------------------------------------------------------------
244/* */
245pkgTagSection::pkgTagSection()
246 : Section(0), TagCount(0), d(NULL), Stop(0)
247{
248 memset(&Indexes, 0, sizeof(Indexes));
249 memset(&AlphaIndexes, 0, sizeof(AlphaIndexes));
250}
251 /*}}}*/
252// TagSection::Scan - Scan for the end of the header information /*{{{*/
253// ---------------------------------------------------------------------
254/* This looks for the first double new line in the data stream.
255 It also indexes the tags in the section. */
256bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
257{
258 const char *End = Start + MaxLength;
259 Stop = Section = Start;
260 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
261
262 if (Stop == 0)
263 return false;
264
265 TagCount = 0;
266 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
267 {
268 TrimRecord(true,End);
269
270 // this can happen when TrimRecord trims away the entire Record
271 // (e.g. because it just contains comments)
272 if(Stop == End)
273 return true;
274
275 // Start a new index and add it to the hash
276 if (isspace(Stop[0]) == 0)
277 {
278 Indexes[TagCount++] = Stop - Section;
279 AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
280 }
281
282 Stop = (const char *)memchr(Stop,'\n',End - Stop);
283
284 if (Stop == 0)
285 return false;
286
287 for (; Stop+1 < End && Stop[1] == '\r'; Stop++)
288 /* nothing */
289 ;
290
291 // Double newline marks the end of the record
292 if (Stop+1 < End && Stop[1] == '\n')
293 {
294 Indexes[TagCount] = Stop - Section;
295 TrimRecord(false,End);
296 return true;
297 }
298
299 Stop++;
300 }
301
302 return false;
303}
304 /*}}}*/
305// TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
306// ---------------------------------------------------------------------
307/* There should be exactly 2 newline at the end of the record, no more. */
308void pkgTagSection::TrimRecord(bool BeforeRecord, const char*& End)
309{
310 if (BeforeRecord == true)
311 return;
312 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
313}
314 /*}}}*/
315// TagSection::Trim - Trim off any trailing garbage /*{{{*/
316// ---------------------------------------------------------------------
317/* There should be exactly 1 newline at the end of the buffer, no more. */
318void pkgTagSection::Trim()
319{
320 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
321}
322 /*}}}*/
323// TagSection::Exists - return True if a tag exists /*{{{*/
324bool pkgTagSection::Exists(const char* const Tag)
325{
326 unsigned int tmp;
327 return Find(Tag, tmp);
328}
329 /*}}}*/
330// TagSection::Find - Locate a tag /*{{{*/
331// ---------------------------------------------------------------------
332/* This searches the section for a tag that matches the given string. */
333bool pkgTagSection::Find(const char *Tag,unsigned int &Pos) const
334{
335 unsigned int Length = strlen(Tag);
336 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
337 if (I == 0)
338 return false;
339 I--;
340
341 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
342 I = (I+1)%TagCount)
343 {
344 const char *St;
345 St = Section + Indexes[I];
346 if (strncasecmp(Tag,St,Length) != 0)
347 continue;
348
349 // Make sure the colon is in the right place
350 const char *C = St + Length;
351 for (; isspace(*C) != 0; C++);
352 if (*C != ':')
353 continue;
354 Pos = I;
355 return true;
356 }
357
358 Pos = 0;
359 return false;
360}
361 /*}}}*/
362// TagSection::Find - Locate a tag /*{{{*/
363// ---------------------------------------------------------------------
364/* This searches the section for a tag that matches the given string. */
365bool pkgTagSection::Find(const char *Tag,const char *&Start,
366 const char *&End) const
367{
368 unsigned int Length = strlen(Tag);
369 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
370 if (I == 0)
371 return false;
372 I--;
373
374 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
375 I = (I+1)%TagCount)
376 {
377 const char *St;
378 St = Section + Indexes[I];
379 if (strncasecmp(Tag,St,Length) != 0)
380 continue;
381
382 // Make sure the colon is in the right place
383 const char *C = St + Length;
384 for (; isspace(*C) != 0; C++);
385 if (*C != ':')
386 continue;
387
388 // Strip off the gunk from the start end
389 Start = C;
390 End = Section + Indexes[I+1];
391 if (Start >= End)
392 return _error->Error("Internal parsing error");
393
394 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
395 for (; isspace(End[-1]) != 0 && End > Start; End--);
396
397 return true;
398 }
399
400 Start = End = 0;
401 return false;
402}
403 /*}}}*/
404// TagSection::FindS - Find a string /*{{{*/
405// ---------------------------------------------------------------------
406/* */
407string pkgTagSection::FindS(const char *Tag) const
408{
409 const char *Start;
410 const char *End;
411 if (Find(Tag,Start,End) == false)
412 return string();
413 return string(Start,End);
414}
415 /*}}}*/
416// TagSection::FindI - Find an integer /*{{{*/
417// ---------------------------------------------------------------------
418/* */
419signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
420{
421 const char *Start;
422 const char *Stop;
423 if (Find(Tag,Start,Stop) == false)
424 return Default;
425
426 // Copy it into a temp buffer so we can use strtol
427 char S[300];
428 if ((unsigned)(Stop - Start) >= sizeof(S))
429 return Default;
430 strncpy(S,Start,Stop-Start);
431 S[Stop - Start] = 0;
432
433 char *End;
434 signed long Result = strtol(S,&End,10);
435 if (S == End)
436 return Default;
437 return Result;
438}
439 /*}}}*/
440// TagSection::FindULL - Find an unsigned long long integer /*{{{*/
441// ---------------------------------------------------------------------
442/* */
443unsigned long long pkgTagSection::FindULL(const char *Tag, unsigned long long const &Default) const
444{
445 const char *Start;
446 const char *Stop;
447 if (Find(Tag,Start,Stop) == false)
448 return Default;
449
450 // Copy it into a temp buffer so we can use strtoull
451 char S[100];
452 if ((unsigned)(Stop - Start) >= sizeof(S))
453 return Default;
454 strncpy(S,Start,Stop-Start);
455 S[Stop - Start] = 0;
456
457 char *End;
458 unsigned long long Result = strtoull(S,&End,10);
459 if (S == End)
460 return Default;
461 return Result;
462}
463 /*}}}*/
464// TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
465// ---------------------------------------------------------------------
466/* The bits marked in Flag are masked on/off in Flags */
467bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
468 unsigned long Flag) const
469{
470 const char *Start;
471 const char *Stop;
472 if (Find(Tag,Start,Stop) == false)
473 return true;
474 return FindFlag(Flags, Flag, Start, Stop);
475}
476bool pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
477 char const* Start, char const* Stop)
478{
479 switch (StringToBool(string(Start, Stop)))
480 {
481 case 0:
482 Flags &= ~Flag;
483 return true;
484
485 case 1:
486 Flags |= Flag;
487 return true;
488
489 default:
490 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
491 return true;
492 }
493 return true;
494}
495 /*}}}*/
496// TFRewrite - Rewrite a control record /*{{{*/
497// ---------------------------------------------------------------------
498/* This writes the control record to stdout rewriting it as necessary. The
499 override map item specificies the rewriting rules to follow. This also
500 takes the time to sort the feild list. */
501
502/* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
503 array. */
504static const char *iTFRewritePackageOrder[] = {
505 "Package",
506 "Essential",
507 "Status",
508 "Priority",
509 "Section",
510 "Installed-Size",
511 "Maintainer",
512 "Original-Maintainer",
513 "Architecture",
514 "Source",
515 "Version",
516 "Revision", // Obsolete
517 "Config-Version", // Obsolete
518 "Replaces",
519 "Provides",
520 "Depends",
521 "Pre-Depends",
522 "Recommends",
523 "Suggests",
524 "Conflicts",
525 "Breaks",
526 "Conffiles",
527 "Filename",
528 "Size",
529 "MD5Sum",
530 "SHA1",
531 "SHA256",
532 "SHA512",
533 "MSDOS-Filename", // Obsolete
534 "Description",
535 0};
536static const char *iTFRewriteSourceOrder[] = {"Package",
537 "Source",
538 "Binary",
539 "Version",
540 "Priority",
541 "Section",
542 "Maintainer",
543 "Original-Maintainer",
544 "Build-Depends",
545 "Build-Depends-Indep",
546 "Build-Conflicts",
547 "Build-Conflicts-Indep",
548 "Architecture",
549 "Standards-Version",
550 "Format",
551 "Directory",
552 "Files",
553 0};
554
555/* Two levels of initialization are used because gcc will set the symbol
556 size of an array to the length of the array, causing dynamic relinking
557 errors. Doing this makes the symbol size constant */
558const char **TFRewritePackageOrder = iTFRewritePackageOrder;
559const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
560
561bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
562 TFRewriteData *Rewrite)
563{
564 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
565 for (unsigned I = 0; I != 256; I++)
566 Visited[I] = 0;
567
568 // Set new tag up as necessary.
569 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
570 {
571 if (Rewrite[J].NewTag == 0)
572 Rewrite[J].NewTag = Rewrite[J].Tag;
573 }
574
575 // Write all all of the tags, in order.
576 if (Order != NULL)
577 {
578 for (unsigned int I = 0; Order[I] != 0; I++)
579 {
580 bool Rewritten = false;
581
582 // See if this is a field that needs to be rewritten
583 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
584 {
585 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
586 {
587 Visited[J] |= 2;
588 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
589 {
590 if (isspace(Rewrite[J].Rewrite[0]))
591 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
592 else
593 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
594 }
595 Rewritten = true;
596 break;
597 }
598 }
599
600 // See if it is in the fragment
601 unsigned Pos;
602 if (Tags.Find(Order[I],Pos) == false)
603 continue;
604 Visited[Pos] |= 1;
605
606 if (Rewritten == true)
607 continue;
608
609 /* Write out this element, taking a moment to rewrite the tag
610 in case of changes of case. */
611 const char *Start;
612 const char *Stop;
613 Tags.Get(Start,Stop,Pos);
614
615 if (fputs(Order[I],Output) < 0)
616 return _error->Errno("fputs","IO Error to output");
617 Start += strlen(Order[I]);
618 if (fwrite(Start,Stop - Start,1,Output) != 1)
619 return _error->Errno("fwrite","IO Error to output");
620 if (Stop[-1] != '\n')
621 fprintf(Output,"\n");
622 }
623 }
624
625 // Now write all the old tags that were missed.
626 for (unsigned int I = 0; I != Tags.Count(); I++)
627 {
628 if ((Visited[I] & 1) == 1)
629 continue;
630
631 const char *Start;
632 const char *Stop;
633 Tags.Get(Start,Stop,I);
634 const char *End = Start;
635 for (; End < Stop && *End != ':'; End++);
636
637 // See if this is a field that needs to be rewritten
638 bool Rewritten = false;
639 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
640 {
641 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
642 {
643 Visited[J] |= 2;
644 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
645 {
646 if (isspace(Rewrite[J].Rewrite[0]))
647 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
648 else
649 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
650 }
651
652 Rewritten = true;
653 break;
654 }
655 }
656
657 if (Rewritten == true)
658 continue;
659
660 // Write out this element
661 if (fwrite(Start,Stop - Start,1,Output) != 1)
662 return _error->Errno("fwrite","IO Error to output");
663 if (Stop[-1] != '\n')
664 fprintf(Output,"\n");
665 }
666
667 // Now write all the rewrites that were missed
668 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
669 {
670 if ((Visited[J] & 2) == 2)
671 continue;
672
673 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
674 {
675 if (isspace(Rewrite[J].Rewrite[0]))
676 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
677 else
678 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
679 }
680 }
681
682 return true;
683}
684 /*}}}*/