]> git.saurik.com Git - apt.git/blob - apt-pkg/tagfile.cc
Merge remote-tracking branch 'mvo/feature/hash-stats' into debian/experimental
[apt.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #include<config.h>
15
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20
21 #include <string>
22 #include <stdio.h>
23 #include <ctype.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include <apti18n.h>
28 /*}}}*/
29
30 using std::string;
31
32 class pkgTagFilePrivate
33 {
34 public:
35 pkgTagFilePrivate(FileFd *pFd, unsigned long long Size) : Fd(*pFd), Buffer(NULL),
36 Start(NULL), End(NULL),
37 Done(false), iOffset(0),
38 Size(Size)
39 {
40 }
41 FileFd &Fd;
42 char *Buffer;
43 char *Start;
44 char *End;
45 bool Done;
46 unsigned long long iOffset;
47 unsigned long long Size;
48 };
49
50 static unsigned long AlphaHash(const char *Text, size_t Length) /*{{{*/
51 {
52 /* This very simple hash function for the last 8 letters gives
53 very good performance on the debian package files */
54 if (Length > 8)
55 {
56 Text += (Length - 8);
57 Length = 8;
58 }
59 unsigned long Res = 0;
60 for (size_t i = 0; i < Length; ++i)
61 Res = ((unsigned long)(Text[i]) & 0xDF) ^ (Res << 1);
62 return Res & 0xFF;
63 }
64 /*}}}*/
65
66 // TagFile::pkgTagFile - Constructor /*{{{*/
67 // ---------------------------------------------------------------------
68 /* */
69 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long long Size)
70 : d(NULL)
71 {
72 Init(pFd, Size);
73 }
74
75 void pkgTagFile::Init(FileFd *pFd,unsigned long long Size)
76 {
77 /* The size is increased by 4 because if we start with the Size of the
78 filename we need to try to read 1 char more to see an EOF faster, 1
79 char the end-pointer can be on and maybe 2 newlines need to be added
80 to the end of the file -> 4 extra chars */
81 Size += 4;
82 if(d != NULL)
83 {
84 free(d->Buffer);
85 delete d;
86 }
87 d = new pkgTagFilePrivate(pFd, Size);
88
89 if (d->Fd.IsOpen() == false)
90 d->Start = d->End = d->Buffer = 0;
91 else
92 d->Buffer = (char*)malloc(sizeof(char) * Size);
93
94 if (d->Buffer == NULL)
95 d->Done = true;
96 else
97 d->Done = false;
98
99 d->Start = d->End = d->Buffer;
100 d->iOffset = 0;
101 if (d->Done == false)
102 Fill();
103 }
104 /*}}}*/
105 // TagFile::~pkgTagFile - Destructor /*{{{*/
106 // ---------------------------------------------------------------------
107 /* */
108 pkgTagFile::~pkgTagFile()
109 {
110 free(d->Buffer);
111 delete d;
112 }
113 /*}}}*/
114 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
115 APT_PURE unsigned long pkgTagFile::Offset()
116 {
117 return d->iOffset;
118 }
119 /*}}}*/
120 // TagFile::Resize - Resize the internal buffer /*{{{*/
121 // ---------------------------------------------------------------------
122 /* Resize the internal buffer (double it in size). Fail if a maximum size
123 * size is reached.
124 */
125 bool pkgTagFile::Resize()
126 {
127 // fail is the buffer grows too big
128 if(d->Size > 1024*1024+1)
129 return false;
130
131 return Resize(d->Size * 2);
132 }
133 bool pkgTagFile::Resize(unsigned long long const newSize)
134 {
135 unsigned long long const EndSize = d->End - d->Start;
136
137 // get new buffer and use it
138 char* newBuffer = (char*)realloc(d->Buffer, sizeof(char) * newSize);
139 if (newBuffer == NULL)
140 return false;
141 d->Buffer = newBuffer;
142 d->Size = newSize;
143
144 // update the start/end pointers to the new buffer
145 d->Start = d->Buffer;
146 d->End = d->Start + EndSize;
147 return true;
148 }
149 /*}}}*/
150 // TagFile::Step - Advance to the next section /*{{{*/
151 // ---------------------------------------------------------------------
152 /* If the Section Scanner fails we refill the buffer and try again.
153 * If that fails too, double the buffer size and try again until a
154 * maximum buffer is reached.
155 */
156 bool pkgTagFile::Step(pkgTagSection &Tag)
157 {
158 if(Tag.Scan(d->Start,d->End - d->Start) == false)
159 {
160 do
161 {
162 if (Fill() == false)
163 return false;
164
165 if(Tag.Scan(d->Start,d->End - d->Start, false))
166 break;
167
168 if (Resize() == false)
169 return _error->Error(_("Unable to parse package file %s (1)"),
170 d->Fd.Name().c_str());
171
172 } while (Tag.Scan(d->Start,d->End - d->Start, false) == false);
173 }
174
175 d->Start += Tag.size();
176 d->iOffset += Tag.size();
177
178 Tag.Trim();
179 return true;
180 }
181 /*}}}*/
182 // TagFile::Fill - Top up the buffer /*{{{*/
183 // ---------------------------------------------------------------------
184 /* This takes the bit at the end of the buffer and puts it at the start
185 then fills the rest from the file */
186 bool pkgTagFile::Fill()
187 {
188 unsigned long long EndSize = d->End - d->Start;
189 unsigned long long Actual = 0;
190
191 memmove(d->Buffer,d->Start,EndSize);
192 d->Start = d->Buffer;
193 d->End = d->Buffer + EndSize;
194
195 if (d->Done == false)
196 {
197 // See if only a bit of the file is left
198 unsigned long long const dataSize = d->Size - ((d->End - d->Buffer) + 1);
199 if (d->Fd.Read(d->End, dataSize, &Actual) == false)
200 return false;
201 if (Actual != dataSize)
202 d->Done = true;
203 d->End += Actual;
204 }
205
206 if (d->Done == true)
207 {
208 if (EndSize <= 3 && Actual == 0)
209 return false;
210 if (d->Size - (d->End - d->Buffer) < 4)
211 return true;
212
213 // Append a double new line if one does not exist
214 unsigned int LineCount = 0;
215 for (const char *E = d->End - 1; E - d->End < 6 && (*E == '\n' || *E == '\r'); E--)
216 if (*E == '\n')
217 LineCount++;
218 if (LineCount < 2)
219 {
220 if ((unsigned)(d->End - d->Buffer) >= d->Size)
221 Resize(d->Size + 3);
222 for (; LineCount < 2; LineCount++)
223 *d->End++ = '\n';
224 }
225
226 return true;
227 }
228
229 return true;
230 }
231 /*}}}*/
232 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
233 // ---------------------------------------------------------------------
234 /* This jumps to a pre-recorded file location and reads the record
235 that is there */
236 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long long Offset)
237 {
238 // We are within a buffer space of the next hit..
239 if (Offset >= d->iOffset && d->iOffset + (d->End - d->Start) > Offset)
240 {
241 unsigned long long Dist = Offset - d->iOffset;
242 d->Start += Dist;
243 d->iOffset += Dist;
244 // if we have seen the end, don't ask for more
245 if (d->Done == true)
246 return Tag.Scan(d->Start, d->End - d->Start);
247 else
248 return Step(Tag);
249 }
250
251 // Reposition and reload..
252 d->iOffset = Offset;
253 d->Done = false;
254 if (d->Fd.Seek(Offset) == false)
255 return false;
256 d->End = d->Start = d->Buffer;
257
258 if (Fill() == false)
259 return false;
260
261 if (Tag.Scan(d->Start, d->End - d->Start) == true)
262 return true;
263
264 // This appends a double new line (for the real eof handling)
265 if (Fill() == false)
266 return false;
267
268 if (Tag.Scan(d->Start, d->End - d->Start, false) == false)
269 return _error->Error(_("Unable to parse package file %s (2)"),d->Fd.Name().c_str());
270
271 return true;
272 }
273 /*}}}*/
274 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
275 // ---------------------------------------------------------------------
276 /* */
277 pkgTagSection::pkgTagSection()
278 : Section(0), d(NULL), Stop(0)
279 {
280 memset(&LookupTable, 0, sizeof(LookupTable));
281 }
282 /*}}}*/
283 // TagSection::Scan - Scan for the end of the header information /*{{{*/
284 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength, bool const Restart)
285 {
286 Section = Start;
287 const char *End = Start + MaxLength;
288
289 if (Restart == false && Tags.empty() == false)
290 {
291 Stop = Section + Tags.back().StartTag;
292 if (End <= Stop)
293 return false;
294 Stop = (const char *)memchr(Stop,'\n',End - Stop);
295 if (Stop == NULL)
296 return false;
297 ++Stop;
298 }
299 else
300 {
301 Stop = Section;
302 if (Tags.empty() == false)
303 {
304 memset(&LookupTable, 0, sizeof(LookupTable));
305 Tags.clear();
306 }
307 Tags.reserve(0x100);
308 }
309 size_t TagCount = Tags.size();
310
311 if (Stop == 0)
312 return false;
313
314 TagData lastTagData(0);
315 lastTagData.EndTag = 0;
316 unsigned long lastTagHash = 0;
317 while (Stop < End)
318 {
319 TrimRecord(true,End);
320
321 // this can happen when TrimRecord trims away the entire Record
322 // (e.g. because it just contains comments)
323 if(Stop == End)
324 return true;
325
326 // Start a new index and add it to the hash
327 if (isspace(Stop[0]) == 0)
328 {
329 // store the last found tag
330 if (lastTagData.EndTag != 0)
331 {
332 if (LookupTable[lastTagHash] != 0)
333 lastTagData.NextInBucket = LookupTable[lastTagHash];
334 LookupTable[lastTagHash] = TagCount;
335 Tags.push_back(lastTagData);
336 }
337
338 ++TagCount;
339 lastTagData = TagData(Stop - Section);
340 // find the colon separating tag and value
341 char const * Colon = (char const *) memchr(Stop, ':', End - Stop);
342 if (Colon == NULL)
343 return false;
344 // find the end of the tag (which might or might not be the colon)
345 char const * EndTag = Colon;
346 --EndTag;
347 for (; EndTag > Stop && isspace(*EndTag) != 0; --EndTag)
348 ;
349 ++EndTag;
350 lastTagData.EndTag = EndTag - Section;
351 lastTagHash = AlphaHash(Stop, EndTag - Stop);
352 // find the beginning of the value
353 Stop = Colon + 1;
354 for (; isspace(*Stop) != 0; ++Stop);
355 if (Stop >= End)
356 return false;
357 lastTagData.StartValue = Stop - Section;
358 }
359
360 Stop = (const char *)memchr(Stop,'\n',End - Stop);
361
362 if (Stop == 0)
363 return false;
364
365 for (; Stop+1 < End && Stop[1] == '\r'; Stop++)
366 /* nothing */
367 ;
368
369 // Double newline marks the end of the record
370 if (Stop+1 < End && Stop[1] == '\n')
371 {
372 if (lastTagData.EndTag != 0)
373 {
374 if (LookupTable[lastTagHash] != 0)
375 lastTagData.NextInBucket = LookupTable[lastTagHash];
376 LookupTable[lastTagHash] = TagCount;
377 Tags.push_back(lastTagData);
378 }
379
380 TagData const td(Stop - Section);
381 Tags.push_back(td);
382 TrimRecord(false,End);
383 return true;
384 }
385
386 Stop++;
387 }
388
389 return false;
390 }
391 /*}}}*/
392 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
393 // ---------------------------------------------------------------------
394 /* There should be exactly 2 newline at the end of the record, no more. */
395 void pkgTagSection::TrimRecord(bool BeforeRecord, const char*& End)
396 {
397 if (BeforeRecord == true)
398 return;
399 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
400 }
401 /*}}}*/
402 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
403 // ---------------------------------------------------------------------
404 /* There should be exactly 1 newline at the end of the buffer, no more. */
405 void pkgTagSection::Trim()
406 {
407 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
408 }
409 /*}}}*/
410 // TagSection::Exists - return True if a tag exists /*{{{*/
411 bool pkgTagSection::Exists(const char* const Tag) const
412 {
413 unsigned int tmp;
414 return Find(Tag, tmp);
415 }
416 /*}}}*/
417 // TagSection::Find - Locate a tag /*{{{*/
418 // ---------------------------------------------------------------------
419 /* This searches the section for a tag that matches the given string. */
420 bool pkgTagSection::Find(const char *Tag,unsigned int &Pos) const
421 {
422 size_t const Length = strlen(Tag);
423 unsigned int Bucket = LookupTable[AlphaHash(Tag, Length)];
424 if (Bucket == 0)
425 return false;
426
427 for (; Bucket != 0; Bucket = Tags[Bucket - 1].NextInBucket)
428 {
429 if ((Tags[Bucket - 1].EndTag - Tags[Bucket - 1].StartTag) != Length)
430 continue;
431
432 char const * const St = Section + Tags[Bucket - 1].StartTag;
433 if (strncasecmp(Tag,St,Length) != 0)
434 continue;
435
436 Pos = Bucket - 1;
437 return true;
438 }
439
440 Pos = 0;
441 return false;
442 }
443 bool pkgTagSection::Find(const char *Tag,const char *&Start,
444 const char *&End) const
445 {
446 unsigned int Pos;
447 if (Find(Tag, Pos) == false)
448 return false;
449
450 Start = Section + Tags[Pos].StartValue;
451 // Strip off the gunk from the end
452 End = Section + Tags[Pos + 1].StartTag;
453 if (unlikely(Start > End))
454 return _error->Error("Internal parsing error");
455
456 for (; isspace(End[-1]) != 0 && End > Start; --End);
457
458 return true;
459 }
460 /*}}}*/
461 // TagSection::FindS - Find a string /*{{{*/
462 // ---------------------------------------------------------------------
463 /* */
464 string pkgTagSection::FindS(const char *Tag) const
465 {
466 const char *Start;
467 const char *End;
468 if (Find(Tag,Start,End) == false)
469 return string();
470 return string(Start,End);
471 }
472 /*}}}*/
473 // TagSection::FindI - Find an integer /*{{{*/
474 // ---------------------------------------------------------------------
475 /* */
476 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
477 {
478 const char *Start;
479 const char *Stop;
480 if (Find(Tag,Start,Stop) == false)
481 return Default;
482
483 // Copy it into a temp buffer so we can use strtol
484 char S[300];
485 if ((unsigned)(Stop - Start) >= sizeof(S))
486 return Default;
487 strncpy(S,Start,Stop-Start);
488 S[Stop - Start] = 0;
489
490 char *End;
491 signed long Result = strtol(S,&End,10);
492 if (S == End)
493 return Default;
494 return Result;
495 }
496 /*}}}*/
497 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
498 // ---------------------------------------------------------------------
499 /* */
500 unsigned long long pkgTagSection::FindULL(const char *Tag, unsigned long long const &Default) const
501 {
502 const char *Start;
503 const char *Stop;
504 if (Find(Tag,Start,Stop) == false)
505 return Default;
506
507 // Copy it into a temp buffer so we can use strtoull
508 char S[100];
509 if ((unsigned)(Stop - Start) >= sizeof(S))
510 return Default;
511 strncpy(S,Start,Stop-Start);
512 S[Stop - Start] = 0;
513
514 char *End;
515 unsigned long long Result = strtoull(S,&End,10);
516 if (S == End)
517 return Default;
518 return Result;
519 }
520 /*}}}*/
521 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
522 // ---------------------------------------------------------------------
523 /* The bits marked in Flag are masked on/off in Flags */
524 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
525 unsigned long Flag) const
526 {
527 const char *Start;
528 const char *Stop;
529 if (Find(Tag,Start,Stop) == false)
530 return true;
531 return FindFlag(Flags, Flag, Start, Stop);
532 }
533 bool pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
534 char const* Start, char const* Stop)
535 {
536 switch (StringToBool(string(Start, Stop)))
537 {
538 case 0:
539 Flags &= ~Flag;
540 return true;
541
542 case 1:
543 Flags |= Flag;
544 return true;
545
546 default:
547 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
548 return true;
549 }
550 return true;
551 }
552 /*}}}*/
553 APT_PURE unsigned int pkgTagSection::Count() const { /*{{{*/
554 if (Tags.empty() == true)
555 return 0;
556 // the last element is just marking the end and isn't a real one
557 return Tags.size() - 1;
558 }
559 /*}}}*/
560 // TFRewrite - Rewrite a control record /*{{{*/
561 // ---------------------------------------------------------------------
562 /* This writes the control record to stdout rewriting it as necessary. The
563 override map item specificies the rewriting rules to follow. This also
564 takes the time to sort the feild list. */
565
566 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
567 array. */
568 static const char *iTFRewritePackageOrder[] = {
569 "Package",
570 "Essential",
571 "Status",
572 "Priority",
573 "Section",
574 "Installed-Size",
575 "Maintainer",
576 "Original-Maintainer",
577 "Architecture",
578 "Source",
579 "Version",
580 "Revision", // Obsolete
581 "Config-Version", // Obsolete
582 "Replaces",
583 "Provides",
584 "Depends",
585 "Pre-Depends",
586 "Recommends",
587 "Suggests",
588 "Conflicts",
589 "Breaks",
590 "Conffiles",
591 "Filename",
592 "Size",
593 "MD5Sum",
594 "SHA1",
595 "SHA256",
596 "SHA512",
597 "MSDOS-Filename", // Obsolete
598 "Description",
599 0};
600 static const char *iTFRewriteSourceOrder[] = {"Package",
601 "Source",
602 "Binary",
603 "Version",
604 "Priority",
605 "Section",
606 "Maintainer",
607 "Original-Maintainer",
608 "Build-Depends",
609 "Build-Depends-Indep",
610 "Build-Conflicts",
611 "Build-Conflicts-Indep",
612 "Architecture",
613 "Standards-Version",
614 "Format",
615 "Directory",
616 "Files",
617 0};
618
619 /* Two levels of initialization are used because gcc will set the symbol
620 size of an array to the length of the array, causing dynamic relinking
621 errors. Doing this makes the symbol size constant */
622 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
623 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
624
625 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
626 TFRewriteData *Rewrite)
627 {
628 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
629 for (unsigned I = 0; I != 256; I++)
630 Visited[I] = 0;
631
632 // Set new tag up as necessary.
633 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
634 {
635 if (Rewrite[J].NewTag == 0)
636 Rewrite[J].NewTag = Rewrite[J].Tag;
637 }
638
639 // Write all all of the tags, in order.
640 if (Order != NULL)
641 {
642 for (unsigned int I = 0; Order[I] != 0; I++)
643 {
644 bool Rewritten = false;
645
646 // See if this is a field that needs to be rewritten
647 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
648 {
649 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
650 {
651 Visited[J] |= 2;
652 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
653 {
654 if (isspace(Rewrite[J].Rewrite[0]))
655 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
656 else
657 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
658 }
659 Rewritten = true;
660 break;
661 }
662 }
663
664 // See if it is in the fragment
665 unsigned Pos;
666 if (Tags.Find(Order[I],Pos) == false)
667 continue;
668 Visited[Pos] |= 1;
669
670 if (Rewritten == true)
671 continue;
672
673 /* Write out this element, taking a moment to rewrite the tag
674 in case of changes of case. */
675 const char *Start;
676 const char *Stop;
677 Tags.Get(Start,Stop,Pos);
678
679 if (fputs(Order[I],Output) < 0)
680 return _error->Errno("fputs","IO Error to output");
681 Start += strlen(Order[I]);
682 if (fwrite(Start,Stop - Start,1,Output) != 1)
683 return _error->Errno("fwrite","IO Error to output");
684 if (Stop[-1] != '\n')
685 fprintf(Output,"\n");
686 }
687 }
688
689 // Now write all the old tags that were missed.
690 for (unsigned int I = 0; I != Tags.Count(); I++)
691 {
692 if ((Visited[I] & 1) == 1)
693 continue;
694
695 const char *Start;
696 const char *Stop;
697 Tags.Get(Start,Stop,I);
698 const char *End = Start;
699 for (; End < Stop && *End != ':'; End++);
700
701 // See if this is a field that needs to be rewritten
702 bool Rewritten = false;
703 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
704 {
705 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
706 {
707 Visited[J] |= 2;
708 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
709 {
710 if (isspace(Rewrite[J].Rewrite[0]))
711 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
712 else
713 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
714 }
715
716 Rewritten = true;
717 break;
718 }
719 }
720
721 if (Rewritten == true)
722 continue;
723
724 // Write out this element
725 if (fwrite(Start,Stop - Start,1,Output) != 1)
726 return _error->Errno("fwrite","IO Error to output");
727 if (Stop[-1] != '\n')
728 fprintf(Output,"\n");
729 }
730
731 // Now write all the rewrites that were missed
732 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
733 {
734 if ((Visited[J] & 2) == 2)
735 continue;
736
737 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
738 {
739 if (isspace(Rewrite[J].Rewrite[0]))
740 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
741 else
742 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
743 }
744 }
745
746 return true;
747 }
748 /*}}}*/