1 // -*- mode: cpp; mode: fold -*-
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
6 Fast scanner for RFC-822 type header information
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
11 ##################################################################### */
13 // Include Files /*{{{*/
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
32 class pkgTagFilePrivate
35 pkgTagFilePrivate(FileFd
*pFd
, unsigned long long Size
) : Fd(*pFd
), Buffer(NULL
),
36 Start(NULL
), End(NULL
),
37 Done(false), iOffset(0),
46 unsigned long long iOffset
;
47 unsigned long long Size
;
50 class pkgTagSectionPrivate
53 pkgTagSectionPrivate()
57 unsigned int StartTag
;
59 unsigned int StartValue
;
60 unsigned int NextInBucket
;
62 TagData(unsigned int const StartTag
) : StartTag(StartTag
), EndTag(0), StartValue(0), NextInBucket(0) {}
64 std::vector
<TagData
> Tags
;
67 static unsigned long AlphaHash(const char *Text
, size_t Length
) /*{{{*/
69 /* This very simple hash function for the last 8 letters gives
70 very good performance on the debian package files */
76 unsigned long Res
= 0;
77 for (size_t i
= 0; i
< Length
; ++i
)
78 Res
= ((unsigned long)(Text
[i
]) & 0xDF) ^ (Res
<< 1);
83 // TagFile::pkgTagFile - Constructor /*{{{*/
84 // ---------------------------------------------------------------------
86 pkgTagFile::pkgTagFile(FileFd
*pFd
,unsigned long long Size
)
92 void pkgTagFile::Init(FileFd
*pFd
,unsigned long long Size
)
94 /* The size is increased by 4 because if we start with the Size of the
95 filename we need to try to read 1 char more to see an EOF faster, 1
96 char the end-pointer can be on and maybe 2 newlines need to be added
97 to the end of the file -> 4 extra chars */
104 d
= new pkgTagFilePrivate(pFd
, Size
);
106 if (d
->Fd
.IsOpen() == false)
107 d
->Start
= d
->End
= d
->Buffer
= 0;
109 d
->Buffer
= (char*)malloc(sizeof(char) * Size
);
111 if (d
->Buffer
== NULL
)
116 d
->Start
= d
->End
= d
->Buffer
;
118 if (d
->Done
== false)
122 // TagFile::~pkgTagFile - Destructor /*{{{*/
123 // ---------------------------------------------------------------------
125 pkgTagFile::~pkgTagFile()
131 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
132 APT_PURE
unsigned long pkgTagFile::Offset()
137 // TagFile::Resize - Resize the internal buffer /*{{{*/
138 // ---------------------------------------------------------------------
139 /* Resize the internal buffer (double it in size). Fail if a maximum size
142 bool pkgTagFile::Resize()
144 // fail is the buffer grows too big
145 if(d
->Size
> 1024*1024+1)
148 return Resize(d
->Size
* 2);
150 bool pkgTagFile::Resize(unsigned long long const newSize
)
152 unsigned long long const EndSize
= d
->End
- d
->Start
;
154 // get new buffer and use it
155 char* newBuffer
= (char*)realloc(d
->Buffer
, sizeof(char) * newSize
);
156 if (newBuffer
== NULL
)
158 d
->Buffer
= newBuffer
;
161 // update the start/end pointers to the new buffer
162 d
->Start
= d
->Buffer
;
163 d
->End
= d
->Start
+ EndSize
;
167 // TagFile::Step - Advance to the next section /*{{{*/
168 // ---------------------------------------------------------------------
169 /* If the Section Scanner fails we refill the buffer and try again.
170 * If that fails too, double the buffer size and try again until a
171 * maximum buffer is reached.
173 bool pkgTagFile::Step(pkgTagSection
&Tag
)
175 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
) == false)
182 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false))
185 if (Resize() == false)
186 return _error
->Error(_("Unable to parse package file %s (1)"),
187 d
->Fd
.Name().c_str());
189 } while (Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false) == false);
192 d
->Start
+= Tag
.size();
193 d
->iOffset
+= Tag
.size();
199 // TagFile::Fill - Top up the buffer /*{{{*/
200 // ---------------------------------------------------------------------
201 /* This takes the bit at the end of the buffer and puts it at the start
202 then fills the rest from the file */
203 bool pkgTagFile::Fill()
205 unsigned long long EndSize
= d
->End
- d
->Start
;
206 unsigned long long Actual
= 0;
208 memmove(d
->Buffer
,d
->Start
,EndSize
);
209 d
->Start
= d
->Buffer
;
210 d
->End
= d
->Buffer
+ EndSize
;
212 if (d
->Done
== false)
214 // See if only a bit of the file is left
215 unsigned long long const dataSize
= d
->Size
- ((d
->End
- d
->Buffer
) + 1);
216 if (d
->Fd
.Read(d
->End
, dataSize
, &Actual
) == false)
218 if (Actual
!= dataSize
)
225 if (EndSize
<= 3 && Actual
== 0)
227 if (d
->Size
- (d
->End
- d
->Buffer
) < 4)
230 // Append a double new line if one does not exist
231 unsigned int LineCount
= 0;
232 for (const char *E
= d
->End
- 1; E
- d
->End
< 6 && (*E
== '\n' || *E
== '\r'); E
--)
237 if ((unsigned)(d
->End
- d
->Buffer
) >= d
->Size
)
239 for (; LineCount
< 2; LineCount
++)
249 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
250 // ---------------------------------------------------------------------
251 /* This jumps to a pre-recorded file location and reads the record
253 bool pkgTagFile::Jump(pkgTagSection
&Tag
,unsigned long long Offset
)
255 // We are within a buffer space of the next hit..
256 if (Offset
>= d
->iOffset
&& d
->iOffset
+ (d
->End
- d
->Start
) > Offset
)
258 unsigned long long Dist
= Offset
- d
->iOffset
;
261 // if we have seen the end, don't ask for more
263 return Tag
.Scan(d
->Start
, d
->End
- d
->Start
);
268 // Reposition and reload..
271 if (d
->Fd
.Seek(Offset
) == false)
273 d
->End
= d
->Start
= d
->Buffer
;
278 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == true)
281 // This appends a double new line (for the real eof handling)
285 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
, false) == false)
286 return _error
->Error(_("Unable to parse package file %s (2)"),d
->Fd
.Name().c_str());
291 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
292 // ---------------------------------------------------------------------
294 APT_IGNORE_DEPRECATED_PUSH
295 pkgTagSection::pkgTagSection()
296 : Section(0), d(NULL
), Stop(0)
298 d
= new pkgTagSectionPrivate();
299 #if APT_PKG_ABI < 413
301 memset(&Indexes
, 0, sizeof(Indexes
));
303 memset(&AlphaIndexes
, 0, sizeof(AlphaIndexes
));
305 APT_IGNORE_DEPRECATED_POP
307 // TagSection::Scan - Scan for the end of the header information /*{{{*/
308 #if APT_PKG_ABI < 413
309 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
)
311 return Scan(Start
, MaxLength
, true);
314 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
, bool const Restart
)
317 const char *End
= Start
+ MaxLength
;
319 if (Restart
== false && d
->Tags
.empty() == false)
321 Stop
= Section
+ d
->Tags
.back().StartTag
;
324 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
332 if (d
->Tags
.empty() == false)
334 memset(&AlphaIndexes
, 0, sizeof(AlphaIndexes
));
337 d
->Tags
.reserve(0x100);
339 #if APT_PKG_ABI >= 413
340 unsigned int TagCount
= d
->Tags
.size();
342 APT_IGNORE_DEPRECATED(TagCount
= d
->Tags
.size();)
348 pkgTagSectionPrivate::TagData
lastTagData(0);
349 lastTagData
.EndTag
= 0;
350 unsigned long lastTagHash
= 0;
353 TrimRecord(true,End
);
355 // this can happen when TrimRecord trims away the entire Record
356 // (e.g. because it just contains comments)
360 // Start a new index and add it to the hash
361 if (isspace(Stop
[0]) == 0)
363 // store the last found tag
364 if (lastTagData
.EndTag
!= 0)
366 if (AlphaIndexes
[lastTagHash
] != 0)
367 lastTagData
.NextInBucket
= AlphaIndexes
[lastTagHash
];
368 APT_IGNORE_DEPRECATED_PUSH
369 AlphaIndexes
[lastTagHash
] = TagCount
;
370 #if APT_PKG_ABI < 413
371 if (d
->Tags
.size() < sizeof(Indexes
)/sizeof(Indexes
[0]))
372 Indexes
[d
->Tags
.size()] = lastTagData
.StartTag
;
374 APT_IGNORE_DEPRECATED_POP
375 d
->Tags
.push_back(lastTagData
);
378 APT_IGNORE_DEPRECATED(++TagCount
;)
379 lastTagData
= pkgTagSectionPrivate::TagData(Stop
- Section
);
380 // find the colon separating tag and value
381 char const * Colon
= (char const *) memchr(Stop
, ':', End
- Stop
);
384 // find the end of the tag (which might or might not be the colon)
385 char const * EndTag
= Colon
;
387 for (; EndTag
> Stop
&& isspace(*EndTag
) != 0; --EndTag
)
390 lastTagData
.EndTag
= EndTag
- Section
;
391 lastTagHash
= AlphaHash(Stop
, EndTag
- Stop
);
392 // find the beginning of the value
394 for (; isspace(*Stop
) != 0; ++Stop
);
397 lastTagData
.StartValue
= Stop
- Section
;
400 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
405 for (; Stop
+1 < End
&& Stop
[1] == '\r'; Stop
++)
409 // Double newline marks the end of the record
410 if (Stop
+1 < End
&& Stop
[1] == '\n')
412 if (lastTagData
.EndTag
!= 0)
414 if (AlphaIndexes
[lastTagHash
] != 0)
415 lastTagData
.NextInBucket
= AlphaIndexes
[lastTagHash
];
416 APT_IGNORE_DEPRECATED(AlphaIndexes
[lastTagHash
] = TagCount
;)
417 #if APT_PKG_ABI < 413
418 APT_IGNORE_DEPRECATED(Indexes
[d
->Tags
.size()] = lastTagData
.StartTag
;)
420 d
->Tags
.push_back(lastTagData
);
423 pkgTagSectionPrivate::TagData
const td(Stop
- Section
);
424 #if APT_PKG_ABI < 413
425 APT_IGNORE_DEPRECATED(Indexes
[d
->Tags
.size()] = td
.StartTag
;)
427 d
->Tags
.push_back(td
);
428 TrimRecord(false,End
);
438 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
439 // ---------------------------------------------------------------------
440 /* There should be exactly 2 newline at the end of the record, no more. */
441 void pkgTagSection::TrimRecord(bool BeforeRecord
, const char*& End
)
443 if (BeforeRecord
== true)
445 for (; Stop
< End
&& (Stop
[0] == '\n' || Stop
[0] == '\r'); Stop
++);
448 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
449 // ---------------------------------------------------------------------
450 /* There should be exactly 1 newline at the end of the buffer, no more. */
451 void pkgTagSection::Trim()
453 for (; Stop
> Section
+ 2 && (Stop
[-2] == '\n' || Stop
[-2] == '\r'); Stop
--);
456 // TagSection::Exists - return True if a tag exists /*{{{*/
457 #if APT_PKG_ABI >= 413
458 bool pkgTagSection::Exists(const char* const Tag
) const
460 bool pkgTagSection::Exists(const char* const Tag
)
464 return Find(Tag
, tmp
);
467 // TagSection::Find - Locate a tag /*{{{*/
468 // ---------------------------------------------------------------------
469 /* This searches the section for a tag that matches the given string. */
470 bool pkgTagSection::Find(const char *Tag
,unsigned int &Pos
) const
472 size_t const Length
= strlen(Tag
);
473 unsigned int Bucket
= AlphaIndexes
[AlphaHash(Tag
, Length
)];
477 for (; Bucket
!= 0; Bucket
= d
->Tags
[Bucket
- 1].NextInBucket
)
479 if ((d
->Tags
[Bucket
- 1].EndTag
- d
->Tags
[Bucket
- 1].StartTag
) != Length
)
482 char const * const St
= Section
+ d
->Tags
[Bucket
- 1].StartTag
;
483 if (strncasecmp(Tag
,St
,Length
) != 0)
493 bool pkgTagSection::Find(const char *Tag
,const char *&Start
,
494 const char *&End
) const
497 if (Find(Tag
, Pos
) == false)
500 Start
= Section
+ d
->Tags
[Pos
].StartValue
;
501 // Strip off the gunk from the end
502 End
= Section
+ d
->Tags
[Pos
+ 1].StartTag
;
503 if (unlikely(Start
> End
))
504 return _error
->Error("Internal parsing error");
506 for (; isspace(End
[-1]) != 0 && End
> Start
; --End
);
511 // TagSection::FindS - Find a string /*{{{*/
512 // ---------------------------------------------------------------------
514 string
pkgTagSection::FindS(const char *Tag
) const
518 if (Find(Tag
,Start
,End
) == false)
520 return string(Start
,End
);
523 // TagSection::FindI - Find an integer /*{{{*/
524 // ---------------------------------------------------------------------
526 signed int pkgTagSection::FindI(const char *Tag
,signed long Default
) const
530 if (Find(Tag
,Start
,Stop
) == false)
533 // Copy it into a temp buffer so we can use strtol
535 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
537 strncpy(S
,Start
,Stop
-Start
);
541 signed long Result
= strtol(S
,&End
,10);
547 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
548 // ---------------------------------------------------------------------
550 unsigned long long pkgTagSection::FindULL(const char *Tag
, unsigned long long const &Default
) const
554 if (Find(Tag
,Start
,Stop
) == false)
557 // Copy it into a temp buffer so we can use strtoull
559 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
561 strncpy(S
,Start
,Stop
-Start
);
565 unsigned long long Result
= strtoull(S
,&End
,10);
571 // TagSection::FindB - Find boolean value /*{{{*/
572 // ---------------------------------------------------------------------
574 bool pkgTagSection::FindB(const char *Tag
, bool const &Default
) const
576 const char *Start
, *Stop
;
577 if (Find(Tag
, Start
, Stop
) == false)
579 return StringToBool(string(Start
, Stop
));
582 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
583 // ---------------------------------------------------------------------
584 /* The bits marked in Flag are masked on/off in Flags */
585 bool pkgTagSection::FindFlag(const char *Tag
,unsigned long &Flags
,
586 unsigned long Flag
) const
590 if (Find(Tag
,Start
,Stop
) == false)
592 return FindFlag(Flags
, Flag
, Start
, Stop
);
594 bool pkgTagSection::FindFlag(unsigned long &Flags
, unsigned long Flag
,
595 char const* Start
, char const* Stop
)
597 switch (StringToBool(string(Start
, Stop
)))
608 _error
->Warning("Unknown flag value: %s",string(Start
,Stop
).c_str());
614 void pkgTagSection::Get(const char *&Start
,const char *&Stop
,unsigned int I
) const
616 Start
= Section
+ d
->Tags
[I
].StartTag
;
617 Stop
= Section
+ d
->Tags
[I
+1].StartTag
;
619 APT_PURE
unsigned int pkgTagSection::Count() const { /*{{{*/
620 if (d
->Tags
.empty() == true)
622 // the last element is just marking the end and isn't a real one
623 return d
->Tags
.size() - 1;
626 // TFRewrite - Rewrite a control record /*{{{*/
627 // ---------------------------------------------------------------------
628 /* This writes the control record to stdout rewriting it as necessary. The
629 override map item specificies the rewriting rules to follow. This also
630 takes the time to sort the feild list. */
632 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
634 static const char *iTFRewritePackageOrder
[] = {
642 "Original-Maintainer",
646 "Revision", // Obsolete
647 "Config-Version", // Obsolete
663 "MSDOS-Filename", // Obsolete
666 static const char *iTFRewriteSourceOrder
[] = {"Package",
673 "Original-Maintainer",
675 "Build-Depends-Indep",
677 "Build-Conflicts-Indep",
685 /* Two levels of initialization are used because gcc will set the symbol
686 size of an array to the length of the array, causing dynamic relinking
687 errors. Doing this makes the symbol size constant */
688 const char **TFRewritePackageOrder
= iTFRewritePackageOrder
;
689 const char **TFRewriteSourceOrder
= iTFRewriteSourceOrder
;
691 bool TFRewrite(FILE *Output
,pkgTagSection
const &Tags
,const char *Order
[],
692 TFRewriteData
*Rewrite
)
694 unsigned char Visited
[256]; // Bit 1 is Order, Bit 2 is Rewrite
695 for (unsigned I
= 0; I
!= 256; I
++)
698 // Set new tag up as necessary.
699 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
701 if (Rewrite
[J
].NewTag
== 0)
702 Rewrite
[J
].NewTag
= Rewrite
[J
].Tag
;
705 // Write all all of the tags, in order.
708 for (unsigned int I
= 0; Order
[I
] != 0; I
++)
710 bool Rewritten
= false;
712 // See if this is a field that needs to be rewritten
713 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
715 if (strcasecmp(Rewrite
[J
].Tag
,Order
[I
]) == 0)
718 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
720 if (isspace(Rewrite
[J
].Rewrite
[0]))
721 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
723 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
730 // See if it is in the fragment
732 if (Tags
.Find(Order
[I
],Pos
) == false)
736 if (Rewritten
== true)
739 /* Write out this element, taking a moment to rewrite the tag
740 in case of changes of case. */
743 Tags
.Get(Start
,Stop
,Pos
);
745 if (fputs(Order
[I
],Output
) < 0)
746 return _error
->Errno("fputs","IO Error to output");
747 Start
+= strlen(Order
[I
]);
748 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
749 return _error
->Errno("fwrite","IO Error to output");
750 if (Stop
[-1] != '\n')
751 fprintf(Output
,"\n");
755 // Now write all the old tags that were missed.
756 for (unsigned int I
= 0; I
!= Tags
.Count(); I
++)
758 if ((Visited
[I
] & 1) == 1)
763 Tags
.Get(Start
,Stop
,I
);
764 const char *End
= Start
;
765 for (; End
< Stop
&& *End
!= ':'; End
++);
767 // See if this is a field that needs to be rewritten
768 bool Rewritten
= false;
769 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
771 if (stringcasecmp(Start
,End
,Rewrite
[J
].Tag
) == 0)
774 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
776 if (isspace(Rewrite
[J
].Rewrite
[0]))
777 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
779 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
787 if (Rewritten
== true)
790 // Write out this element
791 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
792 return _error
->Errno("fwrite","IO Error to output");
793 if (Stop
[-1] != '\n')
794 fprintf(Output
,"\n");
797 // Now write all the rewrites that were missed
798 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
800 if ((Visited
[J
] & 2) == 2)
803 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
805 if (isspace(Rewrite
[J
].Rewrite
[0]))
806 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
808 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
816 pkgTagSection::~pkgTagSection() { delete d
; }