1 // -*- mode: cpp; mode: fold -*-
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
6 Fast scanner for RFC-822 type header information
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
11 ##################################################################### */
13 // Include Files /*{{{*/
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
32 class pkgTagFilePrivate
35 pkgTagFilePrivate(FileFd
*pFd
, unsigned long long Size
) : Fd(*pFd
), Buffer(NULL
),
36 Start(NULL
), End(NULL
),
37 Done(false), iOffset(0),
46 unsigned long long iOffset
;
47 unsigned long long Size
;
50 class pkgTagSectionPrivate
53 pkgTagSectionPrivate()
57 unsigned int StartTag
;
59 unsigned int StartValue
;
60 unsigned int NextInBucket
;
62 TagData(unsigned int const StartTag
) : StartTag(StartTag
), EndTag(0), StartValue(0), NextInBucket(0) {}
64 std::vector
<TagData
> Tags
;
67 static unsigned long AlphaHash(const char *Text
, size_t Length
) /*{{{*/
69 /* This very simple hash function for the last 8 letters gives
70 very good performance on the debian package files */
76 unsigned long Res
= 0;
77 for (size_t i
= 0; i
< Length
; ++i
)
78 Res
= ((unsigned long)(Text
[i
]) & 0xDF) ^ (Res
<< 1);
83 // TagFile::pkgTagFile - Constructor /*{{{*/
84 // ---------------------------------------------------------------------
86 pkgTagFile::pkgTagFile(FileFd
*pFd
,unsigned long long Size
)
92 void pkgTagFile::Init(FileFd
*pFd
,unsigned long long Size
)
94 /* The size is increased by 4 because if we start with the Size of the
95 filename we need to try to read 1 char more to see an EOF faster, 1
96 char the end-pointer can be on and maybe 2 newlines need to be added
97 to the end of the file -> 4 extra chars */
104 d
= new pkgTagFilePrivate(pFd
, Size
);
106 if (d
->Fd
.IsOpen() == false)
107 d
->Start
= d
->End
= d
->Buffer
= 0;
109 d
->Buffer
= (char*)malloc(sizeof(char) * Size
);
111 if (d
->Buffer
== NULL
)
116 d
->Start
= d
->End
= d
->Buffer
;
118 if (d
->Done
== false)
122 // TagFile::~pkgTagFile - Destructor /*{{{*/
123 // ---------------------------------------------------------------------
125 pkgTagFile::~pkgTagFile()
131 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
132 APT_PURE
unsigned long pkgTagFile::Offset()
137 // TagFile::Resize - Resize the internal buffer /*{{{*/
138 // ---------------------------------------------------------------------
139 /* Resize the internal buffer (double it in size). Fail if a maximum size
142 bool pkgTagFile::Resize()
144 // fail is the buffer grows too big
145 if(d
->Size
> 1024*1024+1)
148 return Resize(d
->Size
* 2);
150 bool pkgTagFile::Resize(unsigned long long const newSize
)
152 unsigned long long const EndSize
= d
->End
- d
->Start
;
154 // get new buffer and use it
155 char* newBuffer
= (char*)realloc(d
->Buffer
, sizeof(char) * newSize
);
156 if (newBuffer
== NULL
)
158 d
->Buffer
= newBuffer
;
161 // update the start/end pointers to the new buffer
162 d
->Start
= d
->Buffer
;
163 d
->End
= d
->Start
+ EndSize
;
167 // TagFile::Step - Advance to the next section /*{{{*/
168 // ---------------------------------------------------------------------
169 /* If the Section Scanner fails we refill the buffer and try again.
170 * If that fails too, double the buffer size and try again until a
171 * maximum buffer is reached.
173 bool pkgTagFile::Step(pkgTagSection
&Tag
)
175 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
) == false)
182 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false))
185 if (Resize() == false)
186 return _error
->Error(_("Unable to parse package file %s (%d)"),
187 d
->Fd
.Name().c_str(), 1);
189 } while (Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false) == false);
192 d
->Start
+= Tag
.size();
193 d
->iOffset
+= Tag
.size();
199 // TagFile::Fill - Top up the buffer /*{{{*/
200 // ---------------------------------------------------------------------
201 /* This takes the bit at the end of the buffer and puts it at the start
202 then fills the rest from the file */
203 bool pkgTagFile::Fill()
205 unsigned long long EndSize
= d
->End
- d
->Start
;
206 unsigned long long Actual
= 0;
208 memmove(d
->Buffer
,d
->Start
,EndSize
);
209 d
->Start
= d
->Buffer
;
210 d
->End
= d
->Buffer
+ EndSize
;
212 if (d
->Done
== false)
214 // See if only a bit of the file is left
215 unsigned long long const dataSize
= d
->Size
- ((d
->End
- d
->Buffer
) + 1);
216 if (d
->Fd
.Read(d
->End
, dataSize
, &Actual
) == false)
218 if (Actual
!= dataSize
)
225 if (EndSize
<= 3 && Actual
== 0)
227 if (d
->Size
- (d
->End
- d
->Buffer
) < 4)
230 // Append a double new line if one does not exist
231 unsigned int LineCount
= 0;
232 for (const char *E
= d
->End
- 1; E
- d
->End
< 6 && (*E
== '\n' || *E
== '\r'); E
--)
237 if ((unsigned)(d
->End
- d
->Buffer
) >= d
->Size
)
239 for (; LineCount
< 2; LineCount
++)
249 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
250 // ---------------------------------------------------------------------
251 /* This jumps to a pre-recorded file location and reads the record
253 bool pkgTagFile::Jump(pkgTagSection
&Tag
,unsigned long long Offset
)
255 // We are within a buffer space of the next hit..
256 if (Offset
>= d
->iOffset
&& d
->iOffset
+ (d
->End
- d
->Start
) > Offset
)
258 unsigned long long Dist
= Offset
- d
->iOffset
;
261 // if we have seen the end, don't ask for more
263 return Tag
.Scan(d
->Start
, d
->End
- d
->Start
);
268 // Reposition and reload..
271 if (d
->Fd
.Seek(Offset
) == false)
273 d
->End
= d
->Start
= d
->Buffer
;
278 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == true)
281 // This appends a double new line (for the real eof handling)
285 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
, false) == false)
286 return _error
->Error(_("Unable to parse package file %s (%d)"),d
->Fd
.Name().c_str(), 2);
291 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
292 // ---------------------------------------------------------------------
294 APT_IGNORE_DEPRECATED_PUSH
295 pkgTagSection::pkgTagSection()
296 : Section(0), d(NULL
), Stop(0)
298 d
= new pkgTagSectionPrivate();
299 #if APT_PKG_ABI < 413
301 memset(&Indexes
, 0, sizeof(Indexes
));
303 memset(&AlphaIndexes
, 0, sizeof(AlphaIndexes
));
305 APT_IGNORE_DEPRECATED_POP
307 // TagSection::Scan - Scan for the end of the header information /*{{{*/
308 #if APT_PKG_ABI < 413
309 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
)
311 return Scan(Start
, MaxLength
, true);
314 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
, bool const Restart
)
317 const char *End
= Start
+ MaxLength
;
319 if (Restart
== false && d
->Tags
.empty() == false)
321 Stop
= Section
+ d
->Tags
.back().StartTag
;
324 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
332 if (d
->Tags
.empty() == false)
334 memset(&AlphaIndexes
, 0, sizeof(AlphaIndexes
));
337 d
->Tags
.reserve(0x100);
339 #if APT_PKG_ABI >= 413
340 unsigned int TagCount
= d
->Tags
.size();
342 APT_IGNORE_DEPRECATED(TagCount
= d
->Tags
.size();)
348 pkgTagSectionPrivate::TagData
lastTagData(0);
349 lastTagData
.EndTag
= 0;
350 unsigned long lastTagHash
= 0;
353 TrimRecord(true,End
);
355 // this can happen when TrimRecord trims away the entire Record
356 // (e.g. because it just contains comments)
360 // Start a new index and add it to the hash
361 if (isspace(Stop
[0]) == 0)
363 // store the last found tag
364 if (lastTagData
.EndTag
!= 0)
366 if (AlphaIndexes
[lastTagHash
] != 0)
367 lastTagData
.NextInBucket
= AlphaIndexes
[lastTagHash
];
368 APT_IGNORE_DEPRECATED_PUSH
369 AlphaIndexes
[lastTagHash
] = TagCount
;
370 #if APT_PKG_ABI < 413
371 if (d
->Tags
.size() < sizeof(Indexes
)/sizeof(Indexes
[0]))
372 Indexes
[d
->Tags
.size()] = lastTagData
.StartTag
;
374 APT_IGNORE_DEPRECATED_POP
375 d
->Tags
.push_back(lastTagData
);
378 APT_IGNORE_DEPRECATED(++TagCount
;)
379 lastTagData
= pkgTagSectionPrivate::TagData(Stop
- Section
);
380 // find the colon separating tag and value
381 char const * Colon
= (char const *) memchr(Stop
, ':', End
- Stop
);
384 // find the end of the tag (which might or might not be the colon)
385 char const * EndTag
= Colon
;
387 for (; EndTag
> Stop
&& isspace(*EndTag
) != 0; --EndTag
)
390 lastTagData
.EndTag
= EndTag
- Section
;
391 lastTagHash
= AlphaHash(Stop
, EndTag
- Stop
);
392 // find the beginning of the value
394 for (; isspace(*Stop
) != 0; ++Stop
);
397 lastTagData
.StartValue
= Stop
- Section
;
400 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
405 for (; Stop
+1 < End
&& Stop
[1] == '\r'; Stop
++)
409 // Double newline marks the end of the record
410 if (Stop
+1 < End
&& Stop
[1] == '\n')
412 if (lastTagData
.EndTag
!= 0)
414 if (AlphaIndexes
[lastTagHash
] != 0)
415 lastTagData
.NextInBucket
= AlphaIndexes
[lastTagHash
];
416 APT_IGNORE_DEPRECATED(AlphaIndexes
[lastTagHash
] = TagCount
;)
417 #if APT_PKG_ABI < 413
418 APT_IGNORE_DEPRECATED(Indexes
[d
->Tags
.size()] = lastTagData
.StartTag
;)
420 d
->Tags
.push_back(lastTagData
);
423 pkgTagSectionPrivate::TagData
const td(Stop
- Section
);
424 #if APT_PKG_ABI < 413
425 APT_IGNORE_DEPRECATED(Indexes
[d
->Tags
.size()] = td
.StartTag
;)
427 d
->Tags
.push_back(td
);
428 TrimRecord(false,End
);
438 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
439 // ---------------------------------------------------------------------
440 /* There should be exactly 2 newline at the end of the record, no more. */
441 void pkgTagSection::TrimRecord(bool BeforeRecord
, const char*& End
)
443 if (BeforeRecord
== true)
445 for (; Stop
< End
&& (Stop
[0] == '\n' || Stop
[0] == '\r'); Stop
++);
448 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
449 // ---------------------------------------------------------------------
450 /* There should be exactly 1 newline at the end of the buffer, no more. */
451 void pkgTagSection::Trim()
453 for (; Stop
> Section
+ 2 && (Stop
[-2] == '\n' || Stop
[-2] == '\r'); Stop
--);
456 // TagSection::Exists - return True if a tag exists /*{{{*/
457 #if APT_PKG_ABI >= 413
458 bool pkgTagSection::Exists(const char* const Tag
) const
460 bool pkgTagSection::Exists(const char* const Tag
)
464 return Find(Tag
, tmp
);
467 // TagSection::Find - Locate a tag /*{{{*/
468 // ---------------------------------------------------------------------
469 /* This searches the section for a tag that matches the given string. */
470 bool pkgTagSection::Find(const char *Tag
,unsigned int &Pos
) const
472 size_t const Length
= strlen(Tag
);
473 unsigned int Bucket
= AlphaIndexes
[AlphaHash(Tag
, Length
)];
477 for (; Bucket
!= 0; Bucket
= d
->Tags
[Bucket
- 1].NextInBucket
)
479 if ((d
->Tags
[Bucket
- 1].EndTag
- d
->Tags
[Bucket
- 1].StartTag
) != Length
)
482 char const * const St
= Section
+ d
->Tags
[Bucket
- 1].StartTag
;
483 if (strncasecmp(Tag
,St
,Length
) != 0)
493 bool pkgTagSection::Find(const char *Tag
,const char *&Start
,
494 const char *&End
) const
497 if (Find(Tag
, Pos
) == false)
500 Start
= Section
+ d
->Tags
[Pos
].StartValue
;
501 // Strip off the gunk from the end
502 End
= Section
+ d
->Tags
[Pos
+ 1].StartTag
;
503 if (unlikely(Start
> End
))
504 return _error
->Error("Internal parsing error");
506 for (; isspace(End
[-1]) != 0 && End
> Start
; --End
);
511 // TagSection::FindS - Find a string /*{{{*/
512 // ---------------------------------------------------------------------
514 string
pkgTagSection::FindS(const char *Tag
) const
518 if (Find(Tag
,Start
,End
) == false)
520 return string(Start
,End
);
523 // TagSection::FindI - Find an integer /*{{{*/
524 // ---------------------------------------------------------------------
526 signed int pkgTagSection::FindI(const char *Tag
,signed long Default
) const
530 if (Find(Tag
,Start
,Stop
) == false)
533 // Copy it into a temp buffer so we can use strtol
535 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
537 strncpy(S
,Start
,Stop
-Start
);
541 signed long Result
= strtol(S
,&End
,10);
547 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
548 // ---------------------------------------------------------------------
550 unsigned long long pkgTagSection::FindULL(const char *Tag
, unsigned long long const &Default
) const
554 if (Find(Tag
,Start
,Stop
) == false)
557 // Copy it into a temp buffer so we can use strtoull
559 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
561 strncpy(S
,Start
,Stop
-Start
);
565 unsigned long long Result
= strtoull(S
,&End
,10);
571 // TagSection::FindB - Find boolean value /*{{{*/
572 // ---------------------------------------------------------------------
574 bool pkgTagSection::FindB(const char *Tag
, bool const &Default
) const
576 const char *Start
, *Stop
;
577 if (Find(Tag
, Start
, Stop
) == false)
579 return StringToBool(string(Start
, Stop
));
582 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
583 // ---------------------------------------------------------------------
584 /* The bits marked in Flag are masked on/off in Flags */
585 bool pkgTagSection::FindFlag(const char *Tag
,unsigned long &Flags
,
586 unsigned long Flag
) const
590 if (Find(Tag
,Start
,Stop
) == false)
592 return FindFlag(Flags
, Flag
, Start
, Stop
);
594 bool pkgTagSection::FindFlag(unsigned long &Flags
, unsigned long Flag
,
595 char const* Start
, char const* Stop
)
597 switch (StringToBool(string(Start
, Stop
)))
608 _error
->Warning("Unknown flag value: %s",string(Start
,Stop
).c_str());
614 void pkgTagSection::Get(const char *&Start
,const char *&Stop
,unsigned int I
) const
616 Start
= Section
+ d
->Tags
[I
].StartTag
;
617 Stop
= Section
+ d
->Tags
[I
+1].StartTag
;
619 APT_PURE
unsigned int pkgTagSection::Count() const { /*{{{*/
620 if (d
->Tags
.empty() == true)
622 // the last element is just marking the end and isn't a real one
623 return d
->Tags
.size() - 1;
627 #include "tagfile-order.c"
629 // TFRewrite - Rewrite a control record /*{{{*/
630 // ---------------------------------------------------------------------
631 /* This writes the control record to stdout rewriting it as necessary. The
632 override map item specificies the rewriting rules to follow. This also
633 takes the time to sort the feild list. */
634 bool TFRewrite(FILE *Output
,pkgTagSection
const &Tags
,const char *Order
[],
635 TFRewriteData
*Rewrite
)
637 unsigned char Visited
[256]; // Bit 1 is Order, Bit 2 is Rewrite
638 for (unsigned I
= 0; I
!= 256; I
++)
641 // Set new tag up as necessary.
642 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
644 if (Rewrite
[J
].NewTag
== 0)
645 Rewrite
[J
].NewTag
= Rewrite
[J
].Tag
;
648 // Write all all of the tags, in order.
651 for (unsigned int I
= 0; Order
[I
] != 0; I
++)
653 bool Rewritten
= false;
655 // See if this is a field that needs to be rewritten
656 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
658 if (strcasecmp(Rewrite
[J
].Tag
,Order
[I
]) == 0)
661 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
663 if (isspace(Rewrite
[J
].Rewrite
[0]))
664 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
666 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
673 // See if it is in the fragment
675 if (Tags
.Find(Order
[I
],Pos
) == false)
679 if (Rewritten
== true)
682 /* Write out this element, taking a moment to rewrite the tag
683 in case of changes of case. */
686 Tags
.Get(Start
,Stop
,Pos
);
688 if (fputs(Order
[I
],Output
) < 0)
689 return _error
->Errno("fputs","IO Error to output");
690 Start
+= strlen(Order
[I
]);
691 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
692 return _error
->Errno("fwrite","IO Error to output");
693 if (Stop
[-1] != '\n')
694 fprintf(Output
,"\n");
698 // Now write all the old tags that were missed.
699 for (unsigned int I
= 0; I
!= Tags
.Count(); I
++)
701 if ((Visited
[I
] & 1) == 1)
706 Tags
.Get(Start
,Stop
,I
);
707 const char *End
= Start
;
708 for (; End
< Stop
&& *End
!= ':'; End
++);
710 // See if this is a field that needs to be rewritten
711 bool Rewritten
= false;
712 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
714 if (stringcasecmp(Start
,End
,Rewrite
[J
].Tag
) == 0)
717 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
719 if (isspace(Rewrite
[J
].Rewrite
[0]))
720 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
722 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
730 if (Rewritten
== true)
733 // Write out this element
734 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
735 return _error
->Errno("fwrite","IO Error to output");
736 if (Stop
[-1] != '\n')
737 fprintf(Output
,"\n");
740 // Now write all the rewrites that were missed
741 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
743 if ((Visited
[J
] & 2) == 2)
746 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
748 if (isspace(Rewrite
[J
].Rewrite
[0]))
749 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
751 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
759 pkgTagSection::~pkgTagSection() { delete d
; }