]>
git.saurik.com Git - apt.git/blob - apt-pkg/tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
6 Fast scanner for RFC-822 type header information
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
11 ##################################################################### */
13 // Include Files /*{{{*/
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
32 class pkgTagFilePrivate
35 pkgTagFilePrivate(FileFd
*pFd
, unsigned long long Size
) : Fd(*pFd
), Buffer(NULL
),
36 Start(NULL
), End(NULL
),
37 Done(false), iOffset(0),
46 unsigned long long iOffset
;
47 unsigned long long Size
;
50 static unsigned long AlphaHash(const char *Text
, size_t Length
) /*{{{*/
52 /* This very simple hash function for the last 8 letters gives
53 very good performance on the debian package files */
59 unsigned long Res
= 0;
60 for (size_t i
= 0; i
< Length
; ++i
)
61 Res
= ((unsigned long)(Text
[i
]) & 0xDF) ^ (Res
<< 1);
66 // TagFile::pkgTagFile - Constructor /*{{{*/
67 // ---------------------------------------------------------------------
69 pkgTagFile::pkgTagFile(FileFd
*pFd
,unsigned long long Size
)
75 void pkgTagFile::Init(FileFd
*pFd
,unsigned long long Size
)
77 /* The size is increased by 4 because if we start with the Size of the
78 filename we need to try to read 1 char more to see an EOF faster, 1
79 char the end-pointer can be on and maybe 2 newlines need to be added
80 to the end of the file -> 4 extra chars */
87 d
= new pkgTagFilePrivate(pFd
, Size
);
89 if (d
->Fd
.IsOpen() == false)
90 d
->Start
= d
->End
= d
->Buffer
= 0;
92 d
->Buffer
= (char*)malloc(sizeof(char) * Size
);
94 if (d
->Buffer
== NULL
)
99 d
->Start
= d
->End
= d
->Buffer
;
101 if (d
->Done
== false)
105 // TagFile::~pkgTagFile - Destructor /*{{{*/
106 // ---------------------------------------------------------------------
108 pkgTagFile::~pkgTagFile()
114 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
115 APT_PURE
unsigned long pkgTagFile::Offset()
120 // TagFile::Resize - Resize the internal buffer /*{{{*/
121 // ---------------------------------------------------------------------
122 /* Resize the internal buffer (double it in size). Fail if a maximum size
125 bool pkgTagFile::Resize()
127 // fail is the buffer grows too big
128 if(d
->Size
> 1024*1024+1)
131 return Resize(d
->Size
* 2);
133 bool pkgTagFile::Resize(unsigned long long const newSize
)
135 unsigned long long const EndSize
= d
->End
- d
->Start
;
137 // get new buffer and use it
138 char* newBuffer
= (char*)realloc(d
->Buffer
, sizeof(char) * newSize
);
139 if (newBuffer
== NULL
)
141 d
->Buffer
= newBuffer
;
144 // update the start/end pointers to the new buffer
145 d
->Start
= d
->Buffer
;
146 d
->End
= d
->Start
+ EndSize
;
150 // TagFile::Step - Advance to the next section /*{{{*/
151 // ---------------------------------------------------------------------
152 /* If the Section Scanner fails we refill the buffer and try again.
153 * If that fails too, double the buffer size and try again until a
154 * maximum buffer is reached.
156 bool pkgTagFile::Step(pkgTagSection
&Tag
)
158 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
) == false)
165 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false))
168 if (Resize() == false)
169 return _error
->Error(_("Unable to parse package file %s (1)"),
170 d
->Fd
.Name().c_str());
172 } while (Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false) == false);
175 d
->Start
+= Tag
.size();
176 d
->iOffset
+= Tag
.size();
182 // TagFile::Fill - Top up the buffer /*{{{*/
183 // ---------------------------------------------------------------------
184 /* This takes the bit at the end of the buffer and puts it at the start
185 then fills the rest from the file */
186 bool pkgTagFile::Fill()
188 unsigned long long EndSize
= d
->End
- d
->Start
;
189 unsigned long long Actual
= 0;
191 memmove(d
->Buffer
,d
->Start
,EndSize
);
192 d
->Start
= d
->Buffer
;
193 d
->End
= d
->Buffer
+ EndSize
;
195 if (d
->Done
== false)
197 // See if only a bit of the file is left
198 unsigned long long const dataSize
= d
->Size
- ((d
->End
- d
->Buffer
) + 1);
199 if (d
->Fd
.Read(d
->End
, dataSize
, &Actual
) == false)
201 if (Actual
!= dataSize
)
208 if (EndSize
<= 3 && Actual
== 0)
210 if (d
->Size
- (d
->End
- d
->Buffer
) < 4)
213 // Append a double new line if one does not exist
214 unsigned int LineCount
= 0;
215 for (const char *E
= d
->End
- 1; E
- d
->End
< 6 && (*E
== '\n' || *E
== '\r'); E
--)
220 if ((unsigned)(d
->End
- d
->Buffer
) >= d
->Size
)
222 for (; LineCount
< 2; LineCount
++)
232 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
233 // ---------------------------------------------------------------------
234 /* This jumps to a pre-recorded file location and reads the record
236 bool pkgTagFile::Jump(pkgTagSection
&Tag
,unsigned long long Offset
)
238 // We are within a buffer space of the next hit..
239 if (Offset
>= d
->iOffset
&& d
->iOffset
+ (d
->End
- d
->Start
) > Offset
)
241 unsigned long long Dist
= Offset
- d
->iOffset
;
244 // if we have seen the end, don't ask for more
246 return Tag
.Scan(d
->Start
, d
->End
- d
->Start
);
251 // Reposition and reload..
254 if (d
->Fd
.Seek(Offset
) == false)
256 d
->End
= d
->Start
= d
->Buffer
;
261 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == true)
264 // This appends a double new line (for the real eof handling)
268 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
, false) == false)
269 return _error
->Error(_("Unable to parse package file %s (2)"),d
->Fd
.Name().c_str());
274 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
275 // ---------------------------------------------------------------------
277 pkgTagSection::pkgTagSection()
278 : Section(0), d(NULL
), Stop(0)
280 memset(&LookupTable
, 0, sizeof(LookupTable
));
283 // TagSection::Scan - Scan for the end of the header information /*{{{*/
284 #if APT_PKG_ABI < 413
285 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
)
287 return Scan(Start
, MaxLength
, true);
290 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
, bool const Restart
)
293 const char *End
= Start
+ MaxLength
;
295 if (Restart
== false && Tags
.empty() == false)
297 Stop
= Section
+ Tags
.back().StartTag
;
300 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
308 if (Tags
.empty() == false)
310 memset(&LookupTable
, 0, sizeof(LookupTable
));
315 size_t TagCount
= Tags
.size();
320 TagData
lastTagData(0);
321 lastTagData
.EndTag
= 0;
322 unsigned long lastTagHash
= 0;
325 TrimRecord(true,End
);
327 // this can happen when TrimRecord trims away the entire Record
328 // (e.g. because it just contains comments)
332 // Start a new index and add it to the hash
333 if (isspace(Stop
[0]) == 0)
335 // store the last found tag
336 if (lastTagData
.EndTag
!= 0)
338 if (LookupTable
[lastTagHash
] != 0)
339 lastTagData
.NextInBucket
= LookupTable
[lastTagHash
];
340 LookupTable
[lastTagHash
] = TagCount
;
341 Tags
.push_back(lastTagData
);
345 lastTagData
= TagData(Stop
- Section
);
346 // find the colon separating tag and value
347 char const * Colon
= (char const *) memchr(Stop
, ':', End
- Stop
);
350 // find the end of the tag (which might or might not be the colon)
351 char const * EndTag
= Colon
;
353 for (; EndTag
> Stop
&& isspace(*EndTag
) != 0; --EndTag
)
356 lastTagData
.EndTag
= EndTag
- Section
;
357 lastTagHash
= AlphaHash(Stop
, EndTag
- Stop
);
358 // find the beginning of the value
360 for (; isspace(*Stop
) != 0; ++Stop
);
363 lastTagData
.StartValue
= Stop
- Section
;
366 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
371 for (; Stop
+1 < End
&& Stop
[1] == '\r'; Stop
++)
375 // Double newline marks the end of the record
376 if (Stop
+1 < End
&& Stop
[1] == '\n')
378 if (lastTagData
.EndTag
!= 0)
380 if (LookupTable
[lastTagHash
] != 0)
381 lastTagData
.NextInBucket
= LookupTable
[lastTagHash
];
382 LookupTable
[lastTagHash
] = TagCount
;
383 Tags
.push_back(lastTagData
);
386 TagData
const td(Stop
- Section
);
388 TrimRecord(false,End
);
398 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
399 // ---------------------------------------------------------------------
400 /* There should be exactly 2 newline at the end of the record, no more. */
401 void pkgTagSection::TrimRecord(bool BeforeRecord
, const char*& End
)
403 if (BeforeRecord
== true)
405 for (; Stop
< End
&& (Stop
[0] == '\n' || Stop
[0] == '\r'); Stop
++);
408 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
409 // ---------------------------------------------------------------------
410 /* There should be exactly 1 newline at the end of the buffer, no more. */
411 void pkgTagSection::Trim()
413 for (; Stop
> Section
+ 2 && (Stop
[-2] == '\n' || Stop
[-2] == '\r'); Stop
--);
416 // TagSection::Exists - return True if a tag exists /*{{{*/
417 #if APT_PKG_ABI >= 413
418 bool pkgTagSection::Exists(const char* const Tag
) const
420 bool pkgTagSection::Exists(const char* const Tag
)
424 return Find(Tag
, tmp
);
427 // TagSection::Find - Locate a tag /*{{{*/
428 // ---------------------------------------------------------------------
429 /* This searches the section for a tag that matches the given string. */
430 bool pkgTagSection::Find(const char *Tag
,unsigned int &Pos
) const
432 size_t const Length
= strlen(Tag
);
433 unsigned int Bucket
= LookupTable
[AlphaHash(Tag
, Length
)];
437 for (; Bucket
!= 0; Bucket
= Tags
[Bucket
- 1].NextInBucket
)
439 if ((Tags
[Bucket
- 1].EndTag
- Tags
[Bucket
- 1].StartTag
) != Length
)
442 char const * const St
= Section
+ Tags
[Bucket
- 1].StartTag
;
443 if (strncasecmp(Tag
,St
,Length
) != 0)
453 bool pkgTagSection::Find(const char *Tag
,const char *&Start
,
454 const char *&End
) const
457 if (Find(Tag
, Pos
) == false)
460 Start
= Section
+ Tags
[Pos
].StartValue
;
461 // Strip off the gunk from the end
462 End
= Section
+ Tags
[Pos
+ 1].StartTag
;
463 if (unlikely(Start
> End
))
464 return _error
->Error("Internal parsing error");
466 for (; isspace(End
[-1]) != 0 && End
> Start
; --End
);
471 // TagSection::FindS - Find a string /*{{{*/
472 // ---------------------------------------------------------------------
474 string
pkgTagSection::FindS(const char *Tag
) const
478 if (Find(Tag
,Start
,End
) == false)
480 return string(Start
,End
);
483 // TagSection::FindI - Find an integer /*{{{*/
484 // ---------------------------------------------------------------------
486 signed int pkgTagSection::FindI(const char *Tag
,signed long Default
) const
490 if (Find(Tag
,Start
,Stop
) == false)
493 // Copy it into a temp buffer so we can use strtol
495 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
497 strncpy(S
,Start
,Stop
-Start
);
501 signed long Result
= strtol(S
,&End
,10);
507 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
508 // ---------------------------------------------------------------------
510 unsigned long long pkgTagSection::FindULL(const char *Tag
, unsigned long long const &Default
) const
514 if (Find(Tag
,Start
,Stop
) == false)
517 // Copy it into a temp buffer so we can use strtoull
519 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
521 strncpy(S
,Start
,Stop
-Start
);
525 unsigned long long Result
= strtoull(S
,&End
,10);
531 // TagSection::FindB - Find boolean value /*{{{*/
532 // ---------------------------------------------------------------------
534 bool pkgTagSection::FindB(const char *Tag
, bool const &Default
) const
536 const char *Start
, *Stop
;
537 if (Find(Tag
, Start
, Stop
) == false)
539 return StringToBool(string(Start
, Stop
));
542 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
543 // ---------------------------------------------------------------------
544 /* The bits marked in Flag are masked on/off in Flags */
545 bool pkgTagSection::FindFlag(const char *Tag
,unsigned long &Flags
,
546 unsigned long Flag
) const
550 if (Find(Tag
,Start
,Stop
) == false)
552 return FindFlag(Flags
, Flag
, Start
, Stop
);
554 bool pkgTagSection::FindFlag(unsigned long &Flags
, unsigned long Flag
,
555 char const* Start
, char const* Stop
)
557 switch (StringToBool(string(Start
, Stop
)))
568 _error
->Warning("Unknown flag value: %s",string(Start
,Stop
).c_str());
574 APT_PURE
unsigned int pkgTagSection::Count() const { /*{{{*/
575 if (Tags
.empty() == true)
577 // the last element is just marking the end and isn't a real one
578 return Tags
.size() - 1;
581 // TFRewrite - Rewrite a control record /*{{{*/
582 // ---------------------------------------------------------------------
583 /* This writes the control record to stdout rewriting it as necessary. The
584 override map item specificies the rewriting rules to follow. This also
585 takes the time to sort the feild list. */
587 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
589 static const char *iTFRewritePackageOrder
[] = {
597 "Original-Maintainer",
601 "Revision", // Obsolete
602 "Config-Version", // Obsolete
618 "MSDOS-Filename", // Obsolete
621 static const char *iTFRewriteSourceOrder
[] = {"Package",
628 "Original-Maintainer",
630 "Build-Depends-Indep",
632 "Build-Conflicts-Indep",
640 /* Two levels of initialization are used because gcc will set the symbol
641 size of an array to the length of the array, causing dynamic relinking
642 errors. Doing this makes the symbol size constant */
643 const char **TFRewritePackageOrder
= iTFRewritePackageOrder
;
644 const char **TFRewriteSourceOrder
= iTFRewriteSourceOrder
;
646 bool TFRewrite(FILE *Output
,pkgTagSection
const &Tags
,const char *Order
[],
647 TFRewriteData
*Rewrite
)
649 unsigned char Visited
[256]; // Bit 1 is Order, Bit 2 is Rewrite
650 for (unsigned I
= 0; I
!= 256; I
++)
653 // Set new tag up as necessary.
654 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
656 if (Rewrite
[J
].NewTag
== 0)
657 Rewrite
[J
].NewTag
= Rewrite
[J
].Tag
;
660 // Write all all of the tags, in order.
663 for (unsigned int I
= 0; Order
[I
] != 0; I
++)
665 bool Rewritten
= false;
667 // See if this is a field that needs to be rewritten
668 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
670 if (strcasecmp(Rewrite
[J
].Tag
,Order
[I
]) == 0)
673 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
675 if (isspace(Rewrite
[J
].Rewrite
[0]))
676 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
678 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
685 // See if it is in the fragment
687 if (Tags
.Find(Order
[I
],Pos
) == false)
691 if (Rewritten
== true)
694 /* Write out this element, taking a moment to rewrite the tag
695 in case of changes of case. */
698 Tags
.Get(Start
,Stop
,Pos
);
700 if (fputs(Order
[I
],Output
) < 0)
701 return _error
->Errno("fputs","IO Error to output");
702 Start
+= strlen(Order
[I
]);
703 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
704 return _error
->Errno("fwrite","IO Error to output");
705 if (Stop
[-1] != '\n')
706 fprintf(Output
,"\n");
710 // Now write all the old tags that were missed.
711 for (unsigned int I
= 0; I
!= Tags
.Count(); I
++)
713 if ((Visited
[I
] & 1) == 1)
718 Tags
.Get(Start
,Stop
,I
);
719 const char *End
= Start
;
720 for (; End
< Stop
&& *End
!= ':'; End
++);
722 // See if this is a field that needs to be rewritten
723 bool Rewritten
= false;
724 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
726 if (stringcasecmp(Start
,End
,Rewrite
[J
].Tag
) == 0)
729 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
731 if (isspace(Rewrite
[J
].Rewrite
[0]))
732 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
734 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
742 if (Rewritten
== true)
745 // Write out this element
746 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
747 return _error
->Errno("fwrite","IO Error to output");
748 if (Stop
[-1] != '\n')
749 fprintf(Output
,"\n");
752 // Now write all the rewrites that were missed
753 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
755 if ((Visited
[J
] & 2) == 2)
758 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
760 if (isspace(Rewrite
[J
].Rewrite
[0]))
761 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
763 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
771 pkgTagSection::~pkgTagSection() {}