]>
git.saurik.com Git - apt.git/blob - apt-pkg/tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
6 Fast scanner for RFC-822 type header information
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
11 ##################################################################### */
13 // Include Files /*{{{*/
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
32 class pkgTagFilePrivate
35 pkgTagFilePrivate(FileFd
*pFd
, unsigned long long Size
) : Fd(*pFd
), Buffer(NULL
),
36 Start(NULL
), End(NULL
),
37 Done(false), iOffset(0),
46 unsigned long long iOffset
;
47 unsigned long long Size
;
50 static unsigned long AlphaHash(const char *Text
, size_t Length
) /*{{{*/
52 /* This very simple hash function for the last 8 letters gives
53 very good performance on the debian package files */
59 unsigned long Res
= 0;
60 for (size_t i
= 0; i
< Length
; ++i
)
61 Res
= ((unsigned long)(Text
[i
]) & 0xDF) ^ (Res
<< 1);
66 // TagFile::pkgTagFile - Constructor /*{{{*/
67 // ---------------------------------------------------------------------
69 pkgTagFile::pkgTagFile(FileFd
*pFd
,unsigned long long Size
)
75 void pkgTagFile::Init(FileFd
*pFd
,unsigned long long Size
)
77 /* The size is increased by 4 because if we start with the Size of the
78 filename we need to try to read 1 char more to see an EOF faster, 1
79 char the end-pointer can be on and maybe 2 newlines need to be added
80 to the end of the file -> 4 extra chars */
87 d
= new pkgTagFilePrivate(pFd
, Size
);
89 if (d
->Fd
.IsOpen() == false)
90 d
->Start
= d
->End
= d
->Buffer
= 0;
92 d
->Buffer
= (char*)malloc(sizeof(char) * Size
);
94 if (d
->Buffer
== NULL
)
99 d
->Start
= d
->End
= d
->Buffer
;
101 if (d
->Done
== false)
105 // TagFile::~pkgTagFile - Destructor /*{{{*/
106 // ---------------------------------------------------------------------
108 pkgTagFile::~pkgTagFile()
114 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
115 APT_PURE
unsigned long pkgTagFile::Offset()
120 // TagFile::Resize - Resize the internal buffer /*{{{*/
121 // ---------------------------------------------------------------------
122 /* Resize the internal buffer (double it in size). Fail if a maximum size
125 bool pkgTagFile::Resize()
127 // fail is the buffer grows too big
128 if(d
->Size
> 1024*1024+1)
131 return Resize(d
->Size
* 2);
133 bool pkgTagFile::Resize(unsigned long long const newSize
)
135 unsigned long long const EndSize
= d
->End
- d
->Start
;
137 // get new buffer and use it
138 char* newBuffer
= (char*)realloc(d
->Buffer
, sizeof(char) * newSize
);
139 if (newBuffer
== NULL
)
141 d
->Buffer
= newBuffer
;
144 // update the start/end pointers to the new buffer
145 d
->Start
= d
->Buffer
;
146 d
->End
= d
->Start
+ EndSize
;
150 // TagFile::Step - Advance to the next section /*{{{*/
151 // ---------------------------------------------------------------------
152 /* If the Section Scanner fails we refill the buffer and try again.
153 * If that fails too, double the buffer size and try again until a
154 * maximum buffer is reached.
156 bool pkgTagFile::Step(pkgTagSection
&Tag
)
158 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
) == false)
165 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false))
168 if (Resize() == false)
169 return _error
->Error(_("Unable to parse package file %s (1)"),
170 d
->Fd
.Name().c_str());
172 } while (Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false) == false);
175 d
->Start
+= Tag
.size();
176 d
->iOffset
+= Tag
.size();
182 // TagFile::Fill - Top up the buffer /*{{{*/
183 // ---------------------------------------------------------------------
184 /* This takes the bit at the end of the buffer and puts it at the start
185 then fills the rest from the file */
186 bool pkgTagFile::Fill()
188 unsigned long long EndSize
= d
->End
- d
->Start
;
189 unsigned long long Actual
= 0;
191 memmove(d
->Buffer
,d
->Start
,EndSize
);
192 d
->Start
= d
->Buffer
;
193 d
->End
= d
->Buffer
+ EndSize
;
195 if (d
->Done
== false)
197 // See if only a bit of the file is left
198 unsigned long long const dataSize
= d
->Size
- ((d
->End
- d
->Buffer
) + 1);
199 if (d
->Fd
.Read(d
->End
, dataSize
, &Actual
) == false)
201 if (Actual
!= dataSize
)
208 if (EndSize
<= 3 && Actual
== 0)
210 if (d
->Size
- (d
->End
- d
->Buffer
) < 4)
213 // Append a double new line if one does not exist
214 unsigned int LineCount
= 0;
215 for (const char *E
= d
->End
- 1; E
- d
->End
< 6 && (*E
== '\n' || *E
== '\r'); E
--)
220 if ((unsigned)(d
->End
- d
->Buffer
) >= d
->Size
)
222 for (; LineCount
< 2; LineCount
++)
232 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
233 // ---------------------------------------------------------------------
234 /* This jumps to a pre-recorded file location and reads the record
236 bool pkgTagFile::Jump(pkgTagSection
&Tag
,unsigned long long Offset
)
238 // We are within a buffer space of the next hit..
239 if (Offset
>= d
->iOffset
&& d
->iOffset
+ (d
->End
- d
->Start
) > Offset
)
241 unsigned long long Dist
= Offset
- d
->iOffset
;
244 // if we have seen the end, don't ask for more
246 return Tag
.Scan(d
->Start
, d
->End
- d
->Start
);
251 // Reposition and reload..
254 if (d
->Fd
.Seek(Offset
) == false)
256 d
->End
= d
->Start
= d
->Buffer
;
261 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == true)
264 // This appends a double new line (for the real eof handling)
268 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
, false) == false)
269 return _error
->Error(_("Unable to parse package file %s (2)"),d
->Fd
.Name().c_str());
274 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
275 // ---------------------------------------------------------------------
277 pkgTagSection::pkgTagSection()
278 : Section(0), d(NULL
), Stop(0)
280 memset(&LookupTable
, 0, sizeof(LookupTable
));
283 // TagSection::Scan - Scan for the end of the header information /*{{{*/
284 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
, bool const Restart
)
287 const char *End
= Start
+ MaxLength
;
289 if (Restart
== false && Tags
.empty() == false)
291 Stop
= Section
+ Tags
.back().StartTag
;
294 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
302 if (Tags
.empty() == false)
304 memset(&LookupTable
, 0, sizeof(LookupTable
));
309 size_t TagCount
= Tags
.size();
314 TagData
lastTagData(0);
315 lastTagData
.EndTag
= 0;
316 unsigned long lastTagHash
= 0;
319 TrimRecord(true,End
);
321 // this can happen when TrimRecord trims away the entire Record
322 // (e.g. because it just contains comments)
326 // Start a new index and add it to the hash
327 if (isspace(Stop
[0]) == 0)
329 // store the last found tag
330 if (lastTagData
.EndTag
!= 0)
332 if (LookupTable
[lastTagHash
] != 0)
333 lastTagData
.NextInBucket
= LookupTable
[lastTagHash
];
334 LookupTable
[lastTagHash
] = TagCount
;
335 Tags
.push_back(lastTagData
);
339 lastTagData
= TagData(Stop
- Section
);
340 // find the colon separating tag and value
341 char const * Colon
= (char const *) memchr(Stop
, ':', End
- Stop
);
344 // find the end of the tag (which might or might not be the colon)
345 char const * EndTag
= Colon
;
347 for (; EndTag
> Stop
&& isspace(*EndTag
) != 0; --EndTag
)
350 lastTagData
.EndTag
= EndTag
- Section
;
351 lastTagHash
= AlphaHash(Stop
, EndTag
- Stop
);
352 // find the beginning of the value
354 for (; isspace(*Stop
) != 0; ++Stop
);
357 lastTagData
.StartValue
= Stop
- Section
;
360 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
365 for (; Stop
+1 < End
&& Stop
[1] == '\r'; Stop
++)
369 // Double newline marks the end of the record
370 if (Stop
+1 < End
&& Stop
[1] == '\n')
372 if (lastTagData
.EndTag
!= 0)
374 if (LookupTable
[lastTagHash
] != 0)
375 lastTagData
.NextInBucket
= LookupTable
[lastTagHash
];
376 LookupTable
[lastTagHash
] = TagCount
;
377 Tags
.push_back(lastTagData
);
380 TagData
const td(Stop
- Section
);
382 TrimRecord(false,End
);
392 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
393 // ---------------------------------------------------------------------
394 /* There should be exactly 2 newline at the end of the record, no more. */
395 void pkgTagSection::TrimRecord(bool BeforeRecord
, const char*& End
)
397 if (BeforeRecord
== true)
399 for (; Stop
< End
&& (Stop
[0] == '\n' || Stop
[0] == '\r'); Stop
++);
402 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
403 // ---------------------------------------------------------------------
404 /* There should be exactly 1 newline at the end of the buffer, no more. */
405 void pkgTagSection::Trim()
407 for (; Stop
> Section
+ 2 && (Stop
[-2] == '\n' || Stop
[-2] == '\r'); Stop
--);
410 // TagSection::Exists - return True if a tag exists /*{{{*/
411 bool pkgTagSection::Exists(const char* const Tag
) const
414 return Find(Tag
, tmp
);
417 // TagSection::Find - Locate a tag /*{{{*/
418 // ---------------------------------------------------------------------
419 /* This searches the section for a tag that matches the given string. */
420 bool pkgTagSection::Find(const char *Tag
,unsigned int &Pos
) const
422 size_t const Length
= strlen(Tag
);
423 unsigned int Bucket
= LookupTable
[AlphaHash(Tag
, Length
)];
427 for (; Bucket
!= 0; Bucket
= Tags
[Bucket
- 1].NextInBucket
)
429 if ((Tags
[Bucket
- 1].EndTag
- Tags
[Bucket
- 1].StartTag
) != Length
)
432 char const * const St
= Section
+ Tags
[Bucket
- 1].StartTag
;
433 if (strncasecmp(Tag
,St
,Length
) != 0)
443 bool pkgTagSection::Find(const char *Tag
,const char *&Start
,
444 const char *&End
) const
447 if (Find(Tag
, Pos
) == false)
450 Start
= Section
+ Tags
[Pos
].StartValue
;
451 // Strip off the gunk from the end
452 End
= Section
+ Tags
[Pos
+ 1].StartTag
;
453 if (unlikely(Start
> End
))
454 return _error
->Error("Internal parsing error");
456 for (; isspace(End
[-1]) != 0 && End
> Start
; --End
);
461 // TagSection::FindS - Find a string /*{{{*/
462 // ---------------------------------------------------------------------
464 string
pkgTagSection::FindS(const char *Tag
) const
468 if (Find(Tag
,Start
,End
) == false)
470 return string(Start
,End
);
473 // TagSection::FindI - Find an integer /*{{{*/
474 // ---------------------------------------------------------------------
476 signed int pkgTagSection::FindI(const char *Tag
,signed long Default
) const
480 if (Find(Tag
,Start
,Stop
) == false)
483 // Copy it into a temp buffer so we can use strtol
485 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
487 strncpy(S
,Start
,Stop
-Start
);
491 signed long Result
= strtol(S
,&End
,10);
497 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
498 // ---------------------------------------------------------------------
500 unsigned long long pkgTagSection::FindULL(const char *Tag
, unsigned long long const &Default
) const
504 if (Find(Tag
,Start
,Stop
) == false)
507 // Copy it into a temp buffer so we can use strtoull
509 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
511 strncpy(S
,Start
,Stop
-Start
);
515 unsigned long long Result
= strtoull(S
,&End
,10);
521 // TagSection::FindB - Find boolean value /*{{{*/
522 // ---------------------------------------------------------------------
524 bool pkgTagSection::FindB(const char *Tag
, bool const &Default
) const
526 const char *Start
, *Stop
;
527 if (Find(Tag
, Start
, Stop
) == false)
529 return StringToBool(string(Start
, Stop
));
532 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
533 // ---------------------------------------------------------------------
534 /* The bits marked in Flag are masked on/off in Flags */
535 bool pkgTagSection::FindFlag(const char *Tag
,unsigned long &Flags
,
536 unsigned long Flag
) const
540 if (Find(Tag
,Start
,Stop
) == false)
542 return FindFlag(Flags
, Flag
, Start
, Stop
);
544 bool pkgTagSection::FindFlag(unsigned long &Flags
, unsigned long Flag
,
545 char const* Start
, char const* Stop
)
547 switch (StringToBool(string(Start
, Stop
)))
558 _error
->Warning("Unknown flag value: %s",string(Start
,Stop
).c_str());
564 APT_PURE
unsigned int pkgTagSection::Count() const { /*{{{*/
565 if (Tags
.empty() == true)
567 // the last element is just marking the end and isn't a real one
568 return Tags
.size() - 1;
571 // TFRewrite - Rewrite a control record /*{{{*/
572 // ---------------------------------------------------------------------
573 /* This writes the control record to stdout rewriting it as necessary. The
574 override map item specificies the rewriting rules to follow. This also
575 takes the time to sort the feild list. */
577 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
579 static const char *iTFRewritePackageOrder
[] = {
587 "Original-Maintainer",
591 "Revision", // Obsolete
592 "Config-Version", // Obsolete
608 "MSDOS-Filename", // Obsolete
611 static const char *iTFRewriteSourceOrder
[] = {"Package",
618 "Original-Maintainer",
620 "Build-Depends-Indep",
622 "Build-Conflicts-Indep",
630 /* Two levels of initialization are used because gcc will set the symbol
631 size of an array to the length of the array, causing dynamic relinking
632 errors. Doing this makes the symbol size constant */
633 const char **TFRewritePackageOrder
= iTFRewritePackageOrder
;
634 const char **TFRewriteSourceOrder
= iTFRewriteSourceOrder
;
636 bool TFRewrite(FILE *Output
,pkgTagSection
const &Tags
,const char *Order
[],
637 TFRewriteData
*Rewrite
)
639 unsigned char Visited
[256]; // Bit 1 is Order, Bit 2 is Rewrite
640 for (unsigned I
= 0; I
!= 256; I
++)
643 // Set new tag up as necessary.
644 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
646 if (Rewrite
[J
].NewTag
== 0)
647 Rewrite
[J
].NewTag
= Rewrite
[J
].Tag
;
650 // Write all all of the tags, in order.
653 for (unsigned int I
= 0; Order
[I
] != 0; I
++)
655 bool Rewritten
= false;
657 // See if this is a field that needs to be rewritten
658 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
660 if (strcasecmp(Rewrite
[J
].Tag
,Order
[I
]) == 0)
663 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
665 if (isspace(Rewrite
[J
].Rewrite
[0]))
666 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
668 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
675 // See if it is in the fragment
677 if (Tags
.Find(Order
[I
],Pos
) == false)
681 if (Rewritten
== true)
684 /* Write out this element, taking a moment to rewrite the tag
685 in case of changes of case. */
688 Tags
.Get(Start
,Stop
,Pos
);
690 if (fputs(Order
[I
],Output
) < 0)
691 return _error
->Errno("fputs","IO Error to output");
692 Start
+= strlen(Order
[I
]);
693 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
694 return _error
->Errno("fwrite","IO Error to output");
695 if (Stop
[-1] != '\n')
696 fprintf(Output
,"\n");
700 // Now write all the old tags that were missed.
701 for (unsigned int I
= 0; I
!= Tags
.Count(); I
++)
703 if ((Visited
[I
] & 1) == 1)
708 Tags
.Get(Start
,Stop
,I
);
709 const char *End
= Start
;
710 for (; End
< Stop
&& *End
!= ':'; End
++);
712 // See if this is a field that needs to be rewritten
713 bool Rewritten
= false;
714 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
716 if (stringcasecmp(Start
,End
,Rewrite
[J
].Tag
) == 0)
719 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
721 if (isspace(Rewrite
[J
].Rewrite
[0]))
722 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
724 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
732 if (Rewritten
== true)
735 // Write out this element
736 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
737 return _error
->Errno("fwrite","IO Error to output");
738 if (Stop
[-1] != '\n')
739 fprintf(Output
,"\n");
742 // Now write all the rewrites that were missed
743 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
745 if ((Visited
[J
] & 2) == 2)
748 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
750 if (isspace(Rewrite
[J
].Rewrite
[0]))
751 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
753 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);