]>
git.saurik.com Git - apt.git/blob - apt-pkg/tagfile.cc
e667c495fd0fc68caade79dbb5ac957790fab914
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
6 Fast scanner for RFC-822 type header information
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
11 ##################################################################### */
13 // Include Files /*{{{*/
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
32 class pkgTagFilePrivate
35 pkgTagFilePrivate(FileFd
*pFd
, unsigned long long Size
) : Fd(*pFd
), Buffer(NULL
),
36 Start(NULL
), End(NULL
),
37 Done(false), iOffset(0),
46 unsigned long long iOffset
;
47 unsigned long long Size
;
50 static unsigned long AlphaHash(const char *Text
, size_t Length
) /*{{{*/
52 /* This very simple hash function for the last 8 letters gives
53 very good performance on the debian package files */
59 unsigned long Res
= 0;
60 for (size_t i
= 0; i
< Length
; ++i
)
61 Res
= ((unsigned long)(Text
[i
]) & 0xDF) ^ (Res
<< 1);
66 // TagFile::pkgTagFile - Constructor /*{{{*/
67 // ---------------------------------------------------------------------
69 pkgTagFile::pkgTagFile(FileFd
*pFd
,unsigned long long Size
)
75 void pkgTagFile::Init(FileFd
*pFd
,unsigned long long Size
)
77 /* The size is increased by 4 because if we start with the Size of the
78 filename we need to try to read 1 char more to see an EOF faster, 1
79 char the end-pointer can be on and maybe 2 newlines need to be added
80 to the end of the file -> 4 extra chars */
87 d
= new pkgTagFilePrivate(pFd
, Size
);
89 if (d
->Fd
.IsOpen() == false)
90 d
->Start
= d
->End
= d
->Buffer
= 0;
92 d
->Buffer
= (char*)malloc(sizeof(char) * Size
);
94 if (d
->Buffer
== NULL
)
99 d
->Start
= d
->End
= d
->Buffer
;
101 if (d
->Done
== false)
105 // TagFile::~pkgTagFile - Destructor /*{{{*/
106 // ---------------------------------------------------------------------
108 pkgTagFile::~pkgTagFile()
114 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
115 APT_PURE
unsigned long pkgTagFile::Offset()
120 // TagFile::Resize - Resize the internal buffer /*{{{*/
121 // ---------------------------------------------------------------------
122 /* Resize the internal buffer (double it in size). Fail if a maximum size
125 bool pkgTagFile::Resize()
127 // fail is the buffer grows too big
128 if(d
->Size
> 1024*1024+1)
131 return Resize(d
->Size
* 2);
133 bool pkgTagFile::Resize(unsigned long long const newSize
)
135 unsigned long long const EndSize
= d
->End
- d
->Start
;
137 // get new buffer and use it
138 char* newBuffer
= (char*)realloc(d
->Buffer
, sizeof(char) * newSize
);
139 if (newBuffer
== NULL
)
141 d
->Buffer
= newBuffer
;
144 // update the start/end pointers to the new buffer
145 d
->Start
= d
->Buffer
;
146 d
->End
= d
->Start
+ EndSize
;
150 // TagFile::Step - Advance to the next section /*{{{*/
151 // ---------------------------------------------------------------------
152 /* If the Section Scanner fails we refill the buffer and try again.
153 * If that fails too, double the buffer size and try again until a
154 * maximum buffer is reached.
156 bool pkgTagFile::Step(pkgTagSection
&Tag
)
158 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
) == false)
165 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false))
168 if (Resize() == false)
169 return _error
->Error(_("Unable to parse package file %s (1)"),
170 d
->Fd
.Name().c_str());
172 } while (Tag
.Scan(d
->Start
,d
->End
- d
->Start
, false) == false);
175 d
->Start
+= Tag
.size();
176 d
->iOffset
+= Tag
.size();
182 // TagFile::Fill - Top up the buffer /*{{{*/
183 // ---------------------------------------------------------------------
184 /* This takes the bit at the end of the buffer and puts it at the start
185 then fills the rest from the file */
186 bool pkgTagFile::Fill()
188 unsigned long long EndSize
= d
->End
- d
->Start
;
189 unsigned long long Actual
= 0;
191 memmove(d
->Buffer
,d
->Start
,EndSize
);
192 d
->Start
= d
->Buffer
;
193 d
->End
= d
->Buffer
+ EndSize
;
195 if (d
->Done
== false)
197 // See if only a bit of the file is left
198 unsigned long long const dataSize
= d
->Size
- ((d
->End
- d
->Buffer
) + 1);
199 if (d
->Fd
.Read(d
->End
, dataSize
, &Actual
) == false)
201 if (Actual
!= dataSize
)
208 if (EndSize
<= 3 && Actual
== 0)
210 if (d
->Size
- (d
->End
- d
->Buffer
) < 4)
213 // Append a double new line if one does not exist
214 unsigned int LineCount
= 0;
215 for (const char *E
= d
->End
- 1; E
- d
->End
< 6 && (*E
== '\n' || *E
== '\r'); E
--)
220 if ((unsigned)(d
->End
- d
->Buffer
) >= d
->Size
)
222 for (; LineCount
< 2; LineCount
++)
232 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
233 // ---------------------------------------------------------------------
234 /* This jumps to a pre-recorded file location and reads the record
236 bool pkgTagFile::Jump(pkgTagSection
&Tag
,unsigned long long Offset
)
238 // We are within a buffer space of the next hit..
239 if (Offset
>= d
->iOffset
&& d
->iOffset
+ (d
->End
- d
->Start
) > Offset
)
241 unsigned long long Dist
= Offset
- d
->iOffset
;
244 // if we have seen the end, don't ask for more
246 return Tag
.Scan(d
->Start
, d
->End
- d
->Start
);
251 // Reposition and reload..
254 if (d
->Fd
.Seek(Offset
) == false)
256 d
->End
= d
->Start
= d
->Buffer
;
261 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == true)
264 // This appends a double new line (for the real eof handling)
268 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
, false) == false)
269 return _error
->Error(_("Unable to parse package file %s (2)"),d
->Fd
.Name().c_str());
274 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
275 // ---------------------------------------------------------------------
277 pkgTagSection::pkgTagSection()
278 : Section(0), d(NULL
), Stop(0)
280 memset(&LookupTable
, 0, sizeof(LookupTable
));
283 // TagSection::Scan - Scan for the end of the header information /*{{{*/
284 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
, bool const Restart
)
287 const char *End
= Start
+ MaxLength
;
289 if (Restart
== false && Tags
.empty() == false)
291 Stop
= Section
+ Tags
.back().StartTag
;
294 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
302 if (Tags
.empty() == false)
304 memset(&LookupTable
, 0, sizeof(LookupTable
));
309 size_t TagCount
= Tags
.size();
314 TagData
lastTagData(0);
315 lastTagData
.EndTag
= 0;
316 unsigned long lastTagHash
= 0;
319 TrimRecord(true,End
);
321 // this can happen when TrimRecord trims away the entire Record
322 // (e.g. because it just contains comments)
326 // Start a new index and add it to the hash
327 if (isspace(Stop
[0]) == 0)
329 // store the last found tag
330 if (lastTagData
.EndTag
!= 0)
332 if (LookupTable
[lastTagHash
] != 0)
333 lastTagData
.NextInBucket
= LookupTable
[lastTagHash
];
334 LookupTable
[lastTagHash
] = TagCount
;
335 Tags
.push_back(lastTagData
);
339 lastTagData
= TagData(Stop
- Section
);
340 // find the colon separating tag and value
341 char const * Colon
= (char const *) memchr(Stop
, ':', End
- Stop
);
344 // find the end of the tag (which might or might not be the colon)
345 char const * EndTag
= Colon
;
347 for (; EndTag
> Stop
&& isspace(*EndTag
) != 0; --EndTag
)
350 lastTagData
.EndTag
= EndTag
- Section
;
351 lastTagHash
= AlphaHash(Stop
, EndTag
- Stop
);
352 // find the beginning of the value
354 for (; isspace(*Stop
) != 0; ++Stop
);
357 lastTagData
.StartValue
= Stop
- Section
;
360 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
365 for (; Stop
+1 < End
&& Stop
[1] == '\r'; Stop
++)
369 // Double newline marks the end of the record
370 if (Stop
+1 < End
&& Stop
[1] == '\n')
372 if (lastTagData
.EndTag
!= 0)
374 if (LookupTable
[lastTagHash
] != 0)
375 lastTagData
.NextInBucket
= LookupTable
[lastTagHash
];
376 LookupTable
[lastTagHash
] = TagCount
;
377 Tags
.push_back(lastTagData
);
380 TagData
const td(Stop
- Section
);
382 TrimRecord(false,End
);
392 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
393 // ---------------------------------------------------------------------
394 /* There should be exactly 2 newline at the end of the record, no more. */
395 void pkgTagSection::TrimRecord(bool BeforeRecord
, const char*& End
)
397 if (BeforeRecord
== true)
399 for (; Stop
< End
&& (Stop
[0] == '\n' || Stop
[0] == '\r'); Stop
++);
402 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
403 // ---------------------------------------------------------------------
404 /* There should be exactly 1 newline at the end of the buffer, no more. */
405 void pkgTagSection::Trim()
407 for (; Stop
> Section
+ 2 && (Stop
[-2] == '\n' || Stop
[-2] == '\r'); Stop
--);
410 // TagSection::Exists - return True if a tag exists /*{{{*/
411 #if APT_PKG_ABI >= 413
412 bool pkgTagSection::Exists(const char* const Tag
) const
414 bool pkgTagSection::Exists(const char* const Tag
)
418 return Find(Tag
, tmp
);
421 // TagSection::Find - Locate a tag /*{{{*/
422 // ---------------------------------------------------------------------
423 /* This searches the section for a tag that matches the given string. */
424 bool pkgTagSection::Find(const char *Tag
,unsigned int &Pos
) const
426 size_t const Length
= strlen(Tag
);
427 unsigned int Bucket
= LookupTable
[AlphaHash(Tag
, Length
)];
431 for (; Bucket
!= 0; Bucket
= Tags
[Bucket
- 1].NextInBucket
)
433 if ((Tags
[Bucket
- 1].EndTag
- Tags
[Bucket
- 1].StartTag
) != Length
)
436 char const * const St
= Section
+ Tags
[Bucket
- 1].StartTag
;
437 if (strncasecmp(Tag
,St
,Length
) != 0)
447 bool pkgTagSection::Find(const char *Tag
,const char *&Start
,
448 const char *&End
) const
451 if (Find(Tag
, Pos
) == false)
454 Start
= Section
+ Tags
[Pos
].StartValue
;
455 // Strip off the gunk from the end
456 End
= Section
+ Tags
[Pos
+ 1].StartTag
;
457 if (unlikely(Start
> End
))
458 return _error
->Error("Internal parsing error");
460 for (; isspace(End
[-1]) != 0 && End
> Start
; --End
);
465 // TagSection::FindS - Find a string /*{{{*/
466 // ---------------------------------------------------------------------
468 string
pkgTagSection::FindS(const char *Tag
) const
472 if (Find(Tag
,Start
,End
) == false)
474 return string(Start
,End
);
477 // TagSection::FindI - Find an integer /*{{{*/
478 // ---------------------------------------------------------------------
480 signed int pkgTagSection::FindI(const char *Tag
,signed long Default
) const
484 if (Find(Tag
,Start
,Stop
) == false)
487 // Copy it into a temp buffer so we can use strtol
489 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
491 strncpy(S
,Start
,Stop
-Start
);
495 signed long Result
= strtol(S
,&End
,10);
501 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
502 // ---------------------------------------------------------------------
504 unsigned long long pkgTagSection::FindULL(const char *Tag
, unsigned long long const &Default
) const
508 if (Find(Tag
,Start
,Stop
) == false)
511 // Copy it into a temp buffer so we can use strtoull
513 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
515 strncpy(S
,Start
,Stop
-Start
);
519 unsigned long long Result
= strtoull(S
,&End
,10);
525 // TagSection::FindB - Find boolean value /*{{{*/
526 // ---------------------------------------------------------------------
528 bool pkgTagSection::FindB(const char *Tag
, bool const &Default
) const
530 const char *Start
, *Stop
;
531 if (Find(Tag
, Start
, Stop
) == false)
533 return StringToBool(string(Start
, Stop
));
536 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
537 // ---------------------------------------------------------------------
538 /* The bits marked in Flag are masked on/off in Flags */
539 bool pkgTagSection::FindFlag(const char *Tag
,unsigned long &Flags
,
540 unsigned long Flag
) const
544 if (Find(Tag
,Start
,Stop
) == false)
546 return FindFlag(Flags
, Flag
, Start
, Stop
);
548 bool pkgTagSection::FindFlag(unsigned long &Flags
, unsigned long Flag
,
549 char const* Start
, char const* Stop
)
551 switch (StringToBool(string(Start
, Stop
)))
562 _error
->Warning("Unknown flag value: %s",string(Start
,Stop
).c_str());
568 APT_PURE
unsigned int pkgTagSection::Count() const { /*{{{*/
569 if (Tags
.empty() == true)
571 // the last element is just marking the end and isn't a real one
572 return Tags
.size() - 1;
575 // TFRewrite - Rewrite a control record /*{{{*/
576 // ---------------------------------------------------------------------
577 /* This writes the control record to stdout rewriting it as necessary. The
578 override map item specificies the rewriting rules to follow. This also
579 takes the time to sort the feild list. */
581 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
583 static const char *iTFRewritePackageOrder
[] = {
591 "Original-Maintainer",
595 "Revision", // Obsolete
596 "Config-Version", // Obsolete
612 "MSDOS-Filename", // Obsolete
615 static const char *iTFRewriteSourceOrder
[] = {"Package",
622 "Original-Maintainer",
624 "Build-Depends-Indep",
626 "Build-Conflicts-Indep",
634 /* Two levels of initialization are used because gcc will set the symbol
635 size of an array to the length of the array, causing dynamic relinking
636 errors. Doing this makes the symbol size constant */
637 const char **TFRewritePackageOrder
= iTFRewritePackageOrder
;
638 const char **TFRewriteSourceOrder
= iTFRewriteSourceOrder
;
640 bool TFRewrite(FILE *Output
,pkgTagSection
const &Tags
,const char *Order
[],
641 TFRewriteData
*Rewrite
)
643 unsigned char Visited
[256]; // Bit 1 is Order, Bit 2 is Rewrite
644 for (unsigned I
= 0; I
!= 256; I
++)
647 // Set new tag up as necessary.
648 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
650 if (Rewrite
[J
].NewTag
== 0)
651 Rewrite
[J
].NewTag
= Rewrite
[J
].Tag
;
654 // Write all all of the tags, in order.
657 for (unsigned int I
= 0; Order
[I
] != 0; I
++)
659 bool Rewritten
= false;
661 // See if this is a field that needs to be rewritten
662 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
664 if (strcasecmp(Rewrite
[J
].Tag
,Order
[I
]) == 0)
667 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
669 if (isspace(Rewrite
[J
].Rewrite
[0]))
670 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
672 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
679 // See if it is in the fragment
681 if (Tags
.Find(Order
[I
],Pos
) == false)
685 if (Rewritten
== true)
688 /* Write out this element, taking a moment to rewrite the tag
689 in case of changes of case. */
692 Tags
.Get(Start
,Stop
,Pos
);
694 if (fputs(Order
[I
],Output
) < 0)
695 return _error
->Errno("fputs","IO Error to output");
696 Start
+= strlen(Order
[I
]);
697 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
698 return _error
->Errno("fwrite","IO Error to output");
699 if (Stop
[-1] != '\n')
700 fprintf(Output
,"\n");
704 // Now write all the old tags that were missed.
705 for (unsigned int I
= 0; I
!= Tags
.Count(); I
++)
707 if ((Visited
[I
] & 1) == 1)
712 Tags
.Get(Start
,Stop
,I
);
713 const char *End
= Start
;
714 for (; End
< Stop
&& *End
!= ':'; End
++);
716 // See if this is a field that needs to be rewritten
717 bool Rewritten
= false;
718 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
720 if (stringcasecmp(Start
,End
,Rewrite
[J
].Tag
) == 0)
723 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
725 if (isspace(Rewrite
[J
].Rewrite
[0]))
726 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
728 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
736 if (Rewritten
== true)
739 // Write out this element
740 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
741 return _error
->Errno("fwrite","IO Error to output");
742 if (Stop
[-1] != '\n')
743 fprintf(Output
,"\n");
746 // Now write all the rewrites that were missed
747 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
749 if ((Visited
[J
] & 2) == 2)
752 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
754 if (isspace(Rewrite
[J
].Rewrite
[0]))
755 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
757 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
765 pkgTagSection::~pkgTagSection() {}