]>
git.saurik.com Git - apt.git/blob - apt-pkg/tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
6 Fast scanner for RFC-822 type header information
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
11 ##################################################################### */
13 // Include Files /*{{{*/
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
30 class pkgTagFilePrivate
33 pkgTagFilePrivate(FileFd
*pFd
, unsigned long long Size
) : Fd(*pFd
), Buffer(NULL
),
34 Start(NULL
), End(NULL
),
35 Done(false), iOffset(0),
44 unsigned long long iOffset
;
45 unsigned long long Size
;
48 // TagFile::pkgTagFile - Constructor /*{{{*/
49 // ---------------------------------------------------------------------
51 pkgTagFile::pkgTagFile(FileFd
*pFd
,unsigned long long Size
)
53 /* The size is increased by 4 because if we start with the Size of the
54 filename we need to try to read 1 char more to see an EOF faster, 1
55 char the end-pointer can be on and maybe 2 newlines need to be added
56 to the end of the file -> 4 extra chars */
58 d
= new pkgTagFilePrivate(pFd
, Size
);
60 if (d
->Fd
.IsOpen() == false)
61 d
->Start
= d
->End
= d
->Buffer
= 0;
63 d
->Buffer
= (char*)malloc(sizeof(char) * Size
);
65 if (d
->Buffer
== NULL
)
70 d
->Start
= d
->End
= d
->Buffer
;
76 // TagFile::~pkgTagFile - Destructor /*{{{*/
77 // ---------------------------------------------------------------------
79 pkgTagFile::~pkgTagFile()
85 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
86 unsigned long pkgTagFile::Offset()
91 // TagFile::Resize - Resize the internal buffer /*{{{*/
92 // ---------------------------------------------------------------------
93 /* Resize the internal buffer (double it in size). Fail if a maximum size
96 bool pkgTagFile::Resize()
98 // fail is the buffer grows too big
99 if(d
->Size
> 1024*1024+1)
102 return Resize(d
->Size
* 2);
104 bool pkgTagFile::Resize(unsigned long long const newSize
)
106 unsigned long long const EndSize
= d
->End
- d
->Start
;
108 // get new buffer and use it
109 char* newBuffer
= (char*)realloc(d
->Buffer
, sizeof(char) * newSize
);
110 if (newBuffer
== NULL
)
112 d
->Buffer
= newBuffer
;
115 // update the start/end pointers to the new buffer
116 d
->Start
= d
->Buffer
;
117 d
->End
= d
->Start
+ EndSize
;
121 // TagFile::Step - Advance to the next section /*{{{*/
122 // ---------------------------------------------------------------------
123 /* If the Section Scanner fails we refill the buffer and try again.
124 * If that fails too, double the buffer size and try again until a
125 * maximum buffer is reached.
127 bool pkgTagFile::Step(pkgTagSection
&Tag
)
129 while (Tag
.Scan(d
->Start
,d
->End
- d
->Start
) == false)
134 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
))
137 if (Resize() == false)
138 return _error
->Error(_("Unable to parse package file %s (1)"),
139 d
->Fd
.Name().c_str());
141 d
->Start
+= Tag
.size();
142 d
->iOffset
+= Tag
.size();
148 // TagFile::Fill - Top up the buffer /*{{{*/
149 // ---------------------------------------------------------------------
150 /* This takes the bit at the end of the buffer and puts it at the start
151 then fills the rest from the file */
152 bool pkgTagFile::Fill()
154 unsigned long long EndSize
= d
->End
- d
->Start
;
155 unsigned long long Actual
= 0;
157 memmove(d
->Buffer
,d
->Start
,EndSize
);
158 d
->Start
= d
->Buffer
;
159 d
->End
= d
->Buffer
+ EndSize
;
161 if (d
->Done
== false)
163 // See if only a bit of the file is left
164 unsigned long long const dataSize
= d
->Size
- ((d
->End
- d
->Buffer
) + 1);
165 if (d
->Fd
.Read(d
->End
, dataSize
, &Actual
) == false)
167 if (Actual
!= dataSize
)
174 if (EndSize
<= 3 && Actual
== 0)
176 if (d
->Size
- (d
->End
- d
->Buffer
) < 4)
179 // Append a double new line if one does not exist
180 unsigned int LineCount
= 0;
181 for (const char *E
= d
->End
- 1; E
- d
->End
< 6 && (*E
== '\n' || *E
== '\r'); E
--)
186 if ((unsigned)(d
->End
- d
->Buffer
) >= d
->Size
)
188 for (; LineCount
< 2; LineCount
++)
198 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
199 // ---------------------------------------------------------------------
200 /* This jumps to a pre-recorded file location and reads the record
202 bool pkgTagFile::Jump(pkgTagSection
&Tag
,unsigned long long Offset
)
204 // We are within a buffer space of the next hit..
205 if (Offset
>= d
->iOffset
&& d
->iOffset
+ (d
->End
- d
->Start
) > Offset
)
207 unsigned long long Dist
= Offset
- d
->iOffset
;
210 // if we have seen the end, don't ask for more
212 return Tag
.Scan(d
->Start
, d
->End
- d
->Start
);
217 // Reposition and reload..
220 if (d
->Fd
.Seek(Offset
) == false)
222 d
->End
= d
->Start
= d
->Buffer
;
227 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == true)
230 // This appends a double new line (for the real eof handling)
234 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == false)
235 return _error
->Error(_("Unable to parse package file %s (2)"),d
->Fd
.Name().c_str());
240 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
241 // ---------------------------------------------------------------------
243 pkgTagSection::pkgTagSection()
244 : Section(0), TagCount(0), d(NULL
), Stop(0)
246 memset(&Indexes
, 0, sizeof(Indexes
));
247 memset(&AlphaIndexes
, 0, sizeof(AlphaIndexes
));
250 // TagSection::Scan - Scan for the end of the header information /*{{{*/
251 // ---------------------------------------------------------------------
252 /* This looks for the first double new line in the data stream.
253 It also indexes the tags in the section. */
254 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
)
256 const char *End
= Start
+ MaxLength
;
257 Stop
= Section
= Start
;
258 memset(AlphaIndexes
,0,sizeof(AlphaIndexes
));
264 while (TagCount
+1 < sizeof(Indexes
)/sizeof(Indexes
[0]) && Stop
< End
)
266 TrimRecord(true,End
);
268 // this can happen when TrimRecord trims away the entire Record
269 // (e.g. because it just contains comments)
273 // Start a new index and add it to the hash
274 if (isspace(Stop
[0]) == 0)
276 Indexes
[TagCount
++] = Stop
- Section
;
277 AlphaIndexes
[AlphaHash(Stop
,End
)] = TagCount
;
280 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
285 for (; Stop
+1 < End
&& Stop
[1] == '\r'; Stop
++)
289 // Double newline marks the end of the record
290 if (Stop
+1 < End
&& Stop
[1] == '\n')
292 Indexes
[TagCount
] = Stop
- Section
;
293 TrimRecord(false,End
);
303 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
304 // ---------------------------------------------------------------------
305 /* There should be exactly 2 newline at the end of the record, no more. */
306 void pkgTagSection::TrimRecord(bool BeforeRecord
, const char*& End
)
308 if (BeforeRecord
== true)
310 for (; Stop
< End
&& (Stop
[0] == '\n' || Stop
[0] == '\r'); Stop
++);
313 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
314 // ---------------------------------------------------------------------
315 /* There should be exactly 1 newline at the end of the buffer, no more. */
316 void pkgTagSection::Trim()
318 for (; Stop
> Section
+ 2 && (Stop
[-2] == '\n' || Stop
[-2] == '\r'); Stop
--);
321 // TagSection::Exists - return True if a tag exists /*{{{*/
322 bool pkgTagSection::Exists(const char* const Tag
)
325 return Find(Tag
, tmp
);
328 // TagSection::Find - Locate a tag /*{{{*/
329 // ---------------------------------------------------------------------
330 /* This searches the section for a tag that matches the given string. */
331 bool pkgTagSection::Find(const char *Tag
,unsigned int &Pos
) const
333 unsigned int Length
= strlen(Tag
);
334 unsigned int I
= AlphaIndexes
[AlphaHash(Tag
)];
339 for (unsigned int Counter
= 0; Counter
!= TagCount
; Counter
++,
343 St
= Section
+ Indexes
[I
];
344 if (strncasecmp(Tag
,St
,Length
) != 0)
347 // Make sure the colon is in the right place
348 const char *C
= St
+ Length
;
349 for (; isspace(*C
) != 0; C
++);
360 // TagSection::Find - Locate a tag /*{{{*/
361 // ---------------------------------------------------------------------
362 /* This searches the section for a tag that matches the given string. */
363 bool pkgTagSection::Find(const char *Tag
,const char *&Start
,
364 const char *&End
) const
366 unsigned int Length
= strlen(Tag
);
367 unsigned int I
= AlphaIndexes
[AlphaHash(Tag
)];
372 for (unsigned int Counter
= 0; Counter
!= TagCount
; Counter
++,
376 St
= Section
+ Indexes
[I
];
377 if (strncasecmp(Tag
,St
,Length
) != 0)
380 // Make sure the colon is in the right place
381 const char *C
= St
+ Length
;
382 for (; isspace(*C
) != 0; C
++);
386 // Strip off the gunk from the start end
388 End
= Section
+ Indexes
[I
+1];
390 return _error
->Error("Internal parsing error");
392 for (; (isspace(*Start
) != 0 || *Start
== ':') && Start
< End
; Start
++);
393 for (; isspace(End
[-1]) != 0 && End
> Start
; End
--);
402 // TagSection::FindS - Find a string /*{{{*/
403 // ---------------------------------------------------------------------
405 string
pkgTagSection::FindS(const char *Tag
) const
409 if (Find(Tag
,Start
,End
) == false)
411 return string(Start
,End
);
414 // TagSection::FindI - Find an integer /*{{{*/
415 // ---------------------------------------------------------------------
417 signed int pkgTagSection::FindI(const char *Tag
,signed long Default
) const
421 if (Find(Tag
,Start
,Stop
) == false)
424 // Copy it into a temp buffer so we can use strtol
426 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
428 strncpy(S
,Start
,Stop
-Start
);
432 signed long Result
= strtol(S
,&End
,10);
438 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
439 // ---------------------------------------------------------------------
441 unsigned long long pkgTagSection::FindULL(const char *Tag
, unsigned long long const &Default
) const
445 if (Find(Tag
,Start
,Stop
) == false)
448 // Copy it into a temp buffer so we can use strtoull
450 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
452 strncpy(S
,Start
,Stop
-Start
);
456 unsigned long long Result
= strtoull(S
,&End
,10);
462 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
463 // ---------------------------------------------------------------------
464 /* The bits marked in Flag are masked on/off in Flags */
465 bool pkgTagSection::FindFlag(const char *Tag
,unsigned long &Flags
,
466 unsigned long Flag
) const
470 if (Find(Tag
,Start
,Stop
) == false)
472 return FindFlag(Flags
, Flag
, Start
, Stop
);
474 bool const pkgTagSection::FindFlag(unsigned long &Flags
, unsigned long Flag
,
475 char const* Start
, char const* Stop
)
477 switch (StringToBool(string(Start
, Stop
)))
488 _error
->Warning("Unknown flag value: %s",string(Start
,Stop
).c_str());
494 // TFRewrite - Rewrite a control record /*{{{*/
495 // ---------------------------------------------------------------------
496 /* This writes the control record to stdout rewriting it as necessary. The
497 override map item specificies the rewriting rules to follow. This also
498 takes the time to sort the feild list. */
500 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
502 static const char *iTFRewritePackageOrder
[] = {
510 "Original-Maintainer",
514 "Revision", // Obsolete
515 "Config-Version", // Obsolete
531 "MSDOS-Filename", // Obsolete
534 static const char *iTFRewriteSourceOrder
[] = {"Package",
541 "Original-Maintainer",
543 "Build-Depends-Indep",
545 "Build-Conflicts-Indep",
553 /* Two levels of initialization are used because gcc will set the symbol
554 size of an array to the length of the array, causing dynamic relinking
555 errors. Doing this makes the symbol size constant */
556 const char **TFRewritePackageOrder
= iTFRewritePackageOrder
;
557 const char **TFRewriteSourceOrder
= iTFRewriteSourceOrder
;
559 bool TFRewrite(FILE *Output
,pkgTagSection
const &Tags
,const char *Order
[],
560 TFRewriteData
*Rewrite
)
562 unsigned char Visited
[256]; // Bit 1 is Order, Bit 2 is Rewrite
563 for (unsigned I
= 0; I
!= 256; I
++)
566 // Set new tag up as necessary.
567 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
569 if (Rewrite
[J
].NewTag
== 0)
570 Rewrite
[J
].NewTag
= Rewrite
[J
].Tag
;
573 // Write all all of the tags, in order.
576 for (unsigned int I
= 0; Order
[I
] != 0; I
++)
578 bool Rewritten
= false;
580 // See if this is a field that needs to be rewritten
581 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
583 if (strcasecmp(Rewrite
[J
].Tag
,Order
[I
]) == 0)
586 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
588 if (isspace(Rewrite
[J
].Rewrite
[0]))
589 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
591 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
598 // See if it is in the fragment
600 if (Tags
.Find(Order
[I
],Pos
) == false)
604 if (Rewritten
== true)
607 /* Write out this element, taking a moment to rewrite the tag
608 in case of changes of case. */
611 Tags
.Get(Start
,Stop
,Pos
);
613 if (fputs(Order
[I
],Output
) < 0)
614 return _error
->Errno("fputs","IO Error to output");
615 Start
+= strlen(Order
[I
]);
616 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
617 return _error
->Errno("fwrite","IO Error to output");
618 if (Stop
[-1] != '\n')
619 fprintf(Output
,"\n");
623 // Now write all the old tags that were missed.
624 for (unsigned int I
= 0; I
!= Tags
.Count(); I
++)
626 if ((Visited
[I
] & 1) == 1)
631 Tags
.Get(Start
,Stop
,I
);
632 const char *End
= Start
;
633 for (; End
< Stop
&& *End
!= ':'; End
++);
635 // See if this is a field that needs to be rewritten
636 bool Rewritten
= false;
637 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
639 if (stringcasecmp(Start
,End
,Rewrite
[J
].Tag
) == 0)
642 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
644 if (isspace(Rewrite
[J
].Rewrite
[0]))
645 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
647 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
655 if (Rewritten
== true)
658 // Write out this element
659 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
660 return _error
->Errno("fwrite","IO Error to output");
661 if (Stop
[-1] != '\n')
662 fprintf(Output
,"\n");
665 // Now write all the rewrites that were missed
666 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
668 if ((Visited
[J
] & 2) == 2)
671 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
673 if (isspace(Rewrite
[J
].Rewrite
[0]))
674 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
676 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);