1 // -*- mode: cpp; mode: fold -*-
3 // $Id: tagfile.cc,v 1.35 2002/11/06 06:43:14 jgg Exp $
4 /* ######################################################################
6 Fast scanner for RFC-822 type header information
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
11 ##################################################################### */
13 // Include Files /*{{{*/
15 #pragma implementation "apt-pkg/tagfile.h"
18 #include <apt-pkg/tagfile.h>
19 #include <apt-pkg/error.h>
20 #include <apt-pkg/strutl.h>
31 // TagFile::pkgTagFile - Constructor /*{{{*/
32 // ---------------------------------------------------------------------
34 pkgTagFile::pkgTagFile(FileFd
*pFd
,unsigned long Size
) : Fd(*pFd
), Size(Size
)
36 if (Fd
.IsOpen() == false)
39 Start
= End
= Buffer
= 0;
45 Buffer
= new char[Size
];
52 // TagFile::~pkgTagFile - Destructor /*{{{*/
53 // ---------------------------------------------------------------------
55 pkgTagFile::~pkgTagFile()
60 // TagFile::Step - Advance to the next section /*{{{*/
61 // ---------------------------------------------------------------------
62 /* If the Section Scanner fails we refill the buffer and try again. */
63 bool pkgTagFile::Step(pkgTagSection
&Tag
)
65 if (Tag
.Scan(Start
,End
- Start
) == false)
70 if (Tag
.Scan(Start
,End
- Start
) == false)
71 return _error
->Error(_("Unable to parse package file %s (1)"),
75 iOffset
+= Tag
.size();
81 // TagFile::Fill - Top up the buffer /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This takes the bit at the end of the buffer and puts it at the start
84 then fills the rest from the file */
85 bool pkgTagFile::Fill()
87 unsigned long EndSize
= End
- Start
;
88 unsigned long Actual
= 0;
90 memmove(Buffer
,Start
,EndSize
);
92 End
= Buffer
+ EndSize
;
96 // See if only a bit of the file is left
97 if (Fd
.Read(End
,Size
- (End
- Buffer
),&Actual
) == false)
99 if (Actual
!= Size
- (End
- Buffer
))
106 if (EndSize
<= 3 && Actual
== 0)
108 if (Size
- (End
- Buffer
) < 4)
111 // Append a double new line if one does not exist
112 unsigned int LineCount
= 0;
113 for (const char *E
= End
- 1; E
- End
< 6 && (*E
== '\n' || *E
== '\r'); E
--)
116 for (; LineCount
< 2; LineCount
++)
125 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
126 // ---------------------------------------------------------------------
127 /* This jumps to a pre-recorded file location and reads the record
129 bool pkgTagFile::Jump(pkgTagSection
&Tag
,unsigned long Offset
)
131 // We are within a buffer space of the next hit..
132 if (Offset
>= iOffset
&& iOffset
+ (End
- Start
) > Offset
)
134 unsigned long Dist
= Offset
- iOffset
;
140 // Reposition and reload..
143 if (Fd
.Seek(Offset
) == false)
145 End
= Start
= Buffer
;
150 if (Tag
.Scan(Start
,End
- Start
) == true)
153 // This appends a double new line (for the real eof handling)
157 if (Tag
.Scan(Start
,End
- Start
) == false)
158 return _error
->Error(_("Unable to parse package file %s (2)"),Fd
.Name().c_str());
163 // TagSection::Scan - Scan for the end of the header information /*{{{*/
164 // ---------------------------------------------------------------------
165 /* This looks for the first double new line in the data stream. It also
166 indexes the tags in the section. This very simple hash function for the
167 first 3 letters gives very good performance on the debian package files */
168 inline static unsigned long AlphaHash(const char *Text
, const char *End
= 0)
170 unsigned long Res
= 0;
171 for (; Text
!= End
&& *Text
!= ':' && *Text
!= 0; Text
++)
172 Res
= (unsigned long)(*Text
) ^ (Res
<< 2);
176 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
)
178 const char *End
= Start
+ MaxLength
;
179 Stop
= Section
= Start
;
180 memset(AlphaIndexes
,0,sizeof(AlphaIndexes
));
186 while (TagCount
+1 < sizeof(Indexes
)/sizeof(Indexes
[0]) && Stop
< End
)
188 // Start a new index and add it to the hash
189 if (isspace(Stop
[0]) == 0)
191 Indexes
[TagCount
++] = Stop
- Section
;
192 AlphaIndexes
[AlphaHash(Stop
,End
)] = TagCount
;
195 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
200 for (; Stop
+1 < End
&& Stop
[1] == '\r'; Stop
++);
202 // Double newline marks the end of the record
203 if (Stop
+1 < End
&& Stop
[1] == '\n')
205 Indexes
[TagCount
] = Stop
- Section
;
206 for (; Stop
< End
&& (Stop
[0] == '\n' || Stop
[0] == '\r'); Stop
++);
216 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
217 // ---------------------------------------------------------------------
218 /* There should be exactly 1 newline at the end of the buffer, no more. */
219 void pkgTagSection::Trim()
221 for (; Stop
> Section
+ 2 && (Stop
[-2] == '\n' || Stop
[-2] == '\r'); Stop
--);
224 // TagSection::Find - Locate a tag /*{{{*/
225 // ---------------------------------------------------------------------
226 /* This searches the section for a tag that matches the given string. */
227 bool pkgTagSection::Find(const char *Tag
,unsigned &Pos
) const
229 unsigned int Length
= strlen(Tag
);
230 unsigned int I
= AlphaIndexes
[AlphaHash(Tag
)];
235 for (unsigned int Counter
= 0; Counter
!= TagCount
; Counter
++,
239 St
= Section
+ Indexes
[I
];
240 if (strncasecmp(Tag
,St
,Length
) != 0)
243 // Make sure the colon is in the right place
244 const char *C
= St
+ Length
;
245 for (; isspace(*C
) != 0; C
++);
256 // TagSection::Find - Locate a tag /*{{{*/
257 // ---------------------------------------------------------------------
258 /* This searches the section for a tag that matches the given string. */
259 bool pkgTagSection::Find(const char *Tag
,const char *&Start
,
260 const char *&End
) const
262 unsigned int Length
= strlen(Tag
);
263 unsigned int I
= AlphaIndexes
[AlphaHash(Tag
)];
268 for (unsigned int Counter
= 0; Counter
!= TagCount
; Counter
++,
272 St
= Section
+ Indexes
[I
];
273 if (strncasecmp(Tag
,St
,Length
) != 0)
276 // Make sure the colon is in the right place
277 const char *C
= St
+ Length
;
278 for (; isspace(*C
) != 0; C
++);
282 // Strip off the gunk from the start end
284 End
= Section
+ Indexes
[I
+1];
286 return _error
->Error("Internal parsing error");
288 for (; (isspace(*Start
) != 0 || *Start
== ':') && Start
< End
; Start
++);
289 for (; isspace(End
[-1]) != 0 && End
> Start
; End
--);
298 // TagSection::FindS - Find a string /*{{{*/
299 // ---------------------------------------------------------------------
301 string
pkgTagSection::FindS(const char *Tag
) const
305 if (Find(Tag
,Start
,End
) == false)
307 return string(Start
,End
);
310 // TagSection::FindI - Find an integer /*{{{*/
311 // ---------------------------------------------------------------------
313 signed int pkgTagSection::FindI(const char *Tag
,signed long Default
) const
317 if (Find(Tag
,Start
,Stop
) == false)
320 // Copy it into a temp buffer so we can use strtol
322 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
324 strncpy(S
,Start
,Stop
-Start
);
328 signed long Result
= strtol(S
,&End
,10);
334 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
335 // ---------------------------------------------------------------------
336 /* The bits marked in Flag are masked on/off in Flags */
337 bool pkgTagSection::FindFlag(const char *Tag
,unsigned long &Flags
,
338 unsigned long Flag
) const
342 if (Find(Tag
,Start
,Stop
) == false)
345 switch (StringToBool(string(Start
,Stop
)))
356 _error
->Warning("Unknown flag value: %s",string(Start
,Stop
).c_str());
363 // TFRewrite - Rewrite a control record /*{{{*/
364 // ---------------------------------------------------------------------
365 /* This writes the control record to stdout rewriting it as necessary. The
366 override map item specificies the rewriting rules to follow. This also
367 takes the time to sort the feild list. */
369 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
371 static const char *iTFRewritePackageOrder
[] = {
382 "Revision", // Obsolete
383 "Config-Version", // Obsolete
396 "MSDOS-Filename", // Obsolete
399 static const char *iTFRewriteSourceOrder
[] = {"Package",
407 "Build-Depends-Indep",
409 "Build-Conflicts-Indep",
417 /* Two levels of initialization are used because gcc will set the symbol
418 size of an array to the length of the array, causing dynamic relinking
419 errors. Doing this makes the symbol size constant */
420 const char **TFRewritePackageOrder
= iTFRewritePackageOrder
;
421 const char **TFRewriteSourceOrder
= iTFRewriteSourceOrder
;
423 bool TFRewrite(FILE *Output
,pkgTagSection
const &Tags
,const char *Order
[],
424 TFRewriteData
*Rewrite
)
426 unsigned char Visited
[256]; // Bit 1 is Order, Bit 2 is Rewrite
427 for (unsigned I
= 0; I
!= 256; I
++)
430 // Set new tag up as necessary.
431 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
433 if (Rewrite
[J
].NewTag
== 0)
434 Rewrite
[J
].NewTag
= Rewrite
[J
].Tag
;
437 // Write all all of the tags, in order.
438 for (unsigned int I
= 0; Order
[I
] != 0; I
++)
440 bool Rewritten
= false;
442 // See if this is a field that needs to be rewritten
443 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
445 if (strcasecmp(Rewrite
[J
].Tag
,Order
[I
]) == 0)
448 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
450 if (isspace(Rewrite
[J
].Rewrite
[0]))
451 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
453 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
461 // See if it is in the fragment
463 if (Tags
.Find(Order
[I
],Pos
) == false)
467 if (Rewritten
== true)
470 /* Write out this element, taking a moment to rewrite the tag
471 in case of changes of case. */
474 Tags
.Get(Start
,Stop
,Pos
);
476 if (fputs(Order
[I
],Output
) < 0)
477 return _error
->Errno("fputs","IO Error to output");
478 Start
+= strlen(Order
[I
]);
479 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
480 return _error
->Errno("fwrite","IO Error to output");
481 if (Stop
[-1] != '\n')
482 fprintf(Output
,"\n");
485 // Now write all the old tags that were missed.
486 for (unsigned int I
= 0; I
!= Tags
.Count(); I
++)
488 if ((Visited
[I
] & 1) == 1)
493 Tags
.Get(Start
,Stop
,I
);
494 const char *End
= Start
;
495 for (; End
< Stop
&& *End
!= ':'; End
++);
497 // See if this is a field that needs to be rewritten
498 bool Rewritten
= false;
499 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
501 if (stringcasecmp(Start
,End
,Rewrite
[J
].Tag
) == 0)
504 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
506 if (isspace(Rewrite
[J
].Rewrite
[0]))
507 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
509 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
517 if (Rewritten
== true)
520 // Write out this element
521 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
522 return _error
->Errno("fwrite","IO Error to output");
523 if (Stop
[-1] != '\n')
524 fprintf(Output
,"\n");
527 // Now write all the rewrites that were missed
528 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
530 if ((Visited
[J
] & 2) == 2)
533 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
535 if (isspace(Rewrite
[J
].Rewrite
[0]))
536 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
538 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);