]>
git.saurik.com Git - apt.git/blob - apt-pkg/tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
6 Fast scanner for RFC-822 type header information
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
11 ##################################################################### */
13 // Include Files /*{{{*/
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
32 class pkgTagFilePrivate
35 pkgTagFilePrivate(FileFd
*pFd
, unsigned long long Size
) : Fd(*pFd
), Buffer(NULL
),
36 Start(NULL
), End(NULL
),
37 Done(false), iOffset(0),
46 unsigned long long iOffset
;
47 unsigned long long Size
;
50 // TagFile::pkgTagFile - Constructor /*{{{*/
51 // ---------------------------------------------------------------------
53 pkgTagFile::pkgTagFile(FileFd
*pFd
,unsigned long long Size
)
55 /* The size is increased by 4 because if we start with the Size of the
56 filename we need to try to read 1 char more to see an EOF faster, 1
57 char the end-pointer can be on and maybe 2 newlines need to be added
58 to the end of the file -> 4 extra chars */
60 d
= new pkgTagFilePrivate(pFd
, Size
);
62 if (d
->Fd
.IsOpen() == false)
63 d
->Start
= d
->End
= d
->Buffer
= 0;
65 d
->Buffer
= (char*)malloc(sizeof(char) * Size
);
67 if (d
->Buffer
== NULL
)
72 d
->Start
= d
->End
= d
->Buffer
;
78 // TagFile::~pkgTagFile - Destructor /*{{{*/
79 // ---------------------------------------------------------------------
81 pkgTagFile::~pkgTagFile()
87 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
88 APT_PURE
unsigned long pkgTagFile::Offset()
93 // TagFile::Resize - Resize the internal buffer /*{{{*/
94 // ---------------------------------------------------------------------
95 /* Resize the internal buffer (double it in size). Fail if a maximum size
98 bool pkgTagFile::Resize()
100 // fail is the buffer grows too big
101 if(d
->Size
> 1024*1024+1)
104 return Resize(d
->Size
* 2);
106 bool pkgTagFile::Resize(unsigned long long const newSize
)
108 unsigned long long const EndSize
= d
->End
- d
->Start
;
110 // get new buffer and use it
111 char* newBuffer
= (char*)realloc(d
->Buffer
, sizeof(char) * newSize
);
112 if (newBuffer
== NULL
)
114 d
->Buffer
= newBuffer
;
117 // update the start/end pointers to the new buffer
118 d
->Start
= d
->Buffer
;
119 d
->End
= d
->Start
+ EndSize
;
123 // TagFile::Step - Advance to the next section /*{{{*/
124 // ---------------------------------------------------------------------
125 /* If the Section Scanner fails we refill the buffer and try again.
126 * If that fails too, double the buffer size and try again until a
127 * maximum buffer is reached.
129 bool pkgTagFile::Step(pkgTagSection
&Tag
)
131 while (Tag
.Scan(d
->Start
,d
->End
- d
->Start
) == false)
136 if(Tag
.Scan(d
->Start
,d
->End
- d
->Start
))
139 if (Resize() == false)
140 return _error
->Error(_("Unable to parse package file %s (1)"),
141 d
->Fd
.Name().c_str());
143 d
->Start
+= Tag
.size();
144 d
->iOffset
+= Tag
.size();
150 // TagFile::Fill - Top up the buffer /*{{{*/
151 // ---------------------------------------------------------------------
152 /* This takes the bit at the end of the buffer and puts it at the start
153 then fills the rest from the file */
154 bool pkgTagFile::Fill()
156 unsigned long long EndSize
= d
->End
- d
->Start
;
157 unsigned long long Actual
= 0;
159 memmove(d
->Buffer
,d
->Start
,EndSize
);
160 d
->Start
= d
->Buffer
;
161 d
->End
= d
->Buffer
+ EndSize
;
163 if (d
->Done
== false)
165 // See if only a bit of the file is left
166 unsigned long long const dataSize
= d
->Size
- ((d
->End
- d
->Buffer
) + 1);
167 if (d
->Fd
.Read(d
->End
, dataSize
, &Actual
) == false)
169 if (Actual
!= dataSize
)
176 if (EndSize
<= 3 && Actual
== 0)
178 if (d
->Size
- (d
->End
- d
->Buffer
) < 4)
181 // Append a double new line if one does not exist
182 unsigned int LineCount
= 0;
183 for (const char *E
= d
->End
- 1; E
- d
->End
< 6 && (*E
== '\n' || *E
== '\r'); E
--)
188 if ((unsigned)(d
->End
- d
->Buffer
) >= d
->Size
)
190 for (; LineCount
< 2; LineCount
++)
200 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
201 // ---------------------------------------------------------------------
202 /* This jumps to a pre-recorded file location and reads the record
204 bool pkgTagFile::Jump(pkgTagSection
&Tag
,unsigned long long Offset
)
206 // We are within a buffer space of the next hit..
207 if (Offset
>= d
->iOffset
&& d
->iOffset
+ (d
->End
- d
->Start
) > Offset
)
209 unsigned long long Dist
= Offset
- d
->iOffset
;
212 // if we have seen the end, don't ask for more
214 return Tag
.Scan(d
->Start
, d
->End
- d
->Start
);
219 // Reposition and reload..
222 if (d
->Fd
.Seek(Offset
) == false)
224 d
->End
= d
->Start
= d
->Buffer
;
229 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == true)
232 // This appends a double new line (for the real eof handling)
236 if (Tag
.Scan(d
->Start
, d
->End
- d
->Start
) == false)
237 return _error
->Error(_("Unable to parse package file %s (2)"),d
->Fd
.Name().c_str());
242 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
243 // ---------------------------------------------------------------------
245 pkgTagSection::pkgTagSection()
246 : Section(0), TagCount(0), d(NULL
), Stop(0)
248 memset(&Indexes
, 0, sizeof(Indexes
));
249 memset(&AlphaIndexes
, 0, sizeof(AlphaIndexes
));
252 // TagSection::Scan - Scan for the end of the header information /*{{{*/
253 // ---------------------------------------------------------------------
254 /* This looks for the first double new line in the data stream.
255 It also indexes the tags in the section. */
256 bool pkgTagSection::Scan(const char *Start
,unsigned long MaxLength
)
258 const char *End
= Start
+ MaxLength
;
259 Stop
= Section
= Start
;
260 memset(AlphaIndexes
,0,sizeof(AlphaIndexes
));
266 while (TagCount
+1 < sizeof(Indexes
)/sizeof(Indexes
[0]) && Stop
< End
)
268 TrimRecord(true,End
);
270 // this can happen when TrimRecord trims away the entire Record
271 // (e.g. because it just contains comments)
275 // Start a new index and add it to the hash
276 if (isspace(Stop
[0]) == 0)
278 Indexes
[TagCount
++] = Stop
- Section
;
279 AlphaIndexes
[AlphaHash(Stop
,End
)] = TagCount
;
282 Stop
= (const char *)memchr(Stop
,'\n',End
- Stop
);
287 for (; Stop
+1 < End
&& Stop
[1] == '\r'; Stop
++)
291 // Double newline marks the end of the record
292 if (Stop
+1 < End
&& Stop
[1] == '\n')
294 Indexes
[TagCount
] = Stop
- Section
;
295 TrimRecord(false,End
);
305 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
306 // ---------------------------------------------------------------------
307 /* There should be exactly 2 newline at the end of the record, no more. */
308 void pkgTagSection::TrimRecord(bool BeforeRecord
, const char*& End
)
310 if (BeforeRecord
== true)
312 for (; Stop
< End
&& (Stop
[0] == '\n' || Stop
[0] == '\r'); Stop
++);
315 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
316 // ---------------------------------------------------------------------
317 /* There should be exactly 1 newline at the end of the buffer, no more. */
318 void pkgTagSection::Trim()
320 for (; Stop
> Section
+ 2 && (Stop
[-2] == '\n' || Stop
[-2] == '\r'); Stop
--);
323 // TagSection::Exists - return True if a tag exists /*{{{*/
324 bool pkgTagSection::Exists(const char* const Tag
)
327 return Find(Tag
, tmp
);
330 // TagSection::Find - Locate a tag /*{{{*/
331 // ---------------------------------------------------------------------
332 /* This searches the section for a tag that matches the given string. */
333 bool pkgTagSection::Find(const char *Tag
,unsigned int &Pos
) const
335 unsigned int Length
= strlen(Tag
);
336 unsigned int I
= AlphaIndexes
[AlphaHash(Tag
)];
341 for (unsigned int Counter
= 0; Counter
!= TagCount
; Counter
++,
345 St
= Section
+ Indexes
[I
];
346 if (strncasecmp(Tag
,St
,Length
) != 0)
349 // Make sure the colon is in the right place
350 const char *C
= St
+ Length
;
351 for (; isspace(*C
) != 0; C
++);
362 // TagSection::Find - Locate a tag /*{{{*/
363 // ---------------------------------------------------------------------
364 /* This searches the section for a tag that matches the given string. */
365 bool pkgTagSection::Find(const char *Tag
,const char *&Start
,
366 const char *&End
) const
368 unsigned int Length
= strlen(Tag
);
369 unsigned int I
= AlphaIndexes
[AlphaHash(Tag
)];
374 for (unsigned int Counter
= 0; Counter
!= TagCount
; Counter
++,
378 St
= Section
+ Indexes
[I
];
379 if (strncasecmp(Tag
,St
,Length
) != 0)
382 // Make sure the colon is in the right place
383 const char *C
= St
+ Length
;
384 for (; isspace(*C
) != 0; C
++);
388 // Strip off the gunk from the start end
390 End
= Section
+ Indexes
[I
+1];
392 return _error
->Error("Internal parsing error");
394 for (; (isspace(*Start
) != 0 || *Start
== ':') && Start
< End
; Start
++);
395 for (; isspace(End
[-1]) != 0 && End
> Start
; End
--);
404 // TagSection::FindS - Find a string /*{{{*/
405 // ---------------------------------------------------------------------
407 string
pkgTagSection::FindS(const char *Tag
) const
411 if (Find(Tag
,Start
,End
) == false)
413 return string(Start
,End
);
416 // TagSection::FindI - Find an integer /*{{{*/
417 // ---------------------------------------------------------------------
419 signed int pkgTagSection::FindI(const char *Tag
,signed long Default
) const
423 if (Find(Tag
,Start
,Stop
) == false)
426 // Copy it into a temp buffer so we can use strtol
428 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
430 strncpy(S
,Start
,Stop
-Start
);
434 signed long Result
= strtol(S
,&End
,10);
440 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
441 // ---------------------------------------------------------------------
443 unsigned long long pkgTagSection::FindULL(const char *Tag
, unsigned long long const &Default
) const
447 if (Find(Tag
,Start
,Stop
) == false)
450 // Copy it into a temp buffer so we can use strtoull
452 if ((unsigned)(Stop
- Start
) >= sizeof(S
))
454 strncpy(S
,Start
,Stop
-Start
);
458 unsigned long long Result
= strtoull(S
,&End
,10);
464 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
465 // ---------------------------------------------------------------------
466 /* The bits marked in Flag are masked on/off in Flags */
467 bool pkgTagSection::FindFlag(const char *Tag
,unsigned long &Flags
,
468 unsigned long Flag
) const
472 if (Find(Tag
,Start
,Stop
) == false)
474 return FindFlag(Flags
, Flag
, Start
, Stop
);
476 bool pkgTagSection::FindFlag(unsigned long &Flags
, unsigned long Flag
,
477 char const* Start
, char const* Stop
)
479 switch (StringToBool(string(Start
, Stop
)))
490 _error
->Warning("Unknown flag value: %s",string(Start
,Stop
).c_str());
496 // TFRewrite - Rewrite a control record /*{{{*/
497 // ---------------------------------------------------------------------
498 /* This writes the control record to stdout rewriting it as necessary. The
499 override map item specificies the rewriting rules to follow. This also
500 takes the time to sort the feild list. */
502 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
504 static const char *iTFRewritePackageOrder
[] = {
512 "Original-Maintainer",
516 "Revision", // Obsolete
517 "Config-Version", // Obsolete
533 "MSDOS-Filename", // Obsolete
536 static const char *iTFRewriteSourceOrder
[] = {"Package",
543 "Original-Maintainer",
545 "Build-Depends-Indep",
547 "Build-Conflicts-Indep",
555 /* Two levels of initialization are used because gcc will set the symbol
556 size of an array to the length of the array, causing dynamic relinking
557 errors. Doing this makes the symbol size constant */
558 const char **TFRewritePackageOrder
= iTFRewritePackageOrder
;
559 const char **TFRewriteSourceOrder
= iTFRewriteSourceOrder
;
561 bool TFRewrite(FILE *Output
,pkgTagSection
const &Tags
,const char *Order
[],
562 TFRewriteData
*Rewrite
)
564 unsigned char Visited
[256]; // Bit 1 is Order, Bit 2 is Rewrite
565 for (unsigned I
= 0; I
!= 256; I
++)
568 // Set new tag up as necessary.
569 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
571 if (Rewrite
[J
].NewTag
== 0)
572 Rewrite
[J
].NewTag
= Rewrite
[J
].Tag
;
575 // Write all all of the tags, in order.
578 for (unsigned int I
= 0; Order
[I
] != 0; I
++)
580 bool Rewritten
= false;
582 // See if this is a field that needs to be rewritten
583 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
585 if (strcasecmp(Rewrite
[J
].Tag
,Order
[I
]) == 0)
588 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
590 if (isspace(Rewrite
[J
].Rewrite
[0]))
591 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
593 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
600 // See if it is in the fragment
602 if (Tags
.Find(Order
[I
],Pos
) == false)
606 if (Rewritten
== true)
609 /* Write out this element, taking a moment to rewrite the tag
610 in case of changes of case. */
613 Tags
.Get(Start
,Stop
,Pos
);
615 if (fputs(Order
[I
],Output
) < 0)
616 return _error
->Errno("fputs","IO Error to output");
617 Start
+= strlen(Order
[I
]);
618 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
619 return _error
->Errno("fwrite","IO Error to output");
620 if (Stop
[-1] != '\n')
621 fprintf(Output
,"\n");
625 // Now write all the old tags that were missed.
626 for (unsigned int I
= 0; I
!= Tags
.Count(); I
++)
628 if ((Visited
[I
] & 1) == 1)
633 Tags
.Get(Start
,Stop
,I
);
634 const char *End
= Start
;
635 for (; End
< Stop
&& *End
!= ':'; End
++);
637 // See if this is a field that needs to be rewritten
638 bool Rewritten
= false;
639 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
641 if (stringcasecmp(Start
,End
,Rewrite
[J
].Tag
) == 0)
644 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
646 if (isspace(Rewrite
[J
].Rewrite
[0]))
647 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
649 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
657 if (Rewritten
== true)
660 // Write out this element
661 if (fwrite(Start
,Stop
- Start
,1,Output
) != 1)
662 return _error
->Errno("fwrite","IO Error to output");
663 if (Stop
[-1] != '\n')
664 fprintf(Output
,"\n");
667 // Now write all the rewrites that were missed
668 for (unsigned int J
= 0; Rewrite
!= 0 && Rewrite
[J
].Tag
!= 0; J
++)
670 if ((Visited
[J
] & 2) == 2)
673 if (Rewrite
[J
].Rewrite
!= 0 && Rewrite
[J
].Rewrite
[0] != 0)
675 if (isspace(Rewrite
[J
].Rewrite
[0]))
676 fprintf(Output
,"%s:%s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);
678 fprintf(Output
,"%s: %s\n",Rewrite
[J
].NewTag
,Rewrite
[J
].Rewrite
);