]> git.saurik.com Git - apt.git/blame - apt-pkg/tagfile.cc
Fix nother end of file parsing bug
[apt.git] / apt-pkg / tagfile.cc
CommitLineData
578bfd0a
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
613f9499 3// $Id: tagfile.cc,v 1.34 2002/07/08 03:39:06 jgg Exp $
578bfd0a
AL
4/* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
ad00ae81 8 This uses a rotating buffer to load the package information into.
578bfd0a
AL
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13// Include Files /*{{{*/
6c139d6e 14#ifdef __GNUG__
094a497d 15#pragma implementation "apt-pkg/tagfile.h"
6c139d6e
AL
16#endif
17
094a497d
AL
18#include <apt-pkg/tagfile.h>
19#include <apt-pkg/error.h>
cdcc6d34 20#include <apt-pkg/strutl.h>
578bfd0a 21
b2e465d6
AL
22#include <apti18n.h>
23
578bfd0a
AL
24#include <string>
25#include <stdio.h>
851a45a8 26#include <ctype.h>
578bfd0a
AL
27 /*}}}*/
28
851a45a8
AL
29using std::string;
30
578bfd0a
AL
31// TagFile::pkgTagFile - Constructor /*{{{*/
32// ---------------------------------------------------------------------
33/* */
b2e465d6 34pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) : Fd(*pFd), Size(Size)
578bfd0a 35{
0e72dd52
AL
36 if (Fd.IsOpen() == false)
37 {
38 Buffer = 0;
39 Start = End = Buffer = 0;
f604cf55 40 Done = true;
0e72dd52
AL
41 iOffset = 0;
42 return;
43 }
44
ad00ae81
AL
45 Buffer = new char[Size];
46 Start = End = Buffer;
f604cf55 47 Done = false;
dcb79bae 48 iOffset = 0;
578bfd0a
AL
49 Fill();
50}
51 /*}}}*/
b2e465d6 52// TagFile::~pkgTagFile - Destructor /*{{{*/
29f7b36c
AL
53// ---------------------------------------------------------------------
54/* */
55pkgTagFile::~pkgTagFile()
56{
57 delete [] Buffer;
58}
59 /*}}}*/
578bfd0a
AL
60// TagFile::Step - Advance to the next section /*{{{*/
61// ---------------------------------------------------------------------
62/* If the Section Scanner fails we refill the buffer and try again. */
63bool pkgTagFile::Step(pkgTagSection &Tag)
64{
65 if (Tag.Scan(Start,End - Start) == false)
66 {
67 if (Fill() == false)
68 return false;
69
70 if (Tag.Scan(Start,End - Start) == false)
fe06d72c
AL
71 return _error->Error(_("Unable to parse package file %s (1)"),
72 Fd.Name().c_str());
613f9499 73 }
dcb79bae
AL
74 Start += Tag.size();
75 iOffset += Tag.size();
b2e465d6
AL
76
77 Tag.Trim();
578bfd0a
AL
78 return true;
79}
80 /*}}}*/
81// TagFile::Fill - Top up the buffer /*{{{*/
82// ---------------------------------------------------------------------
83/* This takes the bit at the end of the buffer and puts it at the start
84 then fills the rest from the file */
85bool pkgTagFile::Fill()
86{
ad00ae81 87 unsigned long EndSize = End - Start;
613f9499 88 unsigned long Actual = 0;
578bfd0a 89
c7b5ce1c
AL
90 memmove(Buffer,Start,EndSize);
91 Start = Buffer;
92 End = Buffer + EndSize;
93
fe06d72c
AL
94 if (Done == false)
95 {
96 // See if only a bit of the file is left
fe06d72c
AL
97 if (Fd.Read(End,Size - (End - Buffer),&Actual) == false)
98 return false;
99 if (Actual != Size - (End - Buffer))
100 Done = true;
101 End += Actual;
102 }
103
f604cf55 104 if (Done == true)
578bfd0a 105 {
f5ec3b68 106 if (EndSize <= 3 && Actual == 0)
578bfd0a 107 return false;
c7b5ce1c
AL
108 if (Size - (End - Buffer) < 4)
109 return true;
110
111 // Append a double new line if one does not exist
112 unsigned int LineCount = 0;
113 for (const char *E = End - 1; E - End < 6 && (*E == '\n' || *E == '\r'); E--)
114 if (*E == '\n')
115 LineCount++;
116 for (; LineCount < 2; LineCount++)
117 *End++ = '\n';
118
578bfd0a
AL
119 return true;
120 }
121
578bfd0a
AL
122 return true;
123}
124 /*}}}*/
ad00ae81
AL
125// TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
126// ---------------------------------------------------------------------
03e39e59
AL
127/* This jumps to a pre-recorded file location and reads the record
128 that is there */
ad00ae81
AL
129bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
130{
b2e465d6
AL
131 // We are within a buffer space of the next hit..
132 if (Offset >= iOffset && iOffset + (End - Start) > Offset)
133 {
134 unsigned long Dist = Offset - iOffset;
135 Start += Dist;
136 iOffset += Dist;
137 return Step(Tag);
138 }
139
140 // Reposition and reload..
ad00ae81 141 iOffset = Offset;
f604cf55 142 Done = false;
ad00ae81
AL
143 if (Fd.Seek(Offset) == false)
144 return false;
145 End = Start = Buffer;
146
138d4b3d
AL
147 if (Fill() == false)
148 return false;
149
150 if (Tag.Scan(Start,End - Start) == true)
151 return true;
152
153 // This appends a double new line (for the real eof handling)
ad00ae81
AL
154 if (Fill() == false)
155 return false;
156
157 if (Tag.Scan(Start,End - Start) == false)
b2e465d6 158 return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str());
06bba740 159
ad00ae81
AL
160 return true;
161}
162 /*}}}*/
578bfd0a
AL
163// TagSection::Scan - Scan for the end of the header information /*{{{*/
164// ---------------------------------------------------------------------
165/* This looks for the first double new line in the data stream. It also
c1a22377
AL
166 indexes the tags in the section. This very simple hash function for the
167 first 3 letters gives very good performance on the debian package files */
b2e465d6
AL
168inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
169{
170 unsigned long Res = 0;
171 for (; Text != End && *Text != ':' && *Text != 0; Text++)
172 Res = (unsigned long)(*Text) ^ (Res << 2);
173 return Res & 0xFF;
174}
175
578bfd0a
AL
176bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
177{
178 const char *End = Start + MaxLength;
179 Stop = Section = Start;
c1a22377 180 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
c7b5ce1c
AL
181
182 if (Stop == 0)
183 return false;
578bfd0a
AL
184
185 TagCount = 0;
fd71171a 186 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
578bfd0a 187 {
90d64280 188 // Start a new index and add it to the hash
c1a22377
AL
189 if (isspace(Stop[0]) == 0)
190 {
191 Indexes[TagCount++] = Stop - Section;
b2e465d6 192 AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
c1a22377 193 }
0a8e3465 194
c1a22377 195 Stop = (const char *)memchr(Stop,'\n',End - Stop);
0a8e3465 196
c1a22377
AL
197 if (Stop == 0)
198 return false;
138d4b3d 199
fd71171a 200 for (; Stop+1 < End && Stop[1] == '\r'; Stop++);
c1a22377 201
f3bcc383
AL
202 // Double newline marks the end of the record
203 if (Stop+1 < End && Stop[1] == '\n')
578bfd0a 204 {
578bfd0a 205 Indexes[TagCount] = Stop - Section;
fd71171a 206 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
578bfd0a 207 return true;
578bfd0a
AL
208 }
209
c1a22377
AL
210 Stop++;
211 }
138d4b3d 212
578bfd0a
AL
213 return false;
214}
215 /*}}}*/
b2e465d6
AL
216// TagSection::Trim - Trim off any trailing garbage /*{{{*/
217// ---------------------------------------------------------------------
218/* There should be exactly 1 newline at the end of the buffer, no more. */
219void pkgTagSection::Trim()
220{
221 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
222}
223 /*}}}*/
578bfd0a
AL
224// TagSection::Find - Locate a tag /*{{{*/
225// ---------------------------------------------------------------------
226/* This searches the section for a tag that matches the given string. */
b2e465d6 227bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const
578bfd0a
AL
228{
229 unsigned int Length = strlen(Tag);
b2e465d6 230 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
c1a22377
AL
231 if (I == 0)
232 return false;
233 I--;
234
235 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
236 I = (I+1)%TagCount)
578bfd0a 237 {
c1a22377
AL
238 const char *St;
239 St = Section + Indexes[I];
240 if (strncasecmp(Tag,St,Length) != 0)
578bfd0a
AL
241 continue;
242
b2e465d6
AL
243 // Make sure the colon is in the right place
244 const char *C = St + Length;
245 for (; isspace(*C) != 0; C++);
246 if (*C != ':')
247 continue;
248 Pos = I;
249 return true;
250 }
251
252 Pos = 0;
253 return false;
254}
255 /*}}}*/
256// TagSection::Find - Locate a tag /*{{{*/
257// ---------------------------------------------------------------------
258/* This searches the section for a tag that matches the given string. */
259bool pkgTagSection::Find(const char *Tag,const char *&Start,
260 const char *&End) const
261{
262 unsigned int Length = strlen(Tag);
263 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
264 if (I == 0)
265 return false;
266 I--;
267
268 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
269 I = (I+1)%TagCount)
270 {
271 const char *St;
272 St = Section + Indexes[I];
273 if (strncasecmp(Tag,St,Length) != 0)
274 continue;
275
578bfd0a 276 // Make sure the colon is in the right place
c1a22377 277 const char *C = St + Length;
578bfd0a
AL
278 for (; isspace(*C) != 0; C++);
279 if (*C != ':')
280 continue;
281
282 // Strip off the gunk from the start end
283 Start = C;
284 End = Section + Indexes[I+1];
06bba740
AL
285 if (Start >= End)
286 return _error->Error("Internal parsing error");
287
578bfd0a
AL
288 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
289 for (; isspace(End[-1]) != 0 && End > Start; End--);
06bba740 290
578bfd0a
AL
291 return true;
292 }
c1a22377 293
578bfd0a
AL
294 Start = End = 0;
295 return false;
296}
297 /*}}}*/
0e66b144 298// TagSection::FindS - Find a string /*{{{*/
a05599f1
AL
299// ---------------------------------------------------------------------
300/* */
b2e465d6 301string pkgTagSection::FindS(const char *Tag) const
a05599f1
AL
302{
303 const char *Start;
304 const char *End;
305 if (Find(Tag,Start,End) == false)
306 return string();
307 return string(Start,End);
308}
309 /*}}}*/
310// TagSection::FindI - Find an integer /*{{{*/
311// ---------------------------------------------------------------------
312/* */
b2e465d6 313signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
a05599f1
AL
314{
315 const char *Start;
b0b4efb9
AL
316 const char *Stop;
317 if (Find(Tag,Start,Stop) == false)
318 return Default;
319
320 // Copy it into a temp buffer so we can use strtol
321 char S[300];
322 if ((unsigned)(Stop - Start) >= sizeof(S))
323 return Default;
324 strncpy(S,Start,Stop-Start);
325 S[Stop - Start] = 0;
326
327 char *End;
328 signed long Result = strtol(S,&End,10);
329 if (S == End)
330 return Default;
331 return Result;
332}
333 /*}}}*/
334// TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
335// ---------------------------------------------------------------------
336/* The bits marked in Flag are masked on/off in Flags */
337bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
b2e465d6 338 unsigned long Flag) const
b0b4efb9
AL
339{
340 const char *Start;
341 const char *Stop;
342 if (Find(Tag,Start,Stop) == false)
343 return true;
a05599f1 344
b0b4efb9
AL
345 switch (StringToBool(string(Start,Stop)))
346 {
347 case 0:
348 Flags &= ~Flag;
349 return true;
350
351 case 1:
352 Flags |= Flag;
353 return true;
354
355 default:
b2e465d6 356 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
b0b4efb9
AL
357 return true;
358 }
359 return true;
a05599f1
AL
360}
361 /*}}}*/
b2e465d6
AL
362
363// TFRewrite - Rewrite a control record /*{{{*/
364// ---------------------------------------------------------------------
365/* This writes the control record to stdout rewriting it as necessary. The
366 override map item specificies the rewriting rules to follow. This also
367 takes the time to sort the feild list. */
368
369/* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
370 array. */
371static const char *iTFRewritePackageOrder[] = {
372 "Package",
373 "Essential",
374 "Status",
375 "Priority",
376 "Section",
377 "Installed-Size",
378 "Maintainer",
379 "Architecture",
380 "Source",
381 "Version",
382 "Revision", // Obsolete
383 "Config-Version", // Obsolete
384 "Replaces",
385 "Provides",
386 "Depends",
387 "Pre-Depends",
388 "Recommends",
389 "Suggests",
390 "Conflicts",
391 "Conffiles",
392 "Filename",
393 "Size",
394 "MD5Sum",
a7c835af 395 "SHA1Sum",
b2e465d6
AL
396 "MSDOS-Filename", // Obsolete
397 "Description",
398 0};
399static const char *iTFRewriteSourceOrder[] = {"Package",
400 "Source",
401 "Binary",
402 "Version",
403 "Priority",
404 "Section",
405 "Maintainer",
406 "Build-Depends",
407 "Build-Depends-Indep",
408 "Build-Conflicts",
409 "Build-Conflicts-Indep",
410 "Architecture",
411 "Standards-Version",
412 "Format",
413 "Directory",
414 "Files",
415 0};
416
417/* Two levels of initialization are used because gcc will set the symbol
418 size of an array to the length of the array, causing dynamic relinking
419 errors. Doing this makes the symbol size constant */
420const char **TFRewritePackageOrder = iTFRewritePackageOrder;
421const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
422
423bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
424 TFRewriteData *Rewrite)
425{
426 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
427 for (unsigned I = 0; I != 256; I++)
428 Visited[I] = 0;
429
430 // Set new tag up as necessary.
431 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
432 {
433 if (Rewrite[J].NewTag == 0)
434 Rewrite[J].NewTag = Rewrite[J].Tag;
435 }
436
437 // Write all all of the tags, in order.
438 for (unsigned int I = 0; Order[I] != 0; I++)
439 {
440 bool Rewritten = false;
441
442 // See if this is a field that needs to be rewritten
443 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
444 {
445 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
446 {
447 Visited[J] |= 2;
448 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
449 {
450 if (isspace(Rewrite[J].Rewrite[0]))
451 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
452 else
453 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
454 }
455
456 Rewritten = true;
457 break;
458 }
459 }
460
461 // See if it is in the fragment
462 unsigned Pos;
463 if (Tags.Find(Order[I],Pos) == false)
464 continue;
465 Visited[Pos] |= 1;
466
467 if (Rewritten == true)
468 continue;
469
470 /* Write out this element, taking a moment to rewrite the tag
471 in case of changes of case. */
472 const char *Start;
473 const char *Stop;
474 Tags.Get(Start,Stop,Pos);
475
476 if (fputs(Order[I],Output) < 0)
477 return _error->Errno("fputs","IO Error to output");
478 Start += strlen(Order[I]);
479 if (fwrite(Start,Stop - Start,1,Output) != 1)
480 return _error->Errno("fwrite","IO Error to output");
481 if (Stop[-1] != '\n')
482 fprintf(Output,"\n");
483 }
484
485 // Now write all the old tags that were missed.
486 for (unsigned int I = 0; I != Tags.Count(); I++)
487 {
488 if ((Visited[I] & 1) == 1)
489 continue;
490
491 const char *Start;
492 const char *Stop;
493 Tags.Get(Start,Stop,I);
494 const char *End = Start;
495 for (; End < Stop && *End != ':'; End++);
496
497 // See if this is a field that needs to be rewritten
498 bool Rewritten = false;
499 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
500 {
501 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
502 {
503 Visited[J] |= 2;
504 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
505 {
506 if (isspace(Rewrite[J].Rewrite[0]))
507 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
508 else
509 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
510 }
511
512 Rewritten = true;
513 break;
514 }
515 }
516
517 if (Rewritten == true)
518 continue;
519
520 // Write out this element
521 if (fwrite(Start,Stop - Start,1,Output) != 1)
522 return _error->Errno("fwrite","IO Error to output");
523 if (Stop[-1] != '\n')
524 fprintf(Output,"\n");
525 }
526
527 // Now write all the rewrites that were missed
528 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
529 {
530 if ((Visited[J] & 2) == 2)
531 continue;
532
533 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
534 {
535 if (isspace(Rewrite[J].Rewrite[0]))
536 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
537 else
538 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
539 }
540 }
541
542 return true;
543}
544 /*}}}*/