]> git.saurik.com Git - apt-legacy.git/blame - apt-pkg/tagfile.cc
I feel... vindicated?
[apt-legacy.git] / apt-pkg / tagfile.cc
CommitLineData
da6ee469
JF
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
3// $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4/* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13// Include Files /*{{{*/
da6ee469
JF
14#include <apt-pkg/tagfile.h>
15#include <apt-pkg/error.h>
16#include <apt-pkg/strutl.h>
17
18#include <apti18n.h>
19
20#include <string>
21#include <stdio.h>
22#include <ctype.h>
23 /*}}}*/
24
25using std::string;
26
27// TagFile::pkgTagFile - Constructor /*{{{*/
28// ---------------------------------------------------------------------
29/* */
30pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) :
acdafb44 31 Fd(*pFd)
da6ee469 32{
acdafb44 33 if (Fd.IsOpen() == false || Fd.Size() == 0)
da6ee469 34 {
260efb63 35 _error->Discard();
acdafb44 36 Map = NULL;
da6ee469
JF
37 Buffer = 0;
38 Start = End = Buffer = 0;
39 Done = true;
40 iOffset = 0;
41 return;
42 }
43
acdafb44
JF
44 Map = new MMap(*pFd, MMap::ReadOnly);
45 Buffer = reinterpret_cast<char *>(Map->Data());
da6ee469
JF
46 Start = End = Buffer;
47 Done = false;
48 iOffset = 0;
49 Fill();
50}
51 /*}}}*/
52// TagFile::~pkgTagFile - Destructor /*{{{*/
53// ---------------------------------------------------------------------
54/* */
55pkgTagFile::~pkgTagFile()
56{
acdafb44 57 delete Map;
da6ee469
JF
58}
59 /*}}}*/
da6ee469
JF
60// TagFile::Step - Advance to the next section /*{{{*/
61// ---------------------------------------------------------------------
62/* If the Section Scanner fails we refill the buffer and try again.
63 * If that fails too, double the buffer size and try again until a
64 * maximum buffer is reached.
65 */
66bool pkgTagFile::Step(pkgTagSection &Tag)
67{
acdafb44 68 if (Tag.Scan(Start,End - Start) == false)
da6ee469 69 {
acdafb44
JF
70 if (Start == End)
71 return false;
72 else
da6ee469
JF
73 return _error->Error(_("Unable to parse package file %s (1)"),
74 Fd.Name().c_str());
75 }
76 Start += Tag.size();
77 iOffset += Tag.size();
78
79 Tag.Trim();
80 return true;
81}
82 /*}}}*/
83// TagFile::Fill - Top up the buffer /*{{{*/
84// ---------------------------------------------------------------------
85/* This takes the bit at the end of the buffer and puts it at the start
86 then fills the rest from the file */
87bool pkgTagFile::Fill()
88{
acdafb44
JF
89 unsigned int Size(Map->Size());
90 End = Buffer + Size;
91 if (iOffset >= Size)
92 return false;
93 Start = Buffer + iOffset;
da6ee469
JF
94 return true;
95}
96 /*}}}*/
97// TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
98// ---------------------------------------------------------------------
99/* This jumps to a pre-recorded file location and reads the record
100 that is there */
101bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
102{
103 // We are within a buffer space of the next hit..
104 if (Offset >= iOffset && iOffset + (End - Start) > Offset)
105 {
106 unsigned long Dist = Offset - iOffset;
107 Start += Dist;
108 iOffset += Dist;
109 return Step(Tag);
110 }
111
112 // Reposition and reload..
113 iOffset = Offset;
114 Done = false;
da6ee469
JF
115 End = Start = Buffer;
116
117 if (Fill() == false)
118 return false;
119
da6ee469
JF
120 if (Tag.Scan(Start,End - Start) == false)
121 return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str());
122
123 return true;
124}
125 /*}}}*/
126// TagSection::Scan - Scan for the end of the header information /*{{{*/
127// ---------------------------------------------------------------------
128/* This looks for the first double new line in the data stream. It also
129 indexes the tags in the section. This very simple hash function for the
130 last 8 letters gives very good performance on the debian package files */
131inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
132{
133 unsigned long Res = 0;
134 for (; Text != End && *Text != ':' && *Text != 0; Text++)
135 Res = ((unsigned long)(*Text) & 0xDF) ^ (Res << 1);
136 return Res & 0xFF;
137}
138
139bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
140{
141 const char *End = Start + MaxLength;
142 Stop = Section = Start;
143 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
144
145 if (Stop == 0)
146 return false;
147
148 TagCount = 0;
149 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
150 {
151 // Start a new index and add it to the hash
152 if (isspace(Stop[0]) == 0)
153 {
154 Indexes[TagCount++] = Stop - Section;
acdafb44
JF
155 unsigned long hash(AlphaHash(Stop, End));
156 while (AlphaIndexes[hash] != 0)
157 hash = (hash + 1) % (sizeof(AlphaIndexes) / sizeof(AlphaIndexes[0]));
158 AlphaIndexes[hash] = TagCount;
da6ee469
JF
159 }
160
161 Stop = (const char *)memchr(Stop,'\n',End - Stop);
162
acdafb44
JF
163 if (Stop == 0) {
164 Stop = End;
165 goto end;
166 }
da6ee469
JF
167
168 for (; Stop+1 < End && Stop[1] == '\r'; Stop++);
169
170 // Double newline marks the end of the record
acdafb44
JF
171 if (Stop+1 == End || Stop[1] == '\n')
172 end: {
da6ee469
JF
173 Indexes[TagCount] = Stop - Section;
174 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
175 return true;
176 }
177
178 Stop++;
179 }
180
181 return false;
182}
183 /*}}}*/
184// TagSection::Trim - Trim off any trailing garbage /*{{{*/
185// ---------------------------------------------------------------------
186/* There should be exactly 1 newline at the end of the buffer, no more. */
187void pkgTagSection::Trim()
188{
189 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
190}
191 /*}}}*/
192// TagSection::Find - Locate a tag /*{{{*/
193// ---------------------------------------------------------------------
194/* This searches the section for a tag that matches the given string. */
195bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const
196{
197 unsigned int Length = strlen(Tag);
acdafb44 198 unsigned int J = AlphaHash(Tag);
da6ee469 199
acdafb44
JF
200 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
201 J = (J+1)%(sizeof(AlphaIndexes)/sizeof(AlphaIndexes[0])))
da6ee469 202 {
acdafb44
JF
203 unsigned int I = AlphaIndexes[J];
204 if (I == 0)
205 return false;
206 I--;
207
da6ee469
JF
208 const char *St;
209 St = Section + Indexes[I];
210 if (strncasecmp(Tag,St,Length) != 0)
211 continue;
212
213 // Make sure the colon is in the right place
214 const char *C = St + Length;
215 for (; isspace(*C) != 0; C++);
216 if (*C != ':')
217 continue;
218 Pos = I;
219 return true;
220 }
221
222 Pos = 0;
223 return false;
224}
225 /*}}}*/
226// TagSection::Find - Locate a tag /*{{{*/
227// ---------------------------------------------------------------------
228/* This searches the section for a tag that matches the given string. */
229bool pkgTagSection::Find(const char *Tag,const char *&Start,
230 const char *&End) const
231{
232 unsigned int Length = strlen(Tag);
acdafb44 233 unsigned int J = AlphaHash(Tag);
da6ee469 234
acdafb44
JF
235 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
236 J = (J+1)%(sizeof(AlphaIndexes)/sizeof(AlphaIndexes[0])))
da6ee469 237 {
acdafb44
JF
238 unsigned int I = AlphaIndexes[J];
239 if (I == 0)
240 return false;
241 I--;
242
da6ee469
JF
243 const char *St;
244 St = Section + Indexes[I];
245 if (strncasecmp(Tag,St,Length) != 0)
246 continue;
247
248 // Make sure the colon is in the right place
249 const char *C = St + Length;
250 for (; isspace(*C) != 0; C++);
251 if (*C != ':')
252 continue;
253
254 // Strip off the gunk from the start end
255 Start = C;
256 End = Section + Indexes[I+1];
257 if (Start >= End)
258 return _error->Error("Internal parsing error");
259
260 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
261 for (; isspace(End[-1]) != 0 && End > Start; End--);
262
263 return true;
264 }
265
266 Start = End = 0;
267 return false;
268}
269 /*}}}*/
270// TagSection::FindS - Find a string /*{{{*/
271// ---------------------------------------------------------------------
272/* */
273string pkgTagSection::FindS(const char *Tag) const
274{
275 const char *Start;
276 const char *End;
277 if (Find(Tag,Start,End) == false)
278 return string();
279 return string(Start,End);
280}
281 /*}}}*/
282// TagSection::FindI - Find an integer /*{{{*/
283// ---------------------------------------------------------------------
284/* */
285signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
286{
287 const char *Start;
288 const char *Stop;
289 if (Find(Tag,Start,Stop) == false)
290 return Default;
291
292 // Copy it into a temp buffer so we can use strtol
293 char S[300];
294 if ((unsigned)(Stop - Start) >= sizeof(S))
295 return Default;
296 strncpy(S,Start,Stop-Start);
297 S[Stop - Start] = 0;
298
299 char *End;
300 signed long Result = strtol(S,&End,10);
301 if (S == End)
302 return Default;
303 return Result;
304}
305 /*}}}*/
306// TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
307// ---------------------------------------------------------------------
308/* The bits marked in Flag are masked on/off in Flags */
309bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
310 unsigned long Flag) const
311{
312 const char *Start;
313 const char *Stop;
314 if (Find(Tag,Start,Stop) == false)
315 return true;
316
317 switch (StringToBool(string(Start,Stop)))
318 {
319 case 0:
320 Flags &= ~Flag;
321 return true;
322
323 case 1:
324 Flags |= Flag;
325 return true;
326
327 default:
328 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
329 return true;
330 }
331 return true;
332}
333 /*}}}*/
334
335// TFRewrite - Rewrite a control record /*{{{*/
336// ---------------------------------------------------------------------
337/* This writes the control record to stdout rewriting it as necessary. The
338 override map item specificies the rewriting rules to follow. This also
339 takes the time to sort the feild list. */
340
341/* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
342 array. */
343static const char *iTFRewritePackageOrder[] = {
344 "Package",
345 "Essential",
346 "Status",
347 "Priority",
348 "Section",
349 "Installed-Size",
350 "Maintainer",
351 "Architecture",
352 "Source",
353 "Version",
354 "Revision", // Obsolete
355 "Config-Version", // Obsolete
356 "Replaces",
357 "Provides",
358 "Depends",
359 "Pre-Depends",
360 "Recommends",
361 "Suggests",
362 "Conflicts",
00ec24d0 363 "Breaks",
da6ee469
JF
364 "Conffiles",
365 "Filename",
366 "Size",
367 "MD5Sum",
368 "SHA1",
369 "SHA256",
370 "MSDOS-Filename", // Obsolete
371 "Description",
372 0};
373static const char *iTFRewriteSourceOrder[] = {"Package",
374 "Source",
375 "Binary",
376 "Version",
377 "Priority",
378 "Section",
379 "Maintainer",
380 "Build-Depends",
381 "Build-Depends-Indep",
382 "Build-Conflicts",
383 "Build-Conflicts-Indep",
384 "Architecture",
385 "Standards-Version",
386 "Format",
387 "Directory",
388 "Files",
389 0};
390
391/* Two levels of initialization are used because gcc will set the symbol
392 size of an array to the length of the array, causing dynamic relinking
393 errors. Doing this makes the symbol size constant */
394const char **TFRewritePackageOrder = iTFRewritePackageOrder;
395const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
396
397bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
398 TFRewriteData *Rewrite)
399{
400 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
401 for (unsigned I = 0; I != 256; I++)
402 Visited[I] = 0;
403
404 // Set new tag up as necessary.
405 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
406 {
407 if (Rewrite[J].NewTag == 0)
408 Rewrite[J].NewTag = Rewrite[J].Tag;
409 }
410
411 // Write all all of the tags, in order.
412 for (unsigned int I = 0; Order[I] != 0; I++)
413 {
414 bool Rewritten = false;
415
416 // See if this is a field that needs to be rewritten
417 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
418 {
419 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
420 {
421 Visited[J] |= 2;
422 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
423 {
424 if (isspace(Rewrite[J].Rewrite[0]))
425 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
426 else
427 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
428 }
429
430 Rewritten = true;
431 break;
432 }
433 }
434
435 // See if it is in the fragment
436 unsigned Pos;
437 if (Tags.Find(Order[I],Pos) == false)
438 continue;
439 Visited[Pos] |= 1;
440
441 if (Rewritten == true)
442 continue;
443
444 /* Write out this element, taking a moment to rewrite the tag
445 in case of changes of case. */
446 const char *Start;
447 const char *Stop;
448 Tags.Get(Start,Stop,Pos);
449
450 if (fputs(Order[I],Output) < 0)
451 return _error->Errno("fputs","IO Error to output");
452 Start += strlen(Order[I]);
453 if (fwrite(Start,Stop - Start,1,Output) != 1)
454 return _error->Errno("fwrite","IO Error to output");
455 if (Stop[-1] != '\n')
456 fprintf(Output,"\n");
457 }
458
459 // Now write all the old tags that were missed.
460 for (unsigned int I = 0; I != Tags.Count(); I++)
461 {
462 if ((Visited[I] & 1) == 1)
463 continue;
464
465 const char *Start;
466 const char *Stop;
467 Tags.Get(Start,Stop,I);
468 const char *End = Start;
469 for (; End < Stop && *End != ':'; End++);
470
471 // See if this is a field that needs to be rewritten
472 bool Rewritten = false;
473 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
474 {
475 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
476 {
477 Visited[J] |= 2;
478 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
479 {
480 if (isspace(Rewrite[J].Rewrite[0]))
481 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
482 else
483 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
484 }
485
486 Rewritten = true;
487 break;
488 }
489 }
490
491 if (Rewritten == true)
492 continue;
493
494 // Write out this element
495 if (fwrite(Start,Stop - Start,1,Output) != 1)
496 return _error->Errno("fwrite","IO Error to output");
497 if (Stop[-1] != '\n')
498 fprintf(Output,"\n");
499 }
500
501 // Now write all the rewrites that were missed
502 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
503 {
504 if ((Visited[J] & 2) == 2)
505 continue;
506
507 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
508 {
509 if (isspace(Rewrite[J].Rewrite[0]))
510 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
511 else
512 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
513 }
514 }
515
516 return true;
517}
518 /*}}}*/