]> git.saurik.com Git - apt-legacy.git/blob - apt-pkg/tagfile.cc
7ef5df38e808ab5f03ebb957bf23ddc63a738612
[apt-legacy.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #include <apt-pkg/tagfile.h>
15 #include <apt-pkg/error.h>
16 #include <apt-pkg/strutl.h>
17
18 #include <apti18n.h>
19
20 #include <string>
21 #include <stdio.h>
22 #include <ctype.h>
23 /*}}}*/
24
25 using std::string;
26
27 // TagFile::pkgTagFile - Constructor /*{{{*/
28 // ---------------------------------------------------------------------
29 /* */
30 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) :
31 Fd(*pFd)
32 {
33 if (Fd.IsOpen() == false || Fd.Size() == 0)
34 {
35 _error->Discard();
36 Map = NULL;
37 Buffer = 0;
38 Start = End = Buffer = 0;
39 Done = true;
40 iOffset = 0;
41 return;
42 }
43
44 Map = new MMap(*pFd, MMap::ReadOnly);
45 Buffer = reinterpret_cast<char *>(Map->Data());
46 Start = End = Buffer;
47 Done = false;
48 iOffset = 0;
49 Fill();
50 }
51 /*}}}*/
52 // TagFile::~pkgTagFile - Destructor /*{{{*/
53 // ---------------------------------------------------------------------
54 /* */
55 pkgTagFile::~pkgTagFile()
56 {
57 delete Map;
58 }
59 /*}}}*/
60 // TagFile::Step - Advance to the next section /*{{{*/
61 // ---------------------------------------------------------------------
62 /* If the Section Scanner fails we refill the buffer and try again.
63 * If that fails too, double the buffer size and try again until a
64 * maximum buffer is reached.
65 */
66 bool pkgTagFile::Step(pkgTagSection &Tag)
67 {
68 if (Tag.Scan(Start,End - Start) == false)
69 {
70 if (Start == End)
71 return false;
72 else
73 return _error->Error(_("Unable to parse package file %s (1)"),
74 Fd.Name().c_str());
75 }
76 Start += Tag.size();
77 iOffset += Tag.size();
78
79 Tag.Trim();
80 return true;
81 }
82 /*}}}*/
83 // TagFile::Fill - Top up the buffer /*{{{*/
84 // ---------------------------------------------------------------------
85 /* This takes the bit at the end of the buffer and puts it at the start
86 then fills the rest from the file */
87 bool pkgTagFile::Fill()
88 {
89 unsigned int Size(Map->Size());
90 End = Buffer + Size;
91 if (iOffset >= Size)
92 return false;
93 Start = Buffer + iOffset;
94 return true;
95 }
96 /*}}}*/
97 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
98 // ---------------------------------------------------------------------
99 /* This jumps to a pre-recorded file location and reads the record
100 that is there */
101 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
102 {
103 // We are within a buffer space of the next hit..
104 if (Offset >= iOffset && iOffset + (End - Start) > Offset)
105 {
106 unsigned long Dist = Offset - iOffset;
107 Start += Dist;
108 iOffset += Dist;
109 return Step(Tag);
110 }
111
112 // Reposition and reload..
113 iOffset = Offset;
114 Done = false;
115 End = Start = Buffer;
116
117 if (Fill() == false)
118 return false;
119
120 if (Tag.Scan(Start,End - Start) == false)
121 return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str());
122
123 return true;
124 }
125 /*}}}*/
126 // TagSection::Scan - Scan for the end of the header information /*{{{*/
127 // ---------------------------------------------------------------------
128 /* This looks for the first double new line in the data stream. It also
129 indexes the tags in the section. This very simple hash function for the
130 last 8 letters gives very good performance on the debian package files */
131 inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
132 {
133 unsigned long Res = 0;
134 for (; Text != End && *Text != ':' && *Text != 0; Text++)
135 Res = ((unsigned long)(*Text) & 0xDF) ^ (Res << 1);
136 return Res & 0xFF;
137 }
138
139 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
140 {
141 const char *End = Start + MaxLength;
142 Stop = Section = Start;
143 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
144
145 if (Stop == 0)
146 return false;
147
148 TagCount = 0;
149 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
150 {
151 // Start a new index and add it to the hash
152 if (isspace(Stop[0]) == 0)
153 {
154 Indexes[TagCount++] = Stop - Section;
155 unsigned long hash(AlphaHash(Stop, End));
156 while (AlphaIndexes[hash] != 0)
157 hash = (hash + 1) % (sizeof(AlphaIndexes) / sizeof(AlphaIndexes[0]));
158 AlphaIndexes[hash] = TagCount;
159 }
160
161 Stop = (const char *)memchr(Stop,'\n',End - Stop);
162
163 if (Stop == 0) {
164 Stop = End;
165 goto end;
166 }
167
168 for (; Stop+1 < End && Stop[1] == '\r'; Stop++);
169
170 // Double newline marks the end of the record
171 if (Stop+1 == End || Stop[1] == '\n')
172 end: {
173 Indexes[TagCount] = Stop - Section;
174 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
175 return true;
176 }
177
178 Stop++;
179 }
180
181 return false;
182 }
183 /*}}}*/
184 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
185 // ---------------------------------------------------------------------
186 /* There should be exactly 1 newline at the end of the buffer, no more. */
187 void pkgTagSection::Trim()
188 {
189 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
190 }
191 /*}}}*/
192 // TagSection::Find - Locate a tag /*{{{*/
193 // ---------------------------------------------------------------------
194 /* This searches the section for a tag that matches the given string. */
195 bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const
196 {
197 unsigned int Length = strlen(Tag);
198 unsigned int J = AlphaHash(Tag);
199
200 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
201 J = (J+1)%(sizeof(AlphaIndexes)/sizeof(AlphaIndexes[0])))
202 {
203 unsigned int I = AlphaIndexes[J];
204 if (I == 0)
205 return false;
206 I--;
207
208 const char *St;
209 St = Section + Indexes[I];
210 if (strncasecmp(Tag,St,Length) != 0)
211 continue;
212
213 // Make sure the colon is in the right place
214 const char *C = St + Length;
215 for (; isspace(*C) != 0; C++);
216 if (*C != ':')
217 continue;
218 Pos = I;
219 return true;
220 }
221
222 Pos = 0;
223 return false;
224 }
225 /*}}}*/
226 // TagSection::Find - Locate a tag /*{{{*/
227 // ---------------------------------------------------------------------
228 /* This searches the section for a tag that matches the given string. */
229 bool pkgTagSection::Find(const char *Tag,const char *&Start,
230 const char *&End) const
231 {
232 unsigned int Length = strlen(Tag);
233 unsigned int J = AlphaHash(Tag);
234
235 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
236 J = (J+1)%(sizeof(AlphaIndexes)/sizeof(AlphaIndexes[0])))
237 {
238 unsigned int I = AlphaIndexes[J];
239 if (I == 0)
240 return false;
241 I--;
242
243 const char *St;
244 St = Section + Indexes[I];
245 if (strncasecmp(Tag,St,Length) != 0)
246 continue;
247
248 // Make sure the colon is in the right place
249 const char *C = St + Length;
250 for (; isspace(*C) != 0; C++);
251 if (*C != ':')
252 continue;
253
254 // Strip off the gunk from the start end
255 Start = C;
256 End = Section + Indexes[I+1];
257 if (Start >= End)
258 return _error->Error("Internal parsing error");
259
260 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
261 for (; isspace(End[-1]) != 0 && End > Start; End--);
262
263 return true;
264 }
265
266 Start = End = 0;
267 return false;
268 }
269 /*}}}*/
270 // TagSection::FindS - Find a string /*{{{*/
271 // ---------------------------------------------------------------------
272 /* */
273 string pkgTagSection::FindS(const char *Tag) const
274 {
275 const char *Start;
276 const char *End;
277 if (Find(Tag,Start,End) == false)
278 return string();
279 return string(Start,End);
280 }
281 /*}}}*/
282 // TagSection::FindI - Find an integer /*{{{*/
283 // ---------------------------------------------------------------------
284 /* */
285 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
286 {
287 const char *Start;
288 const char *Stop;
289 if (Find(Tag,Start,Stop) == false)
290 return Default;
291
292 // Copy it into a temp buffer so we can use strtol
293 char S[300];
294 if ((unsigned)(Stop - Start) >= sizeof(S))
295 return Default;
296 strncpy(S,Start,Stop-Start);
297 S[Stop - Start] = 0;
298
299 char *End;
300 signed long Result = strtol(S,&End,10);
301 if (S == End)
302 return Default;
303 return Result;
304 }
305 /*}}}*/
306 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
307 // ---------------------------------------------------------------------
308 /* The bits marked in Flag are masked on/off in Flags */
309 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
310 unsigned long Flag) const
311 {
312 const char *Start;
313 const char *Stop;
314 if (Find(Tag,Start,Stop) == false)
315 return true;
316
317 switch (StringToBool(string(Start,Stop)))
318 {
319 case 0:
320 Flags &= ~Flag;
321 return true;
322
323 case 1:
324 Flags |= Flag;
325 return true;
326
327 default:
328 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
329 return true;
330 }
331 return true;
332 }
333 /*}}}*/
334
335 // TFRewrite - Rewrite a control record /*{{{*/
336 // ---------------------------------------------------------------------
337 /* This writes the control record to stdout rewriting it as necessary. The
338 override map item specificies the rewriting rules to follow. This also
339 takes the time to sort the feild list. */
340
341 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
342 array. */
343 static const char *iTFRewritePackageOrder[] = {
344 "Package",
345 "Essential",
346 "Status",
347 "Priority",
348 "Section",
349 "Installed-Size",
350 "Maintainer",
351 "Architecture",
352 "Source",
353 "Version",
354 "Revision", // Obsolete
355 "Config-Version", // Obsolete
356 "Replaces",
357 "Provides",
358 "Depends",
359 "Pre-Depends",
360 "Recommends",
361 "Suggests",
362 "Conflicts",
363 "Breaks",
364 "Conffiles",
365 "Filename",
366 "Size",
367 "MD5Sum",
368 "SHA1",
369 "SHA256",
370 "MSDOS-Filename", // Obsolete
371 "Description",
372 0};
373 static const char *iTFRewriteSourceOrder[] = {"Package",
374 "Source",
375 "Binary",
376 "Version",
377 "Priority",
378 "Section",
379 "Maintainer",
380 "Build-Depends",
381 "Build-Depends-Indep",
382 "Build-Conflicts",
383 "Build-Conflicts-Indep",
384 "Architecture",
385 "Standards-Version",
386 "Format",
387 "Directory",
388 "Files",
389 0};
390
391 /* Two levels of initialization are used because gcc will set the symbol
392 size of an array to the length of the array, causing dynamic relinking
393 errors. Doing this makes the symbol size constant */
394 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
395 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
396
397 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
398 TFRewriteData *Rewrite)
399 {
400 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
401 for (unsigned I = 0; I != 256; I++)
402 Visited[I] = 0;
403
404 // Set new tag up as necessary.
405 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
406 {
407 if (Rewrite[J].NewTag == 0)
408 Rewrite[J].NewTag = Rewrite[J].Tag;
409 }
410
411 // Write all all of the tags, in order.
412 for (unsigned int I = 0; Order[I] != 0; I++)
413 {
414 bool Rewritten = false;
415
416 // See if this is a field that needs to be rewritten
417 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
418 {
419 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
420 {
421 Visited[J] |= 2;
422 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
423 {
424 if (isspace(Rewrite[J].Rewrite[0]))
425 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
426 else
427 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
428 }
429
430 Rewritten = true;
431 break;
432 }
433 }
434
435 // See if it is in the fragment
436 unsigned Pos;
437 if (Tags.Find(Order[I],Pos) == false)
438 continue;
439 Visited[Pos] |= 1;
440
441 if (Rewritten == true)
442 continue;
443
444 /* Write out this element, taking a moment to rewrite the tag
445 in case of changes of case. */
446 const char *Start;
447 const char *Stop;
448 Tags.Get(Start,Stop,Pos);
449
450 if (fputs(Order[I],Output) < 0)
451 return _error->Errno("fputs","IO Error to output");
452 Start += strlen(Order[I]);
453 if (fwrite(Start,Stop - Start,1,Output) != 1)
454 return _error->Errno("fwrite","IO Error to output");
455 if (Stop[-1] != '\n')
456 fprintf(Output,"\n");
457 }
458
459 // Now write all the old tags that were missed.
460 for (unsigned int I = 0; I != Tags.Count(); I++)
461 {
462 if ((Visited[I] & 1) == 1)
463 continue;
464
465 const char *Start;
466 const char *Stop;
467 Tags.Get(Start,Stop,I);
468 const char *End = Start;
469 for (; End < Stop && *End != ':'; End++);
470
471 // See if this is a field that needs to be rewritten
472 bool Rewritten = false;
473 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
474 {
475 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
476 {
477 Visited[J] |= 2;
478 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
479 {
480 if (isspace(Rewrite[J].Rewrite[0]))
481 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
482 else
483 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
484 }
485
486 Rewritten = true;
487 break;
488 }
489 }
490
491 if (Rewritten == true)
492 continue;
493
494 // Write out this element
495 if (fwrite(Start,Stop - Start,1,Output) != 1)
496 return _error->Errno("fwrite","IO Error to output");
497 if (Stop[-1] != '\n')
498 fprintf(Output,"\n");
499 }
500
501 // Now write all the rewrites that were missed
502 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
503 {
504 if ((Visited[J] & 2) == 2)
505 continue;
506
507 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
508 {
509 if (isspace(Rewrite[J].Rewrite[0]))
510 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
511 else
512 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
513 }
514 }
515
516 return true;
517 }
518 /*}}}*/