]> git.saurik.com Git - apt-legacy.git/blob - apt-pkg/tagfile.cc
Break the ABI and fix the package limit.
[apt-legacy.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #include <apt-pkg/tagfile.h>
15 #include <apt-pkg/error.h>
16 #include <apt-pkg/strutl.h>
17
18 #include <apti18n.h>
19
20 #include <string>
21 #include <stdio.h>
22 #include <ctype.h>
23 /*}}}*/
24
25 using std::string;
26
27 // TagFile::pkgTagFile - Constructor /*{{{*/
28 // ---------------------------------------------------------------------
29 /* */
30 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) :
31 Fd(*pFd)
32 {
33 if (Fd.IsOpen() == false || Fd.Size() == 0)
34 {
35 _error->Discard();
36 Map = NULL;
37 Buffer = 0;
38 Start = End = Buffer = 0;
39 Done = true;
40 iOffset = 0;
41 return;
42 }
43
44 Map = new MMap(*pFd, MMap::ReadOnly);
45 Buffer = reinterpret_cast<char *>(Map->Data());
46 Start = End = Buffer;
47 Done = false;
48 iOffset = 0;
49 Fill();
50 }
51 /*}}}*/
52 // TagFile::~pkgTagFile - Destructor /*{{{*/
53 // ---------------------------------------------------------------------
54 /* */
55 pkgTagFile::~pkgTagFile()
56 {
57 delete Map;
58 }
59 /*}}}*/
60 // TagFile::Step - Advance to the next section /*{{{*/
61 // ---------------------------------------------------------------------
62 /* If the Section Scanner fails we refill the buffer and try again.
63 * If that fails too, double the buffer size and try again until a
64 * maximum buffer is reached.
65 */
66 bool pkgTagFile::Step(pkgTagSection &Tag)
67 {
68 if (Tag.Scan(Start,End - Start) == false)
69 {
70 if (Start == End)
71 return false;
72 else
73 return _error->Error(_("Unable to parse package file %s (1)"),
74 Fd.Name().c_str());
75 }
76 Start += Tag.size();
77 iOffset += Tag.size();
78
79 Tag.Trim();
80 return true;
81 }
82 /*}}}*/
83 // TagFile::Fill - Top up the buffer /*{{{*/
84 // ---------------------------------------------------------------------
85 /* This takes the bit at the end of the buffer and puts it at the start
86 then fills the rest from the file */
87 bool pkgTagFile::Fill()
88 {
89 unsigned int Size(Map->Size());
90 End = Buffer + Size;
91 if (iOffset >= Size)
92 return false;
93 Start = Buffer + iOffset;
94 return true;
95 }
96 /*}}}*/
97 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
98 // ---------------------------------------------------------------------
99 /* This jumps to a pre-recorded file location and reads the record
100 that is there */
101 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
102 {
103 // We are within a buffer space of the next hit..
104 if (Offset >= iOffset && iOffset + (End - Start) > Offset)
105 {
106 unsigned long Dist = Offset - iOffset;
107 Start += Dist;
108 iOffset += Dist;
109 return Step(Tag);
110 }
111
112 // Reposition and reload..
113 iOffset = Offset;
114 Done = false;
115 End = Start = Buffer;
116
117 if (Fill() == false)
118 return false;
119
120 if (Tag.Scan(Start,End - Start) == false)
121 return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str());
122
123 return true;
124 }
125 /*}}}*/
126 // TagSection::Scan - Scan for the end of the header information /*{{{*/
127 // ---------------------------------------------------------------------
128 /* This looks for the first double new line in the data stream. It also
129 indexes the tags in the section. This very simple hash function for the
130 last 8 letters gives very good performance on the debian package files */
131 inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
132 {
133 unsigned long Res = 0;
134 for (; Text != End && *Text != ':' && *Text != 0; Text++)
135 Res = ((unsigned long)(*Text) & 0xDF) ^ (Res << 1);
136 return Res & 0xFF;
137 }
138
139 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
140 {
141 const char *End = Start + MaxLength;
142 Stop = Section = Start;
143 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
144
145 if (Stop == 0)
146 return false;
147
148 TagCount = 0;
149 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
150 {
151 TrimRecord(true,End);
152
153 // Start a new index and add it to the hash
154 if (isspace(Stop[0]) == 0)
155 {
156 Indexes[TagCount++] = Stop - Section;
157 unsigned long hash(AlphaHash(Stop, End));
158 while (AlphaIndexes[hash] != 0)
159 hash = (hash + 1) % (sizeof(AlphaIndexes) / sizeof(AlphaIndexes[0]));
160 AlphaIndexes[hash] = TagCount;
161 }
162
163 Stop = (const char *)memchr(Stop,'\n',End - Stop);
164
165 if (Stop == 0) {
166 Stop = End;
167 goto end;
168 }
169
170 for (; Stop+1 < End && Stop[1] == '\r'; Stop++);
171
172 // Double newline marks the end of the record
173 if (Stop+1 == End || Stop[1] == '\n')
174 end: {
175 Indexes[TagCount] = Stop - Section;
176 TrimRecord(false,End);
177 return true;
178 }
179
180 Stop++;
181 }
182
183 return false;
184 }
185 /*}}}*/
186 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
187 // ---------------------------------------------------------------------
188 /* There should be exactly 2 newline at the end of the record, no more. */
189 void pkgTagSection::TrimRecord(bool BeforeRecord, const char*& End)
190 {
191 if (BeforeRecord == true)
192 return;
193 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
194 }
195 /*}}}*/
196 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
197 // ---------------------------------------------------------------------
198 /* There should be exactly 1 newline at the end of the buffer, no more. */
199 void pkgTagSection::Trim()
200 {
201 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
202 }
203 /*}}}*/
204 // TagSection::Find - Locate a tag /*{{{*/
205 // ---------------------------------------------------------------------
206 /* This searches the section for a tag that matches the given string. */
207 bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const
208 {
209 unsigned int Length = strlen(Tag);
210 unsigned int J = AlphaHash(Tag);
211
212 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
213 J = (J+1)%(sizeof(AlphaIndexes)/sizeof(AlphaIndexes[0])))
214 {
215 unsigned int I = AlphaIndexes[J];
216 if (I == 0)
217 return false;
218 I--;
219
220 const char *St;
221 St = Section + Indexes[I];
222 if (strncasecmp(Tag,St,Length) != 0)
223 continue;
224
225 // Make sure the colon is in the right place
226 const char *C = St + Length;
227 for (; isspace(*C) != 0; C++);
228 if (*C != ':')
229 continue;
230 Pos = I;
231 return true;
232 }
233
234 Pos = 0;
235 return false;
236 }
237 /*}}}*/
238 // TagSection::Find - Locate a tag /*{{{*/
239 // ---------------------------------------------------------------------
240 /* This searches the section for a tag that matches the given string. */
241 bool pkgTagSection::Find(const char *Tag,const char *&Start,
242 const char *&End) const
243 {
244 unsigned int Length = strlen(Tag);
245 unsigned int J = AlphaHash(Tag);
246
247 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
248 J = (J+1)%(sizeof(AlphaIndexes)/sizeof(AlphaIndexes[0])))
249 {
250 unsigned int I = AlphaIndexes[J];
251 if (I == 0)
252 return false;
253 I--;
254
255 const char *St;
256 St = Section + Indexes[I];
257 if (strncasecmp(Tag,St,Length) != 0)
258 continue;
259
260 // Make sure the colon is in the right place
261 const char *C = St + Length;
262 for (; isspace(*C) != 0; C++);
263 if (*C != ':')
264 continue;
265
266 // Strip off the gunk from the start end
267 Start = C;
268 End = Section + Indexes[I+1];
269 if (Start >= End)
270 return _error->Error("Internal parsing error");
271
272 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
273 for (; isspace(End[-1]) != 0 && End > Start; End--);
274
275 return true;
276 }
277
278 Start = End = 0;
279 return false;
280 }
281 /*}}}*/
282 // TagSection::FindS - Find a string /*{{{*/
283 // ---------------------------------------------------------------------
284 /* */
285 string pkgTagSection::FindS(const char *Tag) const
286 {
287 const char *Start;
288 const char *End;
289 if (Find(Tag,Start,End) == false)
290 return string();
291 return string(Start,End);
292 }
293 /*}}}*/
294 // TagSection::FindI - Find an integer /*{{{*/
295 // ---------------------------------------------------------------------
296 /* */
297 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
298 {
299 const char *Start;
300 const char *Stop;
301 if (Find(Tag,Start,Stop) == false)
302 return Default;
303
304 // Copy it into a temp buffer so we can use strtol
305 char S[300];
306 if ((unsigned)(Stop - Start) >= sizeof(S))
307 return Default;
308 strncpy(S,Start,Stop-Start);
309 S[Stop - Start] = 0;
310
311 char *End;
312 signed long Result = strtol(S,&End,10);
313 if (S == End)
314 return Default;
315 return Result;
316 }
317 /*}}}*/
318 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
319 // ---------------------------------------------------------------------
320 /* The bits marked in Flag are masked on/off in Flags */
321 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
322 unsigned long Flag) const
323 {
324 const char *Start;
325 const char *Stop;
326 if (Find(Tag,Start,Stop) == false)
327 return true;
328
329 switch (StringToBool(string(Start,Stop)))
330 {
331 case 0:
332 Flags &= ~Flag;
333 return true;
334
335 case 1:
336 Flags |= Flag;
337 return true;
338
339 default:
340 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
341 return true;
342 }
343 return true;
344 }
345 /*}}}*/
346 // TFRewrite - Rewrite a control record /*{{{*/
347 // ---------------------------------------------------------------------
348 /* This writes the control record to stdout rewriting it as necessary. The
349 override map item specificies the rewriting rules to follow. This also
350 takes the time to sort the feild list. */
351
352 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
353 array. */
354 static const char *iTFRewritePackageOrder[] = {
355 "Package",
356 "Essential",
357 "Status",
358 "Priority",
359 "Section",
360 "Installed-Size",
361 "Maintainer",
362 "Architecture",
363 "Source",
364 "Version",
365 "Revision", // Obsolete
366 "Config-Version", // Obsolete
367 "Replaces",
368 "Provides",
369 "Depends",
370 "Pre-Depends",
371 "Recommends",
372 "Suggests",
373 "Conflicts",
374 "Breaks",
375 "Conffiles",
376 "Filename",
377 "Size",
378 "MD5Sum",
379 "SHA1",
380 "SHA256",
381 "MSDOS-Filename", // Obsolete
382 "Description",
383 0};
384 static const char *iTFRewriteSourceOrder[] = {"Package",
385 "Source",
386 "Binary",
387 "Version",
388 "Priority",
389 "Section",
390 "Maintainer",
391 "Build-Depends",
392 "Build-Depends-Indep",
393 "Build-Conflicts",
394 "Build-Conflicts-Indep",
395 "Architecture",
396 "Standards-Version",
397 "Format",
398 "Directory",
399 "Files",
400 0};
401
402 /* Two levels of initialization are used because gcc will set the symbol
403 size of an array to the length of the array, causing dynamic relinking
404 errors. Doing this makes the symbol size constant */
405 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
406 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
407
408 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
409 TFRewriteData *Rewrite)
410 {
411 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
412 for (unsigned I = 0; I != 256; I++)
413 Visited[I] = 0;
414
415 // Set new tag up as necessary.
416 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
417 {
418 if (Rewrite[J].NewTag == 0)
419 Rewrite[J].NewTag = Rewrite[J].Tag;
420 }
421
422 // Write all all of the tags, in order.
423 for (unsigned int I = 0; Order[I] != 0; I++)
424 {
425 bool Rewritten = false;
426
427 // See if this is a field that needs to be rewritten
428 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
429 {
430 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
431 {
432 Visited[J] |= 2;
433 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
434 {
435 if (isspace(Rewrite[J].Rewrite[0]))
436 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
437 else
438 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
439 }
440
441 Rewritten = true;
442 break;
443 }
444 }
445
446 // See if it is in the fragment
447 unsigned Pos;
448 if (Tags.Find(Order[I],Pos) == false)
449 continue;
450 Visited[Pos] |= 1;
451
452 if (Rewritten == true)
453 continue;
454
455 /* Write out this element, taking a moment to rewrite the tag
456 in case of changes of case. */
457 const char *Start;
458 const char *Stop;
459 Tags.Get(Start,Stop,Pos);
460
461 if (fputs(Order[I],Output) < 0)
462 return _error->Errno("fputs","IO Error to output");
463 Start += strlen(Order[I]);
464 if (fwrite(Start,Stop - Start,1,Output) != 1)
465 return _error->Errno("fwrite","IO Error to output");
466 if (Stop[-1] != '\n')
467 fprintf(Output,"\n");
468 }
469
470 // Now write all the old tags that were missed.
471 for (unsigned int I = 0; I != Tags.Count(); I++)
472 {
473 if ((Visited[I] & 1) == 1)
474 continue;
475
476 const char *Start;
477 const char *Stop;
478 Tags.Get(Start,Stop,I);
479 const char *End = Start;
480 for (; End < Stop && *End != ':'; End++);
481
482 // See if this is a field that needs to be rewritten
483 bool Rewritten = false;
484 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
485 {
486 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
487 {
488 Visited[J] |= 2;
489 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
490 {
491 if (isspace(Rewrite[J].Rewrite[0]))
492 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
493 else
494 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
495 }
496
497 Rewritten = true;
498 break;
499 }
500 }
501
502 if (Rewritten == true)
503 continue;
504
505 // Write out this element
506 if (fwrite(Start,Stop - Start,1,Output) != 1)
507 return _error->Errno("fwrite","IO Error to output");
508 if (Stop[-1] != '\n')
509 fprintf(Output,"\n");
510 }
511
512 // Now write all the rewrites that were missed
513 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
514 {
515 if ((Visited[J] & 2) == 2)
516 continue;
517
518 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
519 {
520 if (isspace(Rewrite[J].Rewrite[0]))
521 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
522 else
523 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
524 }
525 }
526
527 return true;
528 }
529 /*}}}*/