]> git.saurik.com Git - apt.git/blob - apt-pkg/tagfile.cc
Fixed string parsing, Bug #100046
[apt.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.30 2001/05/14 05:56:26 jgg Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #ifdef __GNUG__
15 #pragma implementation "apt-pkg/tagfile.h"
16 #endif
17
18 #include <apt-pkg/tagfile.h>
19 #include <apt-pkg/error.h>
20 #include <apt-pkg/strutl.h>
21
22 #include <apti18n.h>
23
24 #include <string>
25 #include <stdio.h>
26 #include <ctype.h>
27 /*}}}*/
28
29 using std::string;
30
31 // TagFile::pkgTagFile - Constructor /*{{{*/
32 // ---------------------------------------------------------------------
33 /* */
34 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) : Fd(*pFd), Size(Size)
35 {
36 if (Fd.IsOpen() == false)
37 {
38 Buffer = 0;
39 Start = End = Buffer = 0;
40 Done = true;
41 iOffset = 0;
42 return;
43 }
44
45 Buffer = new char[Size];
46 Start = End = Buffer;
47 Done = false;
48 iOffset = 0;
49 Fill();
50 }
51 /*}}}*/
52 // TagFile::~pkgTagFile - Destructor /*{{{*/
53 // ---------------------------------------------------------------------
54 /* */
55 pkgTagFile::~pkgTagFile()
56 {
57 delete [] Buffer;
58 }
59 /*}}}*/
60 // TagFile::Step - Advance to the next section /*{{{*/
61 // ---------------------------------------------------------------------
62 /* If the Section Scanner fails we refill the buffer and try again. */
63 bool pkgTagFile::Step(pkgTagSection &Tag)
64 {
65 if (Tag.Scan(Start,End - Start) == false)
66 {
67 if (Fill() == false)
68 return false;
69
70 if (Tag.Scan(Start,End - Start) == false)
71 return _error->Error(_("Unable to parse package file %s (1)"),Fd.Name().c_str());
72 }
73 Start += Tag.size();
74 iOffset += Tag.size();
75
76 Tag.Trim();
77
78 return true;
79 }
80 /*}}}*/
81 // TagFile::Fill - Top up the buffer /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This takes the bit at the end of the buffer and puts it at the start
84 then fills the rest from the file */
85 bool pkgTagFile::Fill()
86 {
87 unsigned long EndSize = End - Start;
88
89 memmove(Buffer,Start,EndSize);
90 Start = Buffer;
91 End = Buffer + EndSize;
92
93 if (Done == true)
94 {
95 if (EndSize <= 3)
96 return false;
97 if (Size - (End - Buffer) < 4)
98 return true;
99
100 // Append a double new line if one does not exist
101 unsigned int LineCount = 0;
102 for (const char *E = End - 1; E - End < 6 && (*E == '\n' || *E == '\r'); E--)
103 if (*E == '\n')
104 LineCount++;
105 for (; LineCount < 2; LineCount++)
106 *End++ = '\n';
107
108 return true;
109 }
110
111 // See if only a bit of the file is left
112 unsigned long Actual;
113 if (Fd.Read(End,Size - (End - Buffer),&Actual) == false)
114 return false;
115 if (Actual != Size - (End - Buffer))
116 Done = true;
117 End += Actual;
118 /*
119 if (Left < Size - (End - Buffer))
120 {
121 if (Fd.Read(End,Left) == false)
122 return false;
123
124 End += Left;
125 Left = 0;
126 }
127 else
128 {
129 if (Fd.Read(End,Size - (End - Buffer)) == false)
130 return false;
131
132 Left -= Size - (End - Buffer);
133 End = Buffer + Size;
134 }*/
135
136 return true;
137 }
138 /*}}}*/
139 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
140 // ---------------------------------------------------------------------
141 /* This jumps to a pre-recorded file location and reads the record
142 that is there */
143 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
144 {
145 // We are within a buffer space of the next hit..
146 if (Offset >= iOffset && iOffset + (End - Start) > Offset)
147 {
148 unsigned long Dist = Offset - iOffset;
149 Start += Dist;
150 iOffset += Dist;
151 return Step(Tag);
152 }
153
154 // Reposition and reload..
155 iOffset = Offset;
156 Done = false;
157 if (Fd.Seek(Offset) == false)
158 return false;
159 End = Start = Buffer;
160
161 if (Fill() == false)
162 return false;
163
164 if (Tag.Scan(Start,End - Start) == true)
165 return true;
166
167 // This appends a double new line (for the real eof handling)
168 if (Fill() == false)
169 return false;
170
171 if (Tag.Scan(Start,End - Start) == false)
172 return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str());
173
174 return true;
175 }
176 /*}}}*/
177 // TagSection::Scan - Scan for the end of the header information /*{{{*/
178 // ---------------------------------------------------------------------
179 /* This looks for the first double new line in the data stream. It also
180 indexes the tags in the section. This very simple hash function for the
181 first 3 letters gives very good performance on the debian package files */
182 inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
183 {
184 unsigned long Res = 0;
185 for (; Text != End && *Text != ':' && *Text != 0; Text++)
186 Res = (unsigned long)(*Text) ^ (Res << 2);
187 return Res & 0xFF;
188 }
189
190 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
191 {
192 const char *End = Start + MaxLength;
193 Stop = Section = Start;
194 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
195
196 if (Stop == 0)
197 return false;
198
199 TagCount = 0;
200 while (TagCount < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
201 {
202 // Start a new index and add it to the hash
203 if (isspace(Stop[0]) == 0)
204 {
205 Indexes[TagCount++] = Stop - Section;
206 AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
207 }
208
209 Stop = (const char *)memchr(Stop,'\n',End - Stop);
210
211 if (Stop == 0)
212 return false;
213
214 for (; Stop[1] == '\r' && Stop+1 < End; Stop++);
215
216 // Double newline marks the end of the record
217 if (Stop+1 < End && Stop[1] == '\n')
218 {
219 Indexes[TagCount] = Stop - Section;
220 for (; (Stop[0] == '\n' || Stop[0] == '\r') && Stop < End; Stop++);
221 return true;
222 }
223
224 Stop++;
225 }
226
227 return false;
228 }
229 /*}}}*/
230 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
231 // ---------------------------------------------------------------------
232 /* There should be exactly 1 newline at the end of the buffer, no more. */
233 void pkgTagSection::Trim()
234 {
235 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
236 }
237 /*}}}*/
238 // TagSection::Find - Locate a tag /*{{{*/
239 // ---------------------------------------------------------------------
240 /* This searches the section for a tag that matches the given string. */
241 bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const
242 {
243 unsigned int Length = strlen(Tag);
244 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
245 if (I == 0)
246 return false;
247 I--;
248
249 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
250 I = (I+1)%TagCount)
251 {
252 const char *St;
253 St = Section + Indexes[I];
254 if (strncasecmp(Tag,St,Length) != 0)
255 continue;
256
257 // Make sure the colon is in the right place
258 const char *C = St + Length;
259 for (; isspace(*C) != 0; C++);
260 if (*C != ':')
261 continue;
262 Pos = I;
263 return true;
264 }
265
266 Pos = 0;
267 return false;
268 }
269 /*}}}*/
270 // TagSection::Find - Locate a tag /*{{{*/
271 // ---------------------------------------------------------------------
272 /* This searches the section for a tag that matches the given string. */
273 bool pkgTagSection::Find(const char *Tag,const char *&Start,
274 const char *&End) const
275 {
276 unsigned int Length = strlen(Tag);
277 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
278 if (I == 0)
279 return false;
280 I--;
281
282 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
283 I = (I+1)%TagCount)
284 {
285 const char *St;
286 St = Section + Indexes[I];
287 if (strncasecmp(Tag,St,Length) != 0)
288 continue;
289
290 // Make sure the colon is in the right place
291 const char *C = St + Length;
292 for (; isspace(*C) != 0; C++);
293 if (*C != ':')
294 continue;
295
296 // Strip off the gunk from the start end
297 Start = C;
298 End = Section + Indexes[I+1];
299 if (Start >= End)
300 return _error->Error("Internal parsing error");
301
302 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
303 for (; isspace(End[-1]) != 0 && End > Start; End--);
304
305 return true;
306 }
307
308 Start = End = 0;
309 return false;
310 }
311 /*}}}*/
312 // TagSection::FindS - Find a string /*{{{*/
313 // ---------------------------------------------------------------------
314 /* */
315 string pkgTagSection::FindS(const char *Tag) const
316 {
317 const char *Start;
318 const char *End;
319 if (Find(Tag,Start,End) == false)
320 return string();
321 return string(Start,End);
322 }
323 /*}}}*/
324 // TagSection::FindI - Find an integer /*{{{*/
325 // ---------------------------------------------------------------------
326 /* */
327 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
328 {
329 const char *Start;
330 const char *Stop;
331 if (Find(Tag,Start,Stop) == false)
332 return Default;
333
334 // Copy it into a temp buffer so we can use strtol
335 char S[300];
336 if ((unsigned)(Stop - Start) >= sizeof(S))
337 return Default;
338 strncpy(S,Start,Stop-Start);
339 S[Stop - Start] = 0;
340
341 char *End;
342 signed long Result = strtol(S,&End,10);
343 if (S == End)
344 return Default;
345 return Result;
346 }
347 /*}}}*/
348 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
349 // ---------------------------------------------------------------------
350 /* The bits marked in Flag are masked on/off in Flags */
351 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
352 unsigned long Flag) const
353 {
354 const char *Start;
355 const char *Stop;
356 if (Find(Tag,Start,Stop) == false)
357 return true;
358
359 switch (StringToBool(string(Start,Stop)))
360 {
361 case 0:
362 Flags &= ~Flag;
363 return true;
364
365 case 1:
366 Flags |= Flag;
367 return true;
368
369 default:
370 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
371 return true;
372 }
373 return true;
374 }
375 /*}}}*/
376
377 // TFRewrite - Rewrite a control record /*{{{*/
378 // ---------------------------------------------------------------------
379 /* This writes the control record to stdout rewriting it as necessary. The
380 override map item specificies the rewriting rules to follow. This also
381 takes the time to sort the feild list. */
382
383 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
384 array. */
385 static const char *iTFRewritePackageOrder[] = {
386 "Package",
387 "Essential",
388 "Status",
389 "Priority",
390 "Section",
391 "Installed-Size",
392 "Maintainer",
393 "Architecture",
394 "Source",
395 "Version",
396 "Revision", // Obsolete
397 "Config-Version", // Obsolete
398 "Replaces",
399 "Provides",
400 "Depends",
401 "Pre-Depends",
402 "Recommends",
403 "Suggests",
404 "Conflicts",
405 "Conffiles",
406 "Filename",
407 "Size",
408 "MD5Sum",
409 "SHA1Sum",
410 "MSDOS-Filename", // Obsolete
411 "Description",
412 0};
413 static const char *iTFRewriteSourceOrder[] = {"Package",
414 "Source",
415 "Binary",
416 "Version",
417 "Priority",
418 "Section",
419 "Maintainer",
420 "Build-Depends",
421 "Build-Depends-Indep",
422 "Build-Conflicts",
423 "Build-Conflicts-Indep",
424 "Architecture",
425 "Standards-Version",
426 "Format",
427 "Directory",
428 "Files",
429 0};
430
431 /* Two levels of initialization are used because gcc will set the symbol
432 size of an array to the length of the array, causing dynamic relinking
433 errors. Doing this makes the symbol size constant */
434 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
435 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
436
437 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
438 TFRewriteData *Rewrite)
439 {
440 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
441 for (unsigned I = 0; I != 256; I++)
442 Visited[I] = 0;
443
444 // Set new tag up as necessary.
445 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
446 {
447 if (Rewrite[J].NewTag == 0)
448 Rewrite[J].NewTag = Rewrite[J].Tag;
449 }
450
451 // Write all all of the tags, in order.
452 for (unsigned int I = 0; Order[I] != 0; I++)
453 {
454 bool Rewritten = false;
455
456 // See if this is a field that needs to be rewritten
457 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
458 {
459 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
460 {
461 Visited[J] |= 2;
462 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
463 {
464 if (isspace(Rewrite[J].Rewrite[0]))
465 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
466 else
467 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
468 }
469
470 Rewritten = true;
471 break;
472 }
473 }
474
475 // See if it is in the fragment
476 unsigned Pos;
477 if (Tags.Find(Order[I],Pos) == false)
478 continue;
479 Visited[Pos] |= 1;
480
481 if (Rewritten == true)
482 continue;
483
484 /* Write out this element, taking a moment to rewrite the tag
485 in case of changes of case. */
486 const char *Start;
487 const char *Stop;
488 Tags.Get(Start,Stop,Pos);
489
490 if (fputs(Order[I],Output) < 0)
491 return _error->Errno("fputs","IO Error to output");
492 Start += strlen(Order[I]);
493 if (fwrite(Start,Stop - Start,1,Output) != 1)
494 return _error->Errno("fwrite","IO Error to output");
495 if (Stop[-1] != '\n')
496 fprintf(Output,"\n");
497 }
498
499 // Now write all the old tags that were missed.
500 for (unsigned int I = 0; I != Tags.Count(); I++)
501 {
502 if ((Visited[I] & 1) == 1)
503 continue;
504
505 const char *Start;
506 const char *Stop;
507 Tags.Get(Start,Stop,I);
508 const char *End = Start;
509 for (; End < Stop && *End != ':'; End++);
510
511 // See if this is a field that needs to be rewritten
512 bool Rewritten = false;
513 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
514 {
515 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
516 {
517 Visited[J] |= 2;
518 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
519 {
520 if (isspace(Rewrite[J].Rewrite[0]))
521 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
522 else
523 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
524 }
525
526 Rewritten = true;
527 break;
528 }
529 }
530
531 if (Rewritten == true)
532 continue;
533
534 // Write out this element
535 if (fwrite(Start,Stop - Start,1,Output) != 1)
536 return _error->Errno("fwrite","IO Error to output");
537 if (Stop[-1] != '\n')
538 fprintf(Output,"\n");
539 }
540
541 // Now write all the rewrites that were missed
542 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
543 {
544 if ((Visited[J] & 2) == 2)
545 continue;
546
547 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
548 {
549 if (isspace(Rewrite[J].Rewrite[0]))
550 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
551 else
552 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
553 }
554 }
555
556 return true;
557 }
558 /*}}}*/