apt-pkg/tagfile.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
   4 /* ######################################################################
   5
   6    Fast scanner for RFC-822 type header information
   7
   8    This uses a rotating buffer to load the package information into.
   9    The scanner runs over it and isolates and indexes a single section.
  10
  11    ##################################################################### */
  12                                                                         /*}}}*/
  13 // Include Files                                                        /*{{{*/
  14 #include<config.h>
  15
  16 #include <apt-pkg/tagfile.h>
  17 #include <apt-pkg/error.h>
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20
  21 #include <string>
  22 #include <stdio.h>
  23 #include <ctype.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26
  27 #include <apti18n.h>
  28                                                                         /*}}}*/
  29
  30 using std::string;
  31
  32 class pkgTagFilePrivate
  33 {
  34 public:
  35    pkgTagFilePrivate(FileFd *pFd, unsigned long long Size) : Fd(*pFd), Buffer(NULL),
  36                                                              Start(NULL), End(NULL),
  37                                                              Done(false), iOffset(0),
  38                                                              Size(Size)
  39    {
  40    }
  41    FileFd &Fd;
  42    char *Buffer;
  43    char *Start;
  44    char *End;
  45    bool Done;
  46    unsigned long long iOffset;
  47    unsigned long long Size;
  48 };
  49
  50 // TagFile::pkgTagFile - Constructor                                    /*{{{*/
  51 // ---------------------------------------------------------------------
  52 /* */
  53 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long long Size)
  54 {
  55    /* The size is increased by 4 because if we start with the Size of the
  56       filename we need to try to read 1 char more to see an EOF faster, 1
  57       char the end-pointer can be on and maybe 2 newlines need to be added
  58       to the end of the file -> 4 extra chars */
  59    Size += 4;
  60    d = new pkgTagFilePrivate(pFd, Size);
  61
  62    if (d->Fd.IsOpen() == false)
  63       d->Start = d->End = d->Buffer = 0;
  64    else
  65       d->Buffer = (char*)malloc(sizeof(char) * Size);
  66
  67    if (d->Buffer == NULL)
  68       d->Done = true;
  69    else
  70       d->Done = false;
  71
  72    d->Start = d->End = d->Buffer;
  73    d->iOffset = 0;
  74    if (d->Done == false)
  75       Fill();
  76 }
  77                                                                         /*}}}*/
  78 // TagFile::~pkgTagFile - Destructor                                    /*{{{*/
  79 // ---------------------------------------------------------------------
  80 /* */
  81 pkgTagFile::~pkgTagFile()
  82 {
  83    free(d->Buffer);
  84    delete d;
  85 }
  86                                                                         /*}}}*/
  87 // TagFile::Offset - Return the current offset in the buffer            /*{{{*/
  88 APT_PURE unsigned long pkgTagFile::Offset()
  89 {
  90    return d->iOffset;
  91 }
  92                                                                         /*}}}*/
  93 // TagFile::Resize - Resize the internal buffer                         /*{{{*/
  94 // ---------------------------------------------------------------------
  95 /* Resize the internal buffer (double it in size). Fail if a maximum size
  96  * size is reached.
  97  */
  98 bool pkgTagFile::Resize()
  99 {
 100    // fail is the buffer grows too big
 101    if(d->Size > 1024*1024+1)
 102       return false;
 103
 104    return Resize(d->Size * 2);
 105 }
 106 bool pkgTagFile::Resize(unsigned long long const newSize)
 107 {
 108    unsigned long long const EndSize = d->End - d->Start;
 109
 110    // get new buffer and use it
 111    char* newBuffer = (char*)realloc(d->Buffer, sizeof(char) * newSize);
 112    if (newBuffer == NULL)
 113       return false;
 114    d->Buffer = newBuffer;
 115    d->Size = newSize;
 116
 117    // update the start/end pointers to the new buffer
 118    d->Start = d->Buffer;
 119    d->End = d->Start + EndSize;
 120    return true;
 121 }
 122                                                                         /*}}}*/
 123 // TagFile::Step - Advance to the next section                          /*{{{*/
 124 // ---------------------------------------------------------------------
 125 /* If the Section Scanner fails we refill the buffer and try again.
 126  * If that fails too, double the buffer size and try again until a
 127  * maximum buffer is reached.
 128  */
 129 bool pkgTagFile::Step(pkgTagSection &Tag)
 130 {
 131    while (Tag.Scan(d->Start,d->End - d->Start) == false)
 132    {
 133       if (Fill() == false)
 134          return false;
 135
 136       if(Tag.Scan(d->Start,d->End - d->Start))
 137          break;
 138
 139       if (Resize() == false)
 140          return _error->Error(_("Unable to parse package file %s (1)"),
 141                               d->Fd.Name().c_str());
 142    }
 143    d->Start += Tag.size();
 144    d->iOffset += Tag.size();
 145
 146    Tag.Trim();
 147    return true;
 148 }
 149                                                                         /*}}}*/
 150 // TagFile::Fill - Top up the buffer                                    /*{{{*/
 151 // ---------------------------------------------------------------------
 152 /* This takes the bit at the end of the buffer and puts it at the start
 153    then fills the rest from the file */
 154 bool pkgTagFile::Fill()
 155 {
 156    unsigned long long EndSize = d->End - d->Start;
 157    unsigned long long Actual = 0;
 158
 159    memmove(d->Buffer,d->Start,EndSize);
 160    d->Start = d->Buffer;
 161    d->End = d->Buffer + EndSize;
 162
 163    if (d->Done == false)
 164    {
 165       // See if only a bit of the file is left
 166       unsigned long long const dataSize = d->Size - ((d->End - d->Buffer) + 1);
 167       if (d->Fd.Read(d->End, dataSize, &Actual) == false)
 168          return false;
 169       if (Actual != dataSize)
 170          d->Done = true;
 171       d->End += Actual;
 172    }
 173
 174    if (d->Done == true)
 175    {
 176       if (EndSize <= 3 && Actual == 0)
 177          return false;
 178       if (d->Size - (d->End - d->Buffer) < 4)
 179          return true;
 180
 181       // Append a double new line if one does not exist
 182       unsigned int LineCount = 0;
 183       for (const char *E = d->End - 1; E - d->End < 6 && (*E == '\n' || *E == '\r'); E--)
 184          if (*E == '\n')
 185             LineCount++;
 186       if (LineCount < 2)
 187       {
 188          if ((unsigned)(d->End - d->Buffer) >= d->Size)
 189             Resize(d->Size + 3);
 190          for (; LineCount < 2; LineCount++)
 191             *d->End++ = '\n';
 192       }
 193
 194       return true;
 195    }
 196
 197    return true;
 198 }
 199                                                                         /*}}}*/
 200 // TagFile::Jump - Jump to a pre-recorded location in the file          /*{{{*/
 201 // ---------------------------------------------------------------------
 202 /* This jumps to a pre-recorded file location and reads the record
 203    that is there */
 204 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long long Offset)
 205 {
 206    // We are within a buffer space of the next hit..
 207    if (Offset >= d->iOffset && d->iOffset + (d->End - d->Start) > Offset)
 208    {
 209       unsigned long long Dist = Offset - d->iOffset;
 210       d->Start += Dist;
 211       d->iOffset += Dist;
 212       // if we have seen the end, don't ask for more
 213       if (d->Done == true)
 214          return Tag.Scan(d->Start, d->End - d->Start);
 215       else
 216          return Step(Tag);
 217    }
 218
 219    // Reposition and reload..
 220    d->iOffset = Offset;
 221    d->Done = false;
 222    if (d->Fd.Seek(Offset) == false)
 223       return false;
 224    d->End = d->Start = d->Buffer;
 225
 226    if (Fill() == false)
 227       return false;
 228
 229    if (Tag.Scan(d->Start, d->End - d->Start) == true)
 230       return true;
 231
 232    // This appends a double new line (for the real eof handling)
 233    if (Fill() == false)
 234       return false;
 235
 236    if (Tag.Scan(d->Start, d->End - d->Start) == false)
 237       return _error->Error(_("Unable to parse package file %s (2)"),d->Fd.Name().c_str());
 238
 239    return true;
 240 }
 241                                                                         /*}}}*/
 242 // pkgTagSection::pkgTagSection - Constructor                           /*{{{*/
 243 // ---------------------------------------------------------------------
 244 /* */
 245 pkgTagSection::pkgTagSection()
 246    : Section(0), TagCount(0), d(NULL), Stop(0)
 247 {
 248    memset(&Indexes, 0, sizeof(Indexes));
 249    memset(&AlphaIndexes, 0, sizeof(AlphaIndexes));
 250 }
 251                                                                         /*}}}*/
 252 // TagSection::Scan - Scan for the end of the header information        /*{{{*/
 253 // ---------------------------------------------------------------------
 254 /* This looks for the first double new line in the data stream.
 255    It also indexes the tags in the section. */
 256 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
 257 {
 258    const char *End = Start + MaxLength;
 259    Stop = Section = Start;
 260    memset(AlphaIndexes,0,sizeof(AlphaIndexes));
 261
 262    if (Stop == 0)
 263       return false;
 264
 265    TagCount = 0;
 266    while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
 267    {
 268       TrimRecord(true,End);
 269
 270       // this can happen when TrimRecord trims away the entire Record
 271       // (e.g. because it just contains comments)
 272       if(Stop == End)
 273          return true;
 274
 275       // Start a new index and add it to the hash
 276       if (isspace(Stop[0]) == 0)
 277       {
 278          Indexes[TagCount++] = Stop - Section;
 279          AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
 280       }
 281
 282       Stop = (const char *)memchr(Stop,'\n',End - Stop);
 283
 284       if (Stop == 0)
 285          return false;
 286
 287       for (; Stop+1 < End && Stop[1] == '\r'; Stop++)
 288          /* nothing */
 289          ;
 290
 291       // Double newline marks the end of the record
 292       if (Stop+1 < End && Stop[1] == '\n')
 293       {
 294          Indexes[TagCount] = Stop - Section;
 295          TrimRecord(false,End);
 296          return true;
 297       }
 298
 299       Stop++;
 300    }
 301
 302    return false;
 303 }
 304                                                                         /*}}}*/
 305 // TagSection::TrimRecord - Trim off any garbage before/after a record  /*{{{*/
 306 // ---------------------------------------------------------------------
 307 /* There should be exactly 2 newline at the end of the record, no more. */
 308 void pkgTagSection::TrimRecord(bool BeforeRecord, const char*& End)
 309 {
 310    if (BeforeRecord == true)
 311       return;
 312    for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
 313 }
 314                                                                         /*}}}*/
 315 // TagSection::Trim - Trim off any trailing garbage                     /*{{{*/
 316 // ---------------------------------------------------------------------
 317 /* There should be exactly 1 newline at the end of the buffer, no more. */
 318 void pkgTagSection::Trim()
 319 {
 320    for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
 321 }
 322                                                                         /*}}}*/
 323 // TagSection::Exists - return True if a tag exists                     /*{{{*/
 324 bool pkgTagSection::Exists(const char* const Tag)
 325 {
 326    unsigned int tmp;
 327    return Find(Tag, tmp);
 328 }
 329                                                                         /*}}}*/
 330 // TagSection::Find - Locate a tag                                      /*{{{*/
 331 // ---------------------------------------------------------------------
 332 /* This searches the section for a tag that matches the given string. */
 333 bool pkgTagSection::Find(const char *Tag,unsigned int &Pos) const
 334 {
 335    unsigned int Length = strlen(Tag);
 336    unsigned int I = AlphaIndexes[AlphaHash(Tag)];
 337    if (I == 0)
 338       return false;
 339    I--;
 340
 341    for (unsigned int Counter = 0; Counter != TagCount; Counter++,
 342         I = (I+1)%TagCount)
 343    {
 344       const char *St;
 345       St = Section + Indexes[I];
 346       if (strncasecmp(Tag,St,Length) != 0)
 347          continue;
 348
 349       // Make sure the colon is in the right place
 350       const char *C = St + Length;
 351       for (; isspace(*C) != 0; C++);
 352       if (*C != ':')
 353          continue;
 354       Pos = I;
 355       return true;
 356    }
 357
 358    Pos = 0;
 359    return false;
 360 }
 361                                                                         /*}}}*/
 362 // TagSection::Find - Locate a tag                                      /*{{{*/
 363 // ---------------------------------------------------------------------
 364 /* This searches the section for a tag that matches the given string. */
 365 bool pkgTagSection::Find(const char *Tag,const char *&Start,
 366                          const char *&End) const
 367 {
 368    unsigned int Length = strlen(Tag);
 369    unsigned int I = AlphaIndexes[AlphaHash(Tag)];
 370    if (I == 0)
 371       return false;
 372    I--;
 373
 374    for (unsigned int Counter = 0; Counter != TagCount; Counter++,
 375         I = (I+1)%TagCount)
 376    {
 377       const char *St;
 378       St = Section + Indexes[I];
 379       if (strncasecmp(Tag,St,Length) != 0)
 380          continue;
 381
 382       // Make sure the colon is in the right place
 383       const char *C = St + Length;
 384       for (; isspace(*C) != 0; C++);
 385       if (*C != ':')
 386          continue;
 387
 388       // Strip off the gunk from the start end
 389       Start = C;
 390       End = Section + Indexes[I+1];
 391       if (Start >= End)
 392          return _error->Error("Internal parsing error");
 393
 394       for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
 395       for (; isspace(End[-1]) != 0 && End > Start; End--);
 396
 397       return true;
 398    }
 399
 400    Start = End = 0;
 401    return false;
 402 }
 403                                                                         /*}}}*/
 404 // TagSection::FindS - Find a string                                    /*{{{*/
 405 // ---------------------------------------------------------------------
 406 /* */
 407 string pkgTagSection::FindS(const char *Tag) const
 408 {
 409    const char *Start;
 410    const char *End;
 411    if (Find(Tag,Start,End) == false)
 412       return string();
 413    return string(Start,End);
 414 }
 415                                                                         /*}}}*/
 416 // TagSection::FindI - Find an integer                                  /*{{{*/
 417 // ---------------------------------------------------------------------
 418 /* */
 419 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
 420 {
 421    const char *Start;
 422    const char *Stop;
 423    if (Find(Tag,Start,Stop) == false)
 424       return Default;
 425
 426    // Copy it into a temp buffer so we can use strtol
 427    char S[300];
 428    if ((unsigned)(Stop - Start) >= sizeof(S))
 429       return Default;
 430    strncpy(S,Start,Stop-Start);
 431    S[Stop - Start] = 0;
 432
 433    char *End;
 434    signed long Result = strtol(S,&End,10);
 435    if (S == End)
 436       return Default;
 437    return Result;
 438 }
 439                                                                         /*}}}*/
 440 // TagSection::FindULL - Find an unsigned long long integer             /*{{{*/
 441 // ---------------------------------------------------------------------
 442 /* */
 443 unsigned long long pkgTagSection::FindULL(const char *Tag, unsigned long long const &Default) const
 444 {
 445    const char *Start;
 446    const char *Stop;
 447    if (Find(Tag,Start,Stop) == false)
 448       return Default;
 449
 450    // Copy it into a temp buffer so we can use strtoull
 451    char S[100];
 452    if ((unsigned)(Stop - Start) >= sizeof(S))
 453       return Default;
 454    strncpy(S,Start,Stop-Start);
 455    S[Stop - Start] = 0;
 456
 457    char *End;
 458    unsigned long long Result = strtoull(S,&End,10);
 459    if (S == End)
 460       return Default;
 461    return Result;
 462 }
 463                                                                         /*}}}*/
 464 // TagSection::FindFlag - Locate a yes/no type flag                     /*{{{*/
 465 // ---------------------------------------------------------------------
 466 /* The bits marked in Flag are masked on/off in Flags */
 467 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
 468                              unsigned long Flag) const
 469 {
 470    const char *Start;
 471    const char *Stop;
 472    if (Find(Tag,Start,Stop) == false)
 473       return true;
 474    return FindFlag(Flags, Flag, Start, Stop);
 475 }
 476 bool pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
 477                                         char const* Start, char const* Stop)
 478 {
 479    switch (StringToBool(string(Start, Stop)))
 480    {
 481       case 0:
 482       Flags &= ~Flag;
 483       return true;
 484
 485       case 1:
 486       Flags |= Flag;
 487       return true;
 488
 489       default:
 490       _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
 491       return true;
 492    }
 493    return true;
 494 }
 495                                                                         /*}}}*/
 496 // TFRewrite - Rewrite a control record                                 /*{{{*/
 497 // ---------------------------------------------------------------------
 498 /* This writes the control record to stdout rewriting it as necessary. The
 499    override map item specificies the rewriting rules to follow. This also
 500    takes the time to sort the feild list. */
 501
 502 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
 503    array. */
 504 static const char *iTFRewritePackageOrder[] = {
 505                           "Package",
 506                           "Essential",
 507                           "Status",
 508                           "Priority",
 509                           "Section",
 510                           "Installed-Size",
 511                           "Maintainer",
 512                           "Original-Maintainer",
 513                           "Architecture",
 514                           "Source",
 515                           "Version",
 516                            "Revision",         // Obsolete
 517                            "Config-Version",   // Obsolete
 518                           "Replaces",
 519                           "Provides",
 520                           "Depends",
 521                           "Pre-Depends",
 522                           "Recommends",
 523                           "Suggests",
 524                           "Conflicts",
 525                           "Breaks",
 526                           "Conffiles",
 527                           "Filename",
 528                           "Size",
 529                           "MD5Sum",
 530                           "SHA1",
 531                           "SHA256",
 532                           "SHA512",
 533                            "MSDOS-Filename",   // Obsolete
 534                           "Description",
 535                           0};
 536 static const char *iTFRewriteSourceOrder[] = {"Package",
 537                                       "Source",
 538                                       "Binary",
 539                                       "Version",
 540                                       "Priority",
 541                                       "Section",
 542                                       "Maintainer",
 543                                       "Original-Maintainer",
 544                                       "Build-Depends",
 545                                       "Build-Depends-Indep",
 546                                       "Build-Conflicts",
 547                                       "Build-Conflicts-Indep",
 548                                       "Architecture",
 549                                       "Standards-Version",
 550                                       "Format",
 551                                       "Directory",
 552                                       "Files",
 553                                       0};
 554
 555 /* Two levels of initialization are used because gcc will set the symbol
 556    size of an array to the length of the array, causing dynamic relinking
 557    errors. Doing this makes the symbol size constant */
 558 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
 559 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
 560
 561 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
 562                TFRewriteData *Rewrite)
 563 {
 564    unsigned char Visited[256];   // Bit 1 is Order, Bit 2 is Rewrite
 565    for (unsigned I = 0; I != 256; I++)
 566       Visited[I] = 0;
 567
 568    // Set new tag up as necessary.
 569    for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
 570    {
 571       if (Rewrite[J].NewTag == 0)
 572          Rewrite[J].NewTag = Rewrite[J].Tag;
 573    }
 574
 575    // Write all all of the tags, in order.
 576    if (Order != NULL)
 577    {
 578       for (unsigned int I = 0; Order[I] != 0; I++)
 579       {
 580          bool Rewritten = false;
 581
 582          // See if this is a field that needs to be rewritten
 583          for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
 584          {
 585             if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
 586             {
 587                Visited[J] |= 2;
 588                if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
 589                {
 590                   if (isspace(Rewrite[J].Rewrite[0]))
 591                      fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 592                   else
 593                      fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 594                }
 595                Rewritten = true;
 596                break;
 597             }
 598          }
 599
 600          // See if it is in the fragment
 601          unsigned Pos;
 602          if (Tags.Find(Order[I],Pos) == false)
 603             continue;
 604          Visited[Pos] |= 1;
 605
 606          if (Rewritten == true)
 607             continue;
 608
 609          /* Write out this element, taking a moment to rewrite the tag
 610             in case of changes of case. */
 611          const char *Start;
 612          const char *Stop;
 613          Tags.Get(Start,Stop,Pos);
 614
 615          if (fputs(Order[I],Output) < 0)
 616             return _error->Errno("fputs","IO Error to output");
 617          Start += strlen(Order[I]);
 618          if (fwrite(Start,Stop - Start,1,Output) != 1)
 619             return _error->Errno("fwrite","IO Error to output");
 620          if (Stop[-1] != '\n')
 621             fprintf(Output,"\n");
 622       }
 623    }
 624
 625    // Now write all the old tags that were missed.
 626    for (unsigned int I = 0; I != Tags.Count(); I++)
 627    {
 628       if ((Visited[I] & 1) == 1)
 629          continue;
 630
 631       const char *Start;
 632       const char *Stop;
 633       Tags.Get(Start,Stop,I);
 634       const char *End = Start;
 635       for (; End < Stop && *End != ':'; End++);
 636
 637       // See if this is a field that needs to be rewritten
 638       bool Rewritten = false;
 639       for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
 640       {
 641          if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
 642          {
 643             Visited[J] |= 2;
 644             if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
 645             {
 646                if (isspace(Rewrite[J].Rewrite[0]))
 647                   fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 648                else
 649                   fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 650             }
 651
 652             Rewritten = true;
 653             break;
 654          }
 655       }
 656
 657       if (Rewritten == true)
 658          continue;
 659
 660       // Write out this element
 661       if (fwrite(Start,Stop - Start,1,Output) != 1)
 662          return _error->Errno("fwrite","IO Error to output");
 663       if (Stop[-1] != '\n')
 664          fprintf(Output,"\n");
 665    }
 666
 667    // Now write all the rewrites that were missed
 668    for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
 669    {
 670       if ((Visited[J] & 2) == 2)
 671          continue;
 672
 673       if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
 674       {
 675          if (isspace(Rewrite[J].Rewrite[0]))
 676             fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 677          else
 678             fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 679       }
 680    }
 681
 682    return true;
 683 }
 684                                                                         /*}}}*/