Merge branch 'debian/sid' into debian/experimental

[apt.git] / apt-pkg / tagfile.cc
diff --git a/apt-pkg/tagfile.cc b/apt-pkg/tagfile.cc

index 1c79ee74ffaecc559f2a2daad8a4f63d30bdcce2..26c895417644058af8e34bcc6b73166979b52a5d 100644 (file)
--- a/apt-pkg/tagfile.cc
+++ b/apt-pkg/tagfile.cc
@@ -21,6 +21,8 @@
  #include <string>
  #include <stdio.h>
  #include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
  
  #include <apti18n.h>
                                                                         /*}}}*/
@@ -45,26 +47,59 @@ public:
     unsigned long long Size;
  };
  
+static unsigned long AlphaHash(const char *Text, size_t Length)                /*{{{*/
+{
+   /* This very simple hash function for the last 8 letters gives
+      very good performance on the debian package files */
+   if (Length > 8)
+   {
+    Text += (Length - 8);
+    Length = 8;
+   }
+   unsigned long Res = 0;
+   for (size_t i = 0; i < Length; ++i)
+      Res = ((unsigned long)(Text[i]) & 0xDF) ^ (Res << 1);
+   return Res & 0xFF;
+}
+                                                                       /*}}}*/
+
  // TagFile::pkgTagFile - Constructor                                   /*{{{*/
  // ---------------------------------------------------------------------
  /* */
  pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long long Size)
+   : d(NULL)
+{
+   Init(pFd, Size);
+}
+
+void pkgTagFile::Init(FileFd *pFd,unsigned long long Size)
  {
+   /* The size is increased by 4 because if we start with the Size of the
+      filename we need to try to read 1 char more to see an EOF faster, 1
+      char the end-pointer can be on and maybe 2 newlines need to be added
+      to the end of the file -> 4 extra chars */
+   Size += 4;
+   if(d != NULL)
+   {
+      free(d->Buffer);
+      delete d;
+   }
     d = new pkgTagFilePrivate(pFd, Size);
  
     if (d->Fd.IsOpen() == false)
-   {
        d->Start = d->End = d->Buffer = 0;
+   else
+      d->Buffer = (char*)malloc(sizeof(char) * Size);
+
+   if (d->Buffer == NULL)
        d->Done = true;
-      d->iOffset = 0;
-      return;
-   }
-   
-   d->Buffer = new char[Size];
+   else
+      d->Done = false;
+
     d->Start = d->End = d->Buffer;
-   d->Done = false;
     d->iOffset = 0;
-   Fill();
+   if (d->Done == false)
+      Fill();
  }
                                                                         /*}}}*/
  // TagFile::~pkgTagFile - Destructor                                   /*{{{*/
@@ -72,12 +107,12 @@ pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long long Size)
  /* */
  pkgTagFile::~pkgTagFile()
  {
-   delete [] d->Buffer;
+   free(d->Buffer);
     delete d;
  }
                                                                         /*}}}*/
-// TagFile::Offset - Return the current offset in the buffer           /*{{{*/
-unsigned long pkgTagFile::Offset()
+// TagFile::Offset - Return the current offset in the buffer           /*{{{*/
+APT_PURE unsigned long pkgTagFile::Offset()
  {
     return d->iOffset;
  }
@@ -89,19 +124,22 @@ unsigned long pkgTagFile::Offset()
   */
  bool pkgTagFile::Resize()
  {
-   char *tmp;
-   unsigned long long EndSize = d->End - d->Start;
-
     // fail is the buffer grows too big
     if(d->Size > 1024*1024+1)
        return false;
  
+   return Resize(d->Size * 2);
+}
+bool pkgTagFile::Resize(unsigned long long const newSize)
+{
+   unsigned long long const EndSize = d->End - d->Start;
+
     // get new buffer and use it
-   tmp = new char[2*d->Size];
-   memcpy(tmp, d->Buffer, d->Size);
-   d->Size = d->Size*2;
-   delete [] d->Buffer;
-   d->Buffer = tmp;
+   char* newBuffer = (char*)realloc(d->Buffer, sizeof(char) * newSize);
+   if (newBuffer == NULL)
+      return false;
+   d->Buffer = newBuffer;
+   d->Size = newSize;
  
     // update the start/end pointers to the new buffer
     d->Start = d->Buffer;
@@ -117,18 +155,23 @@ bool pkgTagFile::Resize()
   */
  bool pkgTagFile::Step(pkgTagSection &Tag)
  {
-   while (Tag.Scan(d->Start,d->End - d->Start) == false)
+   if(Tag.Scan(d->Start,d->End - d->Start) == false)
     {
-      if (Fill() == false)
-        return false;
-      
-      if(Tag.Scan(d->Start,d->End - d->Start))
-        break;
+      do
+      {
+        if (Fill() == false)
+           return false;
+
+        if(Tag.Scan(d->Start,d->End - d->Start, false))
+           break;
+
+        if (Resize() == false)
+           return _error->Error(_("Unable to parse package file %s (1)"),
+                 d->Fd.Name().c_str());
  
-      if (Resize() == false)
-        return _error->Error(_("Unable to parse package file %s (1)"),
-                              d->Fd.Name().c_str());
+      } while (Tag.Scan(d->Start,d->End - d->Start, false) == false);
     }
+
     d->Start += Tag.size();
     d->iOffset += Tag.size();
  
@@ -152,9 +195,10 @@ bool pkgTagFile::Fill()
     if (d->Done == false)
     {
        // See if only a bit of the file is left
-      if (d->Fd.Read(d->End, d->Size - (d->End - d->Buffer),&Actual) == false)
+      unsigned long long const dataSize = d->Size - ((d->End - d->Buffer) + 1);
+      if (d->Fd.Read(d->End, dataSize, &Actual) == false)
          return false;
-      if (Actual != d->Size - (d->End - d->Buffer))
+      if (Actual != dataSize)
          d->Done = true;
        d->End += Actual;
     }
@@ -171,8 +215,13 @@ bool pkgTagFile::Fill()
        for (const char *E = d->End - 1; E - d->End < 6 && (*E == '\n' || *E == '\r'); E--)
          if (*E == '\n')
             LineCount++;
-      for (; LineCount < 2; LineCount++)
-        *d->End++ = '\n';
+      if (LineCount < 2)
+      {
+        if ((unsigned)(d->End - d->Buffer) >= d->Size)
+           Resize(d->Size + 3);
+        for (; LineCount < 2; LineCount++)
+           *d->End++ = '\n';
+      }
        
        return true;
     }
@@ -192,7 +241,11 @@ bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long long Offset)
        unsigned long long Dist = Offset - d->iOffset;
        d->Start += Dist;
        d->iOffset += Dist;
-      return Step(Tag);
+      // if we have seen the end, don't ask for more
+      if (d->Done == true)
+        return Tag.Scan(d->Start, d->End - d->Start);
+      else
+        return Step(Tag);
     }
  
     // Reposition and reload..
@@ -212,48 +265,120 @@ bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long long Offset)
     if (Fill() == false)
        return false;
     
-   if (Tag.Scan(d->Start, d->End - d->Start) == false)
+   if (Tag.Scan(d->Start, d->End - d->Start, false) == false)
        return _error->Error(_("Unable to parse package file %s (2)"),d->Fd.Name().c_str());
     
     return true;
  }
                                                                         /*}}}*/
-// TagSection::Scan - Scan for the end of the header information       /*{{{*/
+// pkgTagSection::pkgTagSection - Constructor                          /*{{{*/
  // ---------------------------------------------------------------------
-/* This looks for the first double new line in the data stream.
-   It also indexes the tags in the section. */
-bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
+/* */
+pkgTagSection::pkgTagSection()
+   : Section(0), d(NULL), Stop(0)
  {
+   memset(&LookupTable, 0, sizeof(LookupTable));
+}
+                                                                       /*}}}*/
+// TagSection::Scan - Scan for the end of the header information       /*{{{*/
+bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength, bool const Restart)
+{
+   Section = Start;
     const char *End = Start + MaxLength;
-   Stop = Section = Start;
-   memset(AlphaIndexes,0,sizeof(AlphaIndexes));
+
+   if (Restart == false && Tags.empty() == false)
+   {
+      Stop = Section + Tags.back().StartTag;
+      if (End <= Stop)
+        return false;
+      Stop = (const char *)memchr(Stop,'\n',End - Stop);
+      if (Stop == NULL)
+        return false;
+      ++Stop;
+   }
+   else
+   {
+      Stop = Section;
+      if (Tags.empty() == false)
+      {
+        memset(&LookupTable, 0, sizeof(LookupTable));
+        Tags.clear();
+      }
+      Tags.reserve(0x100);
+   }
+   size_t TagCount = Tags.size();
  
     if (Stop == 0)
        return false;
  
-   TagCount = 0;
-   while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
+   TagData lastTagData(0);
+   lastTagData.EndTag = 0;
+   unsigned long lastTagHash = 0;
+   while (Stop < End)
     {
-       TrimRecord(true,End);
+      TrimRecord(true,End);
+
+      // this can happen when TrimRecord trims away the entire Record
+      // (e.g. because it just contains comments)
+      if(Stop == End)
+         return true;
  
        // Start a new index and add it to the hash
        if (isspace(Stop[0]) == 0)
        {
-        Indexes[TagCount++] = Stop - Section;
-        AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
+        // store the last found tag
+        if (lastTagData.EndTag != 0)
+        {
+           if (LookupTable[lastTagHash] != 0)
+              lastTagData.NextInBucket = LookupTable[lastTagHash];
+           LookupTable[lastTagHash] = TagCount;
+           Tags.push_back(lastTagData);
+        }
+
+        ++TagCount;
+        lastTagData = TagData(Stop - Section);
+        // find the colon separating tag and value
+        char const * Colon = (char const *) memchr(Stop, ':', End - Stop);
+        if (Colon == NULL)
+           return false;
+        // find the end of the tag (which might or might not be the colon)
+        char const * EndTag = Colon;
+        --EndTag;
+        for (; EndTag > Stop && isspace(*EndTag) != 0; --EndTag)
+           ;
+        ++EndTag;
+        lastTagData.EndTag = EndTag - Section;
+        lastTagHash = AlphaHash(Stop, EndTag - Stop);
+        // find the beginning of the value
+        Stop = Colon + 1;
+        for (; isspace(*Stop) != 0; ++Stop);
+        if (Stop >= End)
+           return false;
+        lastTagData.StartValue = Stop - Section;
        }
  
        Stop = (const char *)memchr(Stop,'\n',End - Stop);
-      
+
        if (Stop == 0)
          return false;
  
-      for (; Stop+1 < End && Stop[1] == '\r'; Stop++);
+      for (; Stop+1 < End && Stop[1] == '\r'; Stop++)
+         /* nothing */
+         ;
  
        // Double newline marks the end of the record
        if (Stop+1 < End && Stop[1] == '\n')
        {
-        Indexes[TagCount] = Stop - Section;
+        if (lastTagData.EndTag != 0)
+        {
+           if (LookupTable[lastTagHash] != 0)
+              lastTagData.NextInBucket = LookupTable[lastTagHash];
+           LookupTable[lastTagHash] = TagCount;
+           Tags.push_back(lastTagData);
+        }
+
+        TagData const td(Stop - Section);
+        Tags.push_back(td);
          TrimRecord(false,End);
          return true;
        }
@@ -282,8 +407,8 @@ void pkgTagSection::Trim()
     for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
  }
                                                                         /*}}}*/
-// TagSection::Exists - return True if a tag exists                    /*{{{*/
-bool pkgTagSection::Exists(const char* const Tag)
+// TagSection::Exists - return True if a tag exists                    /*{{{*/
+bool pkgTagSection::Exists(const char* const Tag) const
  {
     unsigned int tmp;
     return Find(Tag, tmp);
@@ -294,73 +419,43 @@ bool pkgTagSection::Exists(const char* const Tag)
  /* This searches the section for a tag that matches the given string. */
  bool pkgTagSection::Find(const char *Tag,unsigned int &Pos) const
  {
-   unsigned int Length = strlen(Tag);
-   unsigned int I = AlphaIndexes[AlphaHash(Tag)];
-   if (I == 0)
+   size_t const Length = strlen(Tag);
+   unsigned int Bucket = LookupTable[AlphaHash(Tag, Length)];
+   if (Bucket == 0)
        return false;
-   I--;
-   
-   for (unsigned int Counter = 0; Counter != TagCount; Counter++, 
-       I = (I+1)%TagCount)
+
+   for (; Bucket != 0; Bucket = Tags[Bucket - 1].NextInBucket)
     {
-      const char *St;
-      St = Section + Indexes[I];
-      if (strncasecmp(Tag,St,Length) != 0)
+      if ((Tags[Bucket - 1].EndTag - Tags[Bucket - 1].StartTag) != Length)
          continue;
  
-      // Make sure the colon is in the right place
-      const char *C = St + Length;
-      for (; isspace(*C) != 0; C++);
-      if (*C != ':')
+      char const * const St = Section + Tags[Bucket - 1].StartTag;
+      if (strncasecmp(Tag,St,Length) != 0)
          continue;
-      Pos = I;
+
+      Pos = Bucket - 1;
        return true;
     }
  
     Pos = 0;
     return false;
  }
-                                                                       /*}}}*/
-// TagSection::Find - Locate a tag                                     /*{{{*/
-// ---------------------------------------------------------------------
-/* This searches the section for a tag that matches the given string. */
  bool pkgTagSection::Find(const char *Tag,const char *&Start,
                          const char *&End) const
  {
-   unsigned int Length = strlen(Tag);
-   unsigned int I = AlphaIndexes[AlphaHash(Tag)];
-   if (I == 0)
+   unsigned int Pos;
+   if (Find(Tag, Pos) == false)
        return false;
-   I--;
-   
-   for (unsigned int Counter = 0; Counter != TagCount; Counter++, 
-       I = (I+1)%TagCount)
-   {
-      const char *St;
-      St = Section + Indexes[I];
-      if (strncasecmp(Tag,St,Length) != 0)
-        continue;
-      
-      // Make sure the colon is in the right place
-      const char *C = St + Length;
-      for (; isspace(*C) != 0; C++);
-      if (*C != ':')
-        continue;
  
-      // Strip off the gunk from the start end
-      Start = C;
-      End = Section + Indexes[I+1];
-      if (Start >= End)
-        return _error->Error("Internal parsing error");
-      
-      for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
-      for (; isspace(End[-1]) != 0 && End > Start; End--);
-      
-      return true;
-   }
-   
-   Start = End = 0;
-   return false;
+   Start = Section + Tags[Pos].StartValue;
+   // Strip off the gunk from the end
+   End = Section + Tags[Pos + 1].StartTag;
+   if (unlikely(Start > End))
+      return _error->Error("Internal parsing error");
+
+   for (; isspace(End[-1]) != 0 && End > Start; --End);
+
+   return true;
  }
                                                                         /*}}}*/
  // TagSection::FindS - Find a string                                   /*{{{*/
@@ -423,6 +518,17 @@ unsigned long long pkgTagSection::FindULL(const char *Tag, unsigned long long co
     return Result;
  }
                                                                         /*}}}*/
+// TagSection::FindB - Find boolean value                              /*{{{*/
+// ---------------------------------------------------------------------
+/* */
+bool pkgTagSection::FindB(const char *Tag, bool const &Default) const
+{
+   const char *Start, *Stop;
+   if (Find(Tag, Start, Stop) == false)
+      return Default;
+   return StringToBool(string(Start, Stop));
+}
+                                                                       /*}}}*/
  // TagSection::FindFlag - Locate a yes/no type flag                    /*{{{*/
  // ---------------------------------------------------------------------
  /* The bits marked in Flag are masked on/off in Flags */
@@ -435,7 +541,7 @@ bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
        return true;
     return FindFlag(Flags, Flag, Start, Stop);
  }
-bool const pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
+bool pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
                                         char const* Start, char const* Stop)
  {
     switch (StringToBool(string(Start, Stop)))
@@ -455,6 +561,13 @@ bool const pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
     return true;
  }
                                                                         /*}}}*/
+APT_PURE unsigned int pkgTagSection::Count() const {                   /*{{{*/
+   if (Tags.empty() == true)
+      return 0;
+   // the last element is just marking the end and isn't a real one
+   return Tags.size() - 1;
+}
+                                                                       /*}}}*/
  // TFRewrite - Rewrite a control record                                        /*{{{*/
  // ---------------------------------------------------------------------
  /* This writes the control record to stdout rewriting it as necessary. The
@@ -488,7 +601,7 @@ static const char *iTFRewritePackageOrder[] = {
                            "Conffiles",
                            "Filename",
                            "Size",
-                          "MD5Sum",
+                          "MD5sum",
                            "SHA1",
                            "SHA256",
                            "SHA512",
@@ -535,52 +648,54 @@ bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
     }
     
     // Write all all of the tags, in order.
-   for (unsigned int I = 0; Order[I] != 0; I++)
+   if (Order != NULL)
     {
-      bool Rewritten = false;
-      
-      // See if this is a field that needs to be rewritten
-      for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
+      for (unsigned int I = 0; Order[I] != 0; I++)
        {
-        if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
-        {
-           Visited[J] |= 2;
-           if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
-           {
-              if (isspace(Rewrite[J].Rewrite[0]))
-                 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
-              else
-                 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
-           }
-           
-           Rewritten = true;
-           break;
-        }
-      }      
+         bool Rewritten = false;
+         
+         // See if this is a field that needs to be rewritten
+         for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
+         {
+            if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
+            {
+               Visited[J] |= 2;
+               if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
+               {
+                  if (isspace(Rewrite[J].Rewrite[0]))
+                     fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
+                  else
+                     fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
+               }
+               Rewritten = true;
+               break;
+            }
+         }
             
-      // See if it is in the fragment
-      unsigned Pos;
-      if (Tags.Find(Order[I],Pos) == false)
-        continue;
-      Visited[Pos] |= 1;
-
-      if (Rewritten == true)
-        continue;
+         // See if it is in the fragment
+         unsigned Pos;
+         if (Tags.Find(Order[I],Pos) == false)
+            continue;
+         Visited[Pos] |= 1;
+
+         if (Rewritten == true)
+            continue;
        
-      /* Write out this element, taking a moment to rewrite the tag
-         in case of changes of case. */
-      const char *Start;
-      const char *Stop;
-      Tags.Get(Start,Stop,Pos);
+         /* Write out this element, taking a moment to rewrite the tag
+            in case of changes of case. */
+         const char *Start;
+         const char *Stop;
+         Tags.Get(Start,Stop,Pos);
        
-      if (fputs(Order[I],Output) < 0)
-        return _error->Errno("fputs","IO Error to output");
-      Start += strlen(Order[I]);
-      if (fwrite(Start,Stop - Start,1,Output) != 1)
-        return _error->Errno("fwrite","IO Error to output");
-      if (Stop[-1] != '\n')
-        fprintf(Output,"\n");
-   }   
+         if (fputs(Order[I],Output) < 0)
+            return _error->Errno("fputs","IO Error to output");
+         Start += strlen(Order[I]);
+         if (fwrite(Start,Stop - Start,1,Output) != 1)
+            return _error->Errno("fwrite","IO Error to output");
+         if (Stop[-1] != '\n')
+            fprintf(Output,"\n");
+      }
+   }
  
     // Now write all the old tags that were missed.
     for (unsigned int I = 0; I != Tags.Count(); I++)