unsigned long long Size;
};
+static unsigned long AlphaHash(const char *Text, size_t Length) /*{{{*/
+{
+ /* This very simple hash function for the last 8 letters gives
+ very good performance on the debian package files */
+ if (Length > 8)
+ {
+ Text += (Length - 8);
+ Length = 8;
+ }
+ unsigned long Res = 0;
+ for (size_t i = 0; i < Length; ++i)
+ Res = ((unsigned long)(Text[i]) & 0xDF) ^ (Res << 1);
+ return Res & 0xFF;
+}
+ /*}}}*/
+
// TagFile::pkgTagFile - Constructor /*{{{*/
// ---------------------------------------------------------------------
/* */
*/
bool pkgTagFile::Step(pkgTagSection &Tag)
{
- while (Tag.Scan(d->Start,d->End - d->Start) == false)
+ if(Tag.Scan(d->Start,d->End - d->Start) == false)
{
- if (Fill() == false)
- return false;
-
- if(Tag.Scan(d->Start,d->End - d->Start))
- break;
+ do
+ {
+ if (Fill() == false)
+ return false;
+
+ if(Tag.Scan(d->Start,d->End - d->Start, false))
+ break;
+
+ if (Resize() == false)
+ return _error->Error(_("Unable to parse package file %s (1)"),
+ d->Fd.Name().c_str());
- if (Resize() == false)
- return _error->Error(_("Unable to parse package file %s (1)"),
- d->Fd.Name().c_str());
+ } while (Tag.Scan(d->Start,d->End - d->Start, false) == false);
}
+
d->Start += Tag.size();
d->iOffset += Tag.size();
if (Fill() == false)
return false;
- if (Tag.Scan(d->Start, d->End - d->Start) == false)
+ if (Tag.Scan(d->Start, d->End - d->Start, false) == false)
return _error->Error(_("Unable to parse package file %s (2)"),d->Fd.Name().c_str());
return true;
// ---------------------------------------------------------------------
/* */
pkgTagSection::pkgTagSection()
- : Section(0), TagCount(0), d(NULL), Stop(0)
+ : Section(0), d(NULL), Stop(0)
{
- memset(&Indexes, 0, sizeof(Indexes));
- memset(&AlphaIndexes, 0, sizeof(AlphaIndexes));
+ memset(&LookupTable, 0, sizeof(LookupTable));
}
/*}}}*/
// TagSection::Scan - Scan for the end of the header information /*{{{*/
-// ---------------------------------------------------------------------
-/* This looks for the first double new line in the data stream.
- It also indexes the tags in the section. */
-bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
+bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength, bool const Restart)
{
+ Section = Start;
const char *End = Start + MaxLength;
- Stop = Section = Start;
- memset(AlphaIndexes,0,sizeof(AlphaIndexes));
+
+ if (Restart == false && Tags.empty() == false)
+ {
+ Stop = Section + Tags.back().StartTag;
+ if (End <= Stop)
+ return false;
+ Stop = (const char *)memchr(Stop,'\n',End - Stop);
+ if (Stop == NULL)
+ return false;
+ ++Stop;
+ }
+ else
+ {
+ Stop = Section;
+ if (Tags.empty() == false)
+ {
+ memset(&LookupTable, 0, sizeof(LookupTable));
+ Tags.clear();
+ }
+ Tags.reserve(0x100);
+ }
+ size_t TagCount = Tags.size();
if (Stop == 0)
return false;
- TagCount = 0;
- while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
+ TagData lastTagData(0);
+ lastTagData.EndTag = 0;
+ unsigned long lastTagHash = 0;
+ while (Stop < End)
{
TrimRecord(true,End);
// Start a new index and add it to the hash
if (isspace(Stop[0]) == 0)
{
- Indexes[TagCount++] = Stop - Section;
- AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
+ // store the last found tag
+ if (lastTagData.EndTag != 0)
+ {
+ if (LookupTable[lastTagHash] != 0)
+ lastTagData.NextInBucket = LookupTable[lastTagHash];
+ LookupTable[lastTagHash] = TagCount;
+ Tags.push_back(lastTagData);
+ }
+
+ ++TagCount;
+ lastTagData = TagData(Stop - Section);
+ // find the colon separating tag and value
+ char const * Colon = (char const *) memchr(Stop, ':', End - Stop);
+ if (Colon == NULL)
+ return false;
+ // find the end of the tag (which might or might not be the colon)
+ char const * EndTag = Colon;
+ --EndTag;
+ for (; EndTag > Stop && isspace(*EndTag) != 0; --EndTag)
+ ;
+ ++EndTag;
+ lastTagData.EndTag = EndTag - Section;
+ lastTagHash = AlphaHash(Stop, EndTag - Stop);
+ // find the beginning of the value
+ Stop = Colon + 1;
+ for (; isspace(*Stop) != 0; ++Stop);
+ if (Stop >= End)
+ return false;
+ lastTagData.StartValue = Stop - Section;
}
Stop = (const char *)memchr(Stop,'\n',End - Stop);
-
+
if (Stop == 0)
return false;
// Double newline marks the end of the record
if (Stop+1 < End && Stop[1] == '\n')
{
- Indexes[TagCount] = Stop - Section;
+ if (lastTagData.EndTag != 0)
+ {
+ if (LookupTable[lastTagHash] != 0)
+ lastTagData.NextInBucket = LookupTable[lastTagHash];
+ LookupTable[lastTagHash] = TagCount;
+ Tags.push_back(lastTagData);
+ }
+
+ TagData const td(Stop - Section);
+ Tags.push_back(td);
TrimRecord(false,End);
return true;
}
for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
}
/*}}}*/
-// TagSection::Exists - return True if a tag exists /*{{{*/
-bool pkgTagSection::Exists(const char* const Tag)
+// TagSection::Exists - return True if a tag exists /*{{{*/
+bool pkgTagSection::Exists(const char* const Tag) const
{
unsigned int tmp;
return Find(Tag, tmp);
/* This searches the section for a tag that matches the given string. */
bool pkgTagSection::Find(const char *Tag,unsigned int &Pos) const
{
- unsigned int Length = strlen(Tag);
- unsigned int I = AlphaIndexes[AlphaHash(Tag)];
- if (I == 0)
+ size_t const Length = strlen(Tag);
+ unsigned int Bucket = LookupTable[AlphaHash(Tag, Length)];
+ if (Bucket == 0)
return false;
- I--;
-
- for (unsigned int Counter = 0; Counter != TagCount; Counter++,
- I = (I+1)%TagCount)
+
+ for (; Bucket != 0; Bucket = Tags[Bucket - 1].NextInBucket)
{
- const char *St;
- St = Section + Indexes[I];
- if (strncasecmp(Tag,St,Length) != 0)
+ if ((Tags[Bucket - 1].EndTag - Tags[Bucket - 1].StartTag) != Length)
continue;
- // Make sure the colon is in the right place
- const char *C = St + Length;
- for (; isspace(*C) != 0; C++);
- if (*C != ':')
+ char const * const St = Section + Tags[Bucket - 1].StartTag;
+ if (strncasecmp(Tag,St,Length) != 0)
continue;
- Pos = I;
+
+ Pos = Bucket - 1;
return true;
}
Pos = 0;
return false;
}
- /*}}}*/
-// TagSection::Find - Locate a tag /*{{{*/
-// ---------------------------------------------------------------------
-/* This searches the section for a tag that matches the given string. */
bool pkgTagSection::Find(const char *Tag,const char *&Start,
const char *&End) const
{
- unsigned int Length = strlen(Tag);
- unsigned int I = AlphaIndexes[AlphaHash(Tag)];
- if (I == 0)
+ unsigned int Pos;
+ if (Find(Tag, Pos) == false)
return false;
- I--;
-
- for (unsigned int Counter = 0; Counter != TagCount; Counter++,
- I = (I+1)%TagCount)
- {
- const char *St;
- St = Section + Indexes[I];
- if (strncasecmp(Tag,St,Length) != 0)
- continue;
-
- // Make sure the colon is in the right place
- const char *C = St + Length;
- for (; isspace(*C) != 0; C++);
- if (*C != ':')
- continue;
- // Strip off the gunk from the start end
- Start = C;
- End = Section + Indexes[I+1];
- if (Start >= End)
- return _error->Error("Internal parsing error");
-
- for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
- for (; isspace(End[-1]) != 0 && End > Start; End--);
-
- return true;
- }
-
- Start = End = 0;
- return false;
+ Start = Section + Tags[Pos].StartValue;
+ // Strip off the gunk from the end
+ End = Section + Tags[Pos + 1].StartTag;
+ if (unlikely(Start > End))
+ return _error->Error("Internal parsing error");
+
+ for (; isspace(End[-1]) != 0 && End > Start; --End);
+
+ return true;
}
/*}}}*/
// TagSection::FindS - Find a string /*{{{*/
return true;
}
/*}}}*/
+APT_PURE unsigned int pkgTagSection::Count() const { /*{{{*/
+ if (Tags.empty() == true)
+ return 0;
+ // the last element is just marking the end and isn't a real one
+ return Tags.size() - 1;
+}
+ /*}}}*/
// TFRewrite - Rewrite a control record /*{{{*/
// ---------------------------------------------------------------------
/* This writes the control record to stdout rewriting it as necessary. The
#include <stdio.h>
#include <string>
+#include <vector>
+#include <list>
#ifndef APT_8_CLEANER_HEADERS
#include <apt-pkg/fileutl.h>
class pkgTagSection
{
const char *Section;
- // We have a limit of 256 tags per section.
- unsigned int Indexes[256];
- unsigned int AlphaIndexes[0x100];
- unsigned int TagCount;
+ struct TagData {
+ unsigned int StartTag;
+ unsigned int EndTag;
+ unsigned int StartValue;
+ unsigned int NextInBucket;
+
+ TagData(unsigned int const StartTag) : StartTag(StartTag), NextInBucket(0) {}
+ };
+ std::vector<TagData> Tags;
+ unsigned int LookupTable[0x100];
+
// dpointer placeholder (for later in case we need it)
void *d;
- /* This very simple hash function for the last 8 letters gives
- very good performance on the debian package files */
- inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
- {
- unsigned long Res = 0;
- for (; Text != End && *Text != ':' && *Text != 0; Text++)
- Res = ((unsigned long)(*Text) & 0xDF) ^ (Res << 1);
- return Res & 0xFF;
- }
-
protected:
const char *Stop;
unsigned long Flag) const;
bool static FindFlag(unsigned long &Flags, unsigned long Flag,
const char* Start, const char* Stop);
- bool Scan(const char *Start,unsigned long MaxLength);
+
+ /** \brief searches the boundaries of the current section
+ *
+ * While parameter Start marks the beginning of the section, this method
+ * will search for the first double newline in the data stream which marks
+ * the end of the section. It also does a first pass over the content of
+ * the section parsing it as encountered for processing later on by Find
+ *
+ * @param Start is the beginning of the section
+ * @param MaxLength is the size of valid data in the stream pointed to by Start
+ * @param Restart if enabled internal state will be cleared, otherwise it is
+ * assumed that now more data is available in the stream and the parsing will
+ * start were it encountered insufficent data the last time.
+ *
+ * @return \b true if section end was found, \b false otherwise.
+ * Beware that internal state will be inconsistent if \b false is returned!
+ */
+ APT_MUSTCHECK bool Scan(const char *Start, unsigned long MaxLength, bool const Restart = true);
inline unsigned long size() const {return Stop - Section;};
void Trim();
virtual void TrimRecord(bool BeforeRecord, const char* &End);
-
- inline unsigned int Count() const {return TagCount;};
- bool Exists(const char* const Tag);
-
+
+ /** \brief amount of Tags in the current section
+ *
+ * Note: if a Tag is mentioned repeatly it will be counted multiple
+ * times, but only the last occurance is available via Find methods.
+ */
+ unsigned int Count() const;
+ bool Exists(const char* const Tag) const;
+
inline void Get(const char *&Start,const char *&Stop,unsigned int I) const
- {Start = Section + Indexes[I]; Stop = Section + Indexes[I+1];}
-
+ {Start = Section + Tags[I].StartTag; Stop = Section + Tags[I+1].StartTag;}
+
inline void GetSection(const char *&Start,const char *&Stop) const
{
Start = Section;
if (strcmp(I.Name, "control") == 0)
{
delete [] Control;
- Control = new char[I.Size+1];
- Control[I.Size] = 0;
+ Control = new char[I.Size+3];
+ Control[I.Size] = '\n';
+ Control[I.Size + 1] = '\n';
+ Control[I.Size + 2] = '\0';
Which = IsControl;
- ControlLen = I.Size;
+ ControlLen = I.Size + 3;
// make it call the Process method below. this is so evil
Fd = -2;
}
bool DebFile::ParseInfo()
{
if (Control == NULL) return false;
-
+
pkgTagSection Section;
- Section.Scan(Control, ControlLen);
+ if (Section.Scan(Control, ControlLen) == false)
+ return false;
Package = Section.FindS("Package");
Version = GetInstalledVer(Package);
mkdir aptarchive/pool/invalid
printf "meep" > aptarchive/pool/invalid/invalid_1.0.dsc
testequal "
+E: Could not find a record in the DSC 'aptarchive/pool/invalid/invalid_1.0.dsc'" aptftparchive sources aptarchive/pool/invalid
+rm -f aptarchive/pool/invalid/invalid_1.0.dsc
+
+printf "meep: yes" > aptarchive/pool/invalid/invalid_1.0.dsc
+testequal "
E: Could not find a Source entry in the DSC 'aptarchive/pool/invalid/invalid_1.0.dsc'" aptftparchive sources aptarchive/pool/invalid
rm -f aptarchive/pool/invalid/invalid_1.0.dsc
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <sstream>
#include <gtest/gtest.h>
// There is only one section in this tag file
EXPECT_FALSE(tfile.Step(section));
}
+
+TEST(TagFileTest,MultipleSections)
+{
+ FileFd fd;
+ createTemporaryFile("bigsection", fd, NULL, "Package: pkgA\n"
+ "Version: 1\n"
+ "Size: 100\n"
+ "Description: aaa\n"
+ " aaa\n"
+ "\n"
+ "Package: pkgB\n"
+ "Version: 1\n"
+ "Flag: no\n"
+ "Description: bbb\n"
+ "\n"
+ "Package: pkgC\n"
+ "Version: 2\n"
+ "Flag: yes\n"
+ "Description:\n"
+ " ccc\n"
+ );
+
+ pkgTagFile tfile(&fd);
+ pkgTagSection section;
+ EXPECT_FALSE(section.Exists("Version"));
+
+ EXPECT_TRUE(tfile.Step(section));
+ EXPECT_EQ(4, section.Count());
+ EXPECT_TRUE(section.Exists("Version"));
+ EXPECT_TRUE(section.Exists("Package"));
+ EXPECT_TRUE(section.Exists("Size"));
+ EXPECT_FALSE(section.Exists("Flag"));
+ EXPECT_TRUE(section.Exists("Description"));
+ EXPECT_EQ("pkgA", section.FindS("Package"));
+ EXPECT_EQ("1", section.FindS("Version"));
+ EXPECT_EQ(1, section.FindULL("Version"));
+ EXPECT_EQ(100, section.FindULL("Size"));
+ unsigned long Flags = 1;
+ EXPECT_TRUE(section.FindFlag("Flag", Flags, 1));
+ EXPECT_EQ(1, Flags);
+ Flags = 0;
+ EXPECT_TRUE(section.FindFlag("Flag", Flags, 1));
+ EXPECT_EQ(0, Flags);
+ EXPECT_EQ("aaa\n aaa", section.FindS("Description"));
+
+
+ EXPECT_TRUE(tfile.Step(section));
+ EXPECT_EQ(4, section.Count());
+ EXPECT_TRUE(section.Exists("Version"));
+ EXPECT_TRUE(section.Exists("Package"));
+ EXPECT_FALSE(section.Exists("Size"));
+ EXPECT_TRUE(section.Exists("Flag"));
+ EXPECT_TRUE(section.Exists("Description"));
+ EXPECT_EQ("pkgB", section.FindS("Package"));
+ EXPECT_EQ("1", section.FindS("Version"));
+ EXPECT_EQ(1, section.FindULL("Version"));
+ EXPECT_EQ(0, section.FindULL("Size"));
+ Flags = 1;
+ EXPECT_TRUE(section.FindFlag("Flag", Flags, 1));
+ EXPECT_EQ(0, Flags);
+ Flags = 0;
+ EXPECT_TRUE(section.FindFlag("Flag", Flags, 1));
+ EXPECT_EQ(0, Flags);
+ EXPECT_EQ("bbb", section.FindS("Description"));
+
+ EXPECT_TRUE(tfile.Step(section));
+ EXPECT_EQ(4, section.Count());
+ EXPECT_TRUE(section.Exists("Version"));
+ EXPECT_TRUE(section.Exists("Package"));
+ EXPECT_FALSE(section.Exists("Size"));
+ EXPECT_TRUE(section.Exists("Flag"));
+ EXPECT_TRUE(section.Exists("Description"));
+ EXPECT_EQ("pkgC", section.FindS("Package"));
+ EXPECT_EQ("2", section.FindS("Version"));
+ EXPECT_EQ(2, section.FindULL("Version"));
+ Flags = 0;
+ EXPECT_TRUE(section.FindFlag("Flag", Flags, 1));
+ EXPECT_EQ(1, Flags);
+ Flags = 1;
+ EXPECT_TRUE(section.FindFlag("Flag", Flags, 1));
+ EXPECT_EQ(1, Flags);
+ EXPECT_EQ("ccc", section.FindS("Description"));
+
+ // There is no section left in this tag file
+ EXPECT_FALSE(tfile.Step(section));
+}
+
+TEST(TagFileTest,BigSection)
+{
+ size_t const count = 500;
+ std::stringstream content;
+ for (size_t i = 0; i < count; ++i)
+ content << "Field-" << i << ": " << (2000 + i) << std::endl;
+
+ FileFd fd;
+ createTemporaryFile("bigsection", fd, NULL, content.str().c_str());
+
+ pkgTagFile tfile(&fd);
+ pkgTagSection section;
+ EXPECT_TRUE(tfile.Step(section));
+
+ EXPECT_EQ(count, section.Count());
+ for (size_t i = 0; i < count; ++i)
+ {
+ std::stringstream name;
+ name << "Field-" << i;
+ EXPECT_TRUE(section.Exists(name.str().c_str())) << name.str() << " does not exist";
+ EXPECT_EQ((2000 + i), section.FindULL(name.str().c_str()));
+ }
+
+ // There is only one section in this tag file
+ EXPECT_FALSE(tfile.Step(section));
+}
+
+TEST(TagFileTest, PickedUpFromPreviousCall)
+{
+ size_t const count = 500;
+ std::stringstream contentstream;
+ for (size_t i = 0; i < count; ++i)
+ contentstream << "Field-" << i << ": " << (2000 + i) << std::endl;
+ contentstream << std::endl << std::endl;
+ std::string content = contentstream.str();
+
+ pkgTagSection section;
+ EXPECT_FALSE(section.Scan(content.c_str(), content.size()/2));
+ EXPECT_NE(0, section.Count());
+ EXPECT_NE(count, section.Count());
+ EXPECT_TRUE(section.Scan(content.c_str(), content.size(), false));
+ EXPECT_EQ(count, section.Count());
+
+ for (size_t i = 0; i < count; ++i)
+ {
+ std::stringstream name;
+ name << "Field-" << i;
+ EXPECT_TRUE(section.Exists(name.str().c_str())) << name.str() << " does not exist";
+ EXPECT_EQ((2000 + i), section.FindULL(name.str().c_str()));
+ }
+}
+
+TEST(TagFileTest, SpacesEverywhere)
+{
+ std::string content =
+ "Package: pkgA\n"
+ "Package: pkgB\n"
+ "NoSpaces:yes\n"
+ "TagSpaces\t :yes\n"
+ "ValueSpaces: \tyes\n"
+ "BothSpaces \t:\t yes\n"
+ "TrailingSpaces: yes\t \n"
+ "Naming Space: yes\n"
+ "Naming Spaces: yes\n"
+ "Package : pkgC \n"
+ "Multi-Colon::yes:\n"
+ "\n\n";
+
+ pkgTagSection section;
+ EXPECT_TRUE(section.Scan(content.c_str(), content.size()));
+ EXPECT_TRUE(section.Exists("Package"));
+ EXPECT_TRUE(section.Exists("NoSpaces"));
+ EXPECT_TRUE(section.Exists("TagSpaces"));
+ EXPECT_TRUE(section.Exists("ValueSpaces"));
+ EXPECT_TRUE(section.Exists("BothSpaces"));
+ EXPECT_TRUE(section.Exists("TrailingSpaces"));
+ EXPECT_TRUE(section.Exists("Naming Space"));
+ EXPECT_TRUE(section.Exists("Naming Spaces"));
+ EXPECT_TRUE(section.Exists("Multi-Colon"));
+ EXPECT_EQ("pkgC", section.FindS("Package"));
+ EXPECT_EQ("yes", section.FindS("NoSpaces"));
+ EXPECT_EQ("yes", section.FindS("TagSpaces"));
+ EXPECT_EQ("yes", section.FindS("ValueSpaces"));
+ EXPECT_EQ("yes", section.FindS("BothSpaces"));
+ EXPECT_EQ("yes", section.FindS("TrailingSpaces"));
+ EXPECT_EQ("yes", section.FindS("Naming Space"));
+ EXPECT_EQ("yes", section.FindS("Naming Spaces"));
+ EXPECT_EQ(":yes:", section.FindS("Multi-Colon"));
+ // overridden values are still present, but not really accessible
+ EXPECT_EQ(11, section.Count());
+}