// -*- mode: cpp; mode: fold -*-
// Description /*{{{*/
-// $Id: tagfile.cc,v 1.8 1998/07/16 06:08:39 jgg Exp $
+// $Id: tagfile.cc,v 1.18 1998/12/08 05:24:41 jgg Exp $
/* ######################################################################
Fast scanner for RFC-822 type header information
- This uses a rotating 64K buffer to load the package information into.
+ This uses a rotating buffer to load the package information into.
The scanner runs over it and isolates and indexes a single section.
##################################################################### */
// TagFile::pkgTagFile - Constructor /*{{{*/
// ---------------------------------------------------------------------
/* */
-pkgTagFile::pkgTagFile(File &Fd) : Fd(Fd)
+pkgTagFile::pkgTagFile(FileFd &Fd,unsigned long Size) : Fd(Fd), Size(Size)
{
- Buffer = new char[64*1024];
- Start = End = Buffer + 64*1024;
+ Buffer = new char[Size];
+ Start = End = Buffer;
Left = Fd.Size();
iOffset = 0;
Fill();
return false;
if (Tag.Scan(Start,End - Start) == false)
- return _error->Error("Unable to parse package file");
+ return _error->Error("Unable to parse package file %s",Fd.Name().c_str());
}
Start += Tag.size();
iOffset += Tag.size();
then fills the rest from the file */
bool pkgTagFile::Fill()
{
- unsigned long Size = End - Start;
+ unsigned long EndSize = End - Start;
+
+ memmove(Buffer,Start,EndSize);
+ Start = Buffer;
+ End = Buffer + EndSize;
if (Left == 0)
{
- if (Size <= 1)
+ if (EndSize <= 3)
return false;
+ if (Size - (End - Buffer) < 4)
+ return true;
+
+ // Append a double new line if one does not exist
+ unsigned int LineCount = 0;
+ for (const char *E = End - 1; E - End < 6 && (*E == '\n' || *E == '\r'); E--)
+ if (*E == '\n')
+ LineCount++;
+ for (; LineCount < 2; LineCount++)
+ *End++ = '\n';
+
return true;
}
- memmove(Buffer,Start,Size);
- Start = Buffer;
-
- // See if only a bit of the file is left or if
- if (Left < End - Buffer - Size)
+ // See if only a bit of the file is left
+ if (Left < Size - (End - Buffer))
{
- if (Fd.Read(Buffer + Size,Left) == false)
+ if (Fd.Read(End,Left) == false)
return false;
- End = Buffer + Size + Left;
+
+ End += Left;
Left = 0;
}
else
{
- if (Fd.Read(Buffer + Size, End - Buffer - Size) == false)
+ if (Fd.Read(End,Size - (End - Buffer)) == false)
return false;
- Left -= End - Buffer - Size;
+
+ Left -= Size - (End - Buffer);
+ End = Buffer + Size;
}
return true;
}
/*}}}*/
+// TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
+// ---------------------------------------------------------------------
+/* This jumps to a pre-recorded file location and reads the record
+ that is there */
+bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
+{
+ iOffset = Offset;
+ Left = Fd.Size() - Offset;
+ if (Fd.Seek(Offset) == false)
+ return false;
+ End = Start = Buffer;
+
+ if (Fill() == false)
+ return false;
+
+ if (Tag.Scan(Start,End - Start) == false)
+ return _error->Error("Unable to parse package file");
+ return true;
+}
+ /*}}}*/
// TagSection::Scan - Scan for the end of the header information /*{{{*/
// ---------------------------------------------------------------------
/* This looks for the first double new line in the data stream. It also
- indexes the tags in the section. */
+ indexes the tags in the section. This very simple hash function for the
+ first 3 letters gives very good performance on the debian package files */
bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
{
const char *End = Start + MaxLength;
Stop = Section = Start;
+ memset(AlphaIndexes,0,sizeof(AlphaIndexes));
+
+ if (Stop == 0)
+ return false;
TagCount = 0;
- Indexes[TagCount++] = Stop - Section;
- Stop++;
- for (; Stop < End; Stop++)
+ while (TagCount < sizeof(Indexes)/sizeof(Indexes[0]))
{
- if (Stop[-1] != '\n')
- continue;
- if (Stop[0] == '\n')
+ if (isspace(Stop[0]) == 0)
+ {
+ Indexes[TagCount++] = Stop - Section;
+ unsigned char A = tolower(Stop[0]) - 'a';
+ unsigned char B = tolower(Stop[1]) - 'a';
+ unsigned char C = tolower(Stop[3]) - 'a';
+ AlphaIndexes[((A + C/3)%26) + 26*((B + C/2)%26)] = TagCount;
+ }
+
+ Stop = (const char *)memchr(Stop,'\n',End - Stop);
+
+ if (Stop == 0)
+ return false;
+ for (; Stop[1] == '\r' && Stop < End; Stop++);
+
+ if (Stop[1] == '\n')
{
- // Extra one at the end to simplify find
Indexes[TagCount] = Stop - Section;
- for (; Stop[0] == '\n' && Stop < End; Stop++);
+ for (; (Stop[0] == '\n' || Stop[0] == '\r') && Stop < End; Stop++);
return true;
- break;
}
- if (isspace(Stop[0]) == 0)
- Indexes[TagCount++] = Stop - Section;
-
- // Just in case.
- if (TagCount > sizeof(Indexes)/sizeof(Indexes[0]))
- TagCount = sizeof(Indexes)/sizeof(Indexes[0]);
- }
+ Stop++;
+ }
+
return false;
}
/*}}}*/
const char *&End)
{
unsigned int Length = strlen(Tag);
- for (unsigned int I = 0; I != TagCount; I++)
+ unsigned char A = tolower(Tag[0]) - 'a';
+ unsigned char B = tolower(Tag[1]) - 'a';
+ unsigned char C = tolower(Tag[3]) - 'a';
+ unsigned int I = AlphaIndexes[((A + C/3)%26) + 26*((B + C/2)%26)];
+ if (I == 0)
+ return false;
+ I--;
+
+ for (unsigned int Counter = 0; Counter != TagCount; Counter++,
+ I = (I+1)%TagCount)
{
- if (strncasecmp(Tag,Section + Indexes[I],Length) != 0)
+ const char *St;
+ St = Section + Indexes[I];
+ if (strncasecmp(Tag,St,Length) != 0)
continue;
// Make sure the colon is in the right place
- const char *C = Section + Length + Indexes[I];
+ const char *C = St + Length;
for (; isspace(*C) != 0; C++);
if (*C != ':')
continue;
End = Section + Indexes[I+1];
for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
for (; isspace(End[-1]) != 0 && End > Start; End--);
+
return true;
}
+
Start = End = 0;
return false;
}
/*}}}*/
+// TagSection::FindS - Find a string /*{{{*/
+// ---------------------------------------------------------------------
+/* */
+string pkgTagSection::FindS(const char *Tag)
+{
+ const char *Start;
+ const char *End;
+ if (Find(Tag,Start,End) == false)
+ return string();
+ return string(Start,End);
+}
+ /*}}}*/
+// TagSection::FindI - Find an integer /*{{{*/
+// ---------------------------------------------------------------------
+/* */
+unsigned int pkgTagSection::FindI(const char *Tag)
+{
+ const char *Start;
+ const char *End;
+ if (Find(Tag,Start,End) == false)
+ return 0;
+
+ return atoi(string(Start,End).c_str());
+}
+ /*}}}*/
+