X-Git-Url: https://git.saurik.com/apt.git/blobdiff_plain/578bfd0aed2ec993f4ad85fa6a7094a852261422..f8477782df203e1998a8704e71a1a3cc699e9e3a:/apt-pkg/tagfile.cc?ds=sidebyside diff --git a/apt-pkg/tagfile.cc b/apt-pkg/tagfile.cc index 106b0febe..dc1ba3f9e 100644 --- a/apt-pkg/tagfile.cc +++ b/apt-pkg/tagfile.cc @@ -1,32 +1,62 @@ // -*- mode: cpp; mode: fold -*- // Description /*{{{*/ -// $Id: tagfile.cc,v 1.1 1998/07/02 02:58:13 jgg Exp $ +// $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $ /* ###################################################################### Fast scanner for RFC-822 type header information - This uses a rotating 64K buffer to load the package information into. + This uses a rotating buffer to load the package information into. The scanner runs over it and isolates and indexes a single section. ##################################################################### */ /*}}}*/ // Include Files /*{{{*/ -#include -#include +#ifdef __GNUG__ +#pragma implementation "apt-pkg/tagfile.h" +#endif +#include +#include +#include + +#include + #include #include +#include /*}}}*/ +using std::string; + // TagFile::pkgTagFile - Constructor /*{{{*/ // --------------------------------------------------------------------- /* */ -pkgTagFile::pkgTagFile(File &Fd) : Fd(Fd) +pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) : + Fd(*pFd), + Size(Size) +{ + if (Fd.IsOpen() == false || Fd.Size() == 0) + { + Buffer = 0; + Start = End = Buffer = 0; + iOffset = 0; + Map = NULL; + return; + } + + Map = new MMap (Fd, MMap::Public | MMap::ReadOnly); + Buffer = (char *) Map->Data (); + Start = Buffer; + End = Buffer + Map->Size (); + iOffset = 0; +} + /*}}}*/ +// TagFile::~pkgTagFile - Destructor /*{{{*/ +// --------------------------------------------------------------------- +/* */ +pkgTagFile::~pkgTagFile() { - Buffer = new char[64*1024]; - Start = End = Buffer + 64*1024; - Left = Fd.Size(); - Fill(); + delete Map; } /*}}}*/ // TagFile::Step - Advance to the next section /*{{{*/ @@ -34,102 +64,167 @@ pkgTagFile::pkgTagFile(File &Fd) : Fd(Fd) /* If the Section Scanner fails we refill the buffer and try again. */ bool pkgTagFile::Step(pkgTagSection &Tag) { + if (Start == End) + return false; + if (Tag.Scan(Start,End - Start) == false) { - if (Fill() == false) - return false; - - if (Tag.Scan(Start,End - Start) == false) - return _error->Error("Unable to parse package file"); - } - Start += Tag.Length(); + return _error->Error(_("Unable to parse package file %s (1)"), + Fd.Name().c_str()); + } + Start += Tag.size(); + iOffset += Tag.size(); + + Tag.Trim(); return true; } /*}}}*/ -// TagFile::Fill - Top up the buffer /*{{{*/ +// TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/ // --------------------------------------------------------------------- -/* This takes the bit at the end of the buffer and puts it at the start - then fills the rest from the file */ -bool pkgTagFile::Fill() +/* This jumps to a pre-recorded file location and reads the record + that is there */ +bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset) { - unsigned long Size = End - Start; - - if (Left == 0) + // We are within a buffer space of the next hit.. + if (Offset >= iOffset && iOffset + (End - Start) > Offset) { - if (Size <= 1) - return false; - return true; + unsigned long Dist = Offset - iOffset; + Start += Dist; + iOffset += Dist; + return Step(Tag); } + + // Reposition and reload.. + iOffset = Offset; + Start = Buffer + iOffset; - memmove(Buffer,Start,Size); - Start = Buffer; + if (Tag.Scan(Start,End - Start) == false) + return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str()); - // See if only a bit of the file is left or if - if (Left < End - Buffer - Size) - { - if (Fd.Read(Buffer + Size,Left) == false) - return false; - End = Buffer + Size + Left; - Left = 0; - } - else - { - if (Fd.Read(Buffer + Size, End - Buffer - Size) == false) - return false; - Left -= End - Buffer - Size; - } return true; } /*}}}*/ // TagSection::Scan - Scan for the end of the header information /*{{{*/ // --------------------------------------------------------------------- /* This looks for the first double new line in the data stream. It also - indexes the tags in the section. */ + indexes the tags in the section. This very simple hash function for the + first 3 letters gives very good performance on the debian package files */ +inline static unsigned long AlphaHash(const char *Text, const char *End = 0) +{ + unsigned long Res = 0; + for (; Text != End && *Text != ':' && *Text != 0; Text++) + Res = (unsigned long)(*Text) ^ (Res << 2); + return Res & 0xFF; +} + bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength) { const char *End = Start + MaxLength; Stop = Section = Start; + memset(AlphaIndexes,0,sizeof(AlphaIndexes)); + + if (Stop == 0 || MaxLength == 0) + return false; TagCount = 0; - Indexes[TagCount++] = Stop - Section; - Stop++; - for (; Stop < End; Stop++) + while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End) { - if (Stop[-1] != '\n') - continue; - if (Stop[0] == '\n') + // Start a new index and add it to the hash + if (isspace(Stop[0]) == 0) + { + Indexes[TagCount++] = Stop - Section; + AlphaIndexes[AlphaHash(Stop,End)] = TagCount; + } + + Stop = (const char *)memchr(Stop,'\n',End - Stop); + + if (Stop == 0) + return false; + + for (; Stop+1 < End && Stop[1] == '\r'; Stop++); + + // Double newline marks the end of the record + if (Stop+1 < End && Stop[1] == '\n') { - // Extra one at the end to simplify find Indexes[TagCount] = Stop - Section; - for (; Stop[0] == '\n' && Stop < End; Stop++); + for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++); return true; - break; } - if (isspace(Stop[0]) == 0) - Indexes[TagCount++] = Stop - Section; - - // Just in case. - if (TagCount > sizeof(Indexes)/sizeof(Indexes[0])) - TagCount = sizeof(Indexes)/sizeof(Indexes[0]); - } + Stop++; + } + + if ((Stop+1 >= End) && (End[-1] == '\n' || End[-1] == '\r')) + { + Indexes[TagCount] = (End - 1) - Section; + return true; + } + return false; } /*}}}*/ +// TagSection::Trim - Trim off any trailing garbage /*{{{*/ +// --------------------------------------------------------------------- +/* There should be exactly 1 newline at the end of the buffer, no more. */ +void pkgTagSection::Trim() +{ + for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--); +} + /*}}}*/ // TagSection::Find - Locate a tag /*{{{*/ // --------------------------------------------------------------------- /* This searches the section for a tag that matches the given string. */ -bool pkgTagSection::Find(const char *Tag,const char *&Start, - const char *&End) +bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const { unsigned int Length = strlen(Tag); - for (unsigned int I = 0; I != TagCount; I++) + unsigned int I = AlphaIndexes[AlphaHash(Tag)]; + if (I == 0) + return false; + I--; + + for (unsigned int Counter = 0; Counter != TagCount; Counter++, + I = (I+1)%TagCount) { - if (strncasecmp(Tag,Section + Indexes[I],Length) != 0) + const char *St; + St = Section + Indexes[I]; + if (strncasecmp(Tag,St,Length) != 0) continue; // Make sure the colon is in the right place - const char *C = Section + Length + Indexes[I]; + const char *C = St + Length; + for (; isspace(*C) != 0; C++); + if (*C != ':') + continue; + Pos = I; + return true; + } + + Pos = 0; + return false; +} + /*}}}*/ +// TagSection::Find - Locate a tag /*{{{*/ +// --------------------------------------------------------------------- +/* This searches the section for a tag that matches the given string. */ +bool pkgTagSection::Find(const char *Tag,const char *&Start, + const char *&End) const +{ + unsigned int Length = strlen(Tag); + unsigned int I = AlphaIndexes[AlphaHash(Tag)]; + if (I == 0) + return false; + I--; + + for (unsigned int Counter = 0; Counter != TagCount; Counter++, + I = (I+1)%TagCount) + { + const char *St; + St = Section + Indexes[I]; + if (strncasecmp(Tag,St,Length) != 0) + continue; + + // Make sure the colon is in the right place + const char *C = St + Length; for (; isspace(*C) != 0; C++); if (*C != ':') continue; @@ -137,59 +232,263 @@ bool pkgTagSection::Find(const char *Tag,const char *&Start, // Strip off the gunk from the start end Start = C; End = Section + Indexes[I+1]; + if (Start >= End) + return _error->Error("Internal parsing error"); + for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++); for (; isspace(End[-1]) != 0 && End > Start; End--); + return true; } + Start = End = 0; return false; } /*}}}*/ +// TagSection::FindS - Find a string /*{{{*/ +// --------------------------------------------------------------------- +/* */ +string pkgTagSection::FindS(const char *Tag) const +{ + const char *Start; + const char *End; + if (Find(Tag,Start,End) == false) + return string(); + return string(Start,End); +} + /*}}}*/ +// TagSection::FindI - Find an integer /*{{{*/ +// --------------------------------------------------------------------- +/* */ +signed int pkgTagSection::FindI(const char *Tag,signed long Default) const +{ + const char *Start; + const char *Stop; + if (Find(Tag,Start,Stop) == false) + return Default; + + // Copy it into a temp buffer so we can use strtol + char S[300]; + if ((unsigned)(Stop - Start) >= sizeof(S)) + return Default; + strncpy(S,Start,Stop-Start); + S[Stop - Start] = 0; + + char *End; + signed long Result = strtol(S,&End,10); + if (S == End) + return Default; + return Result; +} + /*}}}*/ +// TagSection::FindFlag - Locate a yes/no type flag /*{{{*/ +// --------------------------------------------------------------------- +/* The bits marked in Flag are masked on/off in Flags */ +bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags, + unsigned long Flag) const +{ + const char *Start; + const char *Stop; + if (Find(Tag,Start,Stop) == false) + return true; + + switch (StringToBool(string(Start,Stop))) + { + case 0: + Flags &= ~Flag; + return true; -#include + case 1: + Flags |= Flag; + return true; -int main(int argc,char *argv[]) + default: + _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str()); + return true; + } + return true; +} + /*}}}*/ + +// TFRewrite - Rewrite a control record /*{{{*/ +// --------------------------------------------------------------------- +/* This writes the control record to stdout rewriting it as necessary. The + override map item specificies the rewriting rules to follow. This also + takes the time to sort the feild list. */ + +/* The order of this list is taken from dpkg source lib/parse.c the fieldinfos + array. */ +static const char *iTFRewritePackageOrder[] = { + "Package", + "Essential", + "Status", + "Priority", + "Section", + "Installed-Size", + "Maintainer", + "Architecture", + "Source", + "Version", + "Revision", // Obsolete + "Config-Version", // Obsolete + "Replaces", + "Provides", + "Depends", + "Pre-Depends", + "Recommends", + "Suggests", + "Conflicts", + "Conffiles", + "Filename", + "Size", + "MD5Sum", + "SHA1Sum", + "MSDOS-Filename", // Obsolete + "Description", + 0}; +static const char *iTFRewriteSourceOrder[] = {"Package", + "Source", + "Binary", + "Version", + "Priority", + "Section", + "Maintainer", + "Build-Depends", + "Build-Depends-Indep", + "Build-Conflicts", + "Build-Conflicts-Indep", + "Architecture", + "Standards-Version", + "Format", + "Directory", + "Files", + 0}; + +/* Two levels of initialization are used because gcc will set the symbol + size of an array to the length of the array, causing dynamic relinking + errors. Doing this makes the symbol size constant */ +const char **TFRewritePackageOrder = iTFRewritePackageOrder; +const char **TFRewriteSourceOrder = iTFRewriteSourceOrder; + +bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[], + TFRewriteData *Rewrite) { + unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite + for (unsigned I = 0; I != 256; I++) + Visited[I] = 0; + + // Set new tag up as necessary. + for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++) { - File F(argv[1],File::ReadOnly); - pkgTagFile Test(F); - File CacheF("./cache",File::WriteEmpty); - DynamicMMap Map(CacheF,MMap::Public); - pkgCacheGenerator Gen(Map); - Gen.SelectFile("tet"); + if (Rewrite[J].NewTag == 0) + Rewrite[J].NewTag = Rewrite[J].Tag; } - -#if 0 - pkgTagSection I; - while (Test.Step(I) == true) + + // Write all all of the tags, in order. + for (unsigned int I = 0; Order[I] != 0; I++) { - const char *Start; - const char *End; - if (I.Find("Package",Start,End) == false) + bool Rewritten = false; + + // See if this is a field that needs to be rewritten + for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++) { - cout << "Failed" << endl; + if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0) + { + Visited[J] |= 2; + if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0) + { + if (isspace(Rewrite[J].Rewrite[0])) + fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite); + else + fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite); + } + + Rewritten = true; + break; + } + } + + // See if it is in the fragment + unsigned Pos; + if (Tags.Find(Order[I],Pos) == false) + continue; + Visited[Pos] |= 1; + + if (Rewritten == true) continue; - } - cout << "Package: " << string(Start,End - Start) << endl; + /* Write out this element, taking a moment to rewrite the tag + in case of changes of case. */ + const char *Start; + const char *Stop; + Tags.Get(Start,Stop,Pos); -/* for (const char *I = Start; I < End; I++) + if (fputs(Order[I],Output) < 0) + return _error->Errno("fputs","IO Error to output"); + Start += strlen(Order[I]); + if (fwrite(Start,Stop - Start,1,Output) != 1) + return _error->Errno("fwrite","IO Error to output"); + if (Stop[-1] != '\n') + fprintf(Output,"\n"); + } + + // Now write all the old tags that were missed. + for (unsigned int I = 0; I != Tags.Count(); I++) + { + if ((Visited[I] & 1) == 1) + continue; + + const char *Start; + const char *Stop; + Tags.Get(Start,Stop,I); + const char *End = Start; + for (; End < Stop && *End != ':'; End++); + + // See if this is a field that needs to be rewritten + bool Rewritten = false; + for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++) { - const char *Begin = I; - bool Number = true; - while (isspace(*I) == 0 && ispunct(*I) == 0 && I < End) + if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0) { - if (isalpha(*I) != 0) - Number = false; - I++; + Visited[J] |= 2; + if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0) + { + if (isspace(Rewrite[J].Rewrite[0])) + fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite); + else + fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite); + } + + Rewritten = true; + break; } - if (Number == false) - cout << string(Begin,I-Begin) << endl; - while ((isspace(*I) != 0 || ispunct(*I) != 0) && I < End) - I++; - I--; - } */ + } + + if (Rewritten == true) + continue; + + // Write out this element + if (fwrite(Start,Stop - Start,1,Output) != 1) + return _error->Errno("fwrite","IO Error to output"); + if (Stop[-1] != '\n') + fprintf(Output,"\n"); } -#endif - _error->DumpErrors(); + + // Now write all the rewrites that were missed + for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++) + { + if ((Visited[J] & 2) == 2) + continue; + + if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0) + { + if (isspace(Rewrite[J].Rewrite[0])) + fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite); + else + fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite); + } + } + + return true; } + /*}}}*/