From 94449d7cd3eed7637c1ed78863c01ff207faa31e Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Tue, 4 May 2010 12:30:13 +0200 Subject: [PATCH] * doc/cache.sgml: - drop the file in favor of inplace documentation with doxygen --- apt-pkg/pkgcache.h | 469 +++++++++++++++++++++----- debian/changelog | 8 +- doc/cache.sgml | 824 --------------------------------------------- 3 files changed, 390 insertions(+), 911 deletions(-) delete mode 100644 doc/cache.sgml diff --git a/apt-pkg/pkgcache.h b/apt-pkg/pkgcache.h index 577eebad9..a2e63ff03 100644 --- a/apt-pkg/pkgcache.h +++ b/apt-pkg/pkgcache.h @@ -1,20 +1,75 @@ // -*- mode: cpp; mode: fold -*- // Description /*{{{*/ -// $Id: pkgcache.h,v 1.25 2001/07/01 22:28:24 jgg Exp $ -/* ###################################################################### - - Cache - Structure definitions for the cache file - - Please see doc/apt-pkg/cache.sgml for a more detailed description of - this format. Also be sure to keep that file up-to-date!! - +/**\file pkgcache.h + \brief pkgCache - Structure definitions for the cache file + + The goal of the cache file is two fold: + Firstly to speed loading and processing of the package file array and + secondly to reduce memory consumption of the package file array. + + The implementation is aimed at an environment with many primary package + files, for instance someone that has a Package file for their CD-ROM, a + Package file for the latest version of the distribution on the CD-ROM and a + package file for the development version. Always present is the information + contained in the status file which might be considered a separate package + file. + + Please understand, this is designed as a Cache file it is not meant to be + used on any system other than the one it was created for. It is not meant to + be authoritative either, i.e. if a system crash or software failure occurs it + must be perfectly acceptable for the cache file to be in an inconsistent + state. Furthermore at any time the cache file may be erased without losing + any information. + + Also the structures and storage layout is optimized for use by the APT + and may not be suitable for all purposes. However it should be possible + to extend it with associate cache files that contain other information. + + To keep memory use down the cache file only contains often used fields and + fields that are inexpensive to store, the Package file has a full list of + fields. Also the client may assume that all items are perfectly valid and + need not perform checks against their correctness. Removal of information + from the cache is possible, but blanks will be left in the file, and + unused strings will also be present. The recommended implementation is to + simply rebuild the cache each time any of the data files change. It is + possible to add a new package file to the cache without any negative side + effects. + + Note on Pointer access Clients should always use the CacheIterators classes for access to the - cache. They provide a simple STL-like method for traversing the links - of the datastructure. - - See pkgcachegen.h for information about generating cache structures. - - ##################################################################### */ + cache and the data in it. They also provide a simple STL-like method for + traversing the links of the datastructure. + + Every item in every structure is stored as the index to that structure. + What this means is that once the files is mmaped every data access has to + go through a fix up stage to get a real memory pointer. This is done + by taking the index, multiplying it by the type size and then adding + it to the start address of the memory block. This sounds complex, but + in C it is a single array dereference. Because all items are aligned to + their size and indexes are stored as multiples of the size of the structure + the format is immediately portable to all possible architectures - BUT the + generated files are -NOT-. + + This scheme allows code like this to be written: + + void *Map = mmap(...); + Package *PkgList = (Package *)Map; + Header *Head = (Header *)Map; + char *Strings = (char *)Map; + cout << (Strings + PkgList[Head->HashTable[0]]->Name) << endl; + + Notice the lack of casting or multiplication. The net result is to return + the name of the first package in the first hash bucket, without error + checks. + + The generator uses allocation pools to group similarly sized structures in + large blocks to eliminate any alignment overhead. The generator also + assures that no structures overlap and all indexes are unique. Although + at first glance it may seem like there is the potential for two structures + to exist at the same point the generator never allows this to happen. + (See the discussion of free space pools) + + See \ref pkgcachegen.h for more information about generating cache structures. */ /*}}}*/ #ifndef PKGLIB_PKGCACHE_H #define PKGLIB_PKGCACHE_H @@ -66,12 +121,20 @@ class pkgCache /*{{{*/ { enum DepType {Depends=1,PreDepends=2,Suggests=3,Recommends=4, Conflicts=5,Replaces=6,Obsoletes=7,DpkgBreaks=8,Enhances=9}; + /** \brief available compare operators + + The lower 4 bits are used to indicate what operator is being specified and + the upper 4 bits are flags. OR indicates that the next package is + or'd with the current package. */ enum DepCompareOp {Or=0x10,NoOp=0,LessEq=0x1,GreaterEq=0x2,Less=0x3, Greater=0x4,Equals=0x5,NotEquals=0x6}; }; struct State { + /** \brief priority of a package version + + Zero is used for unparsable or absent Priority fields. */ enum VerPriority {Important=1,Required=2,Standard=3,Optional=4,Extra=5}; enum PkgSelectedState {Unknown=0,Install=1,Hold=2,DeInstall=3,Purge=4}; enum PkgInstState {Ok=0,ReInstReq=1,HoldInst=2,HoldReInstReq=3}; @@ -120,7 +183,7 @@ class pkgCache /*{{{*/ inline unsigned long Hash(const string &S) const {return sHash(S);}; inline unsigned long Hash(const char *S) const {return sHash(S);}; - // Usefull transformation things + // Useful transformation things const char *Priority(unsigned char Priority); // Accessors @@ -157,13 +220,29 @@ private: // Header structure /*{{{*/ struct pkgCache::Header { - // Signature information + /** \brief Signature information + + This must contain the hex value 0x98FE76DC which is designed to + verify that the system loading the image has the same byte order + and byte size as the system saving the image */ unsigned long Signature; + /** These contain the version of the cache file */ short MajorVersion; short MinorVersion; + /** \brief indicates if the cache should be erased + + Dirty is true if the cache file was opened for reading, the client + expects to have written things to it and have not fully synced it. + The file should be erased and rebuilt if it is true. */ bool Dirty; - - // Size of structure values + + /** \brief Size of structure values + + All *Sz variables contains the sizeof() that particular structure. + It is used as an extra consistency check on the structure of the file. + + If any of the size values do not exactly match what the client expects + then the client should refuse the load the file. */ unsigned short HeaderSz; unsigned short PackageSz; unsigned short PackageFileSz; @@ -173,8 +252,12 @@ struct pkgCache::Header unsigned short ProvidesSz; unsigned short VerFileSz; unsigned short DescFileSz; - - // Structure counts + + /** \brief Structure counts + + These indicate the number of each structure contained in the cache. + PackageCount is especially useful for generating user state structures. + See Package::Id for more info. */ unsigned long GroupCount; unsigned long PackageCount; unsigned long VersionCount; @@ -184,22 +267,48 @@ struct pkgCache::Header unsigned long VerFileCount; unsigned long DescFileCount; unsigned long ProvidesCount; - - // Offsets - map_ptrloc FileList; // struct PackageFile - map_ptrloc StringList; // struct StringItem - map_ptrloc VerSysName; // StringTable - map_ptrloc Architecture; // StringTable + + /** \brief index of the first PackageFile structure + + The PackageFile structures are singly linked lists that represent + all package files that have been merged into the cache. */ + map_ptrloc FileList; + /** \brief index of the first StringItem structure + + The cache contains a list of all the unique strings (StringItems). + The parser reads this list into memory so it can match strings + against it.*/ + map_ptrloc StringList; + /** \brief String representing the version system used */ + map_ptrloc VerSysName; + /** \brief Architecture(s) the cache was built against */ + map_ptrloc Architecture; + /** \brief The maximum size of a raw entry from the original Package file */ unsigned long MaxVerFileSize; + /** \brief The maximum size of a raw entry from the original Translation file */ unsigned long MaxDescFileSize; - /* Allocation pools, there should be one of these for each structure - excluding the header */ + /** \brief The Pool structures manage the allocation pools that the generator uses + + Start indicates the first byte of the pool, Count is the number of objects + remaining in the pool and ItemSize is the structure size (alignment factor) + of the pool. An ItemSize of 0 indicates the pool is empty. There should be + the same number of pools as there are structure types. The generator + stores this information so future additions can make use of any unused pool + blocks. */ DynamicMMap::Pool Pools[9]; - // Rapid package and group name lookup - // Notice: Increase only both table sizes as the - // hashmethod assume the size of the Pkg one + /** \brief hash tables providing rapid group/package name lookup + + Each group/package name is inserted into the hash table using pkgCache::Hash(const &string) + By iterating over each entry in the hash table it is possible to iterate over + the entire list of packages. Hash Collisions are handled with a singly linked + list of packages based at the hash item. The linked list contains only + packages that match the hashing function. + In the PkgHashTable is it possible that multiple packages have the same name - + these packages are stored as a sequence in the list. + + Beware: The Hashmethod assumes that the hash table sizes are equal */ map_ptrloc PkgHashTable[2*1048]; map_ptrloc GrpHashTable[2*1048]; @@ -207,140 +316,332 @@ struct pkgCache::Header Header(); }; /*}}}*/ -struct pkgCache::Group { /*{{{*/ - map_ptrloc Name; // Stringtable +// Group structure /*{{{*/ +/** \brief groups architecture depending packages together - // Linked List - map_ptrloc FirstPackage;// Package - map_ptrloc LastPackage; // Package - map_ptrloc Next; // Group + On or more packages with the same name form a group, so we have + a simple way to access a package built for different architectures + Group exists in a singly linked list of group records starting at + the hash index of the name in the pkgCache::Header::GrpHashTable */ +struct pkgCache::Group +{ + /** \brief Name of the group */ + map_ptrloc Name; // StringItem + + // Linked List + /** Link to the first package which belongs to the group */ + map_ptrloc FirstPackage; // Package + /** Link to the last package which belongs to the group */ + map_ptrloc LastPackage; // Package + /** Link to the next Group */ + map_ptrloc Next; // Group }; /*}}}*/ -struct pkgCache::Package /*{{{*/ +// Package structure /*{{{*/ +/** \brief contains information for a single unique package + + There can be any number of versions of a given package. + Package exists in a singly linked list of package records starting at + the hash index of the name in the pkgCache::Header::PkgHashTable + + A package can be created for every architecture so package names are + not unique, but it is garanteed that packages with the same name + are sequencel ordered in the list. Packages with the same name can be + accessed with the Group. +*/ +struct pkgCache::Package { - // Pointers - map_ptrloc Name; // Stringtable - map_ptrloc Arch; // StringTable (StringItem) + /** \brief Name of the package */ + map_ptrloc Name; // StringItem + /** \brief Architecture of the package */ + map_ptrloc Arch; // StringItem + /** \brief Base of a singly linked list of versions + + Each structure represents a unique version of the package. + The version structures contain links into PackageFile and the + original text file as well as detailed information about the size + and dependencies of the specific package. In this way multiple + versions of a package can be cleanly handled by the system. + Furthermore, this linked list is guaranteed to be sorted + from Highest version to lowest version with no duplicate entries. */ map_ptrloc VersionList; // Version + /** \brief index to the installed version */ map_ptrloc CurrentVer; // Version - map_ptrloc Section; // StringTable (StringItem) + /** \brief indicates the deduced section + + Should be the index to the string "Unknown" or to the section + of the last parsed item. */ + map_ptrloc Section; // StringItem + /** \brief index of the group this package belongs to */ map_ptrloc Group; // Group the Package belongs to - - // Linked list + + // Linked list + /** \brief Link to the next package in the same bucket */ map_ptrloc NextPackage; // Package + /** \brief List of all dependencies on this package */ map_ptrloc RevDepends; // Dependency + /** \brief List of all "packages" this package provide */ map_ptrloc ProvidesList; // Provides // Install/Remove/Purge etc + /** \brief state that the user wishes the package to be in */ unsigned char SelectedState; // What + /** \brief installation state of the package + + This should be "ok" but in case the installation failed + it will be different. + */ unsigned char InstState; // Flags + /** \brief indicates if the package is installed */ unsigned char CurrentState; // State - + + /** \brief unique sequel ID + + ID is a unique value from 0 to Header->PackageCount assigned by the generator. + This allows clients to create an array of size PackageCount and use it to store + state information for the package map. For instance the status file emitter uses + this to track which packages have been emitted already. */ unsigned int ID; + /** \brief some useful indicators of the package's state */ unsigned long Flags; }; /*}}}*/ -struct pkgCache::PackageFile /*{{{*/ +// Package File structure /*{{{*/ +/** \brief stores information about the files used to generate the cache + + Package files are referenced by Version structures to be able to know + after the generation still from which Packages file includes this Version + as we need this information later on e.g. for pinning. */ +struct pkgCache::PackageFile { - // Names - map_ptrloc FileName; // Stringtable - map_ptrloc Archive; // Stringtable - map_ptrloc Codename; // Stringtable - map_ptrloc Component; // Stringtable - map_ptrloc Version; // Stringtable - map_ptrloc Origin; // Stringtable - map_ptrloc Label; // Stringtable - map_ptrloc Architecture; // Stringtable - map_ptrloc Site; // Stringtable - map_ptrloc IndexType; // Stringtable - unsigned long Size; + /** \brief physical disk file that this PackageFile represents */ + map_ptrloc FileName; // StringItem + /** \brief the release information + + Please see the files document for a description of what the + release information means. */ + map_ptrloc Archive; // StringItem + map_ptrloc Codename; // StringItem + map_ptrloc Component; // StringItem + map_ptrloc Version; // StringItem + map_ptrloc Origin; // StringItem + map_ptrloc Label; // StringItem + map_ptrloc Architecture; // StringItem + /** \brief The site the index file was fetched from */ + map_ptrloc Site; // StringItem + /** \brief indicates what sort of index file this is + + @TODO enumerate at least the possible indexes */ + map_ptrloc IndexType; // StringItem + /** \brief Size of the file + + Used together with the modification time as a + simple check to ensure that the Packages + file has not been altered since Cache generation. */ + unsigned long Size; + /** \brief Modification time for the file */ + time_t mtime; + + /* @TODO document PackageFile::Flags */ unsigned long Flags; - + // Linked list + /** \brief Link to the next PackageFile in the Cache */ map_ptrloc NextFile; // PackageFile + /** \brief unique sequel ID */ unsigned int ID; - time_t mtime; // Modification time for the file }; /*}}}*/ -struct pkgCache::VerFile /*{{{*/ +// VerFile structure /*{{{*/ +/** \brief associates a version with a PackageFile + + This allows a full description of all Versions in all files + (and hence all sources) under consideration. */ +struct pkgCache::VerFile { + /** \brief index of the package file that this version was found in */ map_ptrloc File; // PackageFile + /** \brief next step in the linked list */ map_ptrloc NextFile; // PkgVerFile + /** \brief position in the package file */ map_ptrloc Offset; // File offset + /* @TODO document pkgCache::VerFile::Size */ unsigned long Size; }; /*}}}*/ -struct pkgCache::DescFile /*{{{*/ +// DescFile structure /*{{{*/ +/** \brief associates a description with a Translation file */ +struct pkgCache::DescFile { + /** \brief index of the file that this description was found in */ map_ptrloc File; // PackageFile + /** \brief next step in the linked list */ map_ptrloc NextFile; // PkgVerFile + /** \brief position in the file */ map_ptrloc Offset; // File offset + /* @TODO document pkgCache::DescFile::Size */ unsigned long Size; }; /*}}}*/ -struct pkgCache::Version /*{{{*/ +// Version structure /*{{{*/ +/** \brief information for a single version of a package + + The version list is always sorted from highest version to lowest + version by the generator. Equal version numbers are either merged + or handled as separate versions based on the Hash value. */ +struct pkgCache::Version { - map_ptrloc VerStr; // Stringtable - map_ptrloc Section; // StringTable (StringItem) + /** \brief complete version string */ + map_ptrloc VerStr; // StringItem + /** \brief section this version is filled in */ + map_ptrloc Section; // StringItem + /** \brief stores the MultiArch capabilities of this version + + None is the default and doesn't trigger special behaviour, + Foreign means that this version can fulfill dependencies even + if it is built for another architecture as the requester. + Same indicates that builds for different architectures can + be co-installed on the system and All is the marker for a + version with the Architecture: all. */ enum {None, All, Foreign, Same, Allowed} MultiArch; - // Lists + /** \brief references all the PackageFile's that this version came from + + FileList can be used to determine what distribution(s) the Version + applies to. If FileList is 0 then this is a blank version. + The structure should also have a 0 in all other fields excluding + pkgCache::Version::VerStr and Possibly pkgCache::Version::NextVer. */ map_ptrloc FileList; // VerFile + /** \brief next (lower or equal) version in the linked list */ map_ptrloc NextVer; // Version + /** \brief next description in the linked list */ map_ptrloc DescriptionList; // Description + /** \brief base of the dependency list */ map_ptrloc DependsList; // Dependency + /** \brief links to the owning package + + This allows reverse dependencies to determine the package */ map_ptrloc ParentPkg; // Package + /** \brief list of pkgCache::Provides */ map_ptrloc ProvidesList; // Provides - + + /** \brief archive size for this version + + For Debian this is the size of the .deb file. */ map_ptrloc Size; // These are the .deb size + /** \brief uncompressed size for this version */ map_ptrloc InstalledSize; + /** \brief characteristic value representing this version + + No two packages in existence should have the same VerStr + and Hash with different contents. */ unsigned short Hash; + /** \brief unique sequel ID */ unsigned int ID; + /** \brief parsed priority value */ unsigned char Priority; }; /*}}}*/ -struct pkgCache::Description /*{{{*/ +// Description structure /*{{{*/ +/** \brief datamember of a linked list of available description for a version */ +struct pkgCache::Description { - // Language Code store the description translation language code. If - // the value has a 0 lenght then this is readed using the Package - // file else the Translation-CODE are used. - map_ptrloc language_code; // StringTable - map_ptrloc md5sum; // StringTable + /** \brief Language code of this description (translation) + + If the value has a 0 length then this is read using the Package + file else the Translation-CODE file is used. */ + map_ptrloc language_code; // StringItem + /** \brief MD5sum of the original description + + Used to map Translations of a description to a version + and to check that the Translation is up-to-date. */ + map_ptrloc md5sum; // StringItem - // Linked list + /* @TODO document pkgCache::Description::FileList */ map_ptrloc FileList; // DescFile + /** \brief next translation for this description */ map_ptrloc NextDesc; // Description + /** \brief the text is a description of this package */ map_ptrloc ParentPkg; // Package + /** \brief unique sequel ID */ unsigned int ID; }; /*}}}*/ -struct pkgCache::Dependency /*{{{*/ +// Dependency structure /*{{{*/ +/** \brief information for a single dependency record + + The records are split up like this to ease processing by the client. + The base of the linked list is pkgCache::Version::DependsList. + All forms of dependencies are recorded here including Depends, + Recommends, Suggests, Enhances, Conflicts, Replaces and Breaks. */ +struct pkgCache::Dependency { - map_ptrloc Version; // Stringtable + /** \brief string of the version the dependency is applied against */ + map_ptrloc Version; // StringItem + /** \brief index of the package this depends applies to + + The generator will - if the package does not already exist - + create a blank (no version records) package. */ map_ptrloc Package; // Package + /** \brief next dependency of this version */ map_ptrloc NextDepends; // Dependency + /** \brief next reverse dependency of this package */ map_ptrloc NextRevDepends; // Dependency + /** \brief version of the package which has the reverse depends */ map_ptrloc ParentVer; // Version - - // Specific types of depends - map_ptrloc ID; + + /** \brief unique sequel ID */ + map_ptrloc ID; + /** \brief Dependency type - Depends, Recommends, Conflicts, etc */ unsigned char Type; + /** \brief comparison operator specified on the depends line + + If the high bit is set then it is a logical OR with the previous record. */ unsigned char CompareOp; }; /*}}}*/ -struct pkgCache::Provides /*{{{*/ +// Provides structure /*{{{*/ +/** \brief handles virtual packages + + When a Provides: line is encountered a new provides record is added + associating the package with a virtual package name. + The provides structures are linked off the package structures. + This simplifies the analysis of dependencies and other aspects A provides + refers to a specific version of a specific package, not all versions need to + provide that provides.*/ +struct pkgCache::Provides { - map_ptrloc ParentPkg; // Pacakge + /** \brief index of the package providing this */ + map_ptrloc ParentPkg; // Package + /** \brief index of the version this provide line applies to */ map_ptrloc Version; // Version - map_ptrloc ProvideVersion; // Stringtable + /** \brief version in the provides line (if any) + + This version allows dependencies to depend on specific versions of a + Provides, as well as allowing Provides to override existing packages. + This is experimental. Note that Debian doesn't allow versioned provides */ + map_ptrloc ProvideVersion; // StringItem + /** \brief next provides (based of package) */ map_ptrloc NextProvides; // Provides + /** \brief next provides (based of version) */ map_ptrloc NextPkgProv; // Provides }; /*}}}*/ -struct pkgCache::StringItem /*{{{*/ +// StringItem structure /*{{{*/ +/** \brief used for generating single instances of strings + + Some things like Section Name are are useful to have as unique tags. + It is part of a linked list based at pkgCache::Header::StringList + + All strings are simply inlined any place in the file that is natural + for the writer. The client should make no assumptions about the positioning + of strings. All StringItems should be null-terminated. */ +struct pkgCache::StringItem { - map_ptrloc String; // Stringtable + /** \brief string this refers to */ + map_ptrloc String; // StringItem + /** \brief Next link in the chain */ map_ptrloc NextItem; // StringItem }; /*}}}*/ diff --git a/debian/changelog b/debian/changelog index 2f262a3a9..76fc2550b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -38,6 +38,11 @@ apt (0.7.26~exp4) experimental; urgency=low - be less strict and accept [option=value] as well * apt-pkg/contrib/configuration.cc: - error out if #clear directive has no argument + * doc/files.sgml: + - sync documentation with status quo, regarding files/directories in + use, extended_states and uri schemes. + * doc/cache.sgml: + - drop the file in favor of inplace documentation with doxygen [ Jari Aalto ] * cmdline/apt-get.cc: @@ -114,9 +119,6 @@ apt (0.7.26~exp3) experimental; urgency=low Thanks to Chris Leick and Georg Koppen! (Closes: #574962) * apt-pkg/contrib/strutl.cc: - convert all toupper calls to tolower_ascii for a little speedup - * doc/files.sgml: - - sync documentation with status quo, regarding files/directories in - use, extended_states and uri schemes. [ Jean-Baptiste Lallement ] * apt-pkg/contrib/strutl.cc: diff --git a/doc/cache.sgml b/doc/cache.sgml deleted file mode 100644 index aea5a45c3..000000000 --- a/doc/cache.sgml +++ /dev/null @@ -1,824 +0,0 @@ - - - -APT Cache File Format - -Jason Gunthorpe jgg@debian.org -$Id: cache.sgml,v 1.11 2003/02/12 15:05:44 doogie Exp $ - - -This document describes the complete implementation and format of the APT -Cache file. The APT Cache file is a way for APT to parse and store a -large number of package files for display in the UI. It's primary design -goal is to make display of a single package in the tree very fast by -pre-linking important things like dependencies and provides. - -The specification doubles as documentation for one of the in-memory -structures used by the package library and the APT GUI. - - - - -Copyright © Jason Gunthorpe, 1997-1998. -

-APT and this document are free software; you can redistribute them and/or -modify them under the terms of the GNU General Public License as published -by the Free Software Foundation; either version 2 of the License, or (at your -option) any later version. - -

-For more details, on Debian GNU/Linux systems, see the file -/usr/share/common-licenses/GPL for the full license. - - - - -Introduction - - -Purpose - -

-This document describes the implementation of an architecture -dependent binary cache file. The goal of this cache file is two fold, -firstly to speed loading and processing of the package file array and -secondly to reduce memory consumption of the package file array. - -

-The implementation is aimed at an environment with many primary package -files, for instance someone that has a Package file for their CD-ROM, a -Package file for the latest version of the distribution on the CD-ROM and a -package file for the development version. Always present is the information -contained in the status file which might be considered a separate package -file. - -

-Please understand, this is designed as a -CACHE FILE- it is not meant to be -used on any system other than the one it was created for. It is not meant to -be authoritative either, i.e. if a system crash or software failure occurs it -must be perfectly acceptable for the cache file to be in an inconsistent -state. Furthermore at any time the cache file may be erased without losing -any information. - -

-Also the structures and storage layout is optimized for use by the APT -GUI and may not be suitable for all purposes. However it should be possible -to extend it with associate cache files that contain other information. - -

-To keep memory use down the cache file only contains often used fields and -fields that are inexpensive to store, the Package file has a full list of -fields. Also the client may assume that all items are perfectly valid and -need not perform checks against their correctness. Removal of information -from the cache is possible, but blanks will be left in the file, and -unused strings will also be present. The recommended implementation is to -simply rebuild the cache each time any of the data files change. It is -possible to add a new package file to the cache without any negative side -effects. - -Note on Pointer access -

-Every item in every structure is stored as the index to that structure. -What this means is that once the files is mmaped every data access has to -go through a fixup stage to get a real memory pointer. This is done -by taking the index, multiplying it by the type size and then adding -it to the start address of the memory block. This sounds complex, but -in C it is a single array dereference. Because all items are aligned to -their size and indexes are stored as multiples of the size of the structure -the format is immediately portable to all possible architectures - BUT the -generated files are -NOT-. - -

-This scheme allows code like this to be written: - - void *Map = mmap(...); - Package *PkgList = (Package *)Map; - Header *Head = (Header *)Map; - char *Strings = (char *)Map; - cout << (Strings + PkgList[Head->HashTable[0]]->Name) << endl; - -

-Notice the lack of casting or multiplication. The net result is to return -the name of the first package in the first hash bucket, without error -checks. - -

-The generator uses allocation pools to group similarly sized structures in -large blocks to eliminate any alignment overhead. The generator also -assures that no structures overlap and all indexes are unique. Although -at first glance it may seem like there is the potential for two structures -to exist at the same point the generator never allows this to happen. -(See the discussion of free space pools) - - -Structures - - -Header -

-This is the first item in the file. - - struct Header - { - // Signature information - unsigned long Signature; - short MajorVersion; - short MinorVersion; - bool Dirty; - - // Size of structure values - unsigned short HeaderSz; - unsigned short PackageSz; - unsigned short PackageFileSz; - unsigned short VersionSz; - unsigned short DependencySz; - unsigned short ProvidesSz; - unsigned short VerFileSz; - - // Structure counts - unsigned long PackageCount; - unsigned long VersionCount; - unsigned long DependsCount; - unsigned long PackageFileCount; - - // Offsets - unsigned long FileList; // PackageFile - unsigned long StringList; // StringItem - unsigned long VerSysName; // StringTable - unsigned long Architecture; // StringTable - unsigned long MaxVerFileSize; - - // Allocation pools - struct - { - unsigned long ItemSize; - unsigned long Start; - unsigned long Count; - } Pools[7]; - - // Package name lookup - unsigned long HashTable[2*1024]; // Package - }; - - -Signature -This must contain the hex value 0x98FE76DC which is designed to verify -that the system loading the image has the same byte order and byte size as -the system saving the image - -MajorVersion -MinorVersion -These contain the version of the cache file, currently 0.2. - -Dirty -Dirty is true if the cache file was opened for reading, the client expects -to have written things to it and have not fully synced it. The file should -be erased and rebuilt if it is true. - -HeaderSz -PackageSz -PackageFileSz -VersionSz -DependencySz -VerFileSz -ProvidesSz -*Sz contains the sizeof() that particular structure. It is used as an -extra consistency check on the structure of the file. - -If any of the size values do not exactly match what the client expects then -the client should refuse the load the file. - -PackageCount -VersionCount -DependsCount -PackageFileCount -These indicate the number of each structure contained in the cache. -PackageCount is especially useful for generating user state structures. -See Package::Id for more info. - -VerSysName -String representing the version system used for this cache - -Architecture -Architecture the cache was built against. - -MaxVerFileSize -The maximum size of a raw entry from the original Package file -(i.e. VerFile::Size) is stored here. - -FileList -This contains the index of the first PackageFile structure. The PackageFile -structures are singly linked lists that represent all package files that -have been merged into the cache. - -StringList -This contains a list of all the unique strings (string item type strings) in -the cache. The parser reads this list into memory so it can match strings -against it. - -Pools -The Pool structures manage the allocation pools that the generator uses. -Start indicates the first byte of the pool, Count is the number of objects -remaining in the pool and ItemSize is the structure size (alignment factor) -of the pool. An ItemSize of 0 indicates the pool is empty. There should be -the same number of pools as there are structure types. The generator -stores this information so future additions can make use of any unused pool -blocks. - -HashTable -HashTable is a hash table that provides indexing for all of the packages. -Each package name is inserted into the hash table using the following has -function: - - unsigned long Hash(string Str) - { - unsigned long Hash = 0; - for (const char *I = Str.begin(); I != Str.end(); I++) - Hash += *I * ((Str.end() - I + 1)); - return Hash % _count(Head.HashTable); - } - -

-By iterating over each entry in the hash table it is possible to iterate over -the entire list of packages. Hash Collisions are handled with a singly linked -list of packages based at the hash item. The linked list contains only -packages that match the hashing function. - - - - - -Package -

-This contains information for a single unique package. There can be any -number of versions of a given package. Package exists in a singly -linked list of package records starting at the hash index of the name in -the Header->HashTable. - - struct Pacakge - { - // Pointers - unsigned long Name; // Stringtable - unsigned long VersionList; // Version - unsigned long CurrentVer; // Version - unsigned long Section; // StringTable (StringItem) - - // Linked lists - unsigned long NextPackage; // Package - unsigned long RevDepends; // Dependency - unsigned long ProvidesList; // Provides - - // Install/Remove/Purge etc - unsigned char SelectedState; // What - unsigned char InstState; // Flags - unsigned char CurrentState; // State - - // Unique ID for this pkg - unsigned short ID; - unsigned long Flags; - }; - - - -Name -Name of the package. - -VersionList -Base of a singly linked list of version structures. Each structure -represents a unique version of the package. The version structures -contain links into PackageFile and the original text file as well as -detailed information about the size and dependencies of the specific -package. In this way multiple versions of a package can be cleanly handled -by the system. Furthermore, this linked list is guaranteed to be sorted -from Highest version to lowest version with no duplicate entries. - -CurrentVer -CurrentVer is an index to the installed version, either can be -0. - -Section -This indicates the deduced section. It should be "Unknown" or the section -of the last parsed item. - -NextPackage -Next link in this hash item. This linked list is based at Header.HashTable -and contains only packages with the same hash value. - -RevDepends -Reverse Depends is a linked list of all dependencies linked to this package. - -ProvidesList -This is a linked list of all provides for this package name. - -SelectedState -InstState -CurrentState -These correspond to the 3 items in the Status field found in the status -file. See the section on defines for the possible values. -

-SelectedState is the state that the user wishes the package to be -in. -

-InstState is the installation state of the package. This normally -should be OK, but if the installation had an accident it may be otherwise. -

-CurrentState indicates if the package is installed, partially installed or -not installed. - -ID -ID is a value from 0 to Header->PackageCount. It is a unique value assigned -by the generator. This allows clients to create an array of size PackageCount -and use it to store state information for the package map. For instance the -status file emitter uses this to track which packages have been emitted -already. - -Flags -Flags are some useful indicators of the package's state. - - - - - - -PackageFile -

-This contains information for a single package file. Package files are -referenced by Version structures. This is a singly linked list based from -Header.FileList - - struct PackageFile - { - // Names - unsigned long FileName; // Stringtable - unsigned long Archive; // Stringtable - unsigned long Component; // Stringtable - unsigned long Version; // Stringtable - unsigned long Origin; // Stringtable - unsigned long Label; // Stringtable - unsigned long Architecture; // Stringtable - unsigned long Site; // Stringtable - unsigned long IndexType; // Stringtable - unsigned long Size; - - // Linked list - unsigned long NextFile; // PackageFile - unsigned short ID; - unsigned long Flags; - time_t mtime; // Modification time - }; - - - -FileName -Refers the the physical disk file that this PacakgeFile represents. - -Archive -Component -Version -Origin -Label -Architecture -NotAutomatic -This is the release information. Please see the files document for a -description of what the release information means. - -Site -The site the index file was fetched from. - -IndexType -A string indicating what sort of index file this is. - -Size -Size is provided as a simple check to ensure that the package file has not -been altered. - -ID -See Package::ID. - -Flags -Provides some flags for the PackageFile, see the section on defines. - -mtime -Modification time for the file at time of cache generation. - - - - - - -Version -

-This contains the information for a single version of a package. This is a -single linked list based from Package.Versionlist. - -

-The version list is always sorted from highest version to lowest version by -the generator. Also there may not be any duplicate entries in the list (same -VerStr). - - - struct Version - { - unsigned long VerStr; // Stringtable - unsigned long Section; // StringTable (StringItem) - unsigned long Arch; // StringTable - - // Lists - unsigned long FileList; // VerFile - unsigned long NextVer; // Version - unsigned long DependsList; // Dependency - unsigned long ParentPkg; // Package - unsigned long ProvidesList; // Provides - - unsigned long Size; - unsigned long InstalledSize; - unsigned long Hash; - unsigned short ID; - unsigned char Priority; - }; - - - -VerStr -This is the complete version string. - -FileList -References the all the PackageFile's that this version came out of. FileList -can be used to determine what distribution(s) the Version applies to. If -FileList is 0 then this is a blank version. The structure should also have -a 0 in all other fields excluding VerStr and Possibly NextVer. - -Section -This string indicates which section it is part of. The string should be -contained in the StringItem list. - -Arch -Architecture the package was compiled for. - -NextVer -Next step in the linked list. - -DependsList -This is the base of the dependency list. - -ParentPkg -This links the version to the owning package, allowing reverse dependencies -to determine the package. - -ProvidesList -Head of the linked list of Provides::NextPkgProv, forward provides. - -Size -InstalledSize -The archive size for this version. For Debian this is the size of the .deb -file. Installed size is the uncompressed size for this version - -Hash -This is a characteristic value representing this package. No two packages -in existence should have the same VerStr and Hash with different contents. - -ID -See Package::ID. - -Priority -This is the parsed priority value of the package. - - - - - -Dependency -

-Dependency contains the information for a single dependency record. The records -are split up like this to ease processing by the client. The base of list -linked list is Version.DependsList. All forms of dependencies are recorded -here including Conflicts, Breaks, Suggests and Recommends. - -

-Multiple depends on the same package must be grouped together in -the Dependency lists. Clients should assume this is always true. - - - struct Dependency - { - unsigned long Version; // Stringtable - unsigned long Package; // Package - unsigned long NextDepends; // Dependency - unsigned long NextRevDepends; // Reverse dependency linking - unsigned long ParentVer; // Upwards parent version link - - // Specific types of depends - unsigned char Type; - unsigned char CompareOp; - unsigned short ID; - }; - - -Version -The string form of the version that the dependency is applied against. - -Package -The index of the package file this depends applies to. If the package file -does not already exist when the dependency is inserted a blank one (no -version records) should be created. - -NextDepends -Linked list based off a Version structure of all the dependencies in that -version. - -NextRevDepends -Reverse dependency linking, based off a Package structure. This linked list -is a list of all packages that have a depends line for a given package. - -ParentVer -Parent version linking, allows the reverse dependency list to link -back to the version and package that the dependency are for. - -Type -Describes weather it is depends, predepends, recommends, suggests, etc. - -CompareOp -Describes the comparison operator specified on the depends line. If the high -bit is set then it is a logical or with the previous record. - -ID -See Package::ID. - - - - - - -Provides -

-Provides handles virtual packages. When a Provides: line is encountered -a new provides record is added associating the package with a virtual -package name. The provides structures are linked off the package structures. -This simplifies the analysis of dependencies and other aspects A provides -refers to a specific version of a specific package, not all versions need to -provide that provides. - -

-There is a linked list of provided package names started from each -version that provides packages. This is the forwards provides mechanism. - - struct Provides - { - unsigned long ParentPkg; // Package - unsigned long Version; // Version - unsigned long ProvideVersion; // Stringtable - unsigned long NextProvides; // Provides - unsigned long NextPkgProv; // Provides - }; - - -ParentPkg -The index of the package that head of this linked list is in. ParentPkg->Name -is the name of the provides. - -Version -The index of the version this provide line applies to. - -ProvideVersion -Each provides can specify a version in the provides line. This version allows -dependencies to depend on specific versions of a Provides, as well as allowing -Provides to override existing packages. This is experimental. - -NextProvides -Next link in the singly linked list of provides (based off package) - -NextPkgProv -Next link in the singly linked list of provides for 'Version'. - - - - - - -VerFile -

-VerFile associates a version with a PackageFile, this allows a full -description of all Versions in all files (and hence all sources) under -consideration. - - - struct pkgCache::VerFile - { - unsigned long File; // PackageFile - unsigned long NextFile; // PkgVerFile - unsigned long Offset; - unsigned short Size; - } - - -File -The index of the package file that this version was found in. - -NextFile -The next step in the linked list. - -Offset -Size -These describe the exact position in the package file for the section from -this version. - - - - - -StringItem -

-StringItem is used for generating single instances of strings. Some things -like Section Name are are useful to have as unique tags. It is part of -a linked list based at Header::StringList. - - struct StringItem - { - unsigned long String; // Stringtable - unsigned long NextItem; // StringItem - }; - - -String -The string this refers to. - -NextItem -Next link in the chain. - - - - -StringTable -

-All strings are simply inlined any place in the file that is natural for the -writer. The client should make no assumptions about the positioning of -strings. All stringtable values point to a byte offset from the start of the -file that a null terminated string will begin. - - - -Defines -

-Several structures use variables to indicate things. Here is a list of all -of them. - -Definitions for Dependency::Type -

- -#define pkgDEP_Depends 1 -#define pkgDEP_PreDepends 2 -#define pkgDEP_Suggests 3 -#define pkgDEP_Recommends 4 -#define pkgDEP_Conflicts 5 -#define pkgDEP_Replaces 6 -#define pkgDEP_Breaks 8 - - - -Definitions for Dependency::CompareOp -

- -#define pkgOP_OR 0x10 -#define pkgOP_LESSEQ 0x1 -#define pkgOP_GREATEREQ 0x2 -#define pkgOP_LESS 0x3 -#define pkgOP_GREATER 0x4 -#define pkgOP_EQUALS 0x5 - -The lower 4 bits are used to indicate what operator is being specified and -the upper 4 bits are flags. pkgOP_OR indicates that the next package is -or'd with the current package. - - -Definitions for Package::SelectedState -

- -#define pkgSTATE_Unkown 0 -#define pkgSTATE_Install 1 -#define pkgSTATE_Hold 2 -#define pkgSTATE_DeInstall 3 -#define pkgSTATE_Purge 4 - - - -Definitions for Package::InstState -

- -#define pkgSTATE_Ok 0 -#define pkgSTATE_ReInstReq 1 -#define pkgSTATE_Hold 2 -#define pkgSTATE_HoldReInstReq 3 - - - -Definitions for Package::CurrentState -

- -#define pkgSTATE_NotInstalled 0 -#define pkgSTATE_UnPacked 1 -#define pkgSTATE_HalfConfigured 2 -#define pkgSTATE_UnInstalled 3 -#define pkgSTATE_HalfInstalled 4 -#define pkgSTATE_ConfigFiles 5 -#define pkgSTATE_Installed 6 -#define pkgSTATE_TriggersAwaited 7 -#define pkgSTATE_TriggersPending 8 - - - -Definitions for Package::Flags -

- -#define pkgFLAG_Auto (1 << 0) -#define pkgFLAG_New (1 << 1) -#define pkgFLAG_Obsolete (1 << 2) -#define pkgFLAG_Essential (1 << 3) -#define pkgFLAG_ImmediateConf (1 << 4) - - - -Definitions for Version::Priority -

-Zero is used for unparsable or absent Priority fields. - -#define pkgPRIO_Important 1 -#define pkgPRIO_Required 2 -#define pkgPRIO_Standard 3 -#define pkgPRIO_Optional 4 -#define pkgPRIO_Extra 5 - - - -Definitions for PackageFile::Flags -

- -#define pkgFLAG_NotSource (1 << 0) -#define pkgFLAG_NotAutomatic (1 << 1) - - - - - -Notes on the Generator - - -

-The pkgCache::MergePackageFile function is currently the only generator of -the cache file. It implements a conversion from the normal textual package -file into the cache file. - -

-The generator assumes any package declaration with a -Status: line is a 'Status of the package' type of package declaration. -A Package with a Target-Version field should also really have a status field. -The processing of a Target-Version field can create a place-holder Version -structure that is empty to refer to the specified version (See Version -for info on what a empty Version looks like). The Target-Version syntax -allows the specification of a specific version and a target distribution. - -

-Different section names on different versions is supported, but I -do not expect to use it. To simplify the GUI it will merely use the section -in the Package structure. This should be okay as I hope sections do not change -much. - -

-The generator goes through a number of post processing steps after producing -a disk file. It sorts all of the version lists to be in descending order -and then generates the reverse dependency lists for all of the packages. -ID numbers and count values are also generated in the post processing step. - -

-It is possible to extend many of the structures in the cache with extra data. -This is done by using the ID member. ID will be a unique number from 0 to -Header->??Count. For example - -struct MyPkgData; -MyPkgData *Data = new MyPkgData[Header->PackageCount]; -Data[Package->ID]->Item = 0; - -This provides a one way reference between package structures and user data. To -get a two way reference would require a member inside the MyPkgData structure. - -

-The generators use of free space pools tend to make the package file quite -large, and quite full of blank space. This could be fixed with sparse files. - - - -Future Directions - - -

-Some good directions to take the cache file is into a cache directory that -contains many associated caches that cache other important bits of -information. (/var/cache/apt, FHS2) - -

-Caching of the info/*.list is an excellent place to start, by generating all -the list files into a tree structure and reverse linking them to the package -structures in the main cache file major speed gains in dpkg might be achieved. - - - - -- 2.45.2