From: Michael Vogt Date: Wed, 18 Jun 2014 08:13:01 +0000 (+0200) Subject: Merge remote-tracking branch 'mvo/feature/hash-stats' into debian/experimental X-Git-Tag: 1.1.exp1~14 X-Git-Url: https://git.saurik.com/apt.git/commitdiff_plain/da029b0aaebdc64a3a9f6b7012213539421c934b?ds=inline;hp=-c Merge remote-tracking branch 'mvo/feature/hash-stats' into debian/experimental Conflicts: apt-pkg/acquire-item.cc apt-pkg/acquire-item.h apt-pkg/deb/debmetaindex.h apt-pkg/pkgcache.cc test/integration/test-apt-ftparchive-src-cachedb --- da029b0aaebdc64a3a9f6b7012213539421c934b diff --combined apt-pkg/pkgcache.cc index 2b6153634,c1a3c0c55..4fbdc93d5 --- a/apt-pkg/pkgcache.cc +++ b/apt-pkg/pkgcache.cc @@@ -55,11 -55,7 +55,7 @@@ pkgCache::Header::Header( /* Whenever the structures change the major version should be bumped, whenever the generator changes the minor version should be bumped. */ MajorVersion = 9; - #if (APT_PKG_MAJOR >= 4 && APT_PKG_MINOR >= 13) - MinorVersion = 1; + MinorVersion = 2; - #else - MinorVersion = 1; - #endif Dirty = false; HeaderSz = sizeof(pkgCache::Header); @@@ -168,23 -164,15 +164,23 @@@ bool pkgCache::ReMap(bool const &Errorc if (Map.Size() < HeaderP->CacheFileSize) return _error->Error(_("The package cache file is corrupted, it is too small")); + if (HeaderP->VerSysName == 0 || HeaderP->Architecture == 0 || HeaderP->Architectures == 0) + return _error->Error(_("The package cache file is corrupted")); + // Locate our VS.. - if (HeaderP->VerSysName == 0 || - (VS = pkgVersioningSystem::GetVS(StrP + HeaderP->VerSysName)) == 0) + if ((VS = pkgVersioningSystem::GetVS(StrP + HeaderP->VerSysName)) == 0) return _error->Error(_("This APT does not support the versioning system '%s'"),StrP + HeaderP->VerSysName); - // Chcek the arhcitecture - if (HeaderP->Architecture == 0 || - _config->Find("APT::Architecture") != StrP + HeaderP->Architecture) - return _error->Error(_("The package cache was built for a different architecture")); + // Check the architecture + std::vector archs = APT::Configuration::getArchitectures(); + std::vector::const_iterator a = archs.begin(); + std::string list = *a; + for (++a; a != archs.end(); ++a) + list.append(",").append(*a); + if (_config->Find("APT::Architecture") != StrP + HeaderP->Architecture || + list != StrP + HeaderP->Architectures) + return _error->Error(_("The package cache was built for different architectures: %s vs %s"), StrP + HeaderP->Architectures, list.c_str()); + return true; } /*}}}*/ @@@ -218,7 -206,7 +214,7 @@@ pkgCache::PkgIterator pkgCache::SingleA { // Look at the hash bucket Package *Pkg = PkgP + HeaderP->PkgHashTable[Hash(Name)]; - for (; Pkg != PkgP; Pkg = PkgP + Pkg->NextPackage) + for (; Pkg != PkgP; Pkg = PkgP + Pkg->Next) { if (unlikely(Pkg->Name == 0)) continue; @@@ -374,7 -362,7 +370,7 @@@ pkgCache::PkgIterator pkgCache::GrpIter (= different packages with same calculated hash), so we need to check the name also */ for (pkgCache::Package *Pkg = PackageList(); Pkg != Owner->PkgP; - Pkg = Owner->PkgP + Pkg->NextPackage) { + Pkg = Owner->PkgP + Pkg->Next) { if (S->Name == Pkg->Name && stringcasecmp(Arch, Owner->StrP + Pkg->Arch) == 0) return PkgIterator(*Owner, Pkg); @@@ -423,7 -411,7 +419,7 @@@ pkgCache::PkgIterator pkgCache::GrpIter if (S->LastPackage == LastPkg.Index()) return PkgIterator(*Owner, 0); - return PkgIterator(*Owner, Owner->PkgP + LastPkg->NextPackage); + return PkgIterator(*Owner, Owner->PkgP + LastPkg->Next); } /*}}}*/ // GrpIterator::operator ++ - Postfix incr /*{{{*/ @@@ -450,7 -438,7 +446,7 @@@ void pkgCache::PkgIterator::operator ++ { // Follow the current links if (S != Owner->PkgP) - S = Owner->PkgP + S->NextPackage; + S = Owner->PkgP + S->Next; // Follow the hash table while (S == Owner->PkgP && (HashIndex+1) < (signed)_count(Owner->HeaderP->PkgHashTable)) @@@ -830,7 -818,7 +826,7 @@@ int pkgCache::VerIterator::CompareVer(c // VerIterator::Downloadable - Checks if the version is downloadable /*{{{*/ // --------------------------------------------------------------------- /* */ -bool pkgCache::VerIterator::Downloadable() const +APT_PURE bool pkgCache::VerIterator::Downloadable() const { VerFileIterator Files = FileList(); for (; Files.end() == false; ++Files) @@@ -843,7 -831,7 +839,7 @@@ // --------------------------------------------------------------------- /* This checks to see if any of the versions files are not NotAutomatic. True if this version is selectable for automatic installation. */ -bool pkgCache::VerIterator::Automatic() const +APT_PURE bool pkgCache::VerIterator::Automatic() const { VerFileIterator Files = FileList(); for (; Files.end() == false; ++Files) diff --combined apt-pkg/pkgcache.h index 22dc6218c,43d379ddf..55f0187f9 --- a/apt-pkg/pkgcache.h +++ b/apt-pkg/pkgcache.h @@@ -138,7 -138,7 +138,7 @@@ class pkgCache /*{{{* /** \brief priority of a package version Zero is used for unparsable or absent Priority fields. */ - enum VerPriority {Important=1,Required=2,Standard=3,Optional=4,Extra=5}; + enum VerPriority {Required=1,Important=2,Standard=3,Optional=4,Extra=5}; enum PkgSelectedState {Unknown=0,Install=1,Hold=2,DeInstall=3,Purge=4}; enum PkgInstState {Ok=0,ReInstReq=1,HoldInst=2,HoldReInstReq=3}; enum PkgCurrentState {NotInstalled=0,UnPacked=1,HalfConfigured=2, @@@ -286,10 -286,8 +286,10 @@@ struct pkgCache::Heade map_ptrloc StringList; /** \brief String representing the version system used */ map_ptrloc VerSysName; - /** \brief Architecture(s) the cache was built against */ + /** \brief native architecture the cache was built against */ map_ptrloc Architecture; + /** \brief all architectures the cache was built against */ + map_ptrloc Architectures; /** \brief The maximum size of a raw entry from the original Package file */ unsigned long MaxVerFileSize; /** \brief The maximum size of a raw entry from the original Translation file */ @@@ -316,8 -314,8 +316,8 @@@ these packages are stored as a sequence in the list. Beware: The Hashmethod assumes that the hash table sizes are equal */ - map_ptrloc PkgHashTable[2*1048]; - map_ptrloc GrpHashTable[2*1048]; + map_ptrloc PkgHashTable[64*1048]; + map_ptrloc GrpHashTable[64*1048]; /** \brief Size of the complete cache file */ unsigned long CacheFileSize; @@@ -390,7 -388,7 +390,7 @@@ struct pkgCache::Packag // Linked list /** \brief Link to the next package in the same bucket */ - map_ptrloc NextPackage; // Package + map_ptrloc Next; // Package /** \brief List of all dependencies on this package */ map_ptrloc RevDepends; // Dependency /** \brief List of all "packages" this package provide */ diff --combined apt-pkg/pkgcachegen.cc index ac1cea0eb,367115609..9615b4c22 --- a/apt-pkg/pkgcachegen.cc +++ b/apt-pkg/pkgcachegen.cc @@@ -74,31 -74,13 +74,31 @@@ pkgCacheGenerator::pkgCacheGenerator(Dy // Starting header *Cache.HeaderP = pkgCache::Header(); map_ptrloc const idxVerSysName = WriteStringInMap(_system->VS->Label); + if (unlikely(idxVerSysName == 0)) + return; Cache.HeaderP->VerSysName = idxVerSysName; // this pointer is set in ReMap, but we need it now for WriteUniqString Cache.StringItemP = (pkgCache::StringItem *)Map.Data(); map_ptrloc const idxArchitecture = WriteUniqString(_config->Find("APT::Architecture")); - Cache.HeaderP->Architecture = idxArchitecture; - if (unlikely(idxVerSysName == 0 || idxArchitecture == 0)) + if (unlikely(idxArchitecture == 0)) return; + Cache.HeaderP->Architecture = idxArchitecture; + + std::vector archs = APT::Configuration::getArchitectures(); + if (archs.size() > 1) + { + std::vector::const_iterator a = archs.begin(); + std::string list = *a; + for (++a; a != archs.end(); ++a) + list.append(",").append(*a); + map_ptrloc const idxArchitectures = WriteStringInMap(list); + if (unlikely(idxArchitectures == 0)) + return; + Cache.HeaderP->Architectures = idxArchitectures; + } + else + Cache.HeaderP->Architectures = idxArchitecture; + Cache.ReMap(); } else @@@ -320,9 -302,10 +320,9 @@@ bool pkgCacheGenerator::MergeListPackag // Find the right version to write the description MD5SumValue CurMd5 = List.Description_md5(); - if (CurMd5.Value().empty() == true || List.Description().empty() == true) + if (CurMd5.Value().empty() == true && List.Description("").empty() == true) return true; - std::string CurLang = List.DescriptionLanguage(); - + std::vector availDesc = List.AvailableDescriptionLanguages(); for (Ver = Pkg.VersionList(); Ver.end() == false; ++Ver) { pkgCache::DescIterator VerDesc = Ver.DescriptionList(); @@@ -331,16 -314,31 +331,16 @@@ if (VerDesc.end() == true || MD5SumValue(VerDesc.md5()) != CurMd5) continue; - // don't add a new description if we have one for the given - // md5 && language - if (IsDuplicateDescription(VerDesc, CurMd5, CurLang) == true) - continue; - - pkgCache::DescIterator Desc; - Dynamic DynDesc(Desc); - - map_ptrloc const descindex = NewDescription(Desc, CurLang, CurMd5, VerDesc->md5sum); - if (unlikely(descindex == 0 && _error->PendingError())) - return _error->Error(_("Error occurred while processing %s (%s%d)"), - Pkg.Name(), "NewDescription", 1); - - Desc->ParentPkg = Pkg.Index(); - - // we add at the end, so that the start is constant as we need - // that to be able to efficiently share these lists - VerDesc = Ver.DescriptionList(); // old value might be invalid after ReMap - for (;VerDesc.end() == false && VerDesc->NextDesc != 0; ++VerDesc); - map_ptrloc * const LastNextDesc = (VerDesc.end() == true) ? &Ver->DescriptionList : &VerDesc->NextDesc; - *LastNextDesc = descindex; + map_ptrloc md5idx = VerDesc->md5sum; + for (std::vector::const_iterator CurLang = availDesc.begin(); CurLang != availDesc.end(); ++CurLang) + { + // don't add a new description if we have one for the given + // md5 && language + if (IsDuplicateDescription(VerDesc, CurMd5, *CurLang) == true) + continue; - if (NewFileDesc(Desc,List) == false) - return _error->Error(_("Error occurred while processing %s (%s%d)"), - Pkg.Name(), "NewFileDesc", 1); + AddNewDescription(List, Ver, *CurLang, CurMd5, md5idx); + } // we can stop here as all "same" versions will share the description break; @@@ -488,10 -486,11 +488,10 @@@ bool pkgCacheGenerator::MergeListVersio return true; } - /* Record the Description (it is not translated) */ + /* Record the Description(s) based on their master md5sum */ MD5SumValue CurMd5 = List.Description_md5(); - if (CurMd5.Value().empty() == true || List.Description().empty() == true) + if (CurMd5.Value().empty() == true && List.Description("").empty() == true) return true; - std::string CurLang = List.DescriptionLanguage(); /* Before we add a new description we first search in the group for a version with a description of the same MD5 - if so we reuse this @@@ -502,44 -501,28 +502,44 @@@ for (pkgCache::VerIterator V = P.VersionList(); V.end() == false; ++V) { - if (IsDuplicateDescription(V.DescriptionList(), CurMd5, "") == false) + if (V->DescriptionList == 0 || MD5SumValue(V.DescriptionList().md5()) != CurMd5) continue; Ver->DescriptionList = V->DescriptionList; - return true; } } - // We haven't found reusable descriptions, so add the first description - pkgCache::DescIterator Desc = Ver.DescriptionList(); + // We haven't found reusable descriptions, so add the first description(s) + map_ptrloc md5idx = Ver->DescriptionList == 0 ? 0 : Ver.DescriptionList()->md5sum; + std::vector availDesc = List.AvailableDescriptionLanguages(); + for (std::vector::const_iterator CurLang = availDesc.begin(); CurLang != availDesc.end(); ++CurLang) + if (AddNewDescription(List, Ver, *CurLang, CurMd5, md5idx) == false) + return false; + return true; +} + /*}}}*/ +bool pkgCacheGenerator::AddNewDescription(ListParser &List, pkgCache::VerIterator &Ver, std::string const &lang, MD5SumValue const &CurMd5, map_ptrloc &md5idx) /*{{{*/ +{ + pkgCache::DescIterator Desc; Dynamic DynDesc(Desc); - map_ptrloc const descindex = NewDescription(Desc, CurLang, CurMd5, 0); + map_ptrloc const descindex = NewDescription(Desc, lang, CurMd5, md5idx); if (unlikely(descindex == 0 && _error->PendingError())) return _error->Error(_("Error occurred while processing %s (%s%d)"), - Pkg.Name(), "NewDescription", 2); + Ver.ParentPkg().Name(), "NewDescription", 1); + + md5idx = Desc->md5sum; + Desc->ParentPkg = Ver.ParentPkg().Index(); - Desc->ParentPkg = Pkg.Index(); - Ver->DescriptionList = descindex; + // we add at the end, so that the start is constant as we need + // that to be able to efficiently share these lists + pkgCache::DescIterator VerDesc = Ver.DescriptionList(); // old value might be invalid after ReMap + for (;VerDesc.end() == false && VerDesc->NextDesc != 0; ++VerDesc); + map_ptrloc * const LastNextDesc = (VerDesc.end() == true) ? &Ver->DescriptionList : &VerDesc->NextDesc; + *LastNextDesc = descindex; if (NewFileDesc(Desc,List) == false) return _error->Error(_("Error occurred while processing %s (%s%d)"), - Pkg.Name(), "NewFileDesc", 2); + Ver.ParentPkg().Name(), "NewFileDesc", 1); return true; } @@@ -656,16 -639,16 +656,16 @@@ bool pkgCacheGenerator::NewPackage(pkgC unsigned long const Hash = Cache.Hash(Name); map_ptrloc *insertAt = &Cache.HeaderP->PkgHashTable[Hash]; while (*insertAt != 0 && strcasecmp(Name.c_str(), Cache.StrP + (Cache.PkgP + *insertAt)->Name) > 0) - insertAt = &(Cache.PkgP + *insertAt)->NextPackage; - Pkg->NextPackage = *insertAt; + insertAt = &(Cache.PkgP + *insertAt)->Next; + Pkg->Next = *insertAt; *insertAt = Package; } else // Group the Packages together { // this package is the new last package pkgCache::PkgIterator LastPkg(Cache, Cache.PkgP + Grp->LastPackage); - Pkg->NextPackage = LastPkg->NextPackage; - LastPkg->NextPackage = Package; + Pkg->Next = LastPkg->Next; + LastPkg->Next = Package; } Grp->LastPackage = Package; diff --combined cmdline/apt-cache.cc index 1414617eb,35e9cc3a8..2ed1bf5d4 --- a/cmdline/apt-cache.cc +++ b/cmdline/apt-cache.cc @@@ -264,6 -264,44 +264,44 @@@ static bool DumpPackage(CommandLine &Cm return true; } /*}}}*/ + // ShowHashTableStats - Show stats about a hashtable /*{{{*/ + // --------------------------------------------------------------------- + /* */ + template + static void ShowHashTableStats(std::string Type, + T *StartP, + map_ptrloc *Hashtable, + unsigned long Size) + { + // hashtable stats for the HashTable + long NumBuckets = Size; + long UsedBuckets = 0; + long UnusedBuckets = 0; + long LongestBucket = 0; + long ShortestBucket = NumBuckets; + for (unsigned int i=0; i < NumBuckets; ++i) + { + T *P = StartP + Hashtable[i]; + if(P == 0 || P == StartP) + { + UnusedBuckets++; + continue; + } + long ThisBucketSize = 0; + for (; P != StartP; P = StartP + P->Next) + ThisBucketSize++; + LongestBucket = std::max(ThisBucketSize, LongestBucket); + ShortestBucket = std::min(ThisBucketSize, ShortestBucket); + UsedBuckets += ThisBucketSize; + } + cout << "Total buckets " << Type << ": " << SizeToStr(NumBuckets) << std::endl; + cout << " Unused: " << SizeToStr(UnusedBuckets) << std::endl; + cout << " Used: " << UsedBuckets << std::endl; + cout << " Average entries: " << UsedBuckets/(double)NumBuckets << std::endl; + cout << " Longest: " << LongestBucket << std::endl; + cout << " Shortest: " << ShortestBucket << std::endl; + } + /*}}}*/ // Stats - Dump some nice statistics /*{{{*/ // --------------------------------------------------------------------- /* */ @@@ -373,7 -411,13 +411,13 @@@ static bool Stats(CommandLine & Cache->Head().VerFileCount*Cache->Head().VerFileSz + Cache->Head().ProvidesCount*Cache->Head().ProvidesSz; cout << _("Total space accounted for: ") << SizeToStr(Total) << endl; - + + // hashtable stats + int HashTableSize = sizeof(Cache->HeaderP->PkgHashTable)/sizeof(map_ptrloc); + ShowHashTableStats("PkgHashTable", Cache->PkgP, Cache->HeaderP->PkgHashTable, HashTableSize); + HashTableSize = sizeof(Cache->HeaderP->GrpHashTable)/sizeof(map_ptrloc); + ShowHashTableStats("GrpHashTable", Cache->GrpP, Cache->HeaderP->GrpHashTable, HashTableSize); + return true; } /*}}}*/ @@@ -507,7 -551,7 +551,7 @@@ static bool DumpAvail(CommandLine & break; } - FileFd PkgF(File.FileName(),FileFd::ReadOnly); + FileFd PkgF(File.FileName(),FileFd::ReadOnly, FileFd::Extension); if (_error->PendingError() == true) break;