From 22f07fc5e77bcedbc774a4b60d305da847fab287 Mon Sep 17 00:00:00 2001
From: David Kalnischkies <kalnischkies@gmail.com>
Date: Wed, 12 Oct 2011 15:47:56 +0200
Subject: [PATCH] a version can have only a single md5 for descriptions, so we
 can optimize the merging with this knowledge a bit and by correctly sharing
 the lists we only need to have a single description list for possibly many
 different versions. This also means that description translations are shared
 between different sources

---
 apt-pkg/pkgcachegen.cc | 54 +++++++++++++++++++++++-------------------
 debian/changelog       |  3 ++-
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/apt-pkg/pkgcachegen.cc b/apt-pkg/pkgcachegen.cc
index 3545517fe..3b2c08e34 100644
--- a/apt-pkg/pkgcachegen.cc
+++ b/apt-pkg/pkgcachegen.cc
@@ -279,33 +279,36 @@ bool pkgCacheGenerator::MergeListPackage(ListParser &List, pkgCache::PkgIterator
    for (Ver = Pkg.VersionList(); Ver.end() == false; ++Ver)
    {
       pkgCache::DescIterator Desc = Ver.DescriptionList();
-      Dynamic<pkgCache::DescIterator> DynDesc(Desc);
-      map_ptrloc *LastDesc = &Ver->DescriptionList;
+
+      // a version can only have one md5 describing it
+      if (MD5SumValue(Desc.md5()) != CurMd5)
+	 continue;
 
       // don't add a new description if we have one for the given
       // md5 && language
       if (IsDuplicateDescription(Desc, CurMd5, CurLang) == true)
 	 continue;
 
-      for (Desc = Ver.DescriptionList();
-	   Desc.end() == false;
-	    LastDesc = &Desc->NextDesc, ++Desc)
-      {
-	 if (MD5SumValue(Desc.md5()) != CurMd5)
-	    continue;
-
-	 // Add new description
-	 void const * const oldMap = Map.Data();
-	 map_ptrloc const descindex = NewDescription(Desc, CurLang, CurMd5, *LastDesc);
-	 if (oldMap != Map.Data())
-	    LastDesc += (map_ptrloc*) Map.Data() - (map_ptrloc*) oldMap;
-	 *LastDesc = descindex;
-	 Desc->ParentPkg = Pkg.Index();
-
-	 if ((*LastDesc == 0 && _error->PendingError()) || NewFileDesc(Desc,List) == false)
-	    return _error->Error(_("Error occurred while processing %s (NewFileDesc1)"), Pkg.Name());
-	 break;
-       }
+      Dynamic<pkgCache::DescIterator> DynDesc(Desc);
+      // we add at the end, so that the start is constant as we need
+      // that to be able to efficiently share these lists
+      map_ptrloc *LastDesc = &Ver->DescriptionList;
+      for (;Desc.end() == false && Desc->NextDesc != 0; ++Desc);
+      if (Desc.end() == false)
+	 LastDesc = &Desc->NextDesc;
+
+      void const * const oldMap = Map.Data();
+      map_ptrloc const descindex = NewDescription(Desc, CurLang, CurMd5, *LastDesc);
+      if (oldMap != Map.Data())
+	 LastDesc += (map_ptrloc*) Map.Data() - (map_ptrloc*) oldMap;
+      *LastDesc = descindex;
+      Desc->ParentPkg = Pkg.Index();
+
+      if ((*LastDesc == 0 && _error->PendingError()) || NewFileDesc(Desc,List) == false)
+	 return _error->Error(_("Error occurred while processing %s (NewFileDesc1)"), Pkg.Name());
+
+      // we can stop here as all "same" versions will share the description
+      break;
    }
 
    return true;
@@ -421,7 +424,7 @@ bool pkgCacheGenerator::MergeListVersion(ListParser &List, pkgCache::PkgIterator
    map_ptrloc *LastDesc = &Ver->DescriptionList;
 
    oldMap = Map.Data();
-   map_ptrloc const descindex = NewDescription(Desc, List.DescriptionLanguage(), List.Description_md5(), *LastDesc);
+   map_ptrloc const descindex = NewDescription(Desc, CurLang, CurMd5, *LastDesc);
    if (oldMap != Map.Data())
        LastDesc += (map_ptrloc*) Map.Data() - (map_ptrloc*) oldMap;
    *LastDesc = descindex;
@@ -1426,8 +1429,11 @@ bool pkgCacheGenerator::MakeOnlyStatusCache(OpProgress *Progress,DynamicMMap **O
 bool IsDuplicateDescription(pkgCache::DescIterator Desc,
 			    MD5SumValue const &CurMd5, std::string const &CurLang)
 {
-   for ( ; Desc.end() == false; ++Desc)
-      if (MD5SumValue(Desc.md5()) == CurMd5 && Desc.LanguageCode() == CurLang)
+   // Descriptions in the same link-list have all the same md5
+   if (MD5SumValue(Desc.md5()) != CurMd5)
+      return false;
+   for (; Desc.end() == false; ++Desc)
+      if (Desc.LanguageCode() == CurLang)
 	 return true;
    return false;
 }
diff --git a/debian/changelog b/debian/changelog
index b7f199609..62f32d8de 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -25,6 +25,7 @@ apt (0.8.16~exp7) UNRELEASEDexperimental; urgency=low
   * apt-pkg/pkgcachegen.cc:
     - refactor MergeList by creating -Group, -Package and -Version specialist
     - share description list between "same" versions (LP: #868977)
+      This also means that descriptions are shared across archives now.
 
   [ Michael Vogt ]
   * apt-pkg/contrib/configuration.cc:
@@ -39,7 +40,7 @@ apt (0.8.16~exp7) UNRELEASEDexperimental; urgency=low
   * ftparchive/cachedb.cc:
     - fix buffersize in bytes2hex
 
- -- David Kalnischkies <kalnischkies@gmail.com>  Tue, 11 Oct 2011 21:07:38 +0200
+ -- David Kalnischkies <kalnischkies@gmail.com>  Wed, 12 Oct 2011 15:47:43 +0200
 
 apt (0.8.16~exp6) experimental; urgency=low
 
-- 
2.47.2