rewrite ReadMessages()

author David Kalnischkies <david@kalnischkies.de>

Fri, 24 Oct 2014 21:55:15 +0000 (23:55 +0200)

committer David Kalnischkies <david@kalnischkies.de>

Sun, 26 Oct 2014 13:13:34 +0000 (14:13 +0100)
author David Kalnischkies <david@kalnischkies.de>
Fri, 24 Oct 2014 21:55:15 +0000 (23:55 +0200)
committer David Kalnischkies <david@kalnischkies.de>
Sun, 26 Oct 2014 13:13:34 +0000 (14:13 +0100)
diff --git a/apt-pkg/contrib/strutl.cc b/apt-pkg/contrib/strutl.cc

index aad358a55d74d72271c21d9a28af3e9f96e60329..ebf9c9ea6e4187eba9cd2ee1608e930768e97fca 100644 (file)
--- a/apt-pkg/contrib/strutl.cc
+++ b/apt-pkg/contrib/strutl.cc
@@ -779,86 +779,94 @@ string TimeRFC1123(time_t Date)
  
     In particular: this reads blocks from the input until it believes
     that it's run out of input text.  Each block is terminated by a
-   double newline ('\n' followed by '\n').  As noted below, there is a
-   bug in this code: it assumes that all the blocks have been read if
-   it doesn't see additional text in the buffer after the last one is
-   parsed, which will cause it to lose blocks if the last block
-   coincides with the end of the buffer.
+   double newline ('\n' followed by '\n').
   */
  bool ReadMessages(int Fd, vector<string> &List)
  {
     char Buffer[64000];
-   char *End = Buffer;
     // Represents any left-over from the previous iteration of the
     // parse loop.  (i.e., if a message is split across the end
     // of the buffer, it goes here)
     string PartialMessage;
-   
-   while (1)
-   {
-      int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
+
+   do {
+      int const Res = read(Fd, Buffer, sizeof(Buffer));
        if (Res < 0 && errno == EINTR)
          continue;
-      
-      // Process is dead, this is kind of bad..
+
+      // process we read from has died
        if (Res == 0)
          return false;
-      
+
        // No data
-      if (Res < 0 && errno == EAGAIN)
+      if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
          return true;
        if (Res < 0)
          return false;
-                             
-      End += Res;
-      
-      // Look for the end of the message
-      for (char *I = Buffer; I + 1 < End; I++)
+
+      // extract the message(s) from the buffer
+      char const *Start = Buffer;
+      char const * const End = Buffer + Res;
+
+      char const * NL = (char const *) memchr(Start, '\n', End - Start);
+      if (NL == NULL)
        {
-        if (I[1] != '\n' ||
-              (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
-           continue;
-        
-        // Pull the message out
-        string Message(Buffer,I-Buffer);
-        PartialMessage += Message;
-
-        // Fix up the buffer
-        for (; I < End && (*I == '\n' || *I == '\r'); ++I);
-        End -= I-Buffer;        
-        memmove(Buffer,I,End-Buffer);
-        I = Buffer;
-        
-        List.push_back(PartialMessage);
-        PartialMessage.clear();
+        // end of buffer: store what we have so far and read new data in
+        PartialMessage.append(Start, End - Start);
+        Start = End;
        }
-      if (End != Buffer)
-       {
-         // If there's text left in the buffer, store it
-         // in PartialMessage and throw the rest of the buffer
-         // away.  This allows us to handle messages that
-         // are longer than the static buffer size.
-         PartialMessage += string(Buffer, End);
-         End = Buffer;
-       }
        else
-       {
-         // BUG ALERT: if a message block happens to end at a
-         // multiple of 64000 characters, this will cause it to
-         // terminate early, leading to a badly formed block and
-         // probably crashing the method.  However, this is the only
-         // way we have to find the end of the message block.  I have
-         // an idea of how to fix this, but it will require changes
-         // to the protocol (essentially to mark the beginning and
-         // end of the block).
-         //
-         //  -- dburrows 2008-04-02
-         return true;
-       }
+        ++NL;
+
+      if (PartialMessage.empty() == false && Start < End)
+      {
+        // if we start with a new line, see if the partial message we have ended with one
+        // so that we properly detect records ending between two read() runs
+        // cases are: \n|\n  ,  \r\n|\r\n  and  \r\n\r|\n
+        // the case \r|\n\r\n is handled by the usual double-newline handling
+        if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
+        {
+           if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
+           {
+              PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
+              List.push_back(PartialMessage);
+              PartialMessage.clear();
+              while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
+              Start = NL;
+           }
+        }
+      }
+
+      while (Start < End) {
+        char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
+        if (NL2 == NULL)
+        {
+           // end of buffer: store what we have so far and read new data in
+           PartialMessage.append(Start, End - Start);
+           break;
+        }
+        ++NL2;
+
+        // did we find a double newline?
+        if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
+        {
+           PartialMessage.append(Start, NL2 - Start);
+           PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
+           List.push_back(PartialMessage);
+           PartialMessage.clear();
+           while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
+           Start = NL2;
+        }
+        NL = NL2;
+      }
+
+      // we have read at least one complete message and nothing left
+      if (PartialMessage.empty() == true)
+        return true;
  
        if (WaitFd(Fd) == false)
          return false;
-   }   
+   } while (true);
  }
                                                                         /*}}}*/
  // MonthConv - Converts a month string into a number                   /*{{{*/
diff --git a/test/libapt/strutil_test.cc b/test/libapt/strutil_test.cc

index 494159c8779c96831dc5b485d6bd921b37a22cd1..9bc3c76fdd12d86b1279b358762ed1081a968bc3 100644 (file)
--- a/test/libapt/strutil_test.cc
+++ b/test/libapt/strutil_test.cc
@@ -1,10 +1,13 @@
  #include <config.h>
  #include <apt-pkg/strutl.h>
+#include <apt-pkg/fileutl.h>
  #include <string>
  #include <vector>
  
  #include <gtest/gtest.h>
  
+#include "file-helpers.h"
+
  TEST(StrUtilTest,DeEscapeString)
  {
     // nothing special
@@ -143,3 +146,70 @@ TEST(StrUtilTest,Base64Encode)
     EXPECT_EQ("Lg==", Base64Encode("."));
     EXPECT_EQ("", Base64Encode(""));
  }
+void ReadMessagesTestWithNewLine(char const * const nl, char const * const ab)
+{
+   SCOPED_TRACE(SubstVar(SubstVar(nl, "\n", "n"), "\r", "r") + " # " + ab);
+   FileFd fd;
+   std::string pkgA = "Package: pkgA\n"
+      "Version: 1\n"
+      "Size: 100\n"
+      "Description: aaa\n"
+      " aaa";
+   std::string pkgB = "Package: pkgB\n"
+      "Version: 1\n"
+      "Flag: no\n"
+      "Description: bbb";
+   std::string pkgC = "Package: pkgC\n"
+      "Version: 2\n"
+      "Flag: yes\n"
+      "Description:\n"
+      " ccc";
+
+   createTemporaryFile("readmessage", fd, NULL, (pkgA + nl + pkgB + nl + pkgC + nl).c_str());
+   std::vector<std::string> list;
+   EXPECT_TRUE(ReadMessages(fd.Fd(), list));
+   EXPECT_EQ(3, list.size());
+   EXPECT_EQ(pkgA, list[0]);
+   EXPECT_EQ(pkgB, list[1]);
+   EXPECT_EQ(pkgC, list[2]);
+
+   size_t const msgsize = 63990;
+   createTemporaryFile("readmessage", fd, NULL, NULL);
+   for (size_t j = 0; j < msgsize; ++j)
+      fd.Write(ab, strlen(ab));
+   for (size_t i = 0; i < 21; ++i)
+   {
+      std::string msg;
+      strprintf(msg, "msgsize=%zu  i=%zu", msgsize, i);
+      SCOPED_TRACE(msg);
+      fd.Seek((msgsize + (i - 1)) * strlen(ab));
+      fd.Write(ab, strlen(ab));
+      fd.Write(nl, strlen(nl));
+      fd.Seek(0);
+      list.clear();
+      EXPECT_TRUE(ReadMessages(fd.Fd(), list));
+      EXPECT_EQ(1, list.size());
+      EXPECT_EQ((msgsize + i) * strlen(ab), list[0].length());
+      EXPECT_EQ(std::string::npos, list[0].find_first_not_of(ab));
+   }
+
+   list.clear();
+   fd.Write(pkgA.c_str(), pkgA.length());
+   fd.Write(nl, strlen(nl));
+   fd.Seek(0);
+   EXPECT_TRUE(ReadMessages(fd.Fd(), list));
+   EXPECT_EQ(2, list.size());
+   EXPECT_EQ((msgsize + 20) * strlen(ab), list[0].length());
+   EXPECT_EQ(std::string::npos, list[0].find_first_not_of(ab));
+   EXPECT_EQ(pkgA, list[1]);
+
+
+   fd.Close();
+}
+TEST(StrUtilTest,ReadMessages)
+{
+   ReadMessagesTestWithNewLine("\n\n", "a");
+   ReadMessagesTestWithNewLine("\r\n\r\n", "a");
+   ReadMessagesTestWithNewLine("\n\n", "ab");
+   ReadMessagesTestWithNewLine("\r\n\r\n", "ab");
+}
author	David Kalnischkies <david@kalnischkies.de>
	Fri, 24 Oct 2014 21:55:15 +0000 (23:55 +0200)
committer	David Kalnischkies <david@kalnischkies.de>
	Sun, 26 Oct 2014 13:13:34 +0000 (14:13 +0100)
apt-pkg/contrib/strutl.cc		patch \| blob \| blame \| history
test/libapt/strutil_test.cc		patch \| blob \| blame \| history