]> git.saurik.com Git - wxWidgets.git/commitdiff
Fixes for parsing invalid HTML without tag ends.
authorVadim Zeitlin <vadim@wxwidgets.org>
Thu, 13 Jan 2011 14:49:55 +0000 (14:49 +0000)
committerVadim Zeitlin <vadim@wxwidgets.org>
Thu, 13 Jan 2011 14:49:55 +0000 (14:49 +0000)
The code in wxHtmlParser supposed in many places that a '<' character must be
always followed by a '>' one and could create (and sometimes dereference)
invalid iterators if this wasn't the case resulting in asserts from MSVC debug
CRT and possibly crashes.

Fix this by ensuring that only valid iterators are used and add a trivial unit
test for wxHtmlParser which checks that it can parse invalid HTML without
crashing.

Closes #12869.

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@66678 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775

13 files changed:
src/html/htmlpars.cpp
src/html/htmltag.cpp
tests/Makefile.in
tests/html/htmlparser.cpp [new file with mode: 0644]
tests/makefile.bcc
tests/makefile.gcc
tests/makefile.vc
tests/makefile.wat
tests/test.bkl
tests/test_test_gui.dsp
tests/test_vc7_test_gui.vcproj
tests/test_vc8_test_gui.vcproj
tests/test_vc9_test_gui.vcproj

index 38c0680c232a5eb5507c41f2d7118c9862500dba..ed99a443b69d5f3b89858becad6c98ec097890f5 100644 (file)
@@ -227,7 +227,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
             else
             {
                 while (i < end_pos && *i != wxT('>')) ++i;
-                textBeginning = i+1;
+                textBeginning = i < end_pos ? i+1 : i;
             }
         }
         else ++i;
@@ -958,7 +958,7 @@ wxHtmlParser::SkipCommentTag(wxString::const_iterator& start,
     wxString::const_iterator p = start;
 
     // comments begin with "<!--" in HTML 4.0
-    if ( p > end - 3 || *++p != '!' || *++p != '-' || *++p != '-' )
+    if ( end - start < 4 || *++p != '!' || *++p != '-' || *++p != '-' )
     {
         // not a comment at all
         return false;
index b2af5eff7f1d738cce91a734d9909bc892e6c2f1..fdc71fe1710063e3212f899891ceaddb31a95973 100644 (file)
@@ -94,12 +94,10 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
         if ( wxHtmlParser::SkipCommentTag(pos, end) )
             continue;
 
-        size_t tg = Cache().size();
-        Cache().push_back(wxHtmlCacheItem());
-
+        // Remember the starting tag position.
         wxString::const_iterator stpos = pos++;
-        Cache()[tg].Key = stpos;
 
+        // And look for the ending one.
         int i;
         for ( i = 0;
               pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
@@ -110,12 +108,26 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
         }
         tagBuffer[i] = wxT('\0');
 
-        Cache()[tg].Name = new wxChar[i+1];
-        memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
-
         while (pos < end && *pos != wxT('>'))
             ++pos;
 
+        if ( pos == end )
+        {
+            // We didn't find a closing bracket, this is not a valid tag after
+            // all. Notice that we need to roll back pos to avoid creating an
+            // invalid iterator when "++pos" is done in the loop statement.
+            --pos;
+
+            continue;
+        }
+
+        // We have a valid tag, add it to the cache.
+        size_t tg = Cache().size();
+        Cache().push_back(wxHtmlCacheItem());
+        Cache()[tg].Key = stpos;
+        Cache()[tg].Name = new wxChar[i+1];
+        memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
+
         if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
         {
             Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
@@ -223,7 +235,12 @@ void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
                                bool *hasEnding)
 {
     if (Cache().empty())
+    {
+        *end1 =
+        *end2 = inputEnd;
+        *hasEnding = true;
         return;
+    }
 
     if (Cache()[m_CachePos].Key != at)
     {
index 02f665d2f1e8e226f67c6780d5a0c833ca402f19..995b5c84b34b49b49cf77b963f627e0b1007f9a9 100644 (file)
@@ -202,6 +202,7 @@ TEST_GUI_OBJECTS =  \
        test_gui_fonttest.o \
        test_gui_image.o \
        test_gui_rawbmp.o \
+       test_gui_htmlparser.o \
        test_gui_htmlwindow.o \
        test_gui_accelentry.o \
        test_gui_menu.o \
@@ -831,6 +832,9 @@ test_gui_image.o: $(srcdir)/image/image.cpp $(TEST_GUI_ODEP)
 test_gui_rawbmp.o: $(srcdir)/image/rawbmp.cpp $(TEST_GUI_ODEP)
        $(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/image/rawbmp.cpp
 
+test_gui_htmlparser.o: $(srcdir)/html/htmlparser.cpp $(TEST_GUI_ODEP)
+       $(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/html/htmlparser.cpp
+
 test_gui_htmlwindow.o: $(srcdir)/html/htmlwindow.cpp $(TEST_GUI_ODEP)
        $(CXXC) -c -o $@ $(TEST_GUI_CXXFLAGS) $(srcdir)/html/htmlwindow.cpp
 
diff --git a/tests/html/htmlparser.cpp b/tests/html/htmlparser.cpp
new file mode 100644 (file)
index 0000000..14a1842
--- /dev/null
@@ -0,0 +1,75 @@
+///////////////////////////////////////////////////////////////////////////////
+// Name:        tests/html/htmlparser.cpp
+// Purpose:     wxHtmlParser tests
+// Author:      Vadim Zeitlin
+// Created:     2011-01-13
+// RCS-ID:      $Id$
+// Copyright:   (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
+///////////////////////////////////////////////////////////////////////////////
+
+// ----------------------------------------------------------------------------
+// headers
+// ----------------------------------------------------------------------------
+
+#include "testprec.h"
+
+#if wxUSE_HTML
+
+#ifdef __BORLANDC__
+    #pragma hdrstop
+#endif
+
+#ifndef WX_PRECOMP
+#endif // WX_PRECOMP
+
+#include "wx/html/htmlpars.h"
+
+// ----------------------------------------------------------------------------
+// test class
+// ----------------------------------------------------------------------------
+
+class HtmlParserTestCase : public CppUnit::TestCase
+{
+public:
+    HtmlParserTestCase() { }
+
+private:
+    CPPUNIT_TEST_SUITE( HtmlParserTestCase );
+        CPPUNIT_TEST( Invalid );
+    CPPUNIT_TEST_SUITE_END();
+
+    void Invalid();
+
+    wxDECLARE_NO_COPY_CLASS(HtmlParserTestCase);
+};
+
+// register in the unnamed registry so that these tests are run by default
+CPPUNIT_TEST_SUITE_REGISTRATION( HtmlParserTestCase );
+
+// also include in it's own registry so that these tests can be run alone
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( HtmlParserTestCase, "HtmlParserTestCase" );
+
+// ----------------------------------------------------------------------------
+// tests themselves
+// ----------------------------------------------------------------------------
+
+// Test that parsing invalid HTML simply fails but doesn't crash for example.
+void HtmlParserTestCase::Invalid()
+{
+    class NullParser : public wxHtmlParser
+    {
+    public:
+        virtual wxObject *GetProduct() { return NULL; }
+
+    protected:
+        virtual void AddText(const wxString& WXUNUSED(txt)) { }
+    };
+
+    NullParser p;
+    p.Parse("<");
+    p.Parse("<foo");
+    p.Parse("<!--");
+    p.Parse("<!---");
+}
+
+#endif //wxUSE_HTML
index 2a366f22d8e285279fac22bd8c1bbe77ec1985b7..dea20435379edd73a77fbbb6e7dbbe0ed6a06505 100644 (file)
@@ -187,6 +187,7 @@ TEST_GUI_OBJECTS =  \
        $(OBJS)\test_gui_fonttest.obj \\r
        $(OBJS)\test_gui_image.obj \\r
        $(OBJS)\test_gui_rawbmp.obj \\r
+       $(OBJS)\test_gui_htmlparser.obj \\r
        $(OBJS)\test_gui_htmlwindow.obj \\r
        $(OBJS)\test_gui_accelentry.obj \\r
        $(OBJS)\test_gui_menu.obj \\r
@@ -877,6 +878,9 @@ $(OBJS)\test_gui_image.obj: .\image\image.cpp
 $(OBJS)\test_gui_rawbmp.obj: .\image\rawbmp.cpp\r
        $(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\image\rawbmp.cpp\r
 \r
+$(OBJS)\test_gui_htmlparser.obj: .\html\htmlparser.cpp\r
+       $(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\html\htmlparser.cpp\r
+\r
 $(OBJS)\test_gui_htmlwindow.obj: .\html\htmlwindow.cpp\r
        $(CXX) -q -c -P -o$@ $(TEST_GUI_CXXFLAGS) .\html\htmlwindow.cpp\r
 \r
index ee9e554aa6d070372b76be322c6cdb87f8fc6a2b..9c4aa0e01b9ed2dc65537ea803e6e0685b4af1b0 100644 (file)
@@ -180,6 +180,7 @@ TEST_GUI_OBJECTS =  \
        $(OBJS)\test_gui_fonttest.o \\r
        $(OBJS)\test_gui_image.o \\r
        $(OBJS)\test_gui_rawbmp.o \\r
+       $(OBJS)\test_gui_htmlparser.o \\r
        $(OBJS)\test_gui_htmlwindow.o \\r
        $(OBJS)\test_gui_accelentry.o \\r
        $(OBJS)\test_gui_menu.o \\r
@@ -858,6 +859,9 @@ $(OBJS)\test_gui_image.o: ./image/image.cpp
 $(OBJS)\test_gui_rawbmp.o: ./image/rawbmp.cpp\r
        $(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<\r
 \r
+$(OBJS)\test_gui_htmlparser.o: ./html/htmlparser.cpp\r
+       $(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<\r
+\r
 $(OBJS)\test_gui_htmlwindow.o: ./html/htmlwindow.cpp\r
        $(CXX) -c -o $@ $(TEST_GUI_CXXFLAGS) $(CPPDEPS) $<\r
 \r
index 85db68d10c57acf45065b468617de0fdcb86e669..88280b900aaf6276a3e59d09dc45818352452540 100644 (file)
@@ -182,6 +182,7 @@ TEST_GUI_OBJECTS =  \
        $(OBJS)\test_gui_fonttest.obj \\r
        $(OBJS)\test_gui_image.obj \\r
        $(OBJS)\test_gui_rawbmp.obj \\r
+       $(OBJS)\test_gui_htmlparser.obj \\r
        $(OBJS)\test_gui_htmlwindow.obj \\r
        $(OBJS)\test_gui_accelentry.obj \\r
        $(OBJS)\test_gui_menu.obj \\r
@@ -1003,6 +1004,9 @@ $(OBJS)\test_gui_image.obj: .\image\image.cpp
 $(OBJS)\test_gui_rawbmp.obj: .\image\rawbmp.cpp\r
        $(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\image\rawbmp.cpp\r
 \r
+$(OBJS)\test_gui_htmlparser.obj: .\html\htmlparser.cpp\r
+       $(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\html\htmlparser.cpp\r
+\r
 $(OBJS)\test_gui_htmlwindow.obj: .\html\htmlwindow.cpp\r
        $(CXX) /c /nologo /TP /Fo$@ $(TEST_GUI_CXXFLAGS) .\html\htmlwindow.cpp\r
 \r
index a9f8122117b46e02414118bb5b89fac15da54c8f..90dff37c9ae8baaf893c7a582428debe9a859c9b 100644 (file)
@@ -422,6 +422,7 @@ TEST_GUI_OBJECTS =  &
        $(OBJS)\test_gui_fonttest.obj &\r
        $(OBJS)\test_gui_image.obj &\r
        $(OBJS)\test_gui_rawbmp.obj &\r
+       $(OBJS)\test_gui_htmlparser.obj &\r
        $(OBJS)\test_gui_htmlwindow.obj &\r
        $(OBJS)\test_gui_accelentry.obj &\r
        $(OBJS)\test_gui_menu.obj &\r
@@ -916,6 +917,9 @@ $(OBJS)\test_gui_image.obj :  .AUTODEPEND .\image\image.cpp
 $(OBJS)\test_gui_rawbmp.obj :  .AUTODEPEND .\image\rawbmp.cpp\r
        $(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<\r
 \r
+$(OBJS)\test_gui_htmlparser.obj :  .AUTODEPEND .\html\htmlparser.cpp\r
+       $(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<\r
+\r
 $(OBJS)\test_gui_htmlwindow.obj :  .AUTODEPEND .\html\htmlwindow.cpp\r
        $(CXX) -bt=nt -zq -fo=$^@ $(TEST_GUI_CXXFLAGS) $<\r
 \r
index 42aaee6d142932b7427181293beed6276527f2ed..9afe6d54d8ea30063bfa765cdba5101a8695a019 100644 (file)
             font/fonttest.cpp
             image/image.cpp
             image/rawbmp.cpp
+            html/htmlparser.cpp
             html/htmlwindow.cpp
             menu/accelentry.cpp
             menu/menu.cpp
index d130cb8647e2dbecd2393106a31cf3e87416403d..2ed004defb4f5c1526248519248055debad0b5b7 100644 (file)
@@ -345,6 +345,10 @@ SOURCE=.\controls\htmllboxtest.cpp
 # End Source File\r
 # Begin Source File\r
 \r
+SOURCE=.\html\htmlparser.cpp\r
+# End Source File\r
+# Begin Source File\r
+\r
 SOURCE=.\html\htmlwindow.cpp\r
 # End Source File\r
 # Begin Source File\r
index 0172d3a9622063ccb0761741884d0582b6a9d112..377614d36f42cd673dac00b1247156a25ec8bc53 100644 (file)
                        <File\r
                                RelativePath=".\controls\htmllboxtest.cpp">\r
                        </File>\r
+                       <File\r
+                               RelativePath=".\html\htmlparser.cpp">\r
+                       </File>\r
                        <File\r
                                RelativePath=".\html\htmlwindow.cpp">\r
                        </File>\r
index f300941b72526ac001cd6a1d35d668a294e6b281..278380561204128e704198a6ddd83e44c6989d67 100644 (file)
                                RelativePath=".\controls\htmllboxtest.cpp"\r
                                >\r
                        </File>\r
+                       <File\r
+                               RelativePath=".\html\htmlparser.cpp"\r
+                               >\r
+                       </File>\r
                        <File\r
                                RelativePath=".\html\htmlwindow.cpp"\r
                                >\r
index 5063d1dca8fc62f03c427314ecdf4ec677464cf1..958ea5d401b41911a334b9837853f35b7e1ee50a 100644 (file)
                                RelativePath=".\controls\htmllboxtest.cpp"\r
                                >\r
                        </File>\r
+                       <File\r
+                               RelativePath=".\html\htmlparser.cpp"\r
+                               >\r
+                       </File>\r
                        <File\r
                                RelativePath=".\html\htmlwindow.cpp"\r
                                >\r