fixed parsing of CDATA elements (<script> and <style>) (Bill Nalen)

author Václav Slavík <vslavik@fastmail.fm>

Sun, 23 Feb 2003 18:59:13 +0000 (18:59 +0000)

committer Václav Slavík <vslavik@fastmail.fm>

Sun, 23 Feb 2003 18:59:13 +0000 (18:59 +0000)
author Václav Slavík <vslavik@fastmail.fm>
Sun, 23 Feb 2003 18:59:13 +0000 (18:59 +0000)
committer Václav Slavík <vslavik@fastmail.fm>
Sun, 23 Feb 2003 18:59:13 +0000 (18:59 +0000)
diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp

index 9ed716fe817cb391a17355eccf01a519e5e61da6..ac2fc44880df4a532a87fa28afeef1d61a89da77 100644 (file)
--- a/src/html/htmlpars.cpp
+++ b/src/html/htmlpars.cpp
@@ -127,6 +127,8 @@ void wxHtmlParser::CreateDOMTree()
      m_CurTextPiece = 0;
  }
  
      m_CurTextPiece = 0;
  }
  
+extern bool wxIsCDATAElement(const wxChar *tag);
+
  void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
                                      int begin_pos, int end_pos,
                                      wxHtmlTagsCache *cache)
  void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
                                      int begin_pos, int end_pos,
                                      wxHtmlTagsCache *cache)
@@ -137,6 +139,15 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
      int i = begin_pos;
      int textBeginning = begin_pos;
  
      int i = begin_pos;
      int textBeginning = begin_pos;
  
+    // If the tag contains CDATA text, we include the text between beginning
+    // and ending tag verbosely. Setting i=end_pos will skip to the very
+    // end of this function where text piece is added, bypassing any child
+    // tags parsing (CDATA element can't have child elements by definition):
+    if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str()))
+    {
+        i = end_pos;
+    }
+
      while (i < end_pos)
      {
          c = m_Source.GetChar(i);
      while (i < end_pos)
      {
          c = m_Source.GetChar(i);
@@ -209,6 +220,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
                  }
                  else
                      i = chd->GetBeginPos();
                  }
                  else
                      i = chd->GetBeginPos();
+                
                  textBeginning = i;
              }
  
                  textBeginning = i;
              }
  
diff --git a/src/html/htmltag.cpp b/src/html/htmltag.cpp

index 47ac16a2e424894e2761630c74690dd673d567a8..e8d244a2ee01f78eecaa5c29ecebd5a4d5f326c8 100644 (file)
--- a/src/html/htmltag.cpp
+++ b/src/html/htmltag.cpp
@@ -57,6 +57,12 @@ IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
  
  #define CACHE_INCREMENT  64
  
  
  #define CACHE_INCREMENT  64
  
+inline bool wxIsCDATAElement(const wxChar *tag)
+{
+    return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
+           (wxStrcmp(tag, _T("STYLE")) == 0);
+}
+
  wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
  {
      const wxChar *src = source.c_str();
  wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
  {
      const wxChar *src = source.c_str();
@@ -108,6 +114,47 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
              else
              {
                  m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
              else
              {
                  m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
+
+                if (wxIsCDATAElement(tagBuffer))
+                {
+                    // find next matching tag
+                    int tag_len = wxStrlen(tagBuffer);
+                    while (pos < lng)
+                    {
+                        // find the ending tag
+                        while (pos + 1 < lng &&
+                               (src[pos] != '<' || src[pos+1] != '/'))
+                            ++pos;
+                        if (src[pos] == '<')
+                            ++pos;
+                        
+                        // see if it matches
+                        int match_pos = 0;
+                        while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
+                            if (wxToupper(src[pos]) == tagBuffer[match_pos]) {
+                                ++match_pos;
+                            }  
+                            else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
+                                src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
+                                // need to skip over these
+                            }
+                            else {
+                                match_pos = 0;
+                            }
+                            ++pos;
+                        }
+
+                        // found a match
+                        if (match_pos == tag_len) {
+                            pos = pos - tag_len - 3;
+                            stpos = pos;
+                            break;
+                        }
+                        else {
+                            ++pos;
+                        }
+                    }
+                }
              }
          }
  
              }
          }
author	Václav Slavík <vslavik@fastmail.fm>
	Sun, 23 Feb 2003 18:59:13 +0000 (18:59 +0000)
committer	Václav Slavík <vslavik@fastmail.fm>
	Sun, 23 Feb 2003 18:59:13 +0000 (18:59 +0000)
src/html/htmlpars.cpp		patch \| blob \| blame \| history
src/html/htmltag.cpp		patch \| blob \| blame \| history