]> git.saurik.com Git - wxWidgets.git/blobdiff - src/html/htmltag.cpp
use FromUTF8() when loading XML, data coming from Expat are already validated UTF-8
[wxWidgets.git] / src / html / htmltag.cpp
index c14b39fe2d6f0b317ad6f462e89810293a5aad2a..ceb353297c68e79c14b255fbeaa8b3ad29ffb7b3 100644 (file)
@@ -1,36 +1,31 @@
 /////////////////////////////////////////////////////////////////////////////
-// Name:        htmltag.cpp
+// Name:        src/html/htmltag.cpp
 // Purpose:     wxHtmlTag class (represents single tag)
 // Author:      Vaclav Slavik
 // RCS-ID:      $Id$
 // Copyright:   (c) 1999 Vaclav Slavik
-// Licence:     wxWindows Licence
+// Licence:     wxWindows licence
 /////////////////////////////////////////////////////////////////////////////
 
+#include "wx/wxprec.h"
 
-#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
-#pragma implementation "htmltag.h"
+#ifdef __BORLANDC__
+    #pragma hdrstop
 #endif
 
-#include "wx/wxprec.h"
-
-#include "wx/defs.h"
 #if wxUSE_HTML
 
-#ifdef __BORLANDC__
-#pragma hdrstop
-#endif
+#include "wx/html/htmltag.h"
 
-#ifndef WXPRECOMP
+#ifndef WX_PRECOMP
+    #include "wx/colour.h"
+    #include "wx/wxcrtvararg.h"
 #endif
 
-#include "wx/html/htmltag.h"
 #include "wx/html/htmlpars.h"
-#include "wx/colour.h"
 #include <stdio.h> // for vsscanf
 #include <stdarg.h>
 
-
 //-----------------------------------------------------------------------------
 // wxHtmlTagsCache
 //-----------------------------------------------------------------------------
@@ -66,18 +61,25 @@ bool wxIsCDATAElement(const wxChar *tag)
 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
 {
     const wxChar *src = source.c_str();
-    int lng = source.Length();
+    int lng = source.length();
     wxChar tagBuffer[256];
 
     m_Cache = NULL;
     m_CacheSize = 0;
     m_CachePos = 0;
 
-    int pos = 0;
-    while (pos < lng)
+    for ( int pos = 0; pos < lng; pos++ )
     {
         if (src[pos] == wxT('<'))   // tag found:
         {
+            // don't cache comment tags
+            wxString::const_iterator iter = source.begin() + pos;
+            if ( wxHtmlParser::SkipCommentTag(iter, source.end()) )
+            {
+                pos = iter - source.begin();
+                continue;
+            }
+
             if (m_CacheSize % CACHE_INCREMENT == 0)
                 m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
             int tg = m_CacheSize++;
@@ -90,7 +92,7 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
                   src[pos] != wxT('>') && !wxIsspace(src[pos]);
                   i++, pos++ )
             {
-                tagBuffer[i] = wxToupper(src[pos]);
+                tagBuffer[i] = (wxChar)wxToupper(src[pos]);
             }
             tagBuffer[i] = _T('\0');
 
@@ -117,6 +119,11 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
 
                 if (wxIsCDATAElement(tagBuffer))
                 {
+                    // store the orig pos in case we are missing the closing
+                    // tag (see below)
+                    wxInt32 old_pos = pos;
+                    bool foundCloseTag = false;
+
                     // find next matching tag
                     int tag_len = wxStrlen(tagBuffer);
                     while (pos < lng)
@@ -127,7 +134,7 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
                             ++pos;
                         if (src[pos] == '<')
                             ++pos;
-                        
+
                         // see if it matches
                         int match_pos = 0;
                         while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
@@ -135,7 +142,7 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
                             // Unicode build
                             if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
                                 ++match_pos;
-                            }  
+                            }
                             else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
                                 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
                                 // need to skip over these
@@ -147,20 +154,28 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
                         }
 
                         // found a match
-                        if (match_pos == tag_len) {
+                        if (match_pos == tag_len)
+                        {
                             pos = pos - tag_len - 3;
-                            stpos = pos;
+                            foundCloseTag = true;
                             break;
                         }
-                        else {
+                        else // keep looking for the closing tag
+                        {
                             ++pos;
                         }
                     }
+                    if (!foundCloseTag)
+                    {
+                        // we didn't find closing tag; this means the markup
+                        // is incorrect and the best thing we can do is to
+                        // ignore the unclosed tag and continue parsing as if
+                        // it didn't exist:
+                        pos = old_pos;
+                    }
                 }
             }
         }
-
-        pos++;
     }
 
     // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
@@ -179,6 +194,16 @@ void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
         int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
         do
         {
+            if ( m_CachePos < 0 || m_CachePos == m_CacheSize )
+            {
+                // something is very wrong with HTML, give up by returning an
+                // impossibly large value which is going to be ignored by the
+                // caller
+                *end1 =
+                *end2 = INT_MAX;
+                return;
+            }
+
             m_CachePos += delta;
         }
         while (m_Cache[m_CachePos].Key != at);
@@ -340,12 +365,12 @@ wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
         }
 
         #undef IS_WHITE
-   }
-   m_Begin = i;
+    }
+    m_Begin = i;
 
-   cache->QueryTag(pos, &m_End1, &m_End2);
-   if (m_End1 > end_pos) m_End1 = end_pos;
-   if (m_End2 > end_pos) m_End2 = end_pos;
+    cache->QueryTag(pos, &m_End1, &m_End2);
+    if (m_End1 > end_pos) m_End1 = end_pos;
+    if (m_End2 > end_pos) m_End2 = end_pos;
 }
 
 wxHtmlTag::~wxHtmlTag()
@@ -391,25 +416,16 @@ int wxHtmlTag::ScanParam(const wxString& par,
 
 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
 {
+    wxCHECK_MSG( clr, false, _T("invalid colour argument") );
+
     wxString str = GetParam(par);
 
-    if (str.IsEmpty()) return false;
-    if (str.GetChar(0) == wxT('#'))
+    // handle colours defined in HTML 4.0 first:
+    if (str.length() > 1 && str[0] != _T('#'))
     {
-        unsigned long tmp;
-        if (ScanParam(par, wxT("#%lX"), &tmp) != 1)
-            return false;
-        *clr = wxColour((unsigned char)((tmp & 0xFF0000) >> 16),
-                        (unsigned char)((tmp & 0x00FF00) >> 8),
-                        (unsigned char)(tmp & 0x0000FF));
-        return true;
-    }
-    else
-    {
-        // Handle colours defined in HTML 4.0:
-        #define HTML_COLOUR(name,r,g,b)                 \
+        #define HTML_COLOUR(name, r, g, b)              \
             if (str.IsSameAs(wxT(name), false))         \
-                { *clr = wxColour(r,g,b); return true; }
+                { clr->Set(r, g, b); return true; }
         HTML_COLOUR("black",   0x00,0x00,0x00)
         HTML_COLOUR("silver",  0xC0,0xC0,0xC0)
         HTML_COLOUR("gray",    0x80,0x80,0x80)
@@ -429,6 +445,13 @@ bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
         #undef HTML_COLOUR
     }
 
+    // then try to parse #rrggbb representations or set from other well
+    // known names (note that this doesn't strictly conform to HTML spec,
+    // but it doesn't do real harm -- but it *must* be done after the standard
+    // colors are handled above):
+    if (clr->Set(str))
+        return true;
+
     return false;
 }
 
@@ -443,7 +466,7 @@ bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
 
 wxString wxHtmlTag::GetAllParams() const
 {
-    // VS: this function is for backward compatiblity only,
+    // VS: this function is for backward compatibility only,
     //     never used by wxHTML
     wxString s;
     size_t cnt = m_ParamNames.GetCount();