+
+//-----------------------------------------------------------------------------
+// wxHtmlParser::ExtractCharsetInformation
+//-----------------------------------------------------------------------------
+
+class wxMetaTagParser : public wxHtmlParser
+{
+public:
+ wxMetaTagParser() { }
+
+ wxObject* GetProduct() { return NULL; }
+
+protected:
+ virtual void AddText(const wxString& WXUNUSED(txt)) {}
+
+ DECLARE_NO_COPY_CLASS(wxMetaTagParser)
+};
+
+class wxMetaTagHandler : public wxHtmlTagHandler
+{
+public:
+ wxMetaTagHandler(wxString *retval) : wxHtmlTagHandler(), m_retval(retval) {}
+ wxString GetSupportedTags() { return wxT("META,BODY"); }
+ bool HandleTag(const wxHtmlTag& tag);
+
+private:
+ wxString *m_retval;
+
+ DECLARE_NO_COPY_CLASS(wxMetaTagHandler)
+};
+
+bool wxMetaTagHandler::HandleTag(const wxHtmlTag& tag)
+{
+ if (tag.GetName() == _T("BODY"))
+ {
+ m_Parser->StopParsing();
+ return false;
+ }
+
+ if (tag.HasParam(_T("HTTP-EQUIV")) &&
+ tag.GetParam(_T("HTTP-EQUIV")).IsSameAs(_T("Content-Type"), false) &&
+ tag.HasParam(_T("CONTENT")))
+ {
+ wxString content = tag.GetParam(_T("CONTENT")).Lower();
+ if (content.Left(19) == _T("text/html; charset="))
+ {
+ *m_retval = content.Mid(19);
+ m_Parser->StopParsing();
+ }
+ }
+ return false;
+}
+
+
+/*static*/
+wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup)
+{
+ wxString charset;
+ wxMetaTagParser *parser = new wxMetaTagParser();
+ if(parser)
+ {
+ parser->AddTagHandler(new wxMetaTagHandler(&charset));
+ parser->Parse(markup);
+ delete parser;
+ }
+ return charset;
+}
+
+/* static */
+bool
+wxHtmlParser::SkipCommentTag(wxString::const_iterator& start,
+ wxString::const_iterator end)
+{
+ wxASSERT_MSG( *start == '<', _T("should be called on the tag start") );
+
+ wxString::const_iterator p = start;
+
+ // comments begin with "<!--" in HTML 4.0
+ if ( p > end - 3 || *++p != '!' || *++p != '-' || *++p != '-' )
+ {
+ // not a comment at all
+ return false;
+ }
+
+ // skip the start of the comment tag in any case, if we don't find the
+ // closing tag we should ignore broken markup
+ start = p;
+
+ // comments end with "--[ \t\r\n]*>", i.e. white space is allowed between
+ // comment delimiter and the closing tag character (section 3.2.4 of
+ // http://www.w3.org/TR/html401/)
+ int dashes = 0;
+ while ( ++p < end )
+ {
+ const wxChar c = *p;
+
+ if ( (c == wxT(' ') || c == wxT('\n') ||
+ c == wxT('\r') || c == wxT('\t')) && dashes >= 2 )
+ {
+ // ignore white space before potential tag end
+ continue;
+ }
+
+ if ( c == wxT('>') && dashes >= 2 )
+ {
+ // found end of comment
+ start = p;
+ break;
+ }
+
+ if ( c == wxT('-') )
+ dashes++;
+ else
+ dashes = 0;
+ }
+
+ return true;
+}
+
+#endif // wxUSE_HTML