1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmlpars.cpp
3 // Purpose: wxHtmlParser class (generic parser)
4 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
10 #include "wx/wxprec.h"
16 #if wxUSE_HTML && wxUSE_STREAMS
19 #include "wx/dynarray.h"
23 #include "wx/wxcrtvararg.h"
26 #include "wx/tokenzr.h"
27 #include "wx/wfstream.h"
29 #include "wx/fontmap.h"
30 #include "wx/html/htmldefs.h"
31 #include "wx/html/htmlpars.h"
32 #include "wx/vector.h"
35 #include "wx/msw/wince/missing.h" // for bsearch()
38 // DLL options compatibility check:
39 WX_CHECK_BUILD_OPTIONS("wxHTML")
41 const wxChar
*wxTRACE_HTML_DEBUG
= _T("htmldebug");
43 //-----------------------------------------------------------------------------
44 // wxHtmlParser helpers
45 //-----------------------------------------------------------------------------
51 wxHtmlTextPiece(const wxString::const_iterator
& start
,
52 const wxString::const_iterator
& end
)
53 : m_start(start
), m_end(end
) {}
54 wxString::const_iterator m_start
, m_end
;
57 // NB: this is an empty class and not typedef because of forward declaration
58 class wxHtmlTextPieces
: public wxVector
<wxHtmlTextPiece
>
62 class wxHtmlParserState
67 wxHtmlTextPieces
*m_textPieces
;
69 const wxString
*m_source
;
70 wxHtmlParserState
*m_nextState
;
73 //-----------------------------------------------------------------------------
75 //-----------------------------------------------------------------------------
77 IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser
,wxObject
)
79 wxHtmlParser::wxHtmlParser()
84 m_entitiesParser
= new wxHtmlEntitiesParser
;
92 wxHtmlParser::~wxHtmlParser()
94 while (RestoreState()) {}
97 WX_CLEAR_ARRAY(m_HandlersStack
);
98 WX_CLEAR_HASH_SET(wxHtmlTagHandlersSet
, m_HandlersSet
);
99 delete m_entitiesParser
;
103 wxObject
* wxHtmlParser::Parse(const wxString
& source
)
107 wxObject
*result
= GetProduct();
112 void wxHtmlParser::InitParser(const wxString
& source
)
115 m_stopParsing
= false;
118 void wxHtmlParser::DoneParser()
123 void wxHtmlParser::SetSource(const wxString
& src
)
126 // NB: This is allocated on heap because wxHtmlTag uses iterators and
127 // making a copy of m_Source string in SetSourceAndSaveState() and
128 // RestoreState() would invalidate them (because wxString::m_impl's
129 // memory would change completely twice and iterators use pointers
130 // into it). So instead, we keep the string object intact and only
131 // store/restore pointer to it, for which we need it to be allocated
134 m_Source
= new wxString(src
);
140 void wxHtmlParser::CreateDOMTree()
142 wxHtmlTagsCache
cache(*m_Source
);
143 m_TextPieces
= new wxHtmlTextPieces
;
144 CreateDOMSubTree(NULL
, m_Source
->begin(), m_Source
->end(), &cache
);
148 extern bool wxIsCDATAElement(const wxString
& tag
);
150 void wxHtmlParser::CreateDOMSubTree(wxHtmlTag
*cur
,
151 const wxString::const_iterator
& begin_pos
,
152 const wxString::const_iterator
& end_pos
,
153 wxHtmlTagsCache
*cache
)
155 if (end_pos
<= begin_pos
)
159 wxString::const_iterator i
= begin_pos
;
160 wxString::const_iterator textBeginning
= begin_pos
;
162 // If the tag contains CDATA text, we include the text between beginning
163 // and ending tag verbosely. Setting i=end_pos will skip to the very
164 // end of this function where text piece is added, bypassing any child
165 // tags parsing (CDATA element can't have child elements by definition):
166 if (cur
!= NULL
&& wxIsCDATAElement(cur
->GetName()))
177 // add text to m_TextPieces:
178 if (i
> textBeginning
)
179 m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, i
));
181 // if it is a comment, skip it:
182 if ( SkipCommentTag(i
, m_Source
->end()) )
184 textBeginning
= i
= i
+ 1; // skip closing '>' too
187 // add another tag to the tree:
188 else if (i
< end_pos
-1 && *(i
+1) != wxT('/'))
192 chd
= new wxHtmlTag(cur
, m_Source
,
193 i
, end_pos
, cache
, m_entitiesParser
);
196 chd
= new wxHtmlTag(NULL
, m_Source
,
197 i
, end_pos
, cache
, m_entitiesParser
);
200 // if this is the first tag to be created make the root
201 // m_Tags point to it:
206 // if there is already a root tag add this tag as
208 chd
->m_Prev
= m_Tags
->GetLastSibling();
209 chd
->m_Prev
->m_Next
= chd
;
213 if (chd
->HasEnding())
215 CreateDOMSubTree(chd
,
216 chd
->GetBeginIter(), chd
->GetEndIter1(),
218 i
= chd
->GetEndIter2();
221 i
= chd
->GetBeginIter();
226 // ... or skip ending tag:
229 while (i
< end_pos
&& *i
!= wxT('>')) ++i
;
236 // add remaining text to m_TextPieces:
237 if (end_pos
> textBeginning
)
238 m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, end_pos
));
241 void wxHtmlParser::DestroyDOMTree()
247 t2
= t1
->GetNextSibling();
251 m_Tags
= m_CurTag
= NULL
;
257 void wxHtmlParser::DoParsing()
261 DoParsing(m_Source
->begin(), m_Source
->end());
264 void wxHtmlParser::DoParsing(const wxString::const_iterator
& begin_pos_
,
265 const wxString::const_iterator
& end_pos
)
267 wxString::const_iterator
begin_pos(begin_pos_
);
269 if (end_pos
<= begin_pos
)
272 wxHtmlTextPieces
& pieces
= *m_TextPieces
;
273 size_t piecesCnt
= pieces
.size();
275 while (begin_pos
< end_pos
)
277 while (m_CurTag
&& m_CurTag
->GetBeginIter() < begin_pos
)
278 m_CurTag
= m_CurTag
->GetNextTag();
279 while (m_CurTextPiece
< piecesCnt
&&
280 pieces
[m_CurTextPiece
].m_start
< begin_pos
)
283 if (m_CurTextPiece
< piecesCnt
&&
285 pieces
[m_CurTextPiece
].m_start
< m_CurTag
->GetBeginIter()))
288 AddText(GetEntitiesParser()->Parse(
289 wxString(pieces
[m_CurTextPiece
].m_start
,
290 pieces
[m_CurTextPiece
].m_end
)));
291 begin_pos
= pieces
[m_CurTextPiece
].m_end
;
296 if (m_CurTag
->HasEnding())
297 begin_pos
= m_CurTag
->GetEndIter2();
299 begin_pos
= m_CurTag
->GetBeginIter();
300 wxHtmlTag
*t
= m_CurTag
;
301 m_CurTag
= m_CurTag
->GetNextTag();
310 void wxHtmlParser::AddTag(const wxHtmlTag
& tag
)
314 wxHtmlTagHandlersHash::const_iterator h
= m_HandlersHash
.find(tag
.GetName());
315 if (h
!= m_HandlersHash
.end())
317 inner
= h
->second
->HandleTag(tag
);
324 DoParsing(tag
.GetBeginIter(), tag
.GetEndIter1());
328 void wxHtmlParser::AddTagHandler(wxHtmlTagHandler
*handler
)
330 wxString
s(handler
->GetSupportedTags());
331 wxStringTokenizer
tokenizer(s
, wxT(", "));
333 while (tokenizer
.HasMoreTokens())
334 m_HandlersHash
[tokenizer
.GetNextToken()] = handler
;
336 m_HandlersSet
.insert(handler
);
338 handler
->SetParser(this);
341 void wxHtmlParser::PushTagHandler(wxHtmlTagHandler
*handler
, const wxString
& tags
)
343 wxStringTokenizer
tokenizer(tags
, wxT(", "));
346 m_HandlersStack
.push_back(new wxHtmlTagHandlersHash(m_HandlersHash
));
348 while (tokenizer
.HasMoreTokens())
350 key
= tokenizer
.GetNextToken();
351 m_HandlersHash
[key
] = handler
;
355 void wxHtmlParser::PopTagHandler()
357 wxCHECK_RET( !m_HandlersStack
.empty(),
358 "attempt to remove HTML tag handler from empty stack" );
360 wxHtmlTagHandlersHash
*prev
= m_HandlersStack
.back();
361 m_HandlersStack
.pop_back();
362 m_HandlersHash
= *prev
;
366 void wxHtmlParser::SetSourceAndSaveState(const wxString
& src
)
368 wxHtmlParserState
*s
= new wxHtmlParserState
;
370 s
->m_curTag
= m_CurTag
;
372 s
->m_textPieces
= m_TextPieces
;
373 s
->m_curTextPiece
= m_CurTextPiece
;
374 s
->m_source
= m_Source
;
376 s
->m_nextState
= m_SavedStates
;
388 bool wxHtmlParser::RestoreState()
390 if (!m_SavedStates
) return false;
395 wxHtmlParserState
*s
= m_SavedStates
;
396 m_SavedStates
= s
->m_nextState
;
398 m_CurTag
= s
->m_curTag
;
400 m_TextPieces
= s
->m_textPieces
;
401 m_CurTextPiece
= s
->m_curTextPiece
;
402 m_Source
= s
->m_source
;
408 wxString
wxHtmlParser::GetInnerSource(const wxHtmlTag
& tag
)
410 return wxString(tag
.GetBeginIter(), tag
.GetEndIter1());
413 //-----------------------------------------------------------------------------
415 //-----------------------------------------------------------------------------
417 IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler
,wxObject
)
419 void wxHtmlTagHandler::ParseInnerSource(const wxString
& source
)
421 // It is safe to temporarily change the source being parsed,
422 // provided we restore the state back after parsing
423 m_Parser
->SetSourceAndSaveState(source
);
424 m_Parser
->DoParsing();
425 m_Parser
->RestoreState();
429 //-----------------------------------------------------------------------------
430 // wxHtmlEntitiesParser
431 //-----------------------------------------------------------------------------
433 IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser
,wxObject
)
435 wxHtmlEntitiesParser::wxHtmlEntitiesParser()
436 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
437 : m_conv(NULL
), m_encoding(wxFONTENCODING_SYSTEM
)
442 wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
444 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
449 void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding
)
451 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
452 if (encoding
== m_encoding
)
457 m_encoding
= encoding
;
458 if (m_encoding
== wxFONTENCODING_SYSTEM
)
461 m_conv
= new wxCSConv(wxFontMapper::GetEncodingName(m_encoding
));
467 wxString
wxHtmlEntitiesParser::Parse(const wxString
& input
) const
471 const wxString::const_iterator
end(input
.end());
472 wxString::const_iterator
c(input
.begin());
473 wxString::const_iterator
last(c
);
475 for ( ; c
< end
; ++c
)
479 if ( output
.empty() )
480 output
.reserve(input
.length());
483 output
.append(last
, c
);
488 const wxString::const_iterator ent_s
= c
;
491 for ( ; c
!= end
; ++c
)
494 if ( !((ch
>= wxT('a') && ch
<= wxT('z')) ||
495 (ch
>= wxT('A') && ch
<= wxT('Z')) ||
496 (ch
>= wxT('0') && ch
<= wxT('9')) ||
497 ch
== wxT('_') || ch
== wxT('#')) )
501 entity
.append(ent_s
, c
);
502 if (c
== end
|| *c
!= wxT(';')) --c
;
504 entity_char
= GetEntityChar(entity
);
506 output
<< entity_char
;
509 output
.append(ent_s
-1, c
+1);
510 wxLogTrace(wxTRACE_HTML_DEBUG
,
511 "Unrecognized HTML entity: '%s'",
516 if ( last
== input
.begin() ) // common case: no entity
519 output
.append(last
, end
);
524 wxChar
wxHtmlEntitiesParser::GetCharForCode(unsigned code
) const
529 wbuf
[0] = (wchar_t)code
;
531 wxMBConv
*conv
= m_conv
? m_conv
: &wxConvLocal
;
532 if (conv
->WC2MB(buf
, wbuf
, 2) == (size_t)-1)
536 return (code
< 256) ? (wxChar
)code
: '?';
541 struct wxHtmlEntityInfo
543 const wxStringCharType
*name
;
547 extern "C" int LINKAGEMODE
wxHtmlEntityCompare(const void *key
, const void *item
)
549 #if wxUSE_UNICODE_UTF8
550 return strcmp((char*)key
, ((wxHtmlEntityInfo
*)item
)->name
);
552 return wxStrcmp((wxChar
*)key
, ((wxHtmlEntityInfo
*)item
)->name
);
556 wxChar
wxHtmlEntitiesParser::GetEntityChar(const wxString
& entity
) const
560 if (entity
[0] == wxT('#'))
562 // NB: parsed value is a number, so it's OK to use wx_str(), internal
563 // representation is the same for numbers
564 const wxStringCharType
*ent_s
= entity
.wx_str();
565 const wxStringCharType
*format
;
567 if (ent_s
[1] == wxS('x') || ent_s
[1] == wxS('X'))
576 if (wxSscanf(ent_s
, format
, &code
) != 1)
581 // store the literals in wx's internal representation (either char*
582 // in UTF-8 or wchar_t*) for best performance:
583 #define ENTITY(name, code) { wxS(name), code }
585 static wxHtmlEntityInfo substitutions
[] = {
586 ENTITY("AElig", 198),
587 ENTITY("Aacute", 193),
588 ENTITY("Acirc", 194),
589 ENTITY("Agrave", 192),
590 ENTITY("Alpha", 913),
591 ENTITY("Aring", 197),
592 ENTITY("Atilde", 195),
595 ENTITY("Ccedil", 199),
597 ENTITY("Dagger", 8225),
598 ENTITY("Delta", 916),
600 ENTITY("Eacute", 201),
601 ENTITY("Ecirc", 202),
602 ENTITY("Egrave", 200),
603 ENTITY("Epsilon", 917),
606 ENTITY("Gamma", 915),
607 ENTITY("Iacute", 205),
608 ENTITY("Icirc", 206),
609 ENTITY("Igrave", 204),
612 ENTITY("Kappa", 922),
613 ENTITY("Lambda", 923),
615 ENTITY("Ntilde", 209),
617 ENTITY("OElig", 338),
618 ENTITY("Oacute", 211),
619 ENTITY("Ocirc", 212),
620 ENTITY("Ograve", 210),
621 ENTITY("Omega", 937),
622 ENTITY("Omicron", 927),
623 ENTITY("Oslash", 216),
624 ENTITY("Otilde", 213),
628 ENTITY("Prime", 8243),
631 ENTITY("Scaron", 352),
632 ENTITY("Sigma", 931),
633 ENTITY("THORN", 222),
635 ENTITY("Theta", 920),
636 ENTITY("Uacute", 218),
637 ENTITY("Ucirc", 219),
638 ENTITY("Ugrave", 217),
639 ENTITY("Upsilon", 933),
642 ENTITY("Yacute", 221),
645 ENTITY("aacute", 225),
646 ENTITY("acirc", 226),
647 ENTITY("acute", 180),
648 ENTITY("aelig", 230),
649 ENTITY("agrave", 224),
650 ENTITY("alefsym", 8501),
651 ENTITY("alpha", 945),
655 ENTITY("aring", 229),
656 ENTITY("asymp", 8776),
657 ENTITY("atilde", 227),
659 ENTITY("bdquo", 8222),
661 ENTITY("brvbar", 166),
662 ENTITY("bull", 8226),
664 ENTITY("ccedil", 231),
665 ENTITY("cedil", 184),
669 ENTITY("clubs", 9827),
670 ENTITY("cong", 8773),
672 ENTITY("crarr", 8629),
674 ENTITY("curren", 164),
675 ENTITY("dArr", 8659),
676 ENTITY("dagger", 8224),
677 ENTITY("darr", 8595),
679 ENTITY("delta", 948),
680 ENTITY("diams", 9830),
681 ENTITY("divide", 247),
682 ENTITY("eacute", 233),
683 ENTITY("ecirc", 234),
684 ENTITY("egrave", 232),
685 ENTITY("empty", 8709),
686 ENTITY("emsp", 8195),
687 ENTITY("ensp", 8194),
688 ENTITY("epsilon", 949),
689 ENTITY("equiv", 8801),
693 ENTITY("euro", 8364),
694 ENTITY("exist", 8707),
696 ENTITY("forall", 8704),
697 ENTITY("frac12", 189),
698 ENTITY("frac14", 188),
699 ENTITY("frac34", 190),
700 ENTITY("frasl", 8260),
701 ENTITY("gamma", 947),
704 ENTITY("hArr", 8660),
705 ENTITY("harr", 8596),
706 ENTITY("hearts", 9829),
707 ENTITY("hellip", 8230),
708 ENTITY("iacute", 237),
709 ENTITY("icirc", 238),
710 ENTITY("iexcl", 161),
711 ENTITY("igrave", 236),
712 ENTITY("image", 8465),
713 ENTITY("infin", 8734),
716 ENTITY("iquest", 191),
717 ENTITY("isin", 8712),
719 ENTITY("kappa", 954),
720 ENTITY("lArr", 8656),
721 ENTITY("lambda", 955),
722 ENTITY("lang", 9001),
723 ENTITY("laquo", 171),
724 ENTITY("larr", 8592),
725 ENTITY("lceil", 8968),
726 ENTITY("ldquo", 8220),
728 ENTITY("lfloor", 8970),
729 ENTITY("lowast", 8727),
732 ENTITY("lsaquo", 8249),
733 ENTITY("lsquo", 8216),
736 ENTITY("mdash", 8212),
737 ENTITY("micro", 181),
738 ENTITY("middot", 183),
739 ENTITY("minus", 8722),
741 ENTITY("nabla", 8711),
743 ENTITY("ndash", 8211),
747 ENTITY("notin", 8713),
748 ENTITY("nsub", 8836),
749 ENTITY("ntilde", 241),
751 ENTITY("oacute", 243),
752 ENTITY("ocirc", 244),
753 ENTITY("oelig", 339),
754 ENTITY("ograve", 242),
755 ENTITY("oline", 8254),
756 ENTITY("omega", 969),
757 ENTITY("omicron", 959),
758 ENTITY("oplus", 8853),
762 ENTITY("oslash", 248),
763 ENTITY("otilde", 245),
764 ENTITY("otimes", 8855),
767 ENTITY("part", 8706),
768 ENTITY("permil", 8240),
769 ENTITY("perp", 8869),
773 ENTITY("plusmn", 177),
774 ENTITY("pound", 163),
775 ENTITY("prime", 8242),
776 ENTITY("prod", 8719),
777 ENTITY("prop", 8733),
780 ENTITY("rArr", 8658),
781 ENTITY("radic", 8730),
782 ENTITY("rang", 9002),
783 ENTITY("raquo", 187),
784 ENTITY("rarr", 8594),
785 ENTITY("rceil", 8969),
786 ENTITY("rdquo", 8221),
787 ENTITY("real", 8476),
789 ENTITY("rfloor", 8971),
792 ENTITY("rsaquo", 8250),
793 ENTITY("rsquo", 8217),
794 ENTITY("sbquo", 8218),
795 ENTITY("scaron", 353),
796 ENTITY("sdot", 8901),
799 ENTITY("sigma", 963),
800 ENTITY("sigmaf", 962),
802 ENTITY("spades", 9824),
804 ENTITY("sube", 8838),
810 ENTITY("supe", 8839),
811 ENTITY("szlig", 223),
813 ENTITY("there4", 8756),
814 ENTITY("theta", 952),
815 ENTITY("thetasym", 977),
816 ENTITY("thinsp", 8201),
817 ENTITY("thorn", 254),
818 ENTITY("tilde", 732),
819 ENTITY("times", 215),
820 ENTITY("trade", 8482),
821 ENTITY("uArr", 8657),
822 ENTITY("uacute", 250),
823 ENTITY("uarr", 8593),
824 ENTITY("ucirc", 251),
825 ENTITY("ugrave", 249),
827 ENTITY("upsih", 978),
828 ENTITY("upsilon", 965),
830 ENTITY("weierp", 8472),
832 ENTITY("yacute", 253),
837 ENTITY("zwnj", 8204),
840 static size_t substitutions_cnt
= 0;
842 if (substitutions_cnt
== 0)
843 while (substitutions
[substitutions_cnt
].code
!= 0)
846 wxHtmlEntityInfo
*info
= NULL
;
848 // bsearch crashes under WinCE for some reason
850 for (i
= 0; i
< substitutions_cnt
; i
++)
852 if (entity
== substitutions
[i
].name
)
854 info
= & substitutions
[i
];
859 info
= (wxHtmlEntityInfo
*) bsearch(entity
.wx_str(), substitutions
,
861 sizeof(wxHtmlEntityInfo
),
862 wxHtmlEntityCompare
);
871 return GetCharForCode(code
);
874 wxFSFile
*wxHtmlParser::OpenURL(wxHtmlURLType
WXUNUSED(type
),
875 const wxString
& url
) const
877 return m_FS
? m_FS
->OpenFile(url
) : NULL
;
882 //-----------------------------------------------------------------------------
883 // wxHtmlParser::ExtractCharsetInformation
884 //-----------------------------------------------------------------------------
886 class wxMetaTagParser
: public wxHtmlParser
889 wxMetaTagParser() { }
891 wxObject
* GetProduct() { return NULL
; }
894 virtual void AddText(const wxString
& WXUNUSED(txt
)) {}
896 DECLARE_NO_COPY_CLASS(wxMetaTagParser
)
899 class wxMetaTagHandler
: public wxHtmlTagHandler
902 wxMetaTagHandler(wxString
*retval
) : wxHtmlTagHandler(), m_retval(retval
) {}
903 wxString
GetSupportedTags() { return wxT("META,BODY"); }
904 bool HandleTag(const wxHtmlTag
& tag
);
909 DECLARE_NO_COPY_CLASS(wxMetaTagHandler
)
912 bool wxMetaTagHandler::HandleTag(const wxHtmlTag
& tag
)
914 if (tag
.GetName() == _T("BODY"))
916 m_Parser
->StopParsing();
920 if (tag
.HasParam(_T("HTTP-EQUIV")) &&
921 tag
.GetParam(_T("HTTP-EQUIV")).IsSameAs(_T("Content-Type"), false) &&
922 tag
.HasParam(_T("CONTENT")))
924 wxString content
= tag
.GetParam(_T("CONTENT")).Lower();
925 if (content
.Left(19) == _T("text/html; charset="))
927 *m_retval
= content
.Mid(19);
928 m_Parser
->StopParsing();
936 wxString
wxHtmlParser::ExtractCharsetInformation(const wxString
& markup
)
939 wxMetaTagParser
*parser
= new wxMetaTagParser();
942 parser
->AddTagHandler(new wxMetaTagHandler(&charset
));
943 parser
->Parse(markup
);
951 wxHtmlParser::SkipCommentTag(wxString::const_iterator
& start
,
952 wxString::const_iterator end
)
954 wxASSERT_MSG( *start
== '<', _T("should be called on the tag start") );
956 wxString::const_iterator p
= start
;
958 // comments begin with "<!--" in HTML 4.0
959 if ( p
> end
- 3 || *++p
!= '!' || *++p
!= '-' || *++p
!= '-' )
961 // not a comment at all
965 // skip the start of the comment tag in any case, if we don't find the
966 // closing tag we should ignore broken markup
969 // comments end with "--[ \t\r\n]*>", i.e. white space is allowed between
970 // comment delimiter and the closing tag character (section 3.2.4 of
971 // http://www.w3.org/TR/html401/)
977 if ( (c
== wxT(' ') || c
== wxT('\n') ||
978 c
== wxT('\r') || c
== wxT('\t')) && dashes
>= 2 )
980 // ignore white space before potential tag end
984 if ( c
== wxT('>') && dashes
>= 2 )
986 // found end of comment
1000 #endif // wxUSE_HTML