2 Copyright (C) 1997 Martin Jones (mjones@kde.org)
3 (C) 1997 Torben Weis (weis@kde.org)
4 (C) 1998 Waldo Bastian (bastian@kde.org)
5 (C) 1999 Lars Knoll (knoll@kde.org)
6 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public
10 License as published by the Free Software Foundation; either
11 version 2 of the License, or (at your option) any later version.
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
18 You should have received a copy of the GNU Library General Public License
19 along with this library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.
27 #include "QualifiedName.h"
28 #include <wtf/Forward.h>
29 #include <wtf/OwnPtr.h>
30 #include <wtf/RefPtr.h>
31 #include "HTMLParserErrorCodes.h"
38 class DocumentFragment
;
40 class HTMLFormElement
;
41 class HTMLHeadElement
;
43 class HTMLParserQuirks
;
50 * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and
51 * builds up the Document structure from it.
53 class HTMLParser
: Noncopyable
{
55 HTMLParser(HTMLDocument
*, bool reportErrors
);
56 HTMLParser(DocumentFragment
*);
57 virtual ~HTMLParser();
60 * parses one token delivered by the tokenizer
62 PassRefPtr
<Node
> parseToken(Token
*);
64 // Parses a doctype token.
65 void parseDoctypeToken(DoctypeToken
*);
68 * tokenizer says it's not going to be sending us any more tokens
77 bool skipMode() const { return !m_skipModeTag
.isNull(); }
78 bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks
; }
81 void setCurrent(Node
*);
83 void setSkipMode(const QualifiedName
& qName
) { m_skipModeTag
= qName
.localName(); }
85 PassRefPtr
<Node
> getNode(Token
*);
86 bool bodyCreateErrorCheck(Token
*, RefPtr
<Node
>&);
87 bool canvasCreateErrorCheck(Token
*, RefPtr
<Node
>&);
88 bool commentCreateErrorCheck(Token
*, RefPtr
<Node
>&);
89 bool ddCreateErrorCheck(Token
*, RefPtr
<Node
>&);
90 bool dtCreateErrorCheck(Token
*, RefPtr
<Node
>&);
91 bool formCreateErrorCheck(Token
*, RefPtr
<Node
>&);
92 bool framesetCreateErrorCheck(Token
*, RefPtr
<Node
>&);
93 bool headCreateErrorCheck(Token
*, RefPtr
<Node
>&);
94 bool iframeCreateErrorCheck(Token
*, RefPtr
<Node
>&);
95 bool isindexCreateErrorCheck(Token
*, RefPtr
<Node
>&);
96 bool mapCreateErrorCheck(Token
*, RefPtr
<Node
>&);
97 bool nestedCreateErrorCheck(Token
*, RefPtr
<Node
>&);
98 bool nestedPCloserCreateErrorCheck(Token
*, RefPtr
<Node
>&);
99 bool nestedStyleCreateErrorCheck(Token
*, RefPtr
<Node
>&);
100 bool noembedCreateErrorCheck(Token
*, RefPtr
<Node
>&);
101 bool noframesCreateErrorCheck(Token
*, RefPtr
<Node
>&);
102 bool nolayerCreateErrorCheck(Token
*, RefPtr
<Node
>&);
103 bool noscriptCreateErrorCheck(Token
*, RefPtr
<Node
>&);
104 bool pCloserCreateErrorCheck(Token
*, RefPtr
<Node
>&);
105 bool pCloserStrictCreateErrorCheck(Token
*, RefPtr
<Node
>&);
106 bool selectCreateErrorCheck(Token
*, RefPtr
<Node
>&);
107 bool tableCellCreateErrorCheck(Token
*, RefPtr
<Node
>&);
108 bool tableSectionCreateErrorCheck(Token
*, RefPtr
<Node
>&);
109 bool textCreateErrorCheck(Token
*, RefPtr
<Node
>&);
111 void processCloseTag(Token
*);
113 bool insertNode(Node
*, bool flat
= false);
114 bool handleError(Node
*, bool flat
, const AtomicString
& localName
, int tagPriority
);
116 void pushBlock(const AtomicString
& tagName
, int level
);
117 void popBlock(const AtomicString
& tagName
, bool reportErrors
= false);
118 void popBlock(const QualifiedName
& qName
, bool reportErrors
= false) { return popBlock(qName
.localName(), reportErrors
); } // Convenience function for readability.
120 void moveOneBlockToStack(HTMLStackElem
*& head
);
121 inline HTMLStackElem
* popOneBlockCommon();
122 void popInlineBlocks();
128 static bool isResidualStyleTag(const AtomicString
& tagName
);
129 static bool isAffectedByResidualStyle(const AtomicString
& tagName
);
130 void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem
*);
131 void reopenResidualStyleTags(HTMLStackElem
*, Node
* malformedTableParent
);
133 bool allowNestedRedundantTag(const AtomicString
& tagName
);
135 static bool isHeaderTag(const AtomicString
& tagName
);
136 void popNestedHeaderTag();
138 bool isInline(Node
*) const;
140 void startBody(); // inserts the isindex element
141 PassRefPtr
<Node
> handleIsindex(Token
*);
143 PassRefPtr
<Node
> parseTelephoneNumbers(Node
*inputNode
);
144 PassRefPtr
<Text
> parseNextPhoneNumber(Text
*inputText
);
146 void checkIfHasPElementInScope();
147 bool hasPElementInScope()
149 if (m_hasPElementInScope
== Unknown
)
150 checkIfHasPElementInScope();
151 return m_hasPElementInScope
== InScope
;
154 void reportError(HTMLParserErrorCode errorCode
, const AtomicString
* tagName1
= 0, const AtomicString
* tagName2
= 0, bool closeTags
= false)
155 { if (!m_reportErrors
) return; reportErrorToConsole(errorCode
, tagName1
, tagName2
, closeTags
); }
157 void reportErrorToConsole(HTMLParserErrorCode
, const AtomicString
* tagName1
, const AtomicString
* tagName2
, bool closeTags
);
161 // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element.
163 // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref.
166 HTMLStackElem
* blockStack
;
168 // The number of tags with priority minBlockLevelTagPriority or higher
169 // currently in m_blockStack. The parser enforces a cap on this value by
170 // adding such new elements as siblings instead of children once it is reached.
171 size_t m_blocksInStack
;
173 enum ElementInScopeState
{ NotInScope
, InScope
, Unknown
};
174 ElementInScopeState m_hasPElementInScope
;
176 RefPtr
<HTMLFormElement
> m_currentFormElement
; // currently active form
177 RefPtr
<HTMLMapElement
> m_currentMapElement
; // current map
178 RefPtr
<HTMLHeadElement
> m_head
; // head element; needed for HTML which defines <base> after </head>
179 RefPtr
<Node
> m_isindexElement
; // a possible <isindex> element in the head
185 AtomicString m_skipModeTag
; // tells the parser to discard all tags until it reaches the one specified
187 bool m_isParsingFragment
;
189 bool m_handlingResidualStyleAcrossBlocks
;
190 int inStrayTableContent
;
192 OwnPtr
<HTMLParserQuirks
> m_parserQuirks
;
197 #endif // HTMLParser_h