]> git.saurik.com Git - iphone-api.git/blob - WebCore/HTMLParser.h
Add support for new WinterBoard Settings features.
[iphone-api.git] / WebCore / HTMLParser.h
1 /*
2 Copyright (C) 1997 Martin Jones (mjones@kde.org)
3 (C) 1997 Torben Weis (weis@kde.org)
4 (C) 1998 Waldo Bastian (bastian@kde.org)
5 (C) 1999 Lars Knoll (knoll@kde.org)
6 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
7
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public
10 License as published by the Free Software Foundation; either
11 version 2 of the License, or (at your option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public License
19 along with this library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.
22 */
23
24 #ifndef HTMLParser_h
25 #define HTMLParser_h
26
27 #include "QualifiedName.h"
28 #include <wtf/Forward.h>
29 #include <wtf/OwnPtr.h>
30 #include <wtf/RefPtr.h>
31 #include "HTMLParserErrorCodes.h"
32 #include "Text.h"
33
34 namespace WebCore {
35
36 class DoctypeToken;
37 class Document;
38 class DocumentFragment;
39 class HTMLDocument;
40 class HTMLFormElement;
41 class HTMLHeadElement;
42 class HTMLMapElement;
43 class HTMLParserQuirks;
44 class Node;
45
46 struct HTMLStackElem;
47 struct Token;
48
49 /**
50 * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and
51 * builds up the Document structure from it.
52 */
53 class HTMLParser : Noncopyable {
54 public:
55 HTMLParser(HTMLDocument*, bool reportErrors);
56 HTMLParser(DocumentFragment*);
57 virtual ~HTMLParser();
58
59 /**
60 * parses one token delivered by the tokenizer
61 */
62 PassRefPtr<Node> parseToken(Token*);
63
64 // Parses a doctype token.
65 void parseDoctypeToken(DoctypeToken*);
66
67 /**
68 * tokenizer says it's not going to be sending us any more tokens
69 */
70 void finished();
71
72 /**
73 * resets the parser
74 */
75 void reset();
76
77 bool skipMode() const { return !m_skipModeTag.isNull(); }
78 bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks; }
79
80 private:
81 void setCurrent(Node*);
82 void derefCurrent();
83 void setSkipMode(const QualifiedName& qName) { m_skipModeTag = qName.localName(); }
84
85 PassRefPtr<Node> getNode(Token*);
86 bool bodyCreateErrorCheck(Token*, RefPtr<Node>&);
87 bool canvasCreateErrorCheck(Token*, RefPtr<Node>&);
88 bool commentCreateErrorCheck(Token*, RefPtr<Node>&);
89 bool ddCreateErrorCheck(Token*, RefPtr<Node>&);
90 bool dtCreateErrorCheck(Token*, RefPtr<Node>&);
91 bool formCreateErrorCheck(Token*, RefPtr<Node>&);
92 bool framesetCreateErrorCheck(Token*, RefPtr<Node>&);
93 bool headCreateErrorCheck(Token*, RefPtr<Node>&);
94 bool iframeCreateErrorCheck(Token*, RefPtr<Node>&);
95 bool isindexCreateErrorCheck(Token*, RefPtr<Node>&);
96 bool mapCreateErrorCheck(Token*, RefPtr<Node>&);
97 bool nestedCreateErrorCheck(Token*, RefPtr<Node>&);
98 bool nestedPCloserCreateErrorCheck(Token*, RefPtr<Node>&);
99 bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&);
100 bool noembedCreateErrorCheck(Token*, RefPtr<Node>&);
101 bool noframesCreateErrorCheck(Token*, RefPtr<Node>&);
102 bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&);
103 bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&);
104 bool pCloserCreateErrorCheck(Token*, RefPtr<Node>&);
105 bool pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&);
106 bool selectCreateErrorCheck(Token*, RefPtr<Node>&);
107 bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&);
108 bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&);
109 bool textCreateErrorCheck(Token*, RefPtr<Node>&);
110
111 void processCloseTag(Token*);
112
113 bool insertNode(Node*, bool flat = false);
114 bool handleError(Node*, bool flat, const AtomicString& localName, int tagPriority);
115
116 void pushBlock(const AtomicString& tagName, int level);
117 void popBlock(const AtomicString& tagName, bool reportErrors = false);
118 void popBlock(const QualifiedName& qName, bool reportErrors = false) { return popBlock(qName.localName(), reportErrors); } // Convenience function for readability.
119 void popOneBlock();
120 void moveOneBlockToStack(HTMLStackElem*& head);
121 inline HTMLStackElem* popOneBlockCommon();
122 void popInlineBlocks();
123
124 void freeBlock();
125
126 void createHead();
127
128 static bool isResidualStyleTag(const AtomicString& tagName);
129 static bool isAffectedByResidualStyle(const AtomicString& tagName);
130 void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem*);
131 void reopenResidualStyleTags(HTMLStackElem*, Node* malformedTableParent);
132
133 bool allowNestedRedundantTag(const AtomicString& tagName);
134
135 static bool isHeaderTag(const AtomicString& tagName);
136 void popNestedHeaderTag();
137
138 bool isInline(Node*) const;
139
140 void startBody(); // inserts the isindex element
141 PassRefPtr<Node> handleIsindex(Token*);
142
143 PassRefPtr<Node> parseTelephoneNumbers(Node *inputNode);
144 PassRefPtr<Text> parseNextPhoneNumber(Text *inputText);
145
146 void checkIfHasPElementInScope();
147 bool hasPElementInScope()
148 {
149 if (m_hasPElementInScope == Unknown)
150 checkIfHasPElementInScope();
151 return m_hasPElementInScope == InScope;
152 }
153
154 void reportError(HTMLParserErrorCode errorCode, const AtomicString* tagName1 = 0, const AtomicString* tagName2 = 0, bool closeTags = false)
155 { if (!m_reportErrors) return; reportErrorToConsole(errorCode, tagName1, tagName2, closeTags); }
156
157 void reportErrorToConsole(HTMLParserErrorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags);
158
159 Document* document;
160
161 // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element.
162 Node* current;
163 // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref.
164 bool didRefCurrent;
165
166 HTMLStackElem* blockStack;
167
168 // The number of tags with priority minBlockLevelTagPriority or higher
169 // currently in m_blockStack. The parser enforces a cap on this value by
170 // adding such new elements as siblings instead of children once it is reached.
171 size_t m_blocksInStack;
172
173 enum ElementInScopeState { NotInScope, InScope, Unknown };
174 ElementInScopeState m_hasPElementInScope;
175
176 RefPtr<HTMLFormElement> m_currentFormElement; // currently active form
177 RefPtr<HTMLMapElement> m_currentMapElement; // current map
178 RefPtr<HTMLHeadElement> m_head; // head element; needed for HTML which defines <base> after </head>
179 RefPtr<Node> m_isindexElement; // a possible <isindex> element in the head
180
181 bool inBody;
182 bool haveContent;
183 bool haveFrameSet;
184
185 AtomicString m_skipModeTag; // tells the parser to discard all tags until it reaches the one specified
186
187 bool m_isParsingFragment;
188 bool m_reportErrors;
189 bool m_handlingResidualStyleAcrossBlocks;
190 int inStrayTableContent;
191
192 OwnPtr<HTMLParserQuirks> m_parserQuirks;
193 };
194
195 }
196
197 #endif // HTMLParser_h