git.saurik.com Git - wxWidgets.git/blame - tests/benchmarks/htmlparser/htmltag.cpp

Commit	Line	Data
c21faa0a VS	1	/////////////////////////////////////////////////////////////////////////////
	2	// Name: src/html/htmltag.cpp
	3	// Purpose: wx28HtmlTag class (represents single tag)
	4	// Author: Vaclav Slavik
c21faa0a VS	5	// Copyright: (c) 1999 Vaclav Slavik
	6	// Licence: wxWindows licence
	7	/////////////////////////////////////////////////////////////////////////////
	8
	9	#include "wx/wxprec.h"
	10
	11	#ifdef __BORLANDC__
	12	#pragma hdrstop
	13	#endif
	14
	15	#include "htmltag.h"
	16
c21faa0a VS	17	#include "htmlpars.h"
	18	#include <stdio.h> // for vsscanf
	19	#include <stdarg.h>
	20
	21
	22	//-----------------------------------------------------------------------------
	23	// wx28HtmlTagsCache
	24	//-----------------------------------------------------------------------------
	25
	26	struct wx28HtmlCacheItem
	27	{
	28	// this is "pos" value passed to wx28HtmlTag's constructor.
	29	// it is position of '<' character of the tag
	30	int Key;
	31
	32	// end positions for the tag:
	33	// end1 is '<' of ending tag,
	34	// end2 is '>' or both are
	35	// -1 if there is no ending tag for this one...
	36	// or -2 if this is ending tag </...>
	37	int End1, End2;
	38
	39	// name of this tag
	40	wxChar *Name;
	41	};
	42
	43
	44	IMPLEMENT_CLASS(wx28HtmlTagsCache,wxObject)
	45
	46	#define CACHE_INCREMENT 64
	47
	48	bool wxIsCDATAElement(const wxChar *tag)
	49	{
9a83f860 VZ	50	return (wxStrcmp(tag, wxT("SCRIPT")) == 0) \|\|
9a83f860 VZ	51	(wxStrcmp(tag, wxT("STYLE")) == 0);
c21faa0a VS	52	}
	53
	54	wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString& source)
	55	{
	56	const wxChar *src = source.c_str();
	57	int lng = source.length();
	58	wxChar tagBuffer[256];
	59
	60	m_Cache = NULL;
	61	m_CacheSize = 0;
	62	m_CachePos = 0;
	63
	64	int pos = 0;
	65	while (pos < lng)
	66	{
	67	if (src[pos] == wxT('<')) // tag found:
	68	{
	69	if (m_CacheSize % CACHE_INCREMENT == 0)
	70	m_Cache = (wx28HtmlCacheItem) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) sizeof(wx28HtmlCacheItem));
	71	int tg = m_CacheSize++;
	72	int stpos = pos++;
	73	m_Cache[tg].Key = stpos;
	74
	75	int i;
	76	for ( i = 0;
	77	pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
	78	src[pos] != wxT('>') && !wxIsspace(src[pos]);
	79	i++, pos++ )
	80	{
	81	tagBuffer[i] = (wxChar)wxToupper(src[pos]);
	82	}
9a83f860	83	tagBuffer[i] = wxT('\0');
c21faa0a VS	84
	85	m_Cache[tg].Name = new wxChar[i+1];
	86	memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
	87
	88	while (pos < lng && src[pos] != wxT('>')) pos++;
	89
	90	if (src[stpos+1] == wxT('/')) // ending tag:
	91	{
	92	m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
	93	// find matching begin tag:
	94	for (i = tg; i >= 0; i--)
	95	if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
	96	{
	97	m_Cache[i].End1 = stpos;
	98	m_Cache[i].End2 = pos + 1;
	99	break;
	100	}
	101	}
	102	else
	103	{
	104	m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
	105
	106	if (wxIsCDATAElement(tagBuffer))
	107	{
	108	// store the orig pos in case we are missing the closing
	109	// tag (see below)
	110	wxInt32 old_pos = pos;
	111	bool foundCloseTag = false;
	112
	113	// find next matching tag
	114	int tag_len = wxStrlen(tagBuffer);
	115	while (pos < lng)
	116	{
	117	// find the ending tag
	118	while (pos + 1 < lng &&
	119	(src[pos] != '<' \|\| src[pos+1] != '/'))
	120	++pos;
	121	if (src[pos] == '<')
	122	++pos;
	123
	124	// see if it matches
	125	int match_pos = 0;
	126	while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
	127	// cast to wxChar needed to suppress warning in
	128	// Unicode build
	129	if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
	130	++match_pos;
	131	}
	132	else if (src[pos] == wxT(' ') \|\| src[pos] == wxT('\n') \|\|
	133	src[pos] == wxT('\r') \|\| src[pos] == wxT('\t')) {
	134	// need to skip over these
	135	}
	136	else {
	137	match_pos = 0;
	138	}
	139	++pos;
	140	}
	141
	142	// found a match
	143	if (match_pos == tag_len)
	144	{
	145	pos = pos - tag_len - 3;
	146	foundCloseTag = true;
	147	break;
148	}
149	else // keep looking for the closing tag
150	{
151	++pos;
152	}
153	}
154	if (!foundCloseTag)
155	{
156	// we didn't find closing tag; this means the markup
157	// is incorrect and the best thing we can do is to
158	// ignore the unclosed tag and continue parsing as if
159	// it didn't exist:
160	pos = old_pos;
161	}
162	}
163	}
164	}
165
166	pos++;
167	}
168
169	// ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
170	for (int i = 0; i < m_CacheSize; i++)
171	{
172	delete[] m_Cache[i].Name;
173	m_Cache[i].Name = NULL;
174	}
175	}
176
177	void wx28HtmlTagsCache::QueryTag(int at, int* end1, int* end2)
178	{
179	if (m_Cache == NULL) return;
180	if (m_Cache[m_CachePos].Key != at)
181	{
182	int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
183	do
184	{
185	if ( m_CachePos < 0 \|\| m_CachePos == m_CacheSize )
186	{
187	// something is very wrong with HTML, give up by returning an
188	// impossibly large value which is going to be ignored by the
189	// caller
190	*end1 =
191	*end2 = INT_MAX;
192	return;
193	}
194
195	m_CachePos += delta;
196	}
197	while (m_Cache[m_CachePos].Key != at);
198	}
199	*end1 = m_Cache[m_CachePos].End1;
200	*end2 = m_Cache[m_CachePos].End2;
201	}
202
203
204
205
206	//-----------------------------------------------------------------------------
207	// wx28HtmlTag
208	//-----------------------------------------------------------------------------
209
210	IMPLEMENT_CLASS(wx28HtmlTag,wxObject)
211
212	wx28HtmlTag::wx28HtmlTag(wx28HtmlTag *parent,
213	const wxString& source, int pos, int end_pos,
214	wx28HtmlTagsCache *cache,
215	wx28HtmlEntitiesParser *entParser) : wxObject()
216	{
217	/* Setup DOM relations */
218
219	m_Next = NULL;
220	m_FirstChild = m_LastChild = NULL;
221	m_Parent = parent;
222	if (parent)
223	{
224	m_Prev = m_Parent->m_LastChild;
225	if (m_Prev == NULL)
226	m_Parent->m_FirstChild = this;
227	else
228	m_Prev->m_Next = this;
229	m_Parent->m_LastChild = this;
230	}
231	else
232	m_Prev = NULL;
233
234	/* Find parameters and their values: */
235
236	int i;
237	wxChar c;
238
239	// fill-in name, params and begin pos:
240	i = pos+1;
241
242	// find tag's name and convert it to uppercase:
243	while ((i < end_pos) &&
244	((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
245	c != wxT('\n') && c != wxT('\t') &&
246	c != wxT('>')))
247	{
248	if ((c >= wxT('a')) && (c <= wxT('z')))
249	c -= (wxT('a') - wxT('A'));
250	m_Name << c;
251	}
252
253	// if the tag has parameters, read them and "normalize" them,
254	// i.e. convert to uppercase, replace whitespaces by spaces and
255	// remove whitespaces around '=':
256	if (source[i-1] != wxT('>'))
257	{
258	#define IS_WHITE(c) (c == wxT(' ') \|\| c == wxT('\r') \|\| \
259	c == wxT('\n') \|\| c == wxT('\t'))
260	wxString pname, pvalue;
261	wxChar quote;
262	enum
263	{
264	ST_BEFORE_NAME = 1,
265	ST_NAME,
266	ST_BEFORE_EQ,
267	ST_BEFORE_VALUE,
268	ST_VALUE
269	} state;
270
271	quote = 0;
272	state = ST_BEFORE_NAME;
273	while (i < end_pos)
274	{
275	c = source[i++];
276
277	if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
278	{
279	if (state == ST_BEFORE_EQ \|\| state == ST_NAME)
280	{
281	m_ParamNames.Add(pname);
282	m_ParamValues.Add(wxEmptyString);
283	}
284	else if (state == ST_VALUE && quote == 0)
285	{
286	m_ParamNames.Add(pname);
287	if (entParser)
288	m_ParamValues.Add(entParser->Parse(pvalue));
289	else
290	m_ParamValues.Add(pvalue);
291	}
292	break;
293	}
294	switch (state)
295	{
296	case ST_BEFORE_NAME:
297	if (!IS_WHITE(c))
298	{
299	pname = c;
300	state = ST_NAME;
301	}
302	break;
303	case ST_NAME:
304	if (IS_WHITE(c))
305	state = ST_BEFORE_EQ;
306	else if (c == wxT('='))
307	state = ST_BEFORE_VALUE;
308	else
309	pname << c;
310	break;
311	case ST_BEFORE_EQ:
312	if (c == wxT('='))
313	state = ST_BEFORE_VALUE;
314	else if (!IS_WHITE(c))
315	{
316	m_ParamNames.Add(pname);
317	m_ParamValues.Add(wxEmptyString);
318	pname = c;
319	state = ST_NAME;
320	}
321	break;
322	case ST_BEFORE_VALUE:
323	if (!IS_WHITE(c))
324	{
325	if (c == wxT('"') \|\| c == wxT('\''))
326	quote = c, pvalue = wxEmptyString;
327	else
328	quote = 0, pvalue = c;
329	state = ST_VALUE;
330	}
331	break;
332	case ST_VALUE:
333	if ((quote != 0 && c == quote) \|\|
334	(quote == 0 && IS_WHITE(c)))
335	{
336	m_ParamNames.Add(pname);
337	if (quote == 0)
338	{
339	// VS: backward compatibility, no real reason,
340	// but wxHTML code relies on this... :(
341	pvalue.MakeUpper();
342	}
343	if (entParser)
344	m_ParamValues.Add(entParser->Parse(pvalue));
345	else
346	m_ParamValues.Add(pvalue);
347	state = ST_BEFORE_NAME;
348	}
349	else
350	pvalue << c;
351	break;
352	}
353	}
354
355	#undef IS_WHITE
356	}
357	m_Begin = i;
358
359	cache->QueryTag(pos, &m_End1, &m_End2);
360	if (m_End1 > end_pos) m_End1 = end_pos;
361	if (m_End2 > end_pos) m_End2 = end_pos;
362	}
363
364	wx28HtmlTag::~wx28HtmlTag()
365	{
366	wx28HtmlTag t1, t2;
367	t1 = m_FirstChild;
368	while (t1)
369	{
370	t2 = t1->GetNextSibling();
371	delete t1;
372	t1 = t2;
373	}
374	}
375
376	bool wx28HtmlTag::HasParam(const wxString& par) const
377	{
378	return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
379	}
380
381	wxString wx28HtmlTag::GetParam(const wxString& par, bool with_commas) const
382	{
383	int index = m_ParamNames.Index(par, false);
384	if (index == wxNOT_FOUND)
385	return wxEmptyString;
386	if (with_commas)
387	{
388	// VS: backward compatibility, seems to be never used by wxHTML...
389	wxString s;
390	s << wxT('"') << m_ParamValues[index] << wxT('"');
391	return s;
392	}
393	else
394	return m_ParamValues[index];
395	}
396
397	int wx28HtmlTag::ScanParam(const wxString& par,
398	const wxChar *format,
399	void *param) const
400	{
401	wxString parval = GetParam(par);
402	return wxSscanf(parval, format, param);
403	}
404
c21faa0a VS	405	bool wx28HtmlTag::GetParamAsInt(const wxString& par, int *clr) const
	406	{
	407	if ( !HasParam(par) )
	408	return false;
	409
	410	long i;
	411	if ( !GetParam(par).ToLong(&i) )
	412	return false;
	413
	414	*clr = (int)i;
	415	return true;
	416	}
	417
	418	wxString wx28HtmlTag::GetAllParams() const
	419	{
	420	// VS: this function is for backward compatibility only,
	421	// never used by wxHTML
	422	wxString s;
	423	size_t cnt = m_ParamNames.GetCount();
	424	for (size_t i = 0; i < cnt; i++)
	425	{
	426	s << m_ParamNames[i];
	427	s << wxT('=');
	428	if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
	429	s << wxT('\'') << m_ParamValues[i] << wxT('\'');
	430	else
	431	s << wxT('"') << m_ParamValues[i] << wxT('"');
	432	}
	433	return s;
	434	}
	435
	436	wx28HtmlTag *wx28HtmlTag::GetFirstSibling() const
	437	{
	438	if (m_Parent)
	439	return m_Parent->m_FirstChild;
	440	else
	441	{
	442	wx28HtmlTag cur = (wx28HtmlTag)this;
	443	while (cur->m_Prev)
	444	cur = cur->m_Prev;
	445	return cur;
	446	}
	447	}
	448
	449	wx28HtmlTag *wx28HtmlTag::GetLastSibling() const
	450	{
	451	if (m_Parent)
	452	return m_Parent->m_LastChild;
	453	else
	454	{
	455	wx28HtmlTag cur = (wx28HtmlTag)this;
	456	while (cur->m_Next)
	457	cur = cur->m_Next;
	458	return cur;
	459	}
	460	}
	461
	462	wx28HtmlTag *wx28HtmlTag::GetNextTag() const
	463	{
	464	if (m_FirstChild) return m_FirstChild;
	465	if (m_Next) return m_Next;
	466	wx28HtmlTag *cur = m_Parent;
	467	if (!cur) return NULL;
	468	while (cur->m_Parent && !cur->m_Next)
469	cur = cur->m_Parent;
470	return cur->m_Next;
471	}