fixed bug in parsing incomplete HTML when the very last tag was not closed with >
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
9
10
11 #ifdef __GNUG__
12 #pragma implementation
13 #endif
14
15 #include "wx/wxprec.h"
16
17 #include "wx/defs.h"
18 #if wxUSE_HTML
19
20 #ifdef __BORDLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WXPRECOMP
25 #include "wx/wx.h"
26 #endif
27
28 #include "wx/html/htmltag.h"
29 #include <stdio.h> // for vsscanf
30 #include <stdarg.h>
31
32
33
34
35 //-----------------------------------------------------------------------------
36 // wxHtmlTagsCache
37 //-----------------------------------------------------------------------------
38
39 IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
40
41 #define CACHE_INCREMENT 64
42
43 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
44 {
45 const wxChar *src = source.c_str();
46 int i, tg, pos, stpos;
47 int lng = source.Length();
48 wxChar dummy[256];
49
50 m_Cache = NULL;
51 m_CacheSize = 0;
52 m_CachePos = 0;
53
54 pos = 0;
55 while (pos < lng) {
56 if (src[pos] == wxT('<')) { // tag found:
57 if (m_CacheSize % CACHE_INCREMENT == 0)
58 m_Cache = (sCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(sCacheItem));
59 tg = m_CacheSize++;
60 m_Cache[tg].Key = stpos = pos++;
61 dummy[0] = 0; i = 0;
62 while (pos < lng &&
63 src[pos] != wxT('>') &&
64 src[pos] != wxT(' ') && src[pos] != wxT('\r') &&
65 src[pos] != wxT('\n') && src[pos] != wxT('\t')) {
66 dummy[i] = src[pos++];
67 if ((dummy[i] >= wxT('a')) && (dummy[i] <= wxT('z'))) dummy[i] -= (wxT('a') - wxT('A'));
68 i++;
69 }
70 dummy[i] = 0;
71 m_Cache[tg].Name = new wxChar[i+1];
72 memcpy(m_Cache[tg].Name, dummy, (i+1)*sizeof(wxChar));
73
74 while (pos < lng && src[pos] != wxT('>')) pos++;
75
76 if (src[stpos+1] == wxT('/')) { // ending tag:
77 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
78 // find matching begin tag:
79 for (i = tg; i >= 0; i--)
80 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, dummy+1) == 0)) {
81 m_Cache[i].End1 = stpos;
82 m_Cache[i].End2 = pos + 1;
83 break;
84 }
85 }
86 else {
87 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
88 }
89 }
90
91 pos++;
92 }
93
94 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
95 for (i = 0; i < m_CacheSize; i++) {
96 delete[] m_Cache[i].Name;
97 m_Cache[i].Name = NULL;
98 }
99 }
100
101
102
103 void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
104 {
105 if (m_Cache == NULL) return;
106 if (m_Cache[m_CachePos].Key != at) {
107 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
108 do {m_CachePos += delta;} while (m_Cache[m_CachePos].Key != at);
109 }
110 *end1 = m_Cache[m_CachePos].End1;
111 *end2 = m_Cache[m_CachePos].End2;
112 }
113
114
115
116
117 //-----------------------------------------------------------------------------
118 // wxHtmlTag
119 //-----------------------------------------------------------------------------
120
121 IMPLEMENT_CLASS(wxHtmlTag,wxObject)
122
123 wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos, wxHtmlTagsCache* cache) : wxObject()
124 {
125 int i;
126 char c;
127
128 // fill-in name, params and begin pos:
129 m_Name = m_Params = wxEmptyString;
130 i = pos+1;
131 if (source[i] == '/') {m_Ending = TRUE; i++;}
132 else m_Ending = FALSE;
133
134 while ((i < end_pos) &&
135 ((c = source[i++]) != ' ' && c != '\r' && c != '\n' && c != '\t' &&
136 c != '>')) {
137 if ((c >= 'a') && (c <= 'z')) c -= ('a' - 'A');
138 m_Name += c;
139 }
140
141 if (source[i-1] != '>')
142 while ((i < end_pos) && ((c = source[i++]) != '>')) {
143 if ((c >= 'a') && (c <= 'z')) c -= ('a' - 'A');
144 if (c == '\r' || c == '\n' || c == '\t') c = ' '; // make future parsing a bit simpler
145 m_Params += c;
146 if (c == '"') {
147 while ((i < end_pos) && ((c = source[i++]) != '"')) m_Params += c;
148 m_Params += c;
149 }
150 else if (c == '\'') {
151 while ((i < end_pos) && ((c = source[i++]) != '\'')) m_Params += c;
152 m_Params += c;
153 }
154 }
155 m_Begin = i;
156
157 cache -> QueryTag(pos, &m_End1, &m_End2);
158 if (m_End1 > end_pos) m_End1 = end_pos;
159 if (m_End2 > end_pos) m_End2 = end_pos;
160 }
161
162
163
164 bool wxHtmlTag::HasParam(const wxString& par) const
165 {
166 const wxChar *st = m_Params, *p = par;
167 const wxChar *st2, *p2;
168
169 if (*st == 0) return FALSE;
170 if (*p == 0) return FALSE;
171 for (st2 = st, p2 = p; ; st2++) {
172 if (*p2 == 0) return TRUE;
173 if (*st2 == 0) return FALSE;
174 if (*p2 != *st2) p2 = p;
175 if (*p2 == *st2) p2++;
176 if (*st2 == ' ') p2 = p;
177 else if (*st2 == '=') {
178 p2 = p;
179 while (*st2 != ' ') {
180 if (*st2 == '"') {
181 st2++;
182 while (*st2 != '"') st2++;
183 }
184 st2++;
185 if (*st2 == 0) return FALSE;
186 }
187 }
188 }
189 }
190
191
192
193 wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
194 {
195 const wxChar *st = m_Params, *p = par;
196 const wxChar *st2, *p2;
197 bool comma;
198 char comma_char;
199
200 if (*st == 0) return "";
201 if (*p == 0) return "";
202 for (st2 = st, p2 = p; ; st2++) {
203 if (*p2 == 0) { // found
204 wxString fnd = "";
205 st2++; // '=' character
206 comma = FALSE;
207 comma_char = '\0';
208 if (!with_commas && (*(st2) == '"')) {
209 st2++;
210 comma = TRUE;
211 comma_char = '"';
212 }
213 else if (!with_commas && (*(st2) == '\'')) {
214 st2++;
215 comma = TRUE;
216 comma_char = '\'';
217 }
218 while (*st2 != 0) {
219 if (comma && *st2 == comma_char) comma = FALSE;
220 else if ((*st2 == ' ') && (!comma)) break;
221 fnd += (*(st2++));
222 }
223 if (!with_commas && (*(st2-1) == comma_char)) fnd.RemoveLast();
224 return fnd;
225 }
226 if (*st2 == 0) return "";
227 if (*p2 != *st2) p2 = p;
228 if (*p2 == *st2) p2++;
229 if (*st2 == ' ') p2 = p;
230 else if (*st2 == '=') {
231 p2 = p;
232 while (*st2 != ' ') {
233 if (*st2 == '"') {
234 st2++;
235 while (*st2 != '"') st2++;
236 }
237 else if (*st2 == '\'') {
238 st2++;
239 while (*st2 != '\'') st2++;
240 }
241 st2++;
242 }
243 }
244 }
245 }
246
247
248
249 int wxHtmlTag::ScanParam(const wxString& par, wxChar *format, void *param) const
250 {
251 wxString parval = GetParam(par);
252 return wxSscanf((const wxChar*)parval, format, param);
253 }
254
255 #endif