]> git.saurik.com Git - wxWidgets.git/blob - src/common/markupparser.cpp
fixing overrelease and out-of-bounds write, fixes #13725
[wxWidgets.git] / src / common / markupparser.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/markupparser.cpp
3 // Purpose: Implementation of wxMarkupParser.
4 // Author: Vadim Zeitlin
5 // Created: 2011-02-16
6 // RCS-ID: $Id$
7 // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
10
11 // ============================================================================
12 // declarations
13 // ============================================================================
14
15 // ----------------------------------------------------------------------------
16 // headers
17 // ----------------------------------------------------------------------------
18
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #if wxUSE_MARKUP
27
28 #ifndef WX_PRECOMP
29 #include "wx/log.h"
30 #endif
31
32 #include "wx/private/markupparser.h"
33
34 #include "wx/stack.h"
35
36 namespace
37 {
38
39 // ----------------------------------------------------------------------------
40 // constants
41 // ----------------------------------------------------------------------------
42
43 // Array containing the predefined XML 1.0 entities.
44 const struct XMLEntity
45 {
46 const char *name;
47 int len; // == strlen(name)
48 char value;
49 } xmlEntities[] =
50 {
51 { "lt", 2, '<' },
52 { "gt", 2, '>' },
53 { "amp", 3, '&' },
54 { "apos", 4, '\''},
55 { "quot", 4, '"' },
56 };
57
58 // ----------------------------------------------------------------------------
59 // helper functions
60 // ----------------------------------------------------------------------------
61
62 wxString
63 ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end)
64 {
65 wxString str;
66 for ( ; it != end; ++it )
67 {
68 if ( *it == ch )
69 return str;
70
71 str += *it;
72 }
73
74 // Return empty string to indicate that we didn't find ch at all.
75 return wxString();
76 }
77
78 } // anonymous namespace
79
80 // ============================================================================
81 // wxMarkupParser implementation
82 // ============================================================================
83
84 wxString
85 wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs)
86 {
87 if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() )
88 {
89 return wxString::Format("tag \"%s\" can't have attributes",
90 tagAndAttrs.name);
91 }
92
93 // TODO: Parse more attributes described at
94 // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
95 // and at least ignore them gracefully instead of giving errors (but
96 // quite a few of them could be supported as well, notable font_desc).
97
98 wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs;
99
100 while ( !attrs.empty() )
101 {
102 wxString rest;
103 const wxString attr = attrs.BeforeFirst(' ', &rest);
104 attrs = rest;
105
106 // The "original" versions are used for error messages only.
107 wxString valueOrig;
108 const wxString nameOrig = attr.BeforeFirst('=', &valueOrig);
109
110 const wxString name = nameOrig.Lower();
111 wxString value = valueOrig.Lower();
112
113 // All attributes values must be quoted.
114 if ( value.length() < 2 ||
115 (value[0] != value.Last()) ||
116 (value[0] != '"' && value[0] != '\'') )
117 {
118 return wxString::Format("bad quoting for value of \"%s\"",
119 nameOrig);
120 }
121
122 value.assign(value, 1, value.length() - 2);
123
124 if ( name == "foreground" || name == "fgcolor" || name == "color" )
125 {
126 spanAttrs.m_fgCol = value;
127 }
128 else if ( name == "background" || name == "bgcolor" )
129 {
130 spanAttrs.m_bgCol = value;
131 }
132 else if ( name == "font_family" || name == "face" )
133 {
134 spanAttrs.m_fontFace = value;
135 }
136 else if ( name == "font_weight" || name == "weight" )
137 {
138 unsigned long weight;
139
140 if ( value == "ultralight" || value == "light" || value == "normal" )
141 spanAttrs.m_isBold = wxMarkupSpanAttributes::No;
142 else if ( value == "bold" || value == "ultrabold" || value == "heavy" )
143 spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes;
144 else if ( value.ToULong(&weight) )
145 spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes
146 : wxMarkupSpanAttributes::No;
147 else
148 return wxString::Format("invalid font weight \"%s\"", valueOrig);
149 }
150 else if ( name == "font_style" || name == "style" )
151 {
152 if ( value == "normal" )
153 spanAttrs.m_isItalic = wxMarkupSpanAttributes::No;
154 else if ( value == "oblique" || value == "italic" )
155 spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes;
156 else
157 return wxString::Format("invalid font style \"%s\"", valueOrig);
158 }
159 else if ( name == "size" )
160 {
161 unsigned long size;
162 if ( value.ToULong(&size) )
163 {
164 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts;
165 spanAttrs.m_fontSize = size;
166 }
167 else if ( value == "smaller" || value == "larger" )
168 {
169 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative;
170 spanAttrs.m_fontSize = value == "smaller" ? -1 : +1;
171 }
172 else // Must be a CSS-like size specification
173 {
174 int cssSize = 1;
175 if ( value.StartsWith("xx-", &rest) )
176 cssSize = 3;
177 else if ( value.StartsWith("x-", &rest) )
178 cssSize = 2;
179 else if ( value == "medium" )
180 cssSize = 0;
181 else
182 rest = value;
183
184 if ( cssSize != 0 )
185 {
186 if ( rest == "small" )
187 cssSize = -cssSize;
188 else if ( rest != "large" )
189 return wxString::Format("invalid font size \"%s\"",
190 valueOrig);
191 }
192
193 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic;
194 spanAttrs.m_fontSize = cssSize;
195 }
196 }
197 }
198
199 return wxString();
200 }
201
202 bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start)
203 {
204 if ( tagAndAttrs.name.CmpNoCase("span") == 0 )
205 {
206 if ( start )
207 m_output.OnSpanStart(tagAndAttrs.attrs);
208 else
209 m_output.OnSpanEnd(tagAndAttrs.attrs);
210
211 return true;
212 }
213 else // non-span tag
214 {
215 static const struct TagHandler
216 {
217 const char *name;
218 void (wxMarkupParserOutput::*startFunc)();
219 void (wxMarkupParserOutput::*endFunc)();
220 } tagHandlers[] =
221 {
222 { "b", &wxMarkupParserOutput::OnBoldStart,
223 &wxMarkupParserOutput::OnBoldEnd },
224 { "i", &wxMarkupParserOutput::OnItalicStart,
225 &wxMarkupParserOutput::OnItalicEnd },
226 { "u", &wxMarkupParserOutput::OnUnderlinedStart,
227 &wxMarkupParserOutput::OnUnderlinedEnd },
228 { "s", &wxMarkupParserOutput::OnStrikethroughStart,
229 &wxMarkupParserOutput::OnStrikethroughEnd },
230 { "big", &wxMarkupParserOutput::OnBigStart,
231 &wxMarkupParserOutput::OnBigEnd },
232 { "small", &wxMarkupParserOutput::OnSmallStart,
233 &wxMarkupParserOutput::OnSmallEnd },
234 { "tt", &wxMarkupParserOutput::OnTeletypeStart,
235 &wxMarkupParserOutput::OnTeletypeEnd },
236 };
237
238 for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ )
239 {
240 const TagHandler& h = tagHandlers[n];
241
242 if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 )
243 {
244 if ( start )
245 (m_output.*(h.startFunc))();
246 else
247 (m_output.*(h.endFunc))();
248
249 return true;
250 }
251 }
252 }
253
254 // Unknown tag name.
255 return false;
256 }
257
258 bool wxMarkupParser::Parse(const wxString& text)
259 {
260 // The stack containing the names and corresponding attributes (which are
261 // actually only used for <span> tags) of all of the currently opened tag
262 // or none if we're not inside any tag.
263 wxStack<TagAndAttrs> tags;
264
265 // Current run of text.
266 wxString current;
267
268 const wxString::const_iterator end = text.end();
269 for ( wxString::const_iterator it = text.begin(); it != end; ++it )
270 {
271 switch ( (*it).GetValue() )
272 {
273 case '<':
274 {
275 // Flush the text preceding the tag, if any.
276 if ( !current.empty() )
277 {
278 m_output.OnText(current);
279 current.clear();
280 }
281
282 // This variable is used only in the debugging messages
283 // and doesn't need to be defined if they're not compiled
284 // at all (it actually would result in unused variable
285 // messages in this case).
286 #if wxUSE_LOG_DEBUG || !defined(HAVE_VARIADIC_MACROS)
287 // Remember the tag starting position for the error
288 // messages.
289 const size_t pos = it - text.begin();
290 #endif
291 bool start = true;
292 if ( ++it != end && *it == '/' )
293 {
294 start = false;
295 ++it;
296 }
297
298 const wxString tag = ExtractUntil('>', it, end);
299 if ( tag.empty() )
300 {
301 wxLogDebug("%s at %lu.",
302 it == end ? "Unclosed tag starting"
303 : "Empty tag",
304 pos);
305 return false;
306 }
307
308 if ( start )
309 {
310 wxString attrs;
311 const wxString name = tag.BeforeFirst(' ', &attrs);
312
313 TagAndAttrs tagAndAttrs(name);
314 const wxString err = ParseAttrs(attrs, tagAndAttrs);
315 if ( !err.empty() )
316 {
317 wxLogDebug("Bad attributes for \"%s\" "
318 "at %lu: %s.",
319 name, pos, err);
320 return false;
321 }
322
323 tags.push(tagAndAttrs);
324 }
325 else // end tag
326 {
327 if ( tags.empty() || tags.top().name != tag )
328 {
329 wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
330 tag, pos);
331 return false;
332 }
333 }
334
335 if ( !OutputTag(tags.top(), start) )
336 {
337 wxLogDebug("Unknown tag at %lu.", pos);
338 return false;
339 }
340
341 if ( !start )
342 tags.pop();
343 }
344 break;
345
346 case '>':
347 wxLogDebug("'>' should be escaped as \"&gt\"; at %lu.",
348 it - text.begin());
349 break;
350
351 case '&':
352 // Processing is somewhat complicated: we need to recognize at
353 // least the "&lt;" entity to allow escaping left square
354 // brackets in the markup and, in fact, we recognize all of the
355 // standard XML entities for consistency with Pango markup
356 // parsing.
357 //
358 // However we also allow '&' to appear unescaped, i.e. directly
359 // and not as "&amp;" when it is used to introduce the mnemonic
360 // for the label. In this case we simply leave it alone.
361 //
362 // Notice that this logic makes it impossible to have a label
363 // with "lt;" inside it and using "l" as mnemonic but hopefully
364 // this shouldn't be a problem in practice.
365 {
366 const size_t pos = it - text.begin() + 1;
367
368 unsigned n;
369 for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
370 {
371 const XMLEntity& xmlEnt = xmlEntities[n];
372 if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0
373 && text[pos + xmlEnt.len] == ';' )
374 {
375 // Escape the ampersands if needed to protect them
376 // from being interpreted as mnemonics indicators.
377 if ( xmlEnt.value == '&' )
378 current += "&&";
379 else
380 current += xmlEnt.value;
381
382 it += xmlEnt.len + 1; // +1 for '&' itself
383
384 break;
385 }
386 }
387
388 if ( n < WXSIZEOF(xmlEntities) )
389 break;
390 //else: fall through, '&' is not special
391 }
392
393 default:
394 current += *it;
395 }
396 }
397
398 if ( !tags.empty() )
399 {
400 wxLogDebug("Missing closing tag for \"%s\"", tags.top().name);
401 return false;
402 }
403
404 if ( !current.empty() )
405 m_output.OnText(current);
406
407 return true;
408 }
409
410 /* static */
411 wxString wxMarkupParser::Quote(const wxString& text)
412 {
413 wxString quoted;
414 quoted.reserve(text.length());
415
416 for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it )
417 {
418 unsigned n;
419 for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
420 {
421 const XMLEntity& xmlEnt = xmlEntities[n];
422 if ( *it == xmlEnt.value )
423 {
424 quoted << '&' << xmlEnt.name << ';';
425 break;
426 }
427 }
428
429 if ( n == WXSIZEOF(xmlEntities) )
430 quoted += *it;
431 }
432
433 return quoted;
434 }
435
436 /* static */
437 wxString wxMarkupParser::Strip(const wxString& text)
438 {
439 class StripOutput : public wxMarkupParserOutput
440 {
441 public:
442 StripOutput() { }
443
444 const wxString& GetText() const { return m_text; }
445
446 virtual void OnText(const wxString& text) { m_text += text; }
447
448 virtual void OnBoldStart() { }
449 virtual void OnBoldEnd() { }
450
451 virtual void OnItalicStart() { }
452 virtual void OnItalicEnd() { }
453
454 virtual void OnUnderlinedStart() { }
455 virtual void OnUnderlinedEnd() { }
456
457 virtual void OnStrikethroughStart() { }
458 virtual void OnStrikethroughEnd() { }
459
460 virtual void OnBigStart() { }
461 virtual void OnBigEnd() { }
462
463 virtual void OnSmallStart() { }
464 virtual void OnSmallEnd() { }
465
466 virtual void OnTeletypeStart() { }
467 virtual void OnTeletypeEnd() { }
468
469 virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) { }
470 virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) { }
471
472 private:
473 wxString m_text;
474 };
475
476 StripOutput output;
477 wxMarkupParser parser(output);
478 if ( !parser.Parse(text) )
479 return wxString();
480
481 return output.GetText();
482 }
483
484 #endif // wxUSE_MARKUP