Remove all lines containing cvs/svn "$Id$" keyword.
[wxWidgets.git] / src / common / markupparser.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/markupparser.cpp
3 // Purpose: Implementation of wxMarkupParser.
4 // Author: Vadim Zeitlin
5 // Created: 2011-02-16
6 // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
7 // Licence: wxWindows licence
8 ///////////////////////////////////////////////////////////////////////////////
9
10 // ============================================================================
11 // declarations
12 // ============================================================================
13
14 // ----------------------------------------------------------------------------
15 // headers
16 // ----------------------------------------------------------------------------
17
18 // for compilers that support precompilation, includes "wx.h".
19 #include "wx/wxprec.h"
20
21 #ifdef __BORLANDC__
22 #pragma hdrstop
23 #endif
24
25 #if wxUSE_MARKUP
26
27 #ifndef WX_PRECOMP
28 #include "wx/log.h"
29 #endif
30
31 #include "wx/private/markupparser.h"
32
33 #include "wx/stack.h"
34
35 namespace
36 {
37
38 // ----------------------------------------------------------------------------
39 // constants
40 // ----------------------------------------------------------------------------
41
42 // Array containing the predefined XML 1.0 entities.
43 const struct XMLEntity
44 {
45 const char *name;
46 int len; // == strlen(name)
47 char value;
48 } xmlEntities[] =
49 {
50 { "lt", 2, '<' },
51 { "gt", 2, '>' },
52 { "amp", 3, '&' },
53 { "apos", 4, '\''},
54 { "quot", 4, '"' },
55 };
56
57 // ----------------------------------------------------------------------------
58 // helper functions
59 // ----------------------------------------------------------------------------
60
61 wxString
62 ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end)
63 {
64 wxString str;
65 for ( ; it != end; ++it )
66 {
67 if ( *it == ch )
68 return str;
69
70 str += *it;
71 }
72
73 // Return empty string to indicate that we didn't find ch at all.
74 return wxString();
75 }
76
77 } // anonymous namespace
78
79 // ============================================================================
80 // wxMarkupParser implementation
81 // ============================================================================
82
83 wxString
84 wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs)
85 {
86 if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() )
87 {
88 return wxString::Format("tag \"%s\" can't have attributes",
89 tagAndAttrs.name);
90 }
91
92 // TODO: Parse more attributes described at
93 // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
94 // and at least ignore them gracefully instead of giving errors (but
95 // quite a few of them could be supported as well, notable font_desc).
96
97 wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs;
98
99 while ( !attrs.empty() )
100 {
101 wxString rest;
102 const wxString attr = attrs.BeforeFirst(' ', &rest);
103 attrs = rest;
104
105 // The "original" versions are used for error messages only.
106 wxString valueOrig;
107 const wxString nameOrig = attr.BeforeFirst('=', &valueOrig);
108
109 const wxString name = nameOrig.Lower();
110 wxString value = valueOrig.Lower();
111
112 // All attributes values must be quoted.
113 if ( value.length() < 2 ||
114 (value[0] != value.Last()) ||
115 (value[0] != '"' && value[0] != '\'') )
116 {
117 return wxString::Format("bad quoting for value of \"%s\"",
118 nameOrig);
119 }
120
121 value.assign(value, 1, value.length() - 2);
122
123 if ( name == "foreground" || name == "fgcolor" || name == "color" )
124 {
125 spanAttrs.m_fgCol = value;
126 }
127 else if ( name == "background" || name == "bgcolor" )
128 {
129 spanAttrs.m_bgCol = value;
130 }
131 else if ( name == "font_family" || name == "face" )
132 {
133 spanAttrs.m_fontFace = value;
134 }
135 else if ( name == "font_weight" || name == "weight" )
136 {
137 unsigned long weight;
138
139 if ( value == "ultralight" || value == "light" || value == "normal" )
140 spanAttrs.m_isBold = wxMarkupSpanAttributes::No;
141 else if ( value == "bold" || value == "ultrabold" || value == "heavy" )
142 spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes;
143 else if ( value.ToULong(&weight) )
144 spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes
145 : wxMarkupSpanAttributes::No;
146 else
147 return wxString::Format("invalid font weight \"%s\"", valueOrig);
148 }
149 else if ( name == "font_style" || name == "style" )
150 {
151 if ( value == "normal" )
152 spanAttrs.m_isItalic = wxMarkupSpanAttributes::No;
153 else if ( value == "oblique" || value == "italic" )
154 spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes;
155 else
156 return wxString::Format("invalid font style \"%s\"", valueOrig);
157 }
158 else if ( name == "size" )
159 {
160 unsigned long size;
161 if ( value.ToULong(&size) )
162 {
163 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts;
164 spanAttrs.m_fontSize = size;
165 }
166 else if ( value == "smaller" || value == "larger" )
167 {
168 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative;
169 spanAttrs.m_fontSize = value == "smaller" ? -1 : +1;
170 }
171 else // Must be a CSS-like size specification
172 {
173 int cssSize = 1;
174 if ( value.StartsWith("xx-", &rest) )
175 cssSize = 3;
176 else if ( value.StartsWith("x-", &rest) )
177 cssSize = 2;
178 else if ( value == "medium" )
179 cssSize = 0;
180 else
181 rest = value;
182
183 if ( cssSize != 0 )
184 {
185 if ( rest == "small" )
186 cssSize = -cssSize;
187 else if ( rest != "large" )
188 return wxString::Format("invalid font size \"%s\"",
189 valueOrig);
190 }
191
192 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic;
193 spanAttrs.m_fontSize = cssSize;
194 }
195 }
196 }
197
198 return wxString();
199 }
200
201 bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start)
202 {
203 if ( tagAndAttrs.name.CmpNoCase("span") == 0 )
204 {
205 if ( start )
206 m_output.OnSpanStart(tagAndAttrs.attrs);
207 else
208 m_output.OnSpanEnd(tagAndAttrs.attrs);
209
210 return true;
211 }
212 else // non-span tag
213 {
214 static const struct TagHandler
215 {
216 const char *name;
217 void (wxMarkupParserOutput::*startFunc)();
218 void (wxMarkupParserOutput::*endFunc)();
219 } tagHandlers[] =
220 {
221 { "b", &wxMarkupParserOutput::OnBoldStart,
222 &wxMarkupParserOutput::OnBoldEnd },
223 { "i", &wxMarkupParserOutput::OnItalicStart,
224 &wxMarkupParserOutput::OnItalicEnd },
225 { "u", &wxMarkupParserOutput::OnUnderlinedStart,
226 &wxMarkupParserOutput::OnUnderlinedEnd },
227 { "s", &wxMarkupParserOutput::OnStrikethroughStart,
228 &wxMarkupParserOutput::OnStrikethroughEnd },
229 { "big", &wxMarkupParserOutput::OnBigStart,
230 &wxMarkupParserOutput::OnBigEnd },
231 { "small", &wxMarkupParserOutput::OnSmallStart,
232 &wxMarkupParserOutput::OnSmallEnd },
233 { "tt", &wxMarkupParserOutput::OnTeletypeStart,
234 &wxMarkupParserOutput::OnTeletypeEnd },
235 };
236
237 for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ )
238 {
239 const TagHandler& h = tagHandlers[n];
240
241 if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 )
242 {
243 if ( start )
244 (m_output.*(h.startFunc))();
245 else
246 (m_output.*(h.endFunc))();
247
248 return true;
249 }
250 }
251 }
252
253 // Unknown tag name.
254 return false;
255 }
256
257 bool wxMarkupParser::Parse(const wxString& text)
258 {
259 // The stack containing the names and corresponding attributes (which are
260 // actually only used for <span> tags) of all of the currently opened tag
261 // or none if we're not inside any tag.
262 wxStack<TagAndAttrs> tags;
263
264 // Current run of text.
265 wxString current;
266
267 const wxString::const_iterator end = text.end();
268 for ( wxString::const_iterator it = text.begin(); it != end; ++it )
269 {
270 switch ( (*it).GetValue() )
271 {
272 case '<':
273 {
274 // Flush the text preceding the tag, if any.
275 if ( !current.empty() )
276 {
277 m_output.OnText(current);
278 current.clear();
279 }
280
281 // This variable is used only in the debugging messages
282 // and doesn't need to be defined if they're not compiled
283 // at all (it actually would result in unused variable
284 // messages in this case).
285 #if wxUSE_LOG_DEBUG || !defined(HAVE_VARIADIC_MACROS)
286 // Remember the tag starting position for the error
287 // messages.
288 const size_t pos = it - text.begin();
289 #endif
290 bool start = true;
291 if ( ++it != end && *it == '/' )
292 {
293 start = false;
294 ++it;
295 }
296
297 const wxString tag = ExtractUntil('>', it, end);
298 if ( tag.empty() )
299 {
300 wxLogDebug("%s at %lu.",
301 it == end ? "Unclosed tag starting"
302 : "Empty tag",
303 pos);
304 return false;
305 }
306
307 if ( start )
308 {
309 wxString attrs;
310 const wxString name = tag.BeforeFirst(' ', &attrs);
311
312 TagAndAttrs tagAndAttrs(name);
313 const wxString err = ParseAttrs(attrs, tagAndAttrs);
314 if ( !err.empty() )
315 {
316 wxLogDebug("Bad attributes for \"%s\" "
317 "at %lu: %s.",
318 name, pos, err);
319 return false;
320 }
321
322 tags.push(tagAndAttrs);
323 }
324 else // end tag
325 {
326 if ( tags.empty() || tags.top().name != tag )
327 {
328 wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
329 tag, pos);
330 return false;
331 }
332 }
333
334 if ( !OutputTag(tags.top(), start) )
335 {
336 wxLogDebug("Unknown tag at %lu.", pos);
337 return false;
338 }
339
340 if ( !start )
341 tags.pop();
342 }
343 break;
344
345 case '>':
346 wxLogDebug("'>' should be escaped as \"&gt\"; at %lu.",
347 it - text.begin());
348 break;
349
350 case '&':
351 // Processing is somewhat complicated: we need to recognize at
352 // least the "&lt;" entity to allow escaping left square
353 // brackets in the markup and, in fact, we recognize all of the
354 // standard XML entities for consistency with Pango markup
355 // parsing.
356 //
357 // However we also allow '&' to appear unescaped, i.e. directly
358 // and not as "&amp;" when it is used to introduce the mnemonic
359 // for the label. In this case we simply leave it alone.
360 //
361 // Notice that this logic makes it impossible to have a label
362 // with "lt;" inside it and using "l" as mnemonic but hopefully
363 // this shouldn't be a problem in practice.
364 {
365 const size_t pos = it - text.begin() + 1;
366
367 unsigned n;
368 for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
369 {
370 const XMLEntity& xmlEnt = xmlEntities[n];
371 if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0
372 && text[pos + xmlEnt.len] == ';' )
373 {
374 // Escape the ampersands if needed to protect them
375 // from being interpreted as mnemonics indicators.
376 if ( xmlEnt.value == '&' )
377 current += "&&";
378 else
379 current += xmlEnt.value;
380
381 it += xmlEnt.len + 1; // +1 for '&' itself
382
383 break;
384 }
385 }
386
387 if ( n < WXSIZEOF(xmlEntities) )
388 break;
389 //else: fall through, '&' is not special
390 }
391
392 default:
393 current += *it;
394 }
395 }
396
397 if ( !tags.empty() )
398 {
399 wxLogDebug("Missing closing tag for \"%s\"", tags.top().name);
400 return false;
401 }
402
403 if ( !current.empty() )
404 m_output.OnText(current);
405
406 return true;
407 }
408
409 /* static */
410 wxString wxMarkupParser::Quote(const wxString& text)
411 {
412 wxString quoted;
413 quoted.reserve(text.length());
414
415 for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it )
416 {
417 unsigned n;
418 for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
419 {
420 const XMLEntity& xmlEnt = xmlEntities[n];
421 if ( *it == xmlEnt.value )
422 {
423 quoted << '&' << xmlEnt.name << ';';
424 break;
425 }
426 }
427
428 if ( n == WXSIZEOF(xmlEntities) )
429 quoted += *it;
430 }
431
432 return quoted;
433 }
434
435 /* static */
436 wxString wxMarkupParser::Strip(const wxString& text)
437 {
438 class StripOutput : public wxMarkupParserOutput
439 {
440 public:
441 StripOutput() { }
442
443 const wxString& GetText() const { return m_text; }
444
445 virtual void OnText(const wxString& text) { m_text += text; }
446
447 virtual void OnBoldStart() { }
448 virtual void OnBoldEnd() { }
449
450 virtual void OnItalicStart() { }
451 virtual void OnItalicEnd() { }
452
453 virtual void OnUnderlinedStart() { }
454 virtual void OnUnderlinedEnd() { }
455
456 virtual void OnStrikethroughStart() { }
457 virtual void OnStrikethroughEnd() { }
458
459 virtual void OnBigStart() { }
460 virtual void OnBigEnd() { }
461
462 virtual void OnSmallStart() { }
463 virtual void OnSmallEnd() { }
464
465 virtual void OnTeletypeStart() { }
466 virtual void OnTeletypeEnd() { }
467
468 virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) { }
469 virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) { }
470
471 private:
472 wxString m_text;
473 };
474
475 StripOutput output;
476 wxMarkupParser parser(output);
477 if ( !parser.Parse(text) )
478 return wxString();
479
480 return output.GetText();
481 }
482
483 #endif // wxUSE_MARKUP