]> git.saurik.com Git - wxWidgets.git/blame - src/common/markupparser.cpp
supporting promised file urls for transfer, see #14281
[wxWidgets.git] / src / common / markupparser.cpp
CommitLineData
9bb9964e
VZ
1///////////////////////////////////////////////////////////////////////////////
2// Name: src/common/markupparser.cpp
3// Purpose: Implementation of wxMarkupParser.
4// Author: Vadim Zeitlin
5// Created: 2011-02-16
e2cb99c8 6// RCS-ID: $Id$
9bb9964e
VZ
7// Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
8// Licence: wxWindows licence
9///////////////////////////////////////////////////////////////////////////////
10
11// ============================================================================
12// declarations
13// ============================================================================
14
15// ----------------------------------------------------------------------------
16// headers
17// ----------------------------------------------------------------------------
18
19// for compilers that support precompilation, includes "wx.h".
20#include "wx/wxprec.h"
21
22#ifdef __BORLANDC__
23 #pragma hdrstop
24#endif
25
f5bdfc69 26#if wxUSE_MARKUP
9bb9964e 27
1a6e6d54
VZ
28#ifndef WX_PRECOMP
29 #include "wx/log.h"
30#endif
31
9bb9964e
VZ
32#include "wx/private/markupparser.h"
33
34#include "wx/stack.h"
35
36namespace
37{
38
39// ----------------------------------------------------------------------------
40// constants
41// ----------------------------------------------------------------------------
42
43// Array containing the predefined XML 1.0 entities.
44const struct XMLEntity
45{
46 const char *name;
47 int len; // == strlen(name)
48 char value;
49} xmlEntities[] =
50{
51 { "lt", 2, '<' },
52 { "gt", 2, '>' },
53 { "amp", 3, '&' },
54 { "apos", 4, '\''},
55 { "quot", 4, '"' },
56};
57
58// ----------------------------------------------------------------------------
59// helper functions
60// ----------------------------------------------------------------------------
61
62wxString
63ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end)
64{
65 wxString str;
66 for ( ; it != end; ++it )
67 {
68 if ( *it == ch )
69 return str;
70
71 str += *it;
72 }
73
74 // Return empty string to indicate that we didn't find ch at all.
75 return wxString();
76}
77
78} // anonymous namespace
79
80// ============================================================================
81// wxMarkupParser implementation
82// ============================================================================
83
84wxString
85wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs)
86{
87 if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() )
88 {
89 return wxString::Format("tag \"%s\" can't have attributes",
90 tagAndAttrs.name);
91 }
92
93 // TODO: Parse more attributes described at
94 // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
95 // and at least ignore them gracefully instead of giving errors (but
96 // quite a few of them could be supported as well, notable font_desc).
97
98 wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs;
99
100 while ( !attrs.empty() )
101 {
102 wxString rest;
103 const wxString attr = attrs.BeforeFirst(' ', &rest);
104 attrs = rest;
105
106 // The "original" versions are used for error messages only.
107 wxString valueOrig;
108 const wxString nameOrig = attr.BeforeFirst('=', &valueOrig);
109
110 const wxString name = nameOrig.Lower();
111 wxString value = valueOrig.Lower();
112
113 // All attributes values must be quoted.
114 if ( value.length() < 2 ||
115 (value[0] != value.Last()) ||
116 (value[0] != '"' && value[0] != '\'') )
117 {
118 return wxString::Format("bad quoting for value of \"%s\"",
119 nameOrig);
120 }
121
122 value.assign(value, 1, value.length() - 2);
123
124 if ( name == "foreground" || name == "fgcolor" || name == "color" )
125 {
126 spanAttrs.m_fgCol = value;
127 }
128 else if ( name == "background" || name == "bgcolor" )
129 {
130 spanAttrs.m_bgCol = value;
131 }
132 else if ( name == "font_family" || name == "face" )
133 {
134 spanAttrs.m_fontFace = value;
135 }
136 else if ( name == "font_weight" || name == "weight" )
137 {
138 unsigned long weight;
139
140 if ( value == "ultralight" || value == "light" || value == "normal" )
141 spanAttrs.m_isBold = wxMarkupSpanAttributes::No;
142 else if ( value == "bold" || value == "ultrabold" || value == "heavy" )
143 spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes;
144 else if ( value.ToULong(&weight) )
145 spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes
146 : wxMarkupSpanAttributes::No;
147 else
148 return wxString::Format("invalid font weight \"%s\"", valueOrig);
149 }
150 else if ( name == "font_style" || name == "style" )
151 {
152 if ( value == "normal" )
153 spanAttrs.m_isItalic = wxMarkupSpanAttributes::No;
154 else if ( value == "oblique" || value == "italic" )
155 spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes;
156 else
157 return wxString::Format("invalid font style \"%s\"", valueOrig);
158 }
159 else if ( name == "size" )
160 {
161 unsigned long size;
162 if ( value.ToULong(&size) )
163 {
164 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts;
165 spanAttrs.m_fontSize = size;
166 }
167 else if ( value == "smaller" || value == "larger" )
168 {
169 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative;
170 spanAttrs.m_fontSize = value == "smaller" ? -1 : +1;
171 }
172 else // Must be a CSS-like size specification
173 {
174 int cssSize = 1;
175 wxString rest;
176 if ( value.StartsWith("xx-", &rest) )
177 cssSize = 3;
178 else if ( value.StartsWith("x-", &rest) )
179 cssSize = 2;
180 else if ( value == "medium" )
181 cssSize = 0;
182 else
183 rest = value;
184
185 if ( cssSize != 0 )
186 {
187 if ( rest == "small" )
188 cssSize = -cssSize;
189 else if ( rest != "large" )
190 return wxString::Format("invalid font size \"%s\"",
191 valueOrig);
192 }
193
194 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic;
195 spanAttrs.m_fontSize = cssSize;
196 }
197 }
198 }
199
200 return wxString();
201}
202
203bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start)
204{
205 if ( tagAndAttrs.name.CmpNoCase("span") == 0 )
206 {
207 if ( start )
208 m_output.OnSpanStart(tagAndAttrs.attrs);
209 else
210 m_output.OnSpanEnd(tagAndAttrs.attrs);
211
212 return true;
213 }
214 else // non-span tag
215 {
216 static const struct TagHandler
217 {
218 const char *name;
219 void (wxMarkupParserOutput::*startFunc)();
220 void (wxMarkupParserOutput::*endFunc)();
221 } tagHandlers[] =
222 {
223 { "b", &wxMarkupParserOutput::OnBoldStart,
224 &wxMarkupParserOutput::OnBoldEnd },
225 { "i", &wxMarkupParserOutput::OnItalicStart,
226 &wxMarkupParserOutput::OnItalicEnd },
227 { "u", &wxMarkupParserOutput::OnUnderlinedStart,
228 &wxMarkupParserOutput::OnUnderlinedEnd },
229 { "s", &wxMarkupParserOutput::OnStrikethroughStart,
230 &wxMarkupParserOutput::OnStrikethroughEnd },
231 { "big", &wxMarkupParserOutput::OnBigStart,
232 &wxMarkupParserOutput::OnBigEnd },
233 { "small", &wxMarkupParserOutput::OnSmallStart,
234 &wxMarkupParserOutput::OnSmallEnd },
235 { "tt", &wxMarkupParserOutput::OnTeletypeStart,
236 &wxMarkupParserOutput::OnTeletypeEnd },
237 };
238
239 for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ )
240 {
241 const TagHandler& h = tagHandlers[n];
242
243 if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 )
244 {
245 if ( start )
246 (m_output.*(h.startFunc))();
247 else
248 (m_output.*(h.endFunc))();
249
250 return true;
251 }
252 }
253 }
254
255 // Unknown tag name.
256 return false;
257}
258
259bool wxMarkupParser::Parse(const wxString& text)
260{
261 // The stack containing the names and corresponding attributes (which are
262 // actually only used for <span> tags) of all of the currently opened tag
263 // or none if we're not inside any tag.
264 wxStack<TagAndAttrs> tags;
265
266 // Current run of text.
267 wxString current;
268
269 const wxString::const_iterator end = text.end();
270 for ( wxString::const_iterator it = text.begin(); it != end; ++it )
271 {
272 switch ( (*it).GetValue() )
273 {
274 case '<':
275 {
276 // Flush the text preceding the tag, if any.
277 if ( !current.empty() )
278 {
279 m_output.OnText(current);
280 current.clear();
281 }
c564ca3c
VZ
282
283 // This variable is used only in the debugging messages
284 // and doesn't need to be defined if they're not compiled
285 // at all (it actually would result in unused variable
286 // messages in this case).
e779f093 287#if wxUSE_LOG_DEBUG || !defined(HAVE_VARIADIC_MACROS)
9bb9964e
VZ
288 // Remember the tag starting position for the error
289 // messages.
290 const size_t pos = it - text.begin();
e2cb99c8 291#endif
9bb9964e
VZ
292 bool start = true;
293 if ( ++it != end && *it == '/' )
294 {
295 start = false;
296 ++it;
297 }
298
299 const wxString tag = ExtractUntil('>', it, end);
300 if ( tag.empty() )
301 {
302 wxLogDebug("%s at %lu.",
303 it == end ? "Unclosed tag starting"
304 : "Empty tag",
305 pos);
306 return false;
307 }
308
309 if ( start )
310 {
311 wxString attrs;
312 const wxString name = tag.BeforeFirst(' ', &attrs);
313
314 TagAndAttrs tagAndAttrs(name);
315 const wxString err = ParseAttrs(attrs, tagAndAttrs);
316 if ( !err.empty() )
317 {
318 wxLogDebug("Bad attributes for \"%s\" "
319 "at %lu: %s.",
320 name, pos, err);
321 return false;
322 }
323
324 tags.push(tagAndAttrs);
325 }
326 else // end tag
327 {
328 if ( tags.empty() || tags.top().name != tag )
329 {
330 wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
331 tag, pos);
332 return false;
333 }
334 }
335
336 if ( !OutputTag(tags.top(), start) )
337 {
338 wxLogDebug("Unknown tag at %lu.", pos);
339 return false;
340 }
341
342 if ( !start )
343 tags.pop();
344 }
345 break;
346
347 case '>':
348 wxLogDebug("'>' should be escaped as \"&gt\"; at %lu.",
349 it - text.begin());
350 break;
351
352 case '&':
353 // Processing is somewhat complicated: we need to recognize at
354 // least the "&lt;" entity to allow escaping left square
355 // brackets in the markup and, in fact, we recognize all of the
356 // standard XML entities for consistency with Pango markup
357 // parsing.
358 //
359 // However we also allow '&' to appear unescaped, i.e. directly
360 // and not as "&amp;" when it is used to introduce the mnemonic
361 // for the label. In this case we simply leave it alone.
362 //
363 // Notice that this logic makes it impossible to have a label
364 // with "lt;" inside it and using "l" as mnemonic but hopefully
365 // this shouldn't be a problem in practice.
366 {
367 const size_t pos = it - text.begin() + 1;
368
369 unsigned n;
370 for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
371 {
372 const XMLEntity& xmlEnt = xmlEntities[n];
373 if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0
374 && text[pos + xmlEnt.len] == ';' )
375 {
376 // Escape the ampersands if needed to protect them
377 // from being interpreted as mnemonics indicators.
378 if ( xmlEnt.value == '&' )
379 current += "&&";
380 else
381 current += xmlEnt.value;
382
383 it += xmlEnt.len + 1; // +1 for '&' itself
384
385 break;
386 }
387 }
388
389 if ( n < WXSIZEOF(xmlEntities) )
390 break;
391 //else: fall through, '&' is not special
392 }
393
394 default:
395 current += *it;
396 }
397 }
398
399 if ( !tags.empty() )
400 {
401 wxLogDebug("Missing closing tag for \"%s\"", tags.top().name);
402 return false;
403 }
404
405 if ( !current.empty() )
406 m_output.OnText(current);
407
408 return true;
409}
410
411/* static */
412wxString wxMarkupParser::Quote(const wxString& text)
413{
414 wxString quoted;
415 quoted.reserve(text.length());
416
417 for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it )
418 {
419 unsigned n;
420 for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
421 {
422 const XMLEntity& xmlEnt = xmlEntities[n];
423 if ( *it == xmlEnt.value )
424 {
425 quoted << '&' << xmlEnt.name << ';';
426 break;
427 }
428 }
429
430 if ( n == WXSIZEOF(xmlEntities) )
431 quoted += *it;
432 }
433
434 return quoted;
435}
5eb051a7
VZ
436
437/* static */
438wxString wxMarkupParser::Strip(const wxString& text)
439{
440 class StripOutput : public wxMarkupParserOutput
441 {
442 public:
443 StripOutput() { }
444
445 const wxString& GetText() const { return m_text; }
446
447 virtual void OnText(const wxString& text) { m_text += text; }
448
449 virtual void OnBoldStart() { }
450 virtual void OnBoldEnd() { }
451
452 virtual void OnItalicStart() { }
453 virtual void OnItalicEnd() { }
454
455 virtual void OnUnderlinedStart() { }
456 virtual void OnUnderlinedEnd() { }
457
458 virtual void OnStrikethroughStart() { }
459 virtual void OnStrikethroughEnd() { }
460
461 virtual void OnBigStart() { }
462 virtual void OnBigEnd() { }
463
464 virtual void OnSmallStart() { }
465 virtual void OnSmallEnd() { }
466
467 virtual void OnTeletypeStart() { }
468 virtual void OnTeletypeEnd() { }
469
470 virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) { }
471 virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) { }
472
473 private:
474 wxString m_text;
475 };
476
477 StripOutput output;
478 wxMarkupParser parser(output);
479 if ( !parser.Parse(text) )
480 return wxString();
481
482 return output.GetText();
483}
f5bdfc69
VZ
484
485#endif // wxUSE_MARKUP