]>
Commit | Line | Data |
---|---|---|
9bb9964e VZ |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Name: src/common/markupparser.cpp | |
3 | // Purpose: Implementation of wxMarkupParser. | |
4 | // Author: Vadim Zeitlin | |
5 | // Created: 2011-02-16 | |
e2cb99c8 | 6 | // RCS-ID: $Id$ |
9bb9964e VZ |
7 | // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org> |
8 | // Licence: wxWindows licence | |
9 | /////////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // ============================================================================ | |
12 | // declarations | |
13 | // ============================================================================ | |
14 | ||
15 | // ---------------------------------------------------------------------------- | |
16 | // headers | |
17 | // ---------------------------------------------------------------------------- | |
18 | ||
19 | // for compilers that support precompilation, includes "wx.h". | |
20 | #include "wx/wxprec.h" | |
21 | ||
22 | #ifdef __BORLANDC__ | |
23 | #pragma hdrstop | |
24 | #endif | |
25 | ||
f5bdfc69 | 26 | #if wxUSE_MARKUP |
9bb9964e | 27 | |
1a6e6d54 VZ |
28 | #ifndef WX_PRECOMP |
29 | #include "wx/log.h" | |
30 | #endif | |
31 | ||
9bb9964e VZ |
32 | #include "wx/private/markupparser.h" |
33 | ||
34 | #include "wx/stack.h" | |
35 | ||
36 | namespace | |
37 | { | |
38 | ||
39 | // ---------------------------------------------------------------------------- | |
40 | // constants | |
41 | // ---------------------------------------------------------------------------- | |
42 | ||
43 | // Array containing the predefined XML 1.0 entities. | |
44 | const struct XMLEntity | |
45 | { | |
46 | const char *name; | |
47 | int len; // == strlen(name) | |
48 | char value; | |
49 | } xmlEntities[] = | |
50 | { | |
51 | { "lt", 2, '<' }, | |
52 | { "gt", 2, '>' }, | |
53 | { "amp", 3, '&' }, | |
54 | { "apos", 4, '\''}, | |
55 | { "quot", 4, '"' }, | |
56 | }; | |
57 | ||
58 | // ---------------------------------------------------------------------------- | |
59 | // helper functions | |
60 | // ---------------------------------------------------------------------------- | |
61 | ||
62 | wxString | |
63 | ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end) | |
64 | { | |
65 | wxString str; | |
66 | for ( ; it != end; ++it ) | |
67 | { | |
68 | if ( *it == ch ) | |
69 | return str; | |
70 | ||
71 | str += *it; | |
72 | } | |
73 | ||
74 | // Return empty string to indicate that we didn't find ch at all. | |
75 | return wxString(); | |
76 | } | |
77 | ||
78 | } // anonymous namespace | |
79 | ||
80 | // ============================================================================ | |
81 | // wxMarkupParser implementation | |
82 | // ============================================================================ | |
83 | ||
84 | wxString | |
85 | wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs) | |
86 | { | |
87 | if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() ) | |
88 | { | |
89 | return wxString::Format("tag \"%s\" can't have attributes", | |
90 | tagAndAttrs.name); | |
91 | } | |
92 | ||
93 | // TODO: Parse more attributes described at | |
94 | // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html | |
95 | // and at least ignore them gracefully instead of giving errors (but | |
96 | // quite a few of them could be supported as well, notable font_desc). | |
97 | ||
98 | wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs; | |
99 | ||
100 | while ( !attrs.empty() ) | |
101 | { | |
102 | wxString rest; | |
103 | const wxString attr = attrs.BeforeFirst(' ', &rest); | |
104 | attrs = rest; | |
105 | ||
106 | // The "original" versions are used for error messages only. | |
107 | wxString valueOrig; | |
108 | const wxString nameOrig = attr.BeforeFirst('=', &valueOrig); | |
109 | ||
110 | const wxString name = nameOrig.Lower(); | |
111 | wxString value = valueOrig.Lower(); | |
112 | ||
113 | // All attributes values must be quoted. | |
114 | if ( value.length() < 2 || | |
115 | (value[0] != value.Last()) || | |
116 | (value[0] != '"' && value[0] != '\'') ) | |
117 | { | |
118 | return wxString::Format("bad quoting for value of \"%s\"", | |
119 | nameOrig); | |
120 | } | |
121 | ||
122 | value.assign(value, 1, value.length() - 2); | |
123 | ||
124 | if ( name == "foreground" || name == "fgcolor" || name == "color" ) | |
125 | { | |
126 | spanAttrs.m_fgCol = value; | |
127 | } | |
128 | else if ( name == "background" || name == "bgcolor" ) | |
129 | { | |
130 | spanAttrs.m_bgCol = value; | |
131 | } | |
132 | else if ( name == "font_family" || name == "face" ) | |
133 | { | |
134 | spanAttrs.m_fontFace = value; | |
135 | } | |
136 | else if ( name == "font_weight" || name == "weight" ) | |
137 | { | |
138 | unsigned long weight; | |
139 | ||
140 | if ( value == "ultralight" || value == "light" || value == "normal" ) | |
141 | spanAttrs.m_isBold = wxMarkupSpanAttributes::No; | |
142 | else if ( value == "bold" || value == "ultrabold" || value == "heavy" ) | |
143 | spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes; | |
144 | else if ( value.ToULong(&weight) ) | |
145 | spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes | |
146 | : wxMarkupSpanAttributes::No; | |
147 | else | |
148 | return wxString::Format("invalid font weight \"%s\"", valueOrig); | |
149 | } | |
150 | else if ( name == "font_style" || name == "style" ) | |
151 | { | |
152 | if ( value == "normal" ) | |
153 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::No; | |
154 | else if ( value == "oblique" || value == "italic" ) | |
155 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes; | |
156 | else | |
157 | return wxString::Format("invalid font style \"%s\"", valueOrig); | |
158 | } | |
159 | else if ( name == "size" ) | |
160 | { | |
161 | unsigned long size; | |
162 | if ( value.ToULong(&size) ) | |
163 | { | |
164 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts; | |
165 | spanAttrs.m_fontSize = size; | |
166 | } | |
167 | else if ( value == "smaller" || value == "larger" ) | |
168 | { | |
169 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative; | |
170 | spanAttrs.m_fontSize = value == "smaller" ? -1 : +1; | |
171 | } | |
172 | else // Must be a CSS-like size specification | |
173 | { | |
174 | int cssSize = 1; | |
175 | wxString rest; | |
176 | if ( value.StartsWith("xx-", &rest) ) | |
177 | cssSize = 3; | |
178 | else if ( value.StartsWith("x-", &rest) ) | |
179 | cssSize = 2; | |
180 | else if ( value == "medium" ) | |
181 | cssSize = 0; | |
182 | else | |
183 | rest = value; | |
184 | ||
185 | if ( cssSize != 0 ) | |
186 | { | |
187 | if ( rest == "small" ) | |
188 | cssSize = -cssSize; | |
189 | else if ( rest != "large" ) | |
190 | return wxString::Format("invalid font size \"%s\"", | |
191 | valueOrig); | |
192 | } | |
193 | ||
194 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic; | |
195 | spanAttrs.m_fontSize = cssSize; | |
196 | } | |
197 | } | |
198 | } | |
199 | ||
200 | return wxString(); | |
201 | } | |
202 | ||
203 | bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start) | |
204 | { | |
205 | if ( tagAndAttrs.name.CmpNoCase("span") == 0 ) | |
206 | { | |
207 | if ( start ) | |
208 | m_output.OnSpanStart(tagAndAttrs.attrs); | |
209 | else | |
210 | m_output.OnSpanEnd(tagAndAttrs.attrs); | |
211 | ||
212 | return true; | |
213 | } | |
214 | else // non-span tag | |
215 | { | |
216 | static const struct TagHandler | |
217 | { | |
218 | const char *name; | |
219 | void (wxMarkupParserOutput::*startFunc)(); | |
220 | void (wxMarkupParserOutput::*endFunc)(); | |
221 | } tagHandlers[] = | |
222 | { | |
223 | { "b", &wxMarkupParserOutput::OnBoldStart, | |
224 | &wxMarkupParserOutput::OnBoldEnd }, | |
225 | { "i", &wxMarkupParserOutput::OnItalicStart, | |
226 | &wxMarkupParserOutput::OnItalicEnd }, | |
227 | { "u", &wxMarkupParserOutput::OnUnderlinedStart, | |
228 | &wxMarkupParserOutput::OnUnderlinedEnd }, | |
229 | { "s", &wxMarkupParserOutput::OnStrikethroughStart, | |
230 | &wxMarkupParserOutput::OnStrikethroughEnd }, | |
231 | { "big", &wxMarkupParserOutput::OnBigStart, | |
232 | &wxMarkupParserOutput::OnBigEnd }, | |
233 | { "small", &wxMarkupParserOutput::OnSmallStart, | |
234 | &wxMarkupParserOutput::OnSmallEnd }, | |
235 | { "tt", &wxMarkupParserOutput::OnTeletypeStart, | |
236 | &wxMarkupParserOutput::OnTeletypeEnd }, | |
237 | }; | |
238 | ||
239 | for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ ) | |
240 | { | |
241 | const TagHandler& h = tagHandlers[n]; | |
242 | ||
243 | if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 ) | |
244 | { | |
245 | if ( start ) | |
246 | (m_output.*(h.startFunc))(); | |
247 | else | |
248 | (m_output.*(h.endFunc))(); | |
249 | ||
250 | return true; | |
251 | } | |
252 | } | |
253 | } | |
254 | ||
255 | // Unknown tag name. | |
256 | return false; | |
257 | } | |
258 | ||
259 | bool wxMarkupParser::Parse(const wxString& text) | |
260 | { | |
261 | // The stack containing the names and corresponding attributes (which are | |
262 | // actually only used for <span> tags) of all of the currently opened tag | |
263 | // or none if we're not inside any tag. | |
264 | wxStack<TagAndAttrs> tags; | |
265 | ||
266 | // Current run of text. | |
267 | wxString current; | |
268 | ||
269 | const wxString::const_iterator end = text.end(); | |
270 | for ( wxString::const_iterator it = text.begin(); it != end; ++it ) | |
271 | { | |
272 | switch ( (*it).GetValue() ) | |
273 | { | |
274 | case '<': | |
275 | { | |
276 | // Flush the text preceding the tag, if any. | |
277 | if ( !current.empty() ) | |
278 | { | |
279 | m_output.OnText(current); | |
280 | current.clear(); | |
281 | } | |
c564ca3c VZ |
282 | |
283 | // This variable is used only in the debugging messages | |
284 | // and doesn't need to be defined if they're not compiled | |
285 | // at all (it actually would result in unused variable | |
286 | // messages in this case). | |
e779f093 | 287 | #if wxUSE_LOG_DEBUG || !defined(HAVE_VARIADIC_MACROS) |
9bb9964e VZ |
288 | // Remember the tag starting position for the error |
289 | // messages. | |
290 | const size_t pos = it - text.begin(); | |
e2cb99c8 | 291 | #endif |
9bb9964e VZ |
292 | bool start = true; |
293 | if ( ++it != end && *it == '/' ) | |
294 | { | |
295 | start = false; | |
296 | ++it; | |
297 | } | |
298 | ||
299 | const wxString tag = ExtractUntil('>', it, end); | |
300 | if ( tag.empty() ) | |
301 | { | |
302 | wxLogDebug("%s at %lu.", | |
303 | it == end ? "Unclosed tag starting" | |
304 | : "Empty tag", | |
305 | pos); | |
306 | return false; | |
307 | } | |
308 | ||
309 | if ( start ) | |
310 | { | |
311 | wxString attrs; | |
312 | const wxString name = tag.BeforeFirst(' ', &attrs); | |
313 | ||
314 | TagAndAttrs tagAndAttrs(name); | |
315 | const wxString err = ParseAttrs(attrs, tagAndAttrs); | |
316 | if ( !err.empty() ) | |
317 | { | |
318 | wxLogDebug("Bad attributes for \"%s\" " | |
319 | "at %lu: %s.", | |
320 | name, pos, err); | |
321 | return false; | |
322 | } | |
323 | ||
324 | tags.push(tagAndAttrs); | |
325 | } | |
326 | else // end tag | |
327 | { | |
328 | if ( tags.empty() || tags.top().name != tag ) | |
329 | { | |
330 | wxLogDebug("Unmatched closing tag \"%s\" at %lu.", | |
331 | tag, pos); | |
332 | return false; | |
333 | } | |
334 | } | |
335 | ||
336 | if ( !OutputTag(tags.top(), start) ) | |
337 | { | |
338 | wxLogDebug("Unknown tag at %lu.", pos); | |
339 | return false; | |
340 | } | |
341 | ||
342 | if ( !start ) | |
343 | tags.pop(); | |
344 | } | |
345 | break; | |
346 | ||
347 | case '>': | |
348 | wxLogDebug("'>' should be escaped as \">\"; at %lu.", | |
349 | it - text.begin()); | |
350 | break; | |
351 | ||
352 | case '&': | |
353 | // Processing is somewhat complicated: we need to recognize at | |
354 | // least the "<" entity to allow escaping left square | |
355 | // brackets in the markup and, in fact, we recognize all of the | |
356 | // standard XML entities for consistency with Pango markup | |
357 | // parsing. | |
358 | // | |
359 | // However we also allow '&' to appear unescaped, i.e. directly | |
360 | // and not as "&" when it is used to introduce the mnemonic | |
361 | // for the label. In this case we simply leave it alone. | |
362 | // | |
363 | // Notice that this logic makes it impossible to have a label | |
364 | // with "lt;" inside it and using "l" as mnemonic but hopefully | |
365 | // this shouldn't be a problem in practice. | |
366 | { | |
367 | const size_t pos = it - text.begin() + 1; | |
368 | ||
369 | unsigned n; | |
370 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
371 | { | |
372 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
373 | if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0 | |
374 | && text[pos + xmlEnt.len] == ';' ) | |
375 | { | |
376 | // Escape the ampersands if needed to protect them | |
377 | // from being interpreted as mnemonics indicators. | |
378 | if ( xmlEnt.value == '&' ) | |
379 | current += "&&"; | |
380 | else | |
381 | current += xmlEnt.value; | |
382 | ||
383 | it += xmlEnt.len + 1; // +1 for '&' itself | |
384 | ||
385 | break; | |
386 | } | |
387 | } | |
388 | ||
389 | if ( n < WXSIZEOF(xmlEntities) ) | |
390 | break; | |
391 | //else: fall through, '&' is not special | |
392 | } | |
393 | ||
394 | default: | |
395 | current += *it; | |
396 | } | |
397 | } | |
398 | ||
399 | if ( !tags.empty() ) | |
400 | { | |
401 | wxLogDebug("Missing closing tag for \"%s\"", tags.top().name); | |
402 | return false; | |
403 | } | |
404 | ||
405 | if ( !current.empty() ) | |
406 | m_output.OnText(current); | |
407 | ||
408 | return true; | |
409 | } | |
410 | ||
411 | /* static */ | |
412 | wxString wxMarkupParser::Quote(const wxString& text) | |
413 | { | |
414 | wxString quoted; | |
415 | quoted.reserve(text.length()); | |
416 | ||
417 | for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it ) | |
418 | { | |
419 | unsigned n; | |
420 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
421 | { | |
422 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
423 | if ( *it == xmlEnt.value ) | |
424 | { | |
425 | quoted << '&' << xmlEnt.name << ';'; | |
426 | break; | |
427 | } | |
428 | } | |
429 | ||
430 | if ( n == WXSIZEOF(xmlEntities) ) | |
431 | quoted += *it; | |
432 | } | |
433 | ||
434 | return quoted; | |
435 | } | |
5eb051a7 VZ |
436 | |
437 | /* static */ | |
438 | wxString wxMarkupParser::Strip(const wxString& text) | |
439 | { | |
440 | class StripOutput : public wxMarkupParserOutput | |
441 | { | |
442 | public: | |
443 | StripOutput() { } | |
444 | ||
445 | const wxString& GetText() const { return m_text; } | |
446 | ||
447 | virtual void OnText(const wxString& text) { m_text += text; } | |
448 | ||
449 | virtual void OnBoldStart() { } | |
450 | virtual void OnBoldEnd() { } | |
451 | ||
452 | virtual void OnItalicStart() { } | |
453 | virtual void OnItalicEnd() { } | |
454 | ||
455 | virtual void OnUnderlinedStart() { } | |
456 | virtual void OnUnderlinedEnd() { } | |
457 | ||
458 | virtual void OnStrikethroughStart() { } | |
459 | virtual void OnStrikethroughEnd() { } | |
460 | ||
461 | virtual void OnBigStart() { } | |
462 | virtual void OnBigEnd() { } | |
463 | ||
464 | virtual void OnSmallStart() { } | |
465 | virtual void OnSmallEnd() { } | |
466 | ||
467 | virtual void OnTeletypeStart() { } | |
468 | virtual void OnTeletypeEnd() { } | |
469 | ||
470 | virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
471 | virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
472 | ||
473 | private: | |
474 | wxString m_text; | |
475 | }; | |
476 | ||
477 | StripOutput output; | |
478 | wxMarkupParser parser(output); | |
479 | if ( !parser.Parse(text) ) | |
480 | return wxString(); | |
481 | ||
482 | return output.GetText(); | |
483 | } | |
f5bdfc69 VZ |
484 | |
485 | #endif // wxUSE_MARKUP |