]>
Commit | Line | Data |
---|---|---|
9bb9964e VZ |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Name: src/common/markupparser.cpp | |
3 | // Purpose: Implementation of wxMarkupParser. | |
4 | // Author: Vadim Zeitlin | |
5 | // Created: 2011-02-16 | |
e2cb99c8 | 6 | // RCS-ID: $Id$ |
9bb9964e VZ |
7 | // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org> |
8 | // Licence: wxWindows licence | |
9 | /////////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // ============================================================================ | |
12 | // declarations | |
13 | // ============================================================================ | |
14 | ||
15 | // ---------------------------------------------------------------------------- | |
16 | // headers | |
17 | // ---------------------------------------------------------------------------- | |
18 | ||
19 | // for compilers that support precompilation, includes "wx.h". | |
20 | #include "wx/wxprec.h" | |
21 | ||
22 | #ifdef __BORLANDC__ | |
23 | #pragma hdrstop | |
24 | #endif | |
25 | ||
f5bdfc69 | 26 | #if wxUSE_MARKUP |
9bb9964e | 27 | |
1a6e6d54 VZ |
28 | #ifndef WX_PRECOMP |
29 | #include "wx/log.h" | |
30 | #endif | |
31 | ||
9bb9964e VZ |
32 | #include "wx/private/markupparser.h" |
33 | ||
34 | #include "wx/stack.h" | |
35 | ||
36 | namespace | |
37 | { | |
38 | ||
39 | // ---------------------------------------------------------------------------- | |
40 | // constants | |
41 | // ---------------------------------------------------------------------------- | |
42 | ||
43 | // Array containing the predefined XML 1.0 entities. | |
44 | const struct XMLEntity | |
45 | { | |
46 | const char *name; | |
47 | int len; // == strlen(name) | |
48 | char value; | |
49 | } xmlEntities[] = | |
50 | { | |
51 | { "lt", 2, '<' }, | |
52 | { "gt", 2, '>' }, | |
53 | { "amp", 3, '&' }, | |
54 | { "apos", 4, '\''}, | |
55 | { "quot", 4, '"' }, | |
56 | }; | |
57 | ||
58 | // ---------------------------------------------------------------------------- | |
59 | // helper functions | |
60 | // ---------------------------------------------------------------------------- | |
61 | ||
62 | wxString | |
63 | ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end) | |
64 | { | |
65 | wxString str; | |
66 | for ( ; it != end; ++it ) | |
67 | { | |
68 | if ( *it == ch ) | |
69 | return str; | |
70 | ||
71 | str += *it; | |
72 | } | |
73 | ||
74 | // Return empty string to indicate that we didn't find ch at all. | |
75 | return wxString(); | |
76 | } | |
77 | ||
78 | } // anonymous namespace | |
79 | ||
80 | // ============================================================================ | |
81 | // wxMarkupParser implementation | |
82 | // ============================================================================ | |
83 | ||
84 | wxString | |
85 | wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs) | |
86 | { | |
87 | if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() ) | |
88 | { | |
89 | return wxString::Format("tag \"%s\" can't have attributes", | |
90 | tagAndAttrs.name); | |
91 | } | |
92 | ||
93 | // TODO: Parse more attributes described at | |
94 | // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html | |
95 | // and at least ignore them gracefully instead of giving errors (but | |
96 | // quite a few of them could be supported as well, notable font_desc). | |
97 | ||
98 | wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs; | |
99 | ||
100 | while ( !attrs.empty() ) | |
101 | { | |
102 | wxString rest; | |
103 | const wxString attr = attrs.BeforeFirst(' ', &rest); | |
104 | attrs = rest; | |
105 | ||
106 | // The "original" versions are used for error messages only. | |
107 | wxString valueOrig; | |
108 | const wxString nameOrig = attr.BeforeFirst('=', &valueOrig); | |
109 | ||
110 | const wxString name = nameOrig.Lower(); | |
111 | wxString value = valueOrig.Lower(); | |
112 | ||
113 | // All attributes values must be quoted. | |
114 | if ( value.length() < 2 || | |
115 | (value[0] != value.Last()) || | |
116 | (value[0] != '"' && value[0] != '\'') ) | |
117 | { | |
118 | return wxString::Format("bad quoting for value of \"%s\"", | |
119 | nameOrig); | |
120 | } | |
121 | ||
122 | value.assign(value, 1, value.length() - 2); | |
123 | ||
124 | if ( name == "foreground" || name == "fgcolor" || name == "color" ) | |
125 | { | |
126 | spanAttrs.m_fgCol = value; | |
127 | } | |
128 | else if ( name == "background" || name == "bgcolor" ) | |
129 | { | |
130 | spanAttrs.m_bgCol = value; | |
131 | } | |
132 | else if ( name == "font_family" || name == "face" ) | |
133 | { | |
134 | spanAttrs.m_fontFace = value; | |
135 | } | |
136 | else if ( name == "font_weight" || name == "weight" ) | |
137 | { | |
138 | unsigned long weight; | |
139 | ||
140 | if ( value == "ultralight" || value == "light" || value == "normal" ) | |
141 | spanAttrs.m_isBold = wxMarkupSpanAttributes::No; | |
142 | else if ( value == "bold" || value == "ultrabold" || value == "heavy" ) | |
143 | spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes; | |
144 | else if ( value.ToULong(&weight) ) | |
145 | spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes | |
146 | : wxMarkupSpanAttributes::No; | |
147 | else | |
148 | return wxString::Format("invalid font weight \"%s\"", valueOrig); | |
149 | } | |
150 | else if ( name == "font_style" || name == "style" ) | |
151 | { | |
152 | if ( value == "normal" ) | |
153 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::No; | |
154 | else if ( value == "oblique" || value == "italic" ) | |
155 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes; | |
156 | else | |
157 | return wxString::Format("invalid font style \"%s\"", valueOrig); | |
158 | } | |
159 | else if ( name == "size" ) | |
160 | { | |
161 | unsigned long size; | |
162 | if ( value.ToULong(&size) ) | |
163 | { | |
164 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts; | |
165 | spanAttrs.m_fontSize = size; | |
166 | } | |
167 | else if ( value == "smaller" || value == "larger" ) | |
168 | { | |
169 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative; | |
170 | spanAttrs.m_fontSize = value == "smaller" ? -1 : +1; | |
171 | } | |
172 | else // Must be a CSS-like size specification | |
173 | { | |
174 | int cssSize = 1; | |
9bb9964e VZ |
175 | if ( value.StartsWith("xx-", &rest) ) |
176 | cssSize = 3; | |
177 | else if ( value.StartsWith("x-", &rest) ) | |
178 | cssSize = 2; | |
179 | else if ( value == "medium" ) | |
180 | cssSize = 0; | |
181 | else | |
182 | rest = value; | |
183 | ||
184 | if ( cssSize != 0 ) | |
185 | { | |
186 | if ( rest == "small" ) | |
187 | cssSize = -cssSize; | |
188 | else if ( rest != "large" ) | |
189 | return wxString::Format("invalid font size \"%s\"", | |
190 | valueOrig); | |
191 | } | |
192 | ||
193 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic; | |
194 | spanAttrs.m_fontSize = cssSize; | |
195 | } | |
196 | } | |
197 | } | |
198 | ||
199 | return wxString(); | |
200 | } | |
201 | ||
202 | bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start) | |
203 | { | |
204 | if ( tagAndAttrs.name.CmpNoCase("span") == 0 ) | |
205 | { | |
206 | if ( start ) | |
207 | m_output.OnSpanStart(tagAndAttrs.attrs); | |
208 | else | |
209 | m_output.OnSpanEnd(tagAndAttrs.attrs); | |
210 | ||
211 | return true; | |
212 | } | |
213 | else // non-span tag | |
214 | { | |
215 | static const struct TagHandler | |
216 | { | |
217 | const char *name; | |
218 | void (wxMarkupParserOutput::*startFunc)(); | |
219 | void (wxMarkupParserOutput::*endFunc)(); | |
220 | } tagHandlers[] = | |
221 | { | |
222 | { "b", &wxMarkupParserOutput::OnBoldStart, | |
223 | &wxMarkupParserOutput::OnBoldEnd }, | |
224 | { "i", &wxMarkupParserOutput::OnItalicStart, | |
225 | &wxMarkupParserOutput::OnItalicEnd }, | |
226 | { "u", &wxMarkupParserOutput::OnUnderlinedStart, | |
227 | &wxMarkupParserOutput::OnUnderlinedEnd }, | |
228 | { "s", &wxMarkupParserOutput::OnStrikethroughStart, | |
229 | &wxMarkupParserOutput::OnStrikethroughEnd }, | |
230 | { "big", &wxMarkupParserOutput::OnBigStart, | |
231 | &wxMarkupParserOutput::OnBigEnd }, | |
232 | { "small", &wxMarkupParserOutput::OnSmallStart, | |
233 | &wxMarkupParserOutput::OnSmallEnd }, | |
234 | { "tt", &wxMarkupParserOutput::OnTeletypeStart, | |
235 | &wxMarkupParserOutput::OnTeletypeEnd }, | |
236 | }; | |
237 | ||
238 | for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ ) | |
239 | { | |
240 | const TagHandler& h = tagHandlers[n]; | |
241 | ||
242 | if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 ) | |
243 | { | |
244 | if ( start ) | |
245 | (m_output.*(h.startFunc))(); | |
246 | else | |
247 | (m_output.*(h.endFunc))(); | |
248 | ||
249 | return true; | |
250 | } | |
251 | } | |
252 | } | |
253 | ||
254 | // Unknown tag name. | |
255 | return false; | |
256 | } | |
257 | ||
258 | bool wxMarkupParser::Parse(const wxString& text) | |
259 | { | |
260 | // The stack containing the names and corresponding attributes (which are | |
261 | // actually only used for <span> tags) of all of the currently opened tag | |
262 | // or none if we're not inside any tag. | |
263 | wxStack<TagAndAttrs> tags; | |
264 | ||
265 | // Current run of text. | |
266 | wxString current; | |
267 | ||
268 | const wxString::const_iterator end = text.end(); | |
269 | for ( wxString::const_iterator it = text.begin(); it != end; ++it ) | |
270 | { | |
271 | switch ( (*it).GetValue() ) | |
272 | { | |
273 | case '<': | |
274 | { | |
275 | // Flush the text preceding the tag, if any. | |
276 | if ( !current.empty() ) | |
277 | { | |
278 | m_output.OnText(current); | |
279 | current.clear(); | |
280 | } | |
c564ca3c VZ |
281 | |
282 | // This variable is used only in the debugging messages | |
283 | // and doesn't need to be defined if they're not compiled | |
284 | // at all (it actually would result in unused variable | |
285 | // messages in this case). | |
e779f093 | 286 | #if wxUSE_LOG_DEBUG || !defined(HAVE_VARIADIC_MACROS) |
9bb9964e VZ |
287 | // Remember the tag starting position for the error |
288 | // messages. | |
289 | const size_t pos = it - text.begin(); | |
e2cb99c8 | 290 | #endif |
9bb9964e VZ |
291 | bool start = true; |
292 | if ( ++it != end && *it == '/' ) | |
293 | { | |
294 | start = false; | |
295 | ++it; | |
296 | } | |
297 | ||
298 | const wxString tag = ExtractUntil('>', it, end); | |
299 | if ( tag.empty() ) | |
300 | { | |
301 | wxLogDebug("%s at %lu.", | |
302 | it == end ? "Unclosed tag starting" | |
303 | : "Empty tag", | |
304 | pos); | |
305 | return false; | |
306 | } | |
307 | ||
308 | if ( start ) | |
309 | { | |
310 | wxString attrs; | |
311 | const wxString name = tag.BeforeFirst(' ', &attrs); | |
312 | ||
313 | TagAndAttrs tagAndAttrs(name); | |
314 | const wxString err = ParseAttrs(attrs, tagAndAttrs); | |
315 | if ( !err.empty() ) | |
316 | { | |
317 | wxLogDebug("Bad attributes for \"%s\" " | |
318 | "at %lu: %s.", | |
319 | name, pos, err); | |
320 | return false; | |
321 | } | |
322 | ||
323 | tags.push(tagAndAttrs); | |
324 | } | |
325 | else // end tag | |
326 | { | |
327 | if ( tags.empty() || tags.top().name != tag ) | |
328 | { | |
329 | wxLogDebug("Unmatched closing tag \"%s\" at %lu.", | |
330 | tag, pos); | |
331 | return false; | |
332 | } | |
333 | } | |
334 | ||
335 | if ( !OutputTag(tags.top(), start) ) | |
336 | { | |
337 | wxLogDebug("Unknown tag at %lu.", pos); | |
338 | return false; | |
339 | } | |
340 | ||
341 | if ( !start ) | |
342 | tags.pop(); | |
343 | } | |
344 | break; | |
345 | ||
346 | case '>': | |
347 | wxLogDebug("'>' should be escaped as \">\"; at %lu.", | |
348 | it - text.begin()); | |
349 | break; | |
350 | ||
351 | case '&': | |
352 | // Processing is somewhat complicated: we need to recognize at | |
353 | // least the "<" entity to allow escaping left square | |
354 | // brackets in the markup and, in fact, we recognize all of the | |
355 | // standard XML entities for consistency with Pango markup | |
356 | // parsing. | |
357 | // | |
358 | // However we also allow '&' to appear unescaped, i.e. directly | |
359 | // and not as "&" when it is used to introduce the mnemonic | |
360 | // for the label. In this case we simply leave it alone. | |
361 | // | |
362 | // Notice that this logic makes it impossible to have a label | |
363 | // with "lt;" inside it and using "l" as mnemonic but hopefully | |
364 | // this shouldn't be a problem in practice. | |
365 | { | |
366 | const size_t pos = it - text.begin() + 1; | |
367 | ||
368 | unsigned n; | |
369 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
370 | { | |
371 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
372 | if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0 | |
373 | && text[pos + xmlEnt.len] == ';' ) | |
374 | { | |
375 | // Escape the ampersands if needed to protect them | |
376 | // from being interpreted as mnemonics indicators. | |
377 | if ( xmlEnt.value == '&' ) | |
378 | current += "&&"; | |
379 | else | |
380 | current += xmlEnt.value; | |
381 | ||
382 | it += xmlEnt.len + 1; // +1 for '&' itself | |
383 | ||
384 | break; | |
385 | } | |
386 | } | |
387 | ||
388 | if ( n < WXSIZEOF(xmlEntities) ) | |
389 | break; | |
390 | //else: fall through, '&' is not special | |
391 | } | |
392 | ||
393 | default: | |
394 | current += *it; | |
395 | } | |
396 | } | |
397 | ||
398 | if ( !tags.empty() ) | |
399 | { | |
400 | wxLogDebug("Missing closing tag for \"%s\"", tags.top().name); | |
401 | return false; | |
402 | } | |
403 | ||
404 | if ( !current.empty() ) | |
405 | m_output.OnText(current); | |
406 | ||
407 | return true; | |
408 | } | |
409 | ||
410 | /* static */ | |
411 | wxString wxMarkupParser::Quote(const wxString& text) | |
412 | { | |
413 | wxString quoted; | |
414 | quoted.reserve(text.length()); | |
415 | ||
416 | for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it ) | |
417 | { | |
418 | unsigned n; | |
419 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
420 | { | |
421 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
422 | if ( *it == xmlEnt.value ) | |
423 | { | |
424 | quoted << '&' << xmlEnt.name << ';'; | |
425 | break; | |
426 | } | |
427 | } | |
428 | ||
429 | if ( n == WXSIZEOF(xmlEntities) ) | |
430 | quoted += *it; | |
431 | } | |
432 | ||
433 | return quoted; | |
434 | } | |
5eb051a7 VZ |
435 | |
436 | /* static */ | |
437 | wxString wxMarkupParser::Strip(const wxString& text) | |
438 | { | |
439 | class StripOutput : public wxMarkupParserOutput | |
440 | { | |
441 | public: | |
442 | StripOutput() { } | |
443 | ||
444 | const wxString& GetText() const { return m_text; } | |
445 | ||
446 | virtual void OnText(const wxString& text) { m_text += text; } | |
447 | ||
448 | virtual void OnBoldStart() { } | |
449 | virtual void OnBoldEnd() { } | |
450 | ||
451 | virtual void OnItalicStart() { } | |
452 | virtual void OnItalicEnd() { } | |
453 | ||
454 | virtual void OnUnderlinedStart() { } | |
455 | virtual void OnUnderlinedEnd() { } | |
456 | ||
457 | virtual void OnStrikethroughStart() { } | |
458 | virtual void OnStrikethroughEnd() { } | |
459 | ||
460 | virtual void OnBigStart() { } | |
461 | virtual void OnBigEnd() { } | |
462 | ||
463 | virtual void OnSmallStart() { } | |
464 | virtual void OnSmallEnd() { } | |
465 | ||
466 | virtual void OnTeletypeStart() { } | |
467 | virtual void OnTeletypeEnd() { } | |
468 | ||
469 | virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
470 | virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
471 | ||
472 | private: | |
473 | wxString m_text; | |
474 | }; | |
475 | ||
476 | StripOutput output; | |
477 | wxMarkupParser parser(output); | |
478 | if ( !parser.Parse(text) ) | |
479 | return wxString(); | |
480 | ||
481 | return output.GetText(); | |
482 | } | |
f5bdfc69 VZ |
483 | |
484 | #endif // wxUSE_MARKUP |