]>
Commit | Line | Data |
---|---|---|
9bb9964e VZ |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Name: src/common/markupparser.cpp | |
3 | // Purpose: Implementation of wxMarkupParser. | |
4 | // Author: Vadim Zeitlin | |
5 | // Created: 2011-02-16 | |
9bb9964e VZ |
6 | // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org> |
7 | // Licence: wxWindows licence | |
8 | /////////////////////////////////////////////////////////////////////////////// | |
9 | ||
10 | // ============================================================================ | |
11 | // declarations | |
12 | // ============================================================================ | |
13 | ||
14 | // ---------------------------------------------------------------------------- | |
15 | // headers | |
16 | // ---------------------------------------------------------------------------- | |
17 | ||
18 | // for compilers that support precompilation, includes "wx.h". | |
19 | #include "wx/wxprec.h" | |
20 | ||
21 | #ifdef __BORLANDC__ | |
22 | #pragma hdrstop | |
23 | #endif | |
24 | ||
f5bdfc69 | 25 | #if wxUSE_MARKUP |
9bb9964e | 26 | |
1a6e6d54 VZ |
27 | #ifndef WX_PRECOMP |
28 | #include "wx/log.h" | |
29 | #endif | |
30 | ||
9bb9964e VZ |
31 | #include "wx/private/markupparser.h" |
32 | ||
33 | #include "wx/stack.h" | |
34 | ||
35 | namespace | |
36 | { | |
37 | ||
38 | // ---------------------------------------------------------------------------- | |
39 | // constants | |
40 | // ---------------------------------------------------------------------------- | |
41 | ||
42 | // Array containing the predefined XML 1.0 entities. | |
43 | const struct XMLEntity | |
44 | { | |
45 | const char *name; | |
46 | int len; // == strlen(name) | |
47 | char value; | |
48 | } xmlEntities[] = | |
49 | { | |
50 | { "lt", 2, '<' }, | |
51 | { "gt", 2, '>' }, | |
52 | { "amp", 3, '&' }, | |
53 | { "apos", 4, '\''}, | |
54 | { "quot", 4, '"' }, | |
55 | }; | |
56 | ||
57 | // ---------------------------------------------------------------------------- | |
58 | // helper functions | |
59 | // ---------------------------------------------------------------------------- | |
60 | ||
61 | wxString | |
62 | ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end) | |
63 | { | |
64 | wxString str; | |
65 | for ( ; it != end; ++it ) | |
66 | { | |
67 | if ( *it == ch ) | |
68 | return str; | |
69 | ||
70 | str += *it; | |
71 | } | |
72 | ||
73 | // Return empty string to indicate that we didn't find ch at all. | |
74 | return wxString(); | |
75 | } | |
76 | ||
77 | } // anonymous namespace | |
78 | ||
79 | // ============================================================================ | |
80 | // wxMarkupParser implementation | |
81 | // ============================================================================ | |
82 | ||
83 | wxString | |
84 | wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs) | |
85 | { | |
86 | if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() ) | |
87 | { | |
88 | return wxString::Format("tag \"%s\" can't have attributes", | |
89 | tagAndAttrs.name); | |
90 | } | |
91 | ||
92 | // TODO: Parse more attributes described at | |
93 | // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html | |
94 | // and at least ignore them gracefully instead of giving errors (but | |
95 | // quite a few of them could be supported as well, notable font_desc). | |
96 | ||
97 | wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs; | |
98 | ||
99 | while ( !attrs.empty() ) | |
100 | { | |
101 | wxString rest; | |
102 | const wxString attr = attrs.BeforeFirst(' ', &rest); | |
103 | attrs = rest; | |
104 | ||
105 | // The "original" versions are used for error messages only. | |
106 | wxString valueOrig; | |
107 | const wxString nameOrig = attr.BeforeFirst('=', &valueOrig); | |
108 | ||
109 | const wxString name = nameOrig.Lower(); | |
110 | wxString value = valueOrig.Lower(); | |
111 | ||
112 | // All attributes values must be quoted. | |
113 | if ( value.length() < 2 || | |
114 | (value[0] != value.Last()) || | |
115 | (value[0] != '"' && value[0] != '\'') ) | |
116 | { | |
117 | return wxString::Format("bad quoting for value of \"%s\"", | |
118 | nameOrig); | |
119 | } | |
120 | ||
121 | value.assign(value, 1, value.length() - 2); | |
122 | ||
123 | if ( name == "foreground" || name == "fgcolor" || name == "color" ) | |
124 | { | |
125 | spanAttrs.m_fgCol = value; | |
126 | } | |
127 | else if ( name == "background" || name == "bgcolor" ) | |
128 | { | |
129 | spanAttrs.m_bgCol = value; | |
130 | } | |
131 | else if ( name == "font_family" || name == "face" ) | |
132 | { | |
133 | spanAttrs.m_fontFace = value; | |
134 | } | |
135 | else if ( name == "font_weight" || name == "weight" ) | |
136 | { | |
137 | unsigned long weight; | |
138 | ||
139 | if ( value == "ultralight" || value == "light" || value == "normal" ) | |
140 | spanAttrs.m_isBold = wxMarkupSpanAttributes::No; | |
141 | else if ( value == "bold" || value == "ultrabold" || value == "heavy" ) | |
142 | spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes; | |
143 | else if ( value.ToULong(&weight) ) | |
144 | spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes | |
145 | : wxMarkupSpanAttributes::No; | |
146 | else | |
147 | return wxString::Format("invalid font weight \"%s\"", valueOrig); | |
148 | } | |
149 | else if ( name == "font_style" || name == "style" ) | |
150 | { | |
151 | if ( value == "normal" ) | |
152 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::No; | |
153 | else if ( value == "oblique" || value == "italic" ) | |
154 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes; | |
155 | else | |
156 | return wxString::Format("invalid font style \"%s\"", valueOrig); | |
157 | } | |
158 | else if ( name == "size" ) | |
159 | { | |
160 | unsigned long size; | |
161 | if ( value.ToULong(&size) ) | |
162 | { | |
163 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts; | |
164 | spanAttrs.m_fontSize = size; | |
165 | } | |
166 | else if ( value == "smaller" || value == "larger" ) | |
167 | { | |
168 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative; | |
169 | spanAttrs.m_fontSize = value == "smaller" ? -1 : +1; | |
170 | } | |
171 | else // Must be a CSS-like size specification | |
172 | { | |
173 | int cssSize = 1; | |
9bb9964e VZ |
174 | if ( value.StartsWith("xx-", &rest) ) |
175 | cssSize = 3; | |
176 | else if ( value.StartsWith("x-", &rest) ) | |
177 | cssSize = 2; | |
178 | else if ( value == "medium" ) | |
179 | cssSize = 0; | |
180 | else | |
181 | rest = value; | |
182 | ||
183 | if ( cssSize != 0 ) | |
184 | { | |
185 | if ( rest == "small" ) | |
186 | cssSize = -cssSize; | |
187 | else if ( rest != "large" ) | |
188 | return wxString::Format("invalid font size \"%s\"", | |
189 | valueOrig); | |
190 | } | |
191 | ||
192 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic; | |
193 | spanAttrs.m_fontSize = cssSize; | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
198 | return wxString(); | |
199 | } | |
200 | ||
201 | bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start) | |
202 | { | |
203 | if ( tagAndAttrs.name.CmpNoCase("span") == 0 ) | |
204 | { | |
205 | if ( start ) | |
206 | m_output.OnSpanStart(tagAndAttrs.attrs); | |
207 | else | |
208 | m_output.OnSpanEnd(tagAndAttrs.attrs); | |
209 | ||
210 | return true; | |
211 | } | |
212 | else // non-span tag | |
213 | { | |
214 | static const struct TagHandler | |
215 | { | |
216 | const char *name; | |
217 | void (wxMarkupParserOutput::*startFunc)(); | |
218 | void (wxMarkupParserOutput::*endFunc)(); | |
219 | } tagHandlers[] = | |
220 | { | |
221 | { "b", &wxMarkupParserOutput::OnBoldStart, | |
222 | &wxMarkupParserOutput::OnBoldEnd }, | |
223 | { "i", &wxMarkupParserOutput::OnItalicStart, | |
224 | &wxMarkupParserOutput::OnItalicEnd }, | |
225 | { "u", &wxMarkupParserOutput::OnUnderlinedStart, | |
226 | &wxMarkupParserOutput::OnUnderlinedEnd }, | |
227 | { "s", &wxMarkupParserOutput::OnStrikethroughStart, | |
228 | &wxMarkupParserOutput::OnStrikethroughEnd }, | |
229 | { "big", &wxMarkupParserOutput::OnBigStart, | |
230 | &wxMarkupParserOutput::OnBigEnd }, | |
231 | { "small", &wxMarkupParserOutput::OnSmallStart, | |
232 | &wxMarkupParserOutput::OnSmallEnd }, | |
233 | { "tt", &wxMarkupParserOutput::OnTeletypeStart, | |
234 | &wxMarkupParserOutput::OnTeletypeEnd }, | |
235 | }; | |
236 | ||
237 | for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ ) | |
238 | { | |
239 | const TagHandler& h = tagHandlers[n]; | |
240 | ||
241 | if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 ) | |
242 | { | |
243 | if ( start ) | |
244 | (m_output.*(h.startFunc))(); | |
245 | else | |
246 | (m_output.*(h.endFunc))(); | |
247 | ||
248 | return true; | |
249 | } | |
250 | } | |
251 | } | |
252 | ||
253 | // Unknown tag name. | |
254 | return false; | |
255 | } | |
256 | ||
257 | bool wxMarkupParser::Parse(const wxString& text) | |
258 | { | |
259 | // The stack containing the names and corresponding attributes (which are | |
260 | // actually only used for <span> tags) of all of the currently opened tag | |
261 | // or none if we're not inside any tag. | |
262 | wxStack<TagAndAttrs> tags; | |
263 | ||
264 | // Current run of text. | |
265 | wxString current; | |
266 | ||
267 | const wxString::const_iterator end = text.end(); | |
268 | for ( wxString::const_iterator it = text.begin(); it != end; ++it ) | |
269 | { | |
270 | switch ( (*it).GetValue() ) | |
271 | { | |
272 | case '<': | |
273 | { | |
274 | // Flush the text preceding the tag, if any. | |
275 | if ( !current.empty() ) | |
276 | { | |
277 | m_output.OnText(current); | |
278 | current.clear(); | |
279 | } | |
c564ca3c VZ |
280 | |
281 | // This variable is used only in the debugging messages | |
282 | // and doesn't need to be defined if they're not compiled | |
283 | // at all (it actually would result in unused variable | |
284 | // messages in this case). | |
e779f093 | 285 | #if wxUSE_LOG_DEBUG || !defined(HAVE_VARIADIC_MACROS) |
9bb9964e VZ |
286 | // Remember the tag starting position for the error |
287 | // messages. | |
288 | const size_t pos = it - text.begin(); | |
e2cb99c8 | 289 | #endif |
9bb9964e VZ |
290 | bool start = true; |
291 | if ( ++it != end && *it == '/' ) | |
292 | { | |
293 | start = false; | |
294 | ++it; | |
295 | } | |
296 | ||
297 | const wxString tag = ExtractUntil('>', it, end); | |
298 | if ( tag.empty() ) | |
299 | { | |
300 | wxLogDebug("%s at %lu.", | |
301 | it == end ? "Unclosed tag starting" | |
302 | : "Empty tag", | |
303 | pos); | |
304 | return false; | |
305 | } | |
306 | ||
307 | if ( start ) | |
308 | { | |
309 | wxString attrs; | |
310 | const wxString name = tag.BeforeFirst(' ', &attrs); | |
311 | ||
312 | TagAndAttrs tagAndAttrs(name); | |
313 | const wxString err = ParseAttrs(attrs, tagAndAttrs); | |
314 | if ( !err.empty() ) | |
315 | { | |
316 | wxLogDebug("Bad attributes for \"%s\" " | |
317 | "at %lu: %s.", | |
318 | name, pos, err); | |
319 | return false; | |
320 | } | |
321 | ||
322 | tags.push(tagAndAttrs); | |
323 | } | |
324 | else // end tag | |
325 | { | |
326 | if ( tags.empty() || tags.top().name != tag ) | |
327 | { | |
328 | wxLogDebug("Unmatched closing tag \"%s\" at %lu.", | |
329 | tag, pos); | |
330 | return false; | |
331 | } | |
332 | } | |
333 | ||
334 | if ( !OutputTag(tags.top(), start) ) | |
335 | { | |
336 | wxLogDebug("Unknown tag at %lu.", pos); | |
337 | return false; | |
338 | } | |
339 | ||
340 | if ( !start ) | |
341 | tags.pop(); | |
342 | } | |
343 | break; | |
344 | ||
345 | case '>': | |
346 | wxLogDebug("'>' should be escaped as \">\"; at %lu.", | |
347 | it - text.begin()); | |
348 | break; | |
349 | ||
350 | case '&': | |
351 | // Processing is somewhat complicated: we need to recognize at | |
352 | // least the "<" entity to allow escaping left square | |
353 | // brackets in the markup and, in fact, we recognize all of the | |
354 | // standard XML entities for consistency with Pango markup | |
355 | // parsing. | |
356 | // | |
357 | // However we also allow '&' to appear unescaped, i.e. directly | |
358 | // and not as "&" when it is used to introduce the mnemonic | |
359 | // for the label. In this case we simply leave it alone. | |
360 | // | |
361 | // Notice that this logic makes it impossible to have a label | |
362 | // with "lt;" inside it and using "l" as mnemonic but hopefully | |
363 | // this shouldn't be a problem in practice. | |
364 | { | |
365 | const size_t pos = it - text.begin() + 1; | |
366 | ||
367 | unsigned n; | |
368 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
369 | { | |
370 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
371 | if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0 | |
372 | && text[pos + xmlEnt.len] == ';' ) | |
373 | { | |
374 | // Escape the ampersands if needed to protect them | |
375 | // from being interpreted as mnemonics indicators. | |
376 | if ( xmlEnt.value == '&' ) | |
377 | current += "&&"; | |
378 | else | |
379 | current += xmlEnt.value; | |
380 | ||
381 | it += xmlEnt.len + 1; // +1 for '&' itself | |
382 | ||
383 | break; | |
384 | } | |
385 | } | |
386 | ||
387 | if ( n < WXSIZEOF(xmlEntities) ) | |
388 | break; | |
389 | //else: fall through, '&' is not special | |
390 | } | |
391 | ||
392 | default: | |
393 | current += *it; | |
394 | } | |
395 | } | |
396 | ||
397 | if ( !tags.empty() ) | |
398 | { | |
399 | wxLogDebug("Missing closing tag for \"%s\"", tags.top().name); | |
400 | return false; | |
401 | } | |
402 | ||
403 | if ( !current.empty() ) | |
404 | m_output.OnText(current); | |
405 | ||
406 | return true; | |
407 | } | |
408 | ||
409 | /* static */ | |
410 | wxString wxMarkupParser::Quote(const wxString& text) | |
411 | { | |
412 | wxString quoted; | |
413 | quoted.reserve(text.length()); | |
414 | ||
415 | for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it ) | |
416 | { | |
417 | unsigned n; | |
418 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
419 | { | |
420 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
421 | if ( *it == xmlEnt.value ) | |
422 | { | |
423 | quoted << '&' << xmlEnt.name << ';'; | |
424 | break; | |
425 | } | |
426 | } | |
427 | ||
428 | if ( n == WXSIZEOF(xmlEntities) ) | |
429 | quoted += *it; | |
430 | } | |
431 | ||
432 | return quoted; | |
433 | } | |
5eb051a7 VZ |
434 | |
435 | /* static */ | |
436 | wxString wxMarkupParser::Strip(const wxString& text) | |
437 | { | |
438 | class StripOutput : public wxMarkupParserOutput | |
439 | { | |
440 | public: | |
441 | StripOutput() { } | |
442 | ||
443 | const wxString& GetText() const { return m_text; } | |
444 | ||
445 | virtual void OnText(const wxString& text) { m_text += text; } | |
446 | ||
447 | virtual void OnBoldStart() { } | |
448 | virtual void OnBoldEnd() { } | |
449 | ||
450 | virtual void OnItalicStart() { } | |
451 | virtual void OnItalicEnd() { } | |
452 | ||
453 | virtual void OnUnderlinedStart() { } | |
454 | virtual void OnUnderlinedEnd() { } | |
455 | ||
456 | virtual void OnStrikethroughStart() { } | |
457 | virtual void OnStrikethroughEnd() { } | |
458 | ||
459 | virtual void OnBigStart() { } | |
460 | virtual void OnBigEnd() { } | |
461 | ||
462 | virtual void OnSmallStart() { } | |
463 | virtual void OnSmallEnd() { } | |
464 | ||
465 | virtual void OnTeletypeStart() { } | |
466 | virtual void OnTeletypeEnd() { } | |
467 | ||
468 | virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
469 | virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
470 | ||
471 | private: | |
472 | wxString m_text; | |
473 | }; | |
474 | ||
475 | StripOutput output; | |
476 | wxMarkupParser parser(output); | |
477 | if ( !parser.Parse(text) ) | |
478 | return wxString(); | |
479 | ||
480 | return output.GetText(); | |
481 | } | |
f5bdfc69 VZ |
482 | |
483 | #endif // wxUSE_MARKUP |