]>
Commit | Line | Data |
---|---|---|
9bb9964e VZ |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Name: src/common/markupparser.cpp | |
3 | // Purpose: Implementation of wxMarkupParser. | |
4 | // Author: Vadim Zeitlin | |
5 | // Created: 2011-02-16 | |
6 | // RCS-ID: $Id: $ | |
7 | // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org> | |
8 | // Licence: wxWindows licence | |
9 | /////////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // ============================================================================ | |
12 | // declarations | |
13 | // ============================================================================ | |
14 | ||
15 | // ---------------------------------------------------------------------------- | |
16 | // headers | |
17 | // ---------------------------------------------------------------------------- | |
18 | ||
19 | // for compilers that support precompilation, includes "wx.h". | |
20 | #include "wx/wxprec.h" | |
21 | ||
22 | #ifdef __BORLANDC__ | |
23 | #pragma hdrstop | |
24 | #endif | |
25 | ||
f5bdfc69 | 26 | #if wxUSE_MARKUP |
9bb9964e VZ |
27 | |
28 | #include "wx/private/markupparser.h" | |
29 | ||
30 | #include "wx/stack.h" | |
31 | ||
32 | namespace | |
33 | { | |
34 | ||
35 | // ---------------------------------------------------------------------------- | |
36 | // constants | |
37 | // ---------------------------------------------------------------------------- | |
38 | ||
39 | // Array containing the predefined XML 1.0 entities. | |
40 | const struct XMLEntity | |
41 | { | |
42 | const char *name; | |
43 | int len; // == strlen(name) | |
44 | char value; | |
45 | } xmlEntities[] = | |
46 | { | |
47 | { "lt", 2, '<' }, | |
48 | { "gt", 2, '>' }, | |
49 | { "amp", 3, '&' }, | |
50 | { "apos", 4, '\''}, | |
51 | { "quot", 4, '"' }, | |
52 | }; | |
53 | ||
54 | // ---------------------------------------------------------------------------- | |
55 | // helper functions | |
56 | // ---------------------------------------------------------------------------- | |
57 | ||
58 | wxString | |
59 | ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end) | |
60 | { | |
61 | wxString str; | |
62 | for ( ; it != end; ++it ) | |
63 | { | |
64 | if ( *it == ch ) | |
65 | return str; | |
66 | ||
67 | str += *it; | |
68 | } | |
69 | ||
70 | // Return empty string to indicate that we didn't find ch at all. | |
71 | return wxString(); | |
72 | } | |
73 | ||
74 | } // anonymous namespace | |
75 | ||
76 | // ============================================================================ | |
77 | // wxMarkupParser implementation | |
78 | // ============================================================================ | |
79 | ||
80 | wxString | |
81 | wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs) | |
82 | { | |
83 | if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() ) | |
84 | { | |
85 | return wxString::Format("tag \"%s\" can't have attributes", | |
86 | tagAndAttrs.name); | |
87 | } | |
88 | ||
89 | // TODO: Parse more attributes described at | |
90 | // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html | |
91 | // and at least ignore them gracefully instead of giving errors (but | |
92 | // quite a few of them could be supported as well, notable font_desc). | |
93 | ||
94 | wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs; | |
95 | ||
96 | while ( !attrs.empty() ) | |
97 | { | |
98 | wxString rest; | |
99 | const wxString attr = attrs.BeforeFirst(' ', &rest); | |
100 | attrs = rest; | |
101 | ||
102 | // The "original" versions are used for error messages only. | |
103 | wxString valueOrig; | |
104 | const wxString nameOrig = attr.BeforeFirst('=', &valueOrig); | |
105 | ||
106 | const wxString name = nameOrig.Lower(); | |
107 | wxString value = valueOrig.Lower(); | |
108 | ||
109 | // All attributes values must be quoted. | |
110 | if ( value.length() < 2 || | |
111 | (value[0] != value.Last()) || | |
112 | (value[0] != '"' && value[0] != '\'') ) | |
113 | { | |
114 | return wxString::Format("bad quoting for value of \"%s\"", | |
115 | nameOrig); | |
116 | } | |
117 | ||
118 | value.assign(value, 1, value.length() - 2); | |
119 | ||
120 | if ( name == "foreground" || name == "fgcolor" || name == "color" ) | |
121 | { | |
122 | spanAttrs.m_fgCol = value; | |
123 | } | |
124 | else if ( name == "background" || name == "bgcolor" ) | |
125 | { | |
126 | spanAttrs.m_bgCol = value; | |
127 | } | |
128 | else if ( name == "font_family" || name == "face" ) | |
129 | { | |
130 | spanAttrs.m_fontFace = value; | |
131 | } | |
132 | else if ( name == "font_weight" || name == "weight" ) | |
133 | { | |
134 | unsigned long weight; | |
135 | ||
136 | if ( value == "ultralight" || value == "light" || value == "normal" ) | |
137 | spanAttrs.m_isBold = wxMarkupSpanAttributes::No; | |
138 | else if ( value == "bold" || value == "ultrabold" || value == "heavy" ) | |
139 | spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes; | |
140 | else if ( value.ToULong(&weight) ) | |
141 | spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes | |
142 | : wxMarkupSpanAttributes::No; | |
143 | else | |
144 | return wxString::Format("invalid font weight \"%s\"", valueOrig); | |
145 | } | |
146 | else if ( name == "font_style" || name == "style" ) | |
147 | { | |
148 | if ( value == "normal" ) | |
149 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::No; | |
150 | else if ( value == "oblique" || value == "italic" ) | |
151 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes; | |
152 | else | |
153 | return wxString::Format("invalid font style \"%s\"", valueOrig); | |
154 | } | |
155 | else if ( name == "size" ) | |
156 | { | |
157 | unsigned long size; | |
158 | if ( value.ToULong(&size) ) | |
159 | { | |
160 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts; | |
161 | spanAttrs.m_fontSize = size; | |
162 | } | |
163 | else if ( value == "smaller" || value == "larger" ) | |
164 | { | |
165 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative; | |
166 | spanAttrs.m_fontSize = value == "smaller" ? -1 : +1; | |
167 | } | |
168 | else // Must be a CSS-like size specification | |
169 | { | |
170 | int cssSize = 1; | |
171 | wxString rest; | |
172 | if ( value.StartsWith("xx-", &rest) ) | |
173 | cssSize = 3; | |
174 | else if ( value.StartsWith("x-", &rest) ) | |
175 | cssSize = 2; | |
176 | else if ( value == "medium" ) | |
177 | cssSize = 0; | |
178 | else | |
179 | rest = value; | |
180 | ||
181 | if ( cssSize != 0 ) | |
182 | { | |
183 | if ( rest == "small" ) | |
184 | cssSize = -cssSize; | |
185 | else if ( rest != "large" ) | |
186 | return wxString::Format("invalid font size \"%s\"", | |
187 | valueOrig); | |
188 | } | |
189 | ||
190 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic; | |
191 | spanAttrs.m_fontSize = cssSize; | |
192 | } | |
193 | } | |
194 | } | |
195 | ||
196 | return wxString(); | |
197 | } | |
198 | ||
199 | bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start) | |
200 | { | |
201 | if ( tagAndAttrs.name.CmpNoCase("span") == 0 ) | |
202 | { | |
203 | if ( start ) | |
204 | m_output.OnSpanStart(tagAndAttrs.attrs); | |
205 | else | |
206 | m_output.OnSpanEnd(tagAndAttrs.attrs); | |
207 | ||
208 | return true; | |
209 | } | |
210 | else // non-span tag | |
211 | { | |
212 | static const struct TagHandler | |
213 | { | |
214 | const char *name; | |
215 | void (wxMarkupParserOutput::*startFunc)(); | |
216 | void (wxMarkupParserOutput::*endFunc)(); | |
217 | } tagHandlers[] = | |
218 | { | |
219 | { "b", &wxMarkupParserOutput::OnBoldStart, | |
220 | &wxMarkupParserOutput::OnBoldEnd }, | |
221 | { "i", &wxMarkupParserOutput::OnItalicStart, | |
222 | &wxMarkupParserOutput::OnItalicEnd }, | |
223 | { "u", &wxMarkupParserOutput::OnUnderlinedStart, | |
224 | &wxMarkupParserOutput::OnUnderlinedEnd }, | |
225 | { "s", &wxMarkupParserOutput::OnStrikethroughStart, | |
226 | &wxMarkupParserOutput::OnStrikethroughEnd }, | |
227 | { "big", &wxMarkupParserOutput::OnBigStart, | |
228 | &wxMarkupParserOutput::OnBigEnd }, | |
229 | { "small", &wxMarkupParserOutput::OnSmallStart, | |
230 | &wxMarkupParserOutput::OnSmallEnd }, | |
231 | { "tt", &wxMarkupParserOutput::OnTeletypeStart, | |
232 | &wxMarkupParserOutput::OnTeletypeEnd }, | |
233 | }; | |
234 | ||
235 | for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ ) | |
236 | { | |
237 | const TagHandler& h = tagHandlers[n]; | |
238 | ||
239 | if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 ) | |
240 | { | |
241 | if ( start ) | |
242 | (m_output.*(h.startFunc))(); | |
243 | else | |
244 | (m_output.*(h.endFunc))(); | |
245 | ||
246 | return true; | |
247 | } | |
248 | } | |
249 | } | |
250 | ||
251 | // Unknown tag name. | |
252 | return false; | |
253 | } | |
254 | ||
255 | bool wxMarkupParser::Parse(const wxString& text) | |
256 | { | |
257 | // The stack containing the names and corresponding attributes (which are | |
258 | // actually only used for <span> tags) of all of the currently opened tag | |
259 | // or none if we're not inside any tag. | |
260 | wxStack<TagAndAttrs> tags; | |
261 | ||
262 | // Current run of text. | |
263 | wxString current; | |
264 | ||
265 | const wxString::const_iterator end = text.end(); | |
266 | for ( wxString::const_iterator it = text.begin(); it != end; ++it ) | |
267 | { | |
268 | switch ( (*it).GetValue() ) | |
269 | { | |
270 | case '<': | |
271 | { | |
272 | // Flush the text preceding the tag, if any. | |
273 | if ( !current.empty() ) | |
274 | { | |
275 | m_output.OnText(current); | |
276 | current.clear(); | |
277 | } | |
278 | ||
279 | // Remember the tag starting position for the error | |
280 | // messages. | |
281 | const size_t pos = it - text.begin(); | |
282 | ||
283 | bool start = true; | |
284 | if ( ++it != end && *it == '/' ) | |
285 | { | |
286 | start = false; | |
287 | ++it; | |
288 | } | |
289 | ||
290 | const wxString tag = ExtractUntil('>', it, end); | |
291 | if ( tag.empty() ) | |
292 | { | |
293 | wxLogDebug("%s at %lu.", | |
294 | it == end ? "Unclosed tag starting" | |
295 | : "Empty tag", | |
296 | pos); | |
297 | return false; | |
298 | } | |
299 | ||
300 | if ( start ) | |
301 | { | |
302 | wxString attrs; | |
303 | const wxString name = tag.BeforeFirst(' ', &attrs); | |
304 | ||
305 | TagAndAttrs tagAndAttrs(name); | |
306 | const wxString err = ParseAttrs(attrs, tagAndAttrs); | |
307 | if ( !err.empty() ) | |
308 | { | |
309 | wxLogDebug("Bad attributes for \"%s\" " | |
310 | "at %lu: %s.", | |
311 | name, pos, err); | |
312 | return false; | |
313 | } | |
314 | ||
315 | tags.push(tagAndAttrs); | |
316 | } | |
317 | else // end tag | |
318 | { | |
319 | if ( tags.empty() || tags.top().name != tag ) | |
320 | { | |
321 | wxLogDebug("Unmatched closing tag \"%s\" at %lu.", | |
322 | tag, pos); | |
323 | return false; | |
324 | } | |
325 | } | |
326 | ||
327 | if ( !OutputTag(tags.top(), start) ) | |
328 | { | |
329 | wxLogDebug("Unknown tag at %lu.", pos); | |
330 | return false; | |
331 | } | |
332 | ||
333 | if ( !start ) | |
334 | tags.pop(); | |
335 | } | |
336 | break; | |
337 | ||
338 | case '>': | |
339 | wxLogDebug("'>' should be escaped as \">\"; at %lu.", | |
340 | it - text.begin()); | |
341 | break; | |
342 | ||
343 | case '&': | |
344 | // Processing is somewhat complicated: we need to recognize at | |
345 | // least the "<" entity to allow escaping left square | |
346 | // brackets in the markup and, in fact, we recognize all of the | |
347 | // standard XML entities for consistency with Pango markup | |
348 | // parsing. | |
349 | // | |
350 | // However we also allow '&' to appear unescaped, i.e. directly | |
351 | // and not as "&" when it is used to introduce the mnemonic | |
352 | // for the label. In this case we simply leave it alone. | |
353 | // | |
354 | // Notice that this logic makes it impossible to have a label | |
355 | // with "lt;" inside it and using "l" as mnemonic but hopefully | |
356 | // this shouldn't be a problem in practice. | |
357 | { | |
358 | const size_t pos = it - text.begin() + 1; | |
359 | ||
360 | unsigned n; | |
361 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
362 | { | |
363 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
364 | if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0 | |
365 | && text[pos + xmlEnt.len] == ';' ) | |
366 | { | |
367 | // Escape the ampersands if needed to protect them | |
368 | // from being interpreted as mnemonics indicators. | |
369 | if ( xmlEnt.value == '&' ) | |
370 | current += "&&"; | |
371 | else | |
372 | current += xmlEnt.value; | |
373 | ||
374 | it += xmlEnt.len + 1; // +1 for '&' itself | |
375 | ||
376 | break; | |
377 | } | |
378 | } | |
379 | ||
380 | if ( n < WXSIZEOF(xmlEntities) ) | |
381 | break; | |
382 | //else: fall through, '&' is not special | |
383 | } | |
384 | ||
385 | default: | |
386 | current += *it; | |
387 | } | |
388 | } | |
389 | ||
390 | if ( !tags.empty() ) | |
391 | { | |
392 | wxLogDebug("Missing closing tag for \"%s\"", tags.top().name); | |
393 | return false; | |
394 | } | |
395 | ||
396 | if ( !current.empty() ) | |
397 | m_output.OnText(current); | |
398 | ||
399 | return true; | |
400 | } | |
401 | ||
402 | /* static */ | |
403 | wxString wxMarkupParser::Quote(const wxString& text) | |
404 | { | |
405 | wxString quoted; | |
406 | quoted.reserve(text.length()); | |
407 | ||
408 | for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it ) | |
409 | { | |
410 | unsigned n; | |
411 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
412 | { | |
413 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
414 | if ( *it == xmlEnt.value ) | |
415 | { | |
416 | quoted << '&' << xmlEnt.name << ';'; | |
417 | break; | |
418 | } | |
419 | } | |
420 | ||
421 | if ( n == WXSIZEOF(xmlEntities) ) | |
422 | quoted += *it; | |
423 | } | |
424 | ||
425 | return quoted; | |
426 | } | |
5eb051a7 VZ |
427 | |
428 | /* static */ | |
429 | wxString wxMarkupParser::Strip(const wxString& text) | |
430 | { | |
431 | class StripOutput : public wxMarkupParserOutput | |
432 | { | |
433 | public: | |
434 | StripOutput() { } | |
435 | ||
436 | const wxString& GetText() const { return m_text; } | |
437 | ||
438 | virtual void OnText(const wxString& text) { m_text += text; } | |
439 | ||
440 | virtual void OnBoldStart() { } | |
441 | virtual void OnBoldEnd() { } | |
442 | ||
443 | virtual void OnItalicStart() { } | |
444 | virtual void OnItalicEnd() { } | |
445 | ||
446 | virtual void OnUnderlinedStart() { } | |
447 | virtual void OnUnderlinedEnd() { } | |
448 | ||
449 | virtual void OnStrikethroughStart() { } | |
450 | virtual void OnStrikethroughEnd() { } | |
451 | ||
452 | virtual void OnBigStart() { } | |
453 | virtual void OnBigEnd() { } | |
454 | ||
455 | virtual void OnSmallStart() { } | |
456 | virtual void OnSmallEnd() { } | |
457 | ||
458 | virtual void OnTeletypeStart() { } | |
459 | virtual void OnTeletypeEnd() { } | |
460 | ||
461 | virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
462 | virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
463 | ||
464 | private: | |
465 | wxString m_text; | |
466 | }; | |
467 | ||
468 | StripOutput output; | |
469 | wxMarkupParser parser(output); | |
470 | if ( !parser.Parse(text) ) | |
471 | return wxString(); | |
472 | ||
473 | return output.GetText(); | |
474 | } | |
f5bdfc69 VZ |
475 | |
476 | #endif // wxUSE_MARKUP |