]>
Commit | Line | Data |
---|---|---|
9bb9964e VZ |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Name: src/common/markupparser.cpp | |
3 | // Purpose: Implementation of wxMarkupParser. | |
4 | // Author: Vadim Zeitlin | |
5 | // Created: 2011-02-16 | |
6 | // RCS-ID: $Id: $ | |
7 | // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org> | |
8 | // Licence: wxWindows licence | |
9 | /////////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // ============================================================================ | |
12 | // declarations | |
13 | // ============================================================================ | |
14 | ||
15 | // ---------------------------------------------------------------------------- | |
16 | // headers | |
17 | // ---------------------------------------------------------------------------- | |
18 | ||
19 | // for compilers that support precompilation, includes "wx.h". | |
20 | #include "wx/wxprec.h" | |
21 | ||
22 | #ifdef __BORLANDC__ | |
23 | #pragma hdrstop | |
24 | #endif | |
25 | ||
f5bdfc69 | 26 | #if wxUSE_MARKUP |
9bb9964e | 27 | |
1a6e6d54 VZ |
28 | #ifndef WX_PRECOMP |
29 | #include "wx/log.h" | |
30 | #endif | |
31 | ||
9bb9964e VZ |
32 | #include "wx/private/markupparser.h" |
33 | ||
34 | #include "wx/stack.h" | |
35 | ||
36 | namespace | |
37 | { | |
38 | ||
39 | // ---------------------------------------------------------------------------- | |
40 | // constants | |
41 | // ---------------------------------------------------------------------------- | |
42 | ||
43 | // Array containing the predefined XML 1.0 entities. | |
44 | const struct XMLEntity | |
45 | { | |
46 | const char *name; | |
47 | int len; // == strlen(name) | |
48 | char value; | |
49 | } xmlEntities[] = | |
50 | { | |
51 | { "lt", 2, '<' }, | |
52 | { "gt", 2, '>' }, | |
53 | { "amp", 3, '&' }, | |
54 | { "apos", 4, '\''}, | |
55 | { "quot", 4, '"' }, | |
56 | }; | |
57 | ||
58 | // ---------------------------------------------------------------------------- | |
59 | // helper functions | |
60 | // ---------------------------------------------------------------------------- | |
61 | ||
62 | wxString | |
63 | ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end) | |
64 | { | |
65 | wxString str; | |
66 | for ( ; it != end; ++it ) | |
67 | { | |
68 | if ( *it == ch ) | |
69 | return str; | |
70 | ||
71 | str += *it; | |
72 | } | |
73 | ||
74 | // Return empty string to indicate that we didn't find ch at all. | |
75 | return wxString(); | |
76 | } | |
77 | ||
78 | } // anonymous namespace | |
79 | ||
80 | // ============================================================================ | |
81 | // wxMarkupParser implementation | |
82 | // ============================================================================ | |
83 | ||
84 | wxString | |
85 | wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs) | |
86 | { | |
87 | if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() ) | |
88 | { | |
89 | return wxString::Format("tag \"%s\" can't have attributes", | |
90 | tagAndAttrs.name); | |
91 | } | |
92 | ||
93 | // TODO: Parse more attributes described at | |
94 | // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html | |
95 | // and at least ignore them gracefully instead of giving errors (but | |
96 | // quite a few of them could be supported as well, notable font_desc). | |
97 | ||
98 | wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs; | |
99 | ||
100 | while ( !attrs.empty() ) | |
101 | { | |
102 | wxString rest; | |
103 | const wxString attr = attrs.BeforeFirst(' ', &rest); | |
104 | attrs = rest; | |
105 | ||
106 | // The "original" versions are used for error messages only. | |
107 | wxString valueOrig; | |
108 | const wxString nameOrig = attr.BeforeFirst('=', &valueOrig); | |
109 | ||
110 | const wxString name = nameOrig.Lower(); | |
111 | wxString value = valueOrig.Lower(); | |
112 | ||
113 | // All attributes values must be quoted. | |
114 | if ( value.length() < 2 || | |
115 | (value[0] != value.Last()) || | |
116 | (value[0] != '"' && value[0] != '\'') ) | |
117 | { | |
118 | return wxString::Format("bad quoting for value of \"%s\"", | |
119 | nameOrig); | |
120 | } | |
121 | ||
122 | value.assign(value, 1, value.length() - 2); | |
123 | ||
124 | if ( name == "foreground" || name == "fgcolor" || name == "color" ) | |
125 | { | |
126 | spanAttrs.m_fgCol = value; | |
127 | } | |
128 | else if ( name == "background" || name == "bgcolor" ) | |
129 | { | |
130 | spanAttrs.m_bgCol = value; | |
131 | } | |
132 | else if ( name == "font_family" || name == "face" ) | |
133 | { | |
134 | spanAttrs.m_fontFace = value; | |
135 | } | |
136 | else if ( name == "font_weight" || name == "weight" ) | |
137 | { | |
138 | unsigned long weight; | |
139 | ||
140 | if ( value == "ultralight" || value == "light" || value == "normal" ) | |
141 | spanAttrs.m_isBold = wxMarkupSpanAttributes::No; | |
142 | else if ( value == "bold" || value == "ultrabold" || value == "heavy" ) | |
143 | spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes; | |
144 | else if ( value.ToULong(&weight) ) | |
145 | spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes | |
146 | : wxMarkupSpanAttributes::No; | |
147 | else | |
148 | return wxString::Format("invalid font weight \"%s\"", valueOrig); | |
149 | } | |
150 | else if ( name == "font_style" || name == "style" ) | |
151 | { | |
152 | if ( value == "normal" ) | |
153 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::No; | |
154 | else if ( value == "oblique" || value == "italic" ) | |
155 | spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes; | |
156 | else | |
157 | return wxString::Format("invalid font style \"%s\"", valueOrig); | |
158 | } | |
159 | else if ( name == "size" ) | |
160 | { | |
161 | unsigned long size; | |
162 | if ( value.ToULong(&size) ) | |
163 | { | |
164 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts; | |
165 | spanAttrs.m_fontSize = size; | |
166 | } | |
167 | else if ( value == "smaller" || value == "larger" ) | |
168 | { | |
169 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative; | |
170 | spanAttrs.m_fontSize = value == "smaller" ? -1 : +1; | |
171 | } | |
172 | else // Must be a CSS-like size specification | |
173 | { | |
174 | int cssSize = 1; | |
175 | wxString rest; | |
176 | if ( value.StartsWith("xx-", &rest) ) | |
177 | cssSize = 3; | |
178 | else if ( value.StartsWith("x-", &rest) ) | |
179 | cssSize = 2; | |
180 | else if ( value == "medium" ) | |
181 | cssSize = 0; | |
182 | else | |
183 | rest = value; | |
184 | ||
185 | if ( cssSize != 0 ) | |
186 | { | |
187 | if ( rest == "small" ) | |
188 | cssSize = -cssSize; | |
189 | else if ( rest != "large" ) | |
190 | return wxString::Format("invalid font size \"%s\"", | |
191 | valueOrig); | |
192 | } | |
193 | ||
194 | spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic; | |
195 | spanAttrs.m_fontSize = cssSize; | |
196 | } | |
197 | } | |
198 | } | |
199 | ||
200 | return wxString(); | |
201 | } | |
202 | ||
203 | bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start) | |
204 | { | |
205 | if ( tagAndAttrs.name.CmpNoCase("span") == 0 ) | |
206 | { | |
207 | if ( start ) | |
208 | m_output.OnSpanStart(tagAndAttrs.attrs); | |
209 | else | |
210 | m_output.OnSpanEnd(tagAndAttrs.attrs); | |
211 | ||
212 | return true; | |
213 | } | |
214 | else // non-span tag | |
215 | { | |
216 | static const struct TagHandler | |
217 | { | |
218 | const char *name; | |
219 | void (wxMarkupParserOutput::*startFunc)(); | |
220 | void (wxMarkupParserOutput::*endFunc)(); | |
221 | } tagHandlers[] = | |
222 | { | |
223 | { "b", &wxMarkupParserOutput::OnBoldStart, | |
224 | &wxMarkupParserOutput::OnBoldEnd }, | |
225 | { "i", &wxMarkupParserOutput::OnItalicStart, | |
226 | &wxMarkupParserOutput::OnItalicEnd }, | |
227 | { "u", &wxMarkupParserOutput::OnUnderlinedStart, | |
228 | &wxMarkupParserOutput::OnUnderlinedEnd }, | |
229 | { "s", &wxMarkupParserOutput::OnStrikethroughStart, | |
230 | &wxMarkupParserOutput::OnStrikethroughEnd }, | |
231 | { "big", &wxMarkupParserOutput::OnBigStart, | |
232 | &wxMarkupParserOutput::OnBigEnd }, | |
233 | { "small", &wxMarkupParserOutput::OnSmallStart, | |
234 | &wxMarkupParserOutput::OnSmallEnd }, | |
235 | { "tt", &wxMarkupParserOutput::OnTeletypeStart, | |
236 | &wxMarkupParserOutput::OnTeletypeEnd }, | |
237 | }; | |
238 | ||
239 | for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ ) | |
240 | { | |
241 | const TagHandler& h = tagHandlers[n]; | |
242 | ||
243 | if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 ) | |
244 | { | |
245 | if ( start ) | |
246 | (m_output.*(h.startFunc))(); | |
247 | else | |
248 | (m_output.*(h.endFunc))(); | |
249 | ||
250 | return true; | |
251 | } | |
252 | } | |
253 | } | |
254 | ||
255 | // Unknown tag name. | |
256 | return false; | |
257 | } | |
258 | ||
259 | bool wxMarkupParser::Parse(const wxString& text) | |
260 | { | |
261 | // The stack containing the names and corresponding attributes (which are | |
262 | // actually only used for <span> tags) of all of the currently opened tag | |
263 | // or none if we're not inside any tag. | |
264 | wxStack<TagAndAttrs> tags; | |
265 | ||
266 | // Current run of text. | |
267 | wxString current; | |
268 | ||
269 | const wxString::const_iterator end = text.end(); | |
270 | for ( wxString::const_iterator it = text.begin(); it != end; ++it ) | |
271 | { | |
272 | switch ( (*it).GetValue() ) | |
273 | { | |
274 | case '<': | |
275 | { | |
276 | // Flush the text preceding the tag, if any. | |
277 | if ( !current.empty() ) | |
278 | { | |
279 | m_output.OnText(current); | |
280 | current.clear(); | |
281 | } | |
282 | ||
283 | // Remember the tag starting position for the error | |
284 | // messages. | |
285 | const size_t pos = it - text.begin(); | |
286 | ||
287 | bool start = true; | |
288 | if ( ++it != end && *it == '/' ) | |
289 | { | |
290 | start = false; | |
291 | ++it; | |
292 | } | |
293 | ||
294 | const wxString tag = ExtractUntil('>', it, end); | |
295 | if ( tag.empty() ) | |
296 | { | |
297 | wxLogDebug("%s at %lu.", | |
298 | it == end ? "Unclosed tag starting" | |
299 | : "Empty tag", | |
300 | pos); | |
301 | return false; | |
302 | } | |
303 | ||
304 | if ( start ) | |
305 | { | |
306 | wxString attrs; | |
307 | const wxString name = tag.BeforeFirst(' ', &attrs); | |
308 | ||
309 | TagAndAttrs tagAndAttrs(name); | |
310 | const wxString err = ParseAttrs(attrs, tagAndAttrs); | |
311 | if ( !err.empty() ) | |
312 | { | |
313 | wxLogDebug("Bad attributes for \"%s\" " | |
314 | "at %lu: %s.", | |
315 | name, pos, err); | |
316 | return false; | |
317 | } | |
318 | ||
319 | tags.push(tagAndAttrs); | |
320 | } | |
321 | else // end tag | |
322 | { | |
323 | if ( tags.empty() || tags.top().name != tag ) | |
324 | { | |
325 | wxLogDebug("Unmatched closing tag \"%s\" at %lu.", | |
326 | tag, pos); | |
327 | return false; | |
328 | } | |
329 | } | |
330 | ||
331 | if ( !OutputTag(tags.top(), start) ) | |
332 | { | |
333 | wxLogDebug("Unknown tag at %lu.", pos); | |
334 | return false; | |
335 | } | |
336 | ||
337 | if ( !start ) | |
338 | tags.pop(); | |
339 | } | |
340 | break; | |
341 | ||
342 | case '>': | |
343 | wxLogDebug("'>' should be escaped as \">\"; at %lu.", | |
344 | it - text.begin()); | |
345 | break; | |
346 | ||
347 | case '&': | |
348 | // Processing is somewhat complicated: we need to recognize at | |
349 | // least the "<" entity to allow escaping left square | |
350 | // brackets in the markup and, in fact, we recognize all of the | |
351 | // standard XML entities for consistency with Pango markup | |
352 | // parsing. | |
353 | // | |
354 | // However we also allow '&' to appear unescaped, i.e. directly | |
355 | // and not as "&" when it is used to introduce the mnemonic | |
356 | // for the label. In this case we simply leave it alone. | |
357 | // | |
358 | // Notice that this logic makes it impossible to have a label | |
359 | // with "lt;" inside it and using "l" as mnemonic but hopefully | |
360 | // this shouldn't be a problem in practice. | |
361 | { | |
362 | const size_t pos = it - text.begin() + 1; | |
363 | ||
364 | unsigned n; | |
365 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
366 | { | |
367 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
368 | if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0 | |
369 | && text[pos + xmlEnt.len] == ';' ) | |
370 | { | |
371 | // Escape the ampersands if needed to protect them | |
372 | // from being interpreted as mnemonics indicators. | |
373 | if ( xmlEnt.value == '&' ) | |
374 | current += "&&"; | |
375 | else | |
376 | current += xmlEnt.value; | |
377 | ||
378 | it += xmlEnt.len + 1; // +1 for '&' itself | |
379 | ||
380 | break; | |
381 | } | |
382 | } | |
383 | ||
384 | if ( n < WXSIZEOF(xmlEntities) ) | |
385 | break; | |
386 | //else: fall through, '&' is not special | |
387 | } | |
388 | ||
389 | default: | |
390 | current += *it; | |
391 | } | |
392 | } | |
393 | ||
394 | if ( !tags.empty() ) | |
395 | { | |
396 | wxLogDebug("Missing closing tag for \"%s\"", tags.top().name); | |
397 | return false; | |
398 | } | |
399 | ||
400 | if ( !current.empty() ) | |
401 | m_output.OnText(current); | |
402 | ||
403 | return true; | |
404 | } | |
405 | ||
406 | /* static */ | |
407 | wxString wxMarkupParser::Quote(const wxString& text) | |
408 | { | |
409 | wxString quoted; | |
410 | quoted.reserve(text.length()); | |
411 | ||
412 | for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it ) | |
413 | { | |
414 | unsigned n; | |
415 | for ( n = 0; n < WXSIZEOF(xmlEntities); n++ ) | |
416 | { | |
417 | const XMLEntity& xmlEnt = xmlEntities[n]; | |
418 | if ( *it == xmlEnt.value ) | |
419 | { | |
420 | quoted << '&' << xmlEnt.name << ';'; | |
421 | break; | |
422 | } | |
423 | } | |
424 | ||
425 | if ( n == WXSIZEOF(xmlEntities) ) | |
426 | quoted += *it; | |
427 | } | |
428 | ||
429 | return quoted; | |
430 | } | |
5eb051a7 VZ |
431 | |
432 | /* static */ | |
433 | wxString wxMarkupParser::Strip(const wxString& text) | |
434 | { | |
435 | class StripOutput : public wxMarkupParserOutput | |
436 | { | |
437 | public: | |
438 | StripOutput() { } | |
439 | ||
440 | const wxString& GetText() const { return m_text; } | |
441 | ||
442 | virtual void OnText(const wxString& text) { m_text += text; } | |
443 | ||
444 | virtual void OnBoldStart() { } | |
445 | virtual void OnBoldEnd() { } | |
446 | ||
447 | virtual void OnItalicStart() { } | |
448 | virtual void OnItalicEnd() { } | |
449 | ||
450 | virtual void OnUnderlinedStart() { } | |
451 | virtual void OnUnderlinedEnd() { } | |
452 | ||
453 | virtual void OnStrikethroughStart() { } | |
454 | virtual void OnStrikethroughEnd() { } | |
455 | ||
456 | virtual void OnBigStart() { } | |
457 | virtual void OnBigEnd() { } | |
458 | ||
459 | virtual void OnSmallStart() { } | |
460 | virtual void OnSmallEnd() { } | |
461 | ||
462 | virtual void OnTeletypeStart() { } | |
463 | virtual void OnTeletypeEnd() { } | |
464 | ||
465 | virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
466 | virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) { } | |
467 | ||
468 | private: | |
469 | wxString m_text; | |
470 | }; | |
471 | ||
472 | StripOutput output; | |
473 | wxMarkupParser parser(output); | |
474 | if ( !parser.Parse(text) ) | |
475 | return wxString(); | |
476 | ||
477 | return output.GetText(); | |
478 | } | |
f5bdfc69 VZ |
479 | |
480 | #endif // wxUSE_MARKUP |