]>
git.saurik.com Git - wxWidgets.git/blob - src/common/markupparser.cpp
5701ee64fa85e305a7285a0af25757dcb447f3e2
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/markupparser.cpp
3 // Purpose: Implementation of wxMarkupParser.
4 // Author: Vadim Zeitlin
7 // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
11 // ============================================================================
13 // ============================================================================
15 // ----------------------------------------------------------------------------
17 // ----------------------------------------------------------------------------
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
28 #include "wx/private/markupparser.h"
35 // ----------------------------------------------------------------------------
37 // ----------------------------------------------------------------------------
39 // Array containing the predefined XML 1.0 entities.
40 const struct XMLEntity
43 int len
; // == strlen(name)
54 // ----------------------------------------------------------------------------
56 // ----------------------------------------------------------------------------
59 ExtractUntil(char ch
, wxString::const_iterator
& it
, wxString::const_iterator end
)
62 for ( ; it
!= end
; ++it
)
70 // Return empty string to indicate that we didn't find ch at all.
74 } // anonymous namespace
76 // ============================================================================
77 // wxMarkupParser implementation
78 // ============================================================================
81 wxMarkupParser::ParseAttrs(wxString attrs
, TagAndAttrs
& tagAndAttrs
)
83 if ( tagAndAttrs
.name
.CmpNoCase("span") != 0 && !attrs
.empty() )
85 return wxString::Format("tag \"%s\" can't have attributes",
89 // TODO: Parse more attributes described at
90 // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
91 // and at least ignore them gracefully instead of giving errors (but
92 // quite a few of them could be supported as well, notable font_desc).
94 wxMarkupSpanAttributes
& spanAttrs
= tagAndAttrs
.attrs
;
96 while ( !attrs
.empty() )
99 const wxString attr
= attrs
.BeforeFirst(' ', &rest
);
102 // The "original" versions are used for error messages only.
104 const wxString nameOrig
= attr
.BeforeFirst('=', &valueOrig
);
106 const wxString name
= nameOrig
.Lower();
107 wxString value
= valueOrig
.Lower();
109 // All attributes values must be quoted.
110 if ( value
.length() < 2 ||
111 (value
[0] != value
.Last()) ||
112 (value
[0] != '"' && value
[0] != '\'') )
114 return wxString::Format("bad quoting for value of \"%s\"",
118 value
.assign(value
, 1, value
.length() - 2);
120 if ( name
== "foreground" || name
== "fgcolor" || name
== "color" )
122 spanAttrs
.m_fgCol
= value
;
124 else if ( name
== "background" || name
== "bgcolor" )
126 spanAttrs
.m_bgCol
= value
;
128 else if ( name
== "font_family" || name
== "face" )
130 spanAttrs
.m_fontFace
= value
;
132 else if ( name
== "font_weight" || name
== "weight" )
134 unsigned long weight
;
136 if ( value
== "ultralight" || value
== "light" || value
== "normal" )
137 spanAttrs
.m_isBold
= wxMarkupSpanAttributes::No
;
138 else if ( value
== "bold" || value
== "ultrabold" || value
== "heavy" )
139 spanAttrs
.m_isBold
= wxMarkupSpanAttributes::Yes
;
140 else if ( value
.ToULong(&weight
) )
141 spanAttrs
.m_isBold
= weight
>= 600 ? wxMarkupSpanAttributes::Yes
142 : wxMarkupSpanAttributes::No
;
144 return wxString::Format("invalid font weight \"%s\"", valueOrig
);
146 else if ( name
== "font_style" || name
== "style" )
148 if ( value
== "normal" )
149 spanAttrs
.m_isItalic
= wxMarkupSpanAttributes::No
;
150 else if ( value
== "oblique" || value
== "italic" )
151 spanAttrs
.m_isItalic
= wxMarkupSpanAttributes::Yes
;
153 return wxString::Format("invalid font style \"%s\"", valueOrig
);
155 else if ( name
== "size" )
158 if ( value
.ToULong(&size
) )
160 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_PointParts
;
161 spanAttrs
.m_fontSize
= size
;
163 else if ( value
== "smaller" || value
== "larger" )
165 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_Relative
;
166 spanAttrs
.m_fontSize
= value
== "smaller" ? -1 : +1;
168 else // Must be a CSS-like size specification
172 if ( value
.StartsWith("xx-", &rest
) )
174 else if ( value
.StartsWith("x-", &rest
) )
176 else if ( value
== "medium" )
183 if ( rest
== "small" )
185 else if ( rest
!= "large" )
186 return wxString::Format("invalid font size \"%s\"",
190 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_Symbolic
;
191 spanAttrs
.m_fontSize
= cssSize
;
199 bool wxMarkupParser::OutputTag(const TagAndAttrs
& tagAndAttrs
, bool start
)
201 if ( tagAndAttrs
.name
.CmpNoCase("span") == 0 )
204 m_output
.OnSpanStart(tagAndAttrs
.attrs
);
206 m_output
.OnSpanEnd(tagAndAttrs
.attrs
);
212 static const struct TagHandler
215 void (wxMarkupParserOutput::*startFunc
)();
216 void (wxMarkupParserOutput::*endFunc
)();
219 { "b", &wxMarkupParserOutput::OnBoldStart
,
220 &wxMarkupParserOutput::OnBoldEnd
},
221 { "i", &wxMarkupParserOutput::OnItalicStart
,
222 &wxMarkupParserOutput::OnItalicEnd
},
223 { "u", &wxMarkupParserOutput::OnUnderlinedStart
,
224 &wxMarkupParserOutput::OnUnderlinedEnd
},
225 { "s", &wxMarkupParserOutput::OnStrikethroughStart
,
226 &wxMarkupParserOutput::OnStrikethroughEnd
},
227 { "big", &wxMarkupParserOutput::OnBigStart
,
228 &wxMarkupParserOutput::OnBigEnd
},
229 { "small", &wxMarkupParserOutput::OnSmallStart
,
230 &wxMarkupParserOutput::OnSmallEnd
},
231 { "tt", &wxMarkupParserOutput::OnTeletypeStart
,
232 &wxMarkupParserOutput::OnTeletypeEnd
},
235 for ( unsigned n
= 0; n
< WXSIZEOF(tagHandlers
); n
++ )
237 const TagHandler
& h
= tagHandlers
[n
];
239 if ( tagAndAttrs
.name
.CmpNoCase(h
.name
) == 0 )
242 (m_output
.*(h
.startFunc
))();
244 (m_output
.*(h
.endFunc
))();
255 bool wxMarkupParser::Parse(const wxString
& text
)
257 // The stack containing the names and corresponding attributes (which are
258 // actually only used for <span> tags) of all of the currently opened tag
259 // or none if we're not inside any tag.
260 wxStack
<TagAndAttrs
> tags
;
262 // Current run of text.
265 const wxString::const_iterator end
= text
.end();
266 for ( wxString::const_iterator it
= text
.begin(); it
!= end
; ++it
)
268 switch ( (*it
).GetValue() )
272 // Flush the text preceding the tag, if any.
273 if ( !current
.empty() )
275 m_output
.OnText(current
);
279 // Remember the tag starting position for the error
281 const size_t pos
= it
- text
.begin();
284 if ( ++it
!= end
&& *it
== '/' )
290 const wxString tag
= ExtractUntil('>', it
, end
);
293 wxLogDebug("%s at %lu.",
294 it
== end
? "Unclosed tag starting"
303 const wxString name
= tag
.BeforeFirst(' ', &attrs
);
305 TagAndAttrs
tagAndAttrs(name
);
306 const wxString err
= ParseAttrs(attrs
, tagAndAttrs
);
309 wxLogDebug("Bad attributes for \"%s\" "
315 tags
.push(tagAndAttrs
);
319 if ( tags
.empty() || tags
.top().name
!= tag
)
321 wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
327 if ( !OutputTag(tags
.top(), start
) )
329 wxLogDebug("Unknown tag at %lu.", pos
);
339 wxLogDebug("'>' should be escaped as \">\"; at %lu.",
344 // Processing is somewhat complicated: we need to recognize at
345 // least the "<" entity to allow escaping left square
346 // brackets in the markup and, in fact, we recognize all of the
347 // standard XML entities for consistency with Pango markup
350 // However we also allow '&' to appear unescaped, i.e. directly
351 // and not as "&" when it is used to introduce the mnemonic
352 // for the label. In this case we simply leave it alone.
354 // Notice that this logic makes it impossible to have a label
355 // with "lt;" inside it and using "l" as mnemonic but hopefully
356 // this shouldn't be a problem in practice.
358 const size_t pos
= it
- text
.begin() + 1;
361 for ( n
= 0; n
< WXSIZEOF(xmlEntities
); n
++ )
363 const XMLEntity
& xmlEnt
= xmlEntities
[n
];
364 if ( text
.compare(pos
, xmlEnt
.len
, xmlEnt
.name
) == 0
365 && text
[pos
+ xmlEnt
.len
] == ';' )
367 // Escape the ampersands if needed to protect them
368 // from being interpreted as mnemonics indicators.
369 if ( xmlEnt
.value
== '&' )
372 current
+= xmlEnt
.value
;
374 it
+= xmlEnt
.len
+ 1; // +1 for '&' itself
380 if ( n
< WXSIZEOF(xmlEntities
) )
382 //else: fall through, '&' is not special
392 wxLogDebug("Missing closing tag for \"%s\"", tags
.top().name
);
396 if ( !current
.empty() )
397 m_output
.OnText(current
);
403 wxString
wxMarkupParser::Quote(const wxString
& text
)
406 quoted
.reserve(text
.length());
408 for ( wxString::const_iterator it
= text
.begin(); it
!= text
.end(); ++it
)
411 for ( n
= 0; n
< WXSIZEOF(xmlEntities
); n
++ )
413 const XMLEntity
& xmlEnt
= xmlEntities
[n
];
414 if ( *it
== xmlEnt
.value
)
416 quoted
<< '&' << xmlEnt
.name
<< ';';
421 if ( n
== WXSIZEOF(xmlEntities
) )
429 wxString
wxMarkupParser::Strip(const wxString
& text
)
431 class StripOutput
: public wxMarkupParserOutput
436 const wxString
& GetText() const { return m_text
; }
438 virtual void OnText(const wxString
& text
) { m_text
+= text
; }
440 virtual void OnBoldStart() { }
441 virtual void OnBoldEnd() { }
443 virtual void OnItalicStart() { }
444 virtual void OnItalicEnd() { }
446 virtual void OnUnderlinedStart() { }
447 virtual void OnUnderlinedEnd() { }
449 virtual void OnStrikethroughStart() { }
450 virtual void OnStrikethroughEnd() { }
452 virtual void OnBigStart() { }
453 virtual void OnBigEnd() { }
455 virtual void OnSmallStart() { }
456 virtual void OnSmallEnd() { }
458 virtual void OnTeletypeStart() { }
459 virtual void OnTeletypeEnd() { }
461 virtual void OnSpanStart(const wxMarkupSpanAttributes
& WXUNUSED(a
)) { }
462 virtual void OnSpanEnd(const wxMarkupSpanAttributes
& WXUNUSED(a
)) { }
469 wxMarkupParser
parser(output
);
470 if ( !parser
.Parse(text
) )
473 return output
.GetText();
476 #endif // wxUSE_MARKUP