]>
git.saurik.com Git - wxWidgets.git/blob - src/common/markupparser.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/markupparser.cpp
3 // Purpose: Implementation of wxMarkupParser.
4 // Author: Vadim Zeitlin
7 // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
11 // ============================================================================
13 // ============================================================================
15 // ----------------------------------------------------------------------------
17 // ----------------------------------------------------------------------------
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
32 #include "wx/private/markupparser.h"
39 // ----------------------------------------------------------------------------
41 // ----------------------------------------------------------------------------
43 // Array containing the predefined XML 1.0 entities.
44 const struct XMLEntity
47 int len
; // == strlen(name)
58 // ----------------------------------------------------------------------------
60 // ----------------------------------------------------------------------------
63 ExtractUntil(char ch
, wxString::const_iterator
& it
, wxString::const_iterator end
)
66 for ( ; it
!= end
; ++it
)
74 // Return empty string to indicate that we didn't find ch at all.
78 } // anonymous namespace
80 // ============================================================================
81 // wxMarkupParser implementation
82 // ============================================================================
85 wxMarkupParser::ParseAttrs(wxString attrs
, TagAndAttrs
& tagAndAttrs
)
87 if ( tagAndAttrs
.name
.CmpNoCase("span") != 0 && !attrs
.empty() )
89 return wxString::Format("tag \"%s\" can't have attributes",
93 // TODO: Parse more attributes described at
94 // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
95 // and at least ignore them gracefully instead of giving errors (but
96 // quite a few of them could be supported as well, notable font_desc).
98 wxMarkupSpanAttributes
& spanAttrs
= tagAndAttrs
.attrs
;
100 while ( !attrs
.empty() )
103 const wxString attr
= attrs
.BeforeFirst(' ', &rest
);
106 // The "original" versions are used for error messages only.
108 const wxString nameOrig
= attr
.BeforeFirst('=', &valueOrig
);
110 const wxString name
= nameOrig
.Lower();
111 wxString value
= valueOrig
.Lower();
113 // All attributes values must be quoted.
114 if ( value
.length() < 2 ||
115 (value
[0] != value
.Last()) ||
116 (value
[0] != '"' && value
[0] != '\'') )
118 return wxString::Format("bad quoting for value of \"%s\"",
122 value
.assign(value
, 1, value
.length() - 2);
124 if ( name
== "foreground" || name
== "fgcolor" || name
== "color" )
126 spanAttrs
.m_fgCol
= value
;
128 else if ( name
== "background" || name
== "bgcolor" )
130 spanAttrs
.m_bgCol
= value
;
132 else if ( name
== "font_family" || name
== "face" )
134 spanAttrs
.m_fontFace
= value
;
136 else if ( name
== "font_weight" || name
== "weight" )
138 unsigned long weight
;
140 if ( value
== "ultralight" || value
== "light" || value
== "normal" )
141 spanAttrs
.m_isBold
= wxMarkupSpanAttributes::No
;
142 else if ( value
== "bold" || value
== "ultrabold" || value
== "heavy" )
143 spanAttrs
.m_isBold
= wxMarkupSpanAttributes::Yes
;
144 else if ( value
.ToULong(&weight
) )
145 spanAttrs
.m_isBold
= weight
>= 600 ? wxMarkupSpanAttributes::Yes
146 : wxMarkupSpanAttributes::No
;
148 return wxString::Format("invalid font weight \"%s\"", valueOrig
);
150 else if ( name
== "font_style" || name
== "style" )
152 if ( value
== "normal" )
153 spanAttrs
.m_isItalic
= wxMarkupSpanAttributes::No
;
154 else if ( value
== "oblique" || value
== "italic" )
155 spanAttrs
.m_isItalic
= wxMarkupSpanAttributes::Yes
;
157 return wxString::Format("invalid font style \"%s\"", valueOrig
);
159 else if ( name
== "size" )
162 if ( value
.ToULong(&size
) )
164 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_PointParts
;
165 spanAttrs
.m_fontSize
= size
;
167 else if ( value
== "smaller" || value
== "larger" )
169 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_Relative
;
170 spanAttrs
.m_fontSize
= value
== "smaller" ? -1 : +1;
172 else // Must be a CSS-like size specification
176 if ( value
.StartsWith("xx-", &rest
) )
178 else if ( value
.StartsWith("x-", &rest
) )
180 else if ( value
== "medium" )
187 if ( rest
== "small" )
189 else if ( rest
!= "large" )
190 return wxString::Format("invalid font size \"%s\"",
194 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_Symbolic
;
195 spanAttrs
.m_fontSize
= cssSize
;
203 bool wxMarkupParser::OutputTag(const TagAndAttrs
& tagAndAttrs
, bool start
)
205 if ( tagAndAttrs
.name
.CmpNoCase("span") == 0 )
208 m_output
.OnSpanStart(tagAndAttrs
.attrs
);
210 m_output
.OnSpanEnd(tagAndAttrs
.attrs
);
216 static const struct TagHandler
219 void (wxMarkupParserOutput::*startFunc
)();
220 void (wxMarkupParserOutput::*endFunc
)();
223 { "b", &wxMarkupParserOutput::OnBoldStart
,
224 &wxMarkupParserOutput::OnBoldEnd
},
225 { "i", &wxMarkupParserOutput::OnItalicStart
,
226 &wxMarkupParserOutput::OnItalicEnd
},
227 { "u", &wxMarkupParserOutput::OnUnderlinedStart
,
228 &wxMarkupParserOutput::OnUnderlinedEnd
},
229 { "s", &wxMarkupParserOutput::OnStrikethroughStart
,
230 &wxMarkupParserOutput::OnStrikethroughEnd
},
231 { "big", &wxMarkupParserOutput::OnBigStart
,
232 &wxMarkupParserOutput::OnBigEnd
},
233 { "small", &wxMarkupParserOutput::OnSmallStart
,
234 &wxMarkupParserOutput::OnSmallEnd
},
235 { "tt", &wxMarkupParserOutput::OnTeletypeStart
,
236 &wxMarkupParserOutput::OnTeletypeEnd
},
239 for ( unsigned n
= 0; n
< WXSIZEOF(tagHandlers
); n
++ )
241 const TagHandler
& h
= tagHandlers
[n
];
243 if ( tagAndAttrs
.name
.CmpNoCase(h
.name
) == 0 )
246 (m_output
.*(h
.startFunc
))();
248 (m_output
.*(h
.endFunc
))();
259 bool wxMarkupParser::Parse(const wxString
& text
)
261 // The stack containing the names and corresponding attributes (which are
262 // actually only used for <span> tags) of all of the currently opened tag
263 // or none if we're not inside any tag.
264 wxStack
<TagAndAttrs
> tags
;
266 // Current run of text.
269 const wxString::const_iterator end
= text
.end();
270 for ( wxString::const_iterator it
= text
.begin(); it
!= end
; ++it
)
272 switch ( (*it
).GetValue() )
276 // Flush the text preceding the tag, if any.
277 if ( !current
.empty() )
279 m_output
.OnText(current
);
283 // Remember the tag starting position for the error
285 const size_t pos
= it
- text
.begin();
288 if ( ++it
!= end
&& *it
== '/' )
294 const wxString tag
= ExtractUntil('>', it
, end
);
297 wxLogDebug("%s at %lu.",
298 it
== end
? "Unclosed tag starting"
307 const wxString name
= tag
.BeforeFirst(' ', &attrs
);
309 TagAndAttrs
tagAndAttrs(name
);
310 const wxString err
= ParseAttrs(attrs
, tagAndAttrs
);
313 wxLogDebug("Bad attributes for \"%s\" "
319 tags
.push(tagAndAttrs
);
323 if ( tags
.empty() || tags
.top().name
!= tag
)
325 wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
331 if ( !OutputTag(tags
.top(), start
) )
333 wxLogDebug("Unknown tag at %lu.", pos
);
343 wxLogDebug("'>' should be escaped as \">\"; at %lu.",
348 // Processing is somewhat complicated: we need to recognize at
349 // least the "<" entity to allow escaping left square
350 // brackets in the markup and, in fact, we recognize all of the
351 // standard XML entities for consistency with Pango markup
354 // However we also allow '&' to appear unescaped, i.e. directly
355 // and not as "&" when it is used to introduce the mnemonic
356 // for the label. In this case we simply leave it alone.
358 // Notice that this logic makes it impossible to have a label
359 // with "lt;" inside it and using "l" as mnemonic but hopefully
360 // this shouldn't be a problem in practice.
362 const size_t pos
= it
- text
.begin() + 1;
365 for ( n
= 0; n
< WXSIZEOF(xmlEntities
); n
++ )
367 const XMLEntity
& xmlEnt
= xmlEntities
[n
];
368 if ( text
.compare(pos
, xmlEnt
.len
, xmlEnt
.name
) == 0
369 && text
[pos
+ xmlEnt
.len
] == ';' )
371 // Escape the ampersands if needed to protect them
372 // from being interpreted as mnemonics indicators.
373 if ( xmlEnt
.value
== '&' )
376 current
+= xmlEnt
.value
;
378 it
+= xmlEnt
.len
+ 1; // +1 for '&' itself
384 if ( n
< WXSIZEOF(xmlEntities
) )
386 //else: fall through, '&' is not special
396 wxLogDebug("Missing closing tag for \"%s\"", tags
.top().name
);
400 if ( !current
.empty() )
401 m_output
.OnText(current
);
407 wxString
wxMarkupParser::Quote(const wxString
& text
)
410 quoted
.reserve(text
.length());
412 for ( wxString::const_iterator it
= text
.begin(); it
!= text
.end(); ++it
)
415 for ( n
= 0; n
< WXSIZEOF(xmlEntities
); n
++ )
417 const XMLEntity
& xmlEnt
= xmlEntities
[n
];
418 if ( *it
== xmlEnt
.value
)
420 quoted
<< '&' << xmlEnt
.name
<< ';';
425 if ( n
== WXSIZEOF(xmlEntities
) )
433 wxString
wxMarkupParser::Strip(const wxString
& text
)
435 class StripOutput
: public wxMarkupParserOutput
440 const wxString
& GetText() const { return m_text
; }
442 virtual void OnText(const wxString
& text
) { m_text
+= text
; }
444 virtual void OnBoldStart() { }
445 virtual void OnBoldEnd() { }
447 virtual void OnItalicStart() { }
448 virtual void OnItalicEnd() { }
450 virtual void OnUnderlinedStart() { }
451 virtual void OnUnderlinedEnd() { }
453 virtual void OnStrikethroughStart() { }
454 virtual void OnStrikethroughEnd() { }
456 virtual void OnBigStart() { }
457 virtual void OnBigEnd() { }
459 virtual void OnSmallStart() { }
460 virtual void OnSmallEnd() { }
462 virtual void OnTeletypeStart() { }
463 virtual void OnTeletypeEnd() { }
465 virtual void OnSpanStart(const wxMarkupSpanAttributes
& WXUNUSED(a
)) { }
466 virtual void OnSpanEnd(const wxMarkupSpanAttributes
& WXUNUSED(a
)) { }
473 wxMarkupParser
parser(output
);
474 if ( !parser
.Parse(text
) )
477 return output
.GetText();
480 #endif // wxUSE_MARKUP