]>
git.saurik.com Git - wxWidgets.git/blob - src/common/markupparser.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/markupparser.cpp
3 // Purpose: Implementation of wxMarkupParser.
4 // Author: Vadim Zeitlin
7 // Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
11 // ============================================================================
13 // ============================================================================
15 // ----------------------------------------------------------------------------
17 // ----------------------------------------------------------------------------
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
29 #include "wx/private/markupparser.h"
36 // ----------------------------------------------------------------------------
38 // ----------------------------------------------------------------------------
40 // Array containing the predefined XML 1.0 entities.
41 const struct XMLEntity
44 int len
; // == strlen(name)
55 // ----------------------------------------------------------------------------
57 // ----------------------------------------------------------------------------
60 ExtractUntil(char ch
, wxString::const_iterator
& it
, wxString::const_iterator end
)
63 for ( ; it
!= end
; ++it
)
71 // Return empty string to indicate that we didn't find ch at all.
75 } // anonymous namespace
77 // ============================================================================
78 // wxMarkupParser implementation
79 // ============================================================================
82 wxMarkupParser::ParseAttrs(wxString attrs
, TagAndAttrs
& tagAndAttrs
)
84 if ( tagAndAttrs
.name
.CmpNoCase("span") != 0 && !attrs
.empty() )
86 return wxString::Format("tag \"%s\" can't have attributes",
90 // TODO: Parse more attributes described at
91 // http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
92 // and at least ignore them gracefully instead of giving errors (but
93 // quite a few of them could be supported as well, notable font_desc).
95 wxMarkupSpanAttributes
& spanAttrs
= tagAndAttrs
.attrs
;
97 while ( !attrs
.empty() )
100 const wxString attr
= attrs
.BeforeFirst(' ', &rest
);
103 // The "original" versions are used for error messages only.
105 const wxString nameOrig
= attr
.BeforeFirst('=', &valueOrig
);
107 const wxString name
= nameOrig
.Lower();
108 wxString value
= valueOrig
.Lower();
110 // All attributes values must be quoted.
111 if ( value
.length() < 2 ||
112 (value
[0] != value
.Last()) ||
113 (value
[0] != '"' && value
[0] != '\'') )
115 return wxString::Format("bad quoting for value of \"%s\"",
119 value
.assign(value
, 1, value
.length() - 2);
121 if ( name
== "foreground" || name
== "fgcolor" || name
== "color" )
123 spanAttrs
.m_fgCol
= value
;
125 else if ( name
== "background" || name
== "bgcolor" )
127 spanAttrs
.m_bgCol
= value
;
129 else if ( name
== "font_family" || name
== "face" )
131 spanAttrs
.m_fontFace
= value
;
133 else if ( name
== "font_weight" || name
== "weight" )
135 unsigned long weight
;
137 if ( value
== "ultralight" || value
== "light" || value
== "normal" )
138 spanAttrs
.m_isBold
= wxMarkupSpanAttributes::No
;
139 else if ( value
== "bold" || value
== "ultrabold" || value
== "heavy" )
140 spanAttrs
.m_isBold
= wxMarkupSpanAttributes::Yes
;
141 else if ( value
.ToULong(&weight
) )
142 spanAttrs
.m_isBold
= weight
>= 600 ? wxMarkupSpanAttributes::Yes
143 : wxMarkupSpanAttributes::No
;
145 return wxString::Format("invalid font weight \"%s\"", valueOrig
);
147 else if ( name
== "font_style" || name
== "style" )
149 if ( value
== "normal" )
150 spanAttrs
.m_isItalic
= wxMarkupSpanAttributes::No
;
151 else if ( value
== "oblique" || value
== "italic" )
152 spanAttrs
.m_isItalic
= wxMarkupSpanAttributes::Yes
;
154 return wxString::Format("invalid font style \"%s\"", valueOrig
);
156 else if ( name
== "size" )
159 if ( value
.ToULong(&size
) )
161 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_PointParts
;
162 spanAttrs
.m_fontSize
= size
;
164 else if ( value
== "smaller" || value
== "larger" )
166 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_Relative
;
167 spanAttrs
.m_fontSize
= value
== "smaller" ? -1 : +1;
169 else // Must be a CSS-like size specification
173 if ( value
.StartsWith("xx-", &rest
) )
175 else if ( value
.StartsWith("x-", &rest
) )
177 else if ( value
== "medium" )
184 if ( rest
== "small" )
186 else if ( rest
!= "large" )
187 return wxString::Format("invalid font size \"%s\"",
191 spanAttrs
.m_sizeKind
= wxMarkupSpanAttributes::Size_Symbolic
;
192 spanAttrs
.m_fontSize
= cssSize
;
200 bool wxMarkupParser::OutputTag(const TagAndAttrs
& tagAndAttrs
, bool start
)
202 if ( tagAndAttrs
.name
.CmpNoCase("span") == 0 )
205 m_output
.OnSpanStart(tagAndAttrs
.attrs
);
207 m_output
.OnSpanEnd(tagAndAttrs
.attrs
);
213 static const struct TagHandler
216 void (wxMarkupParserOutput::*startFunc
)();
217 void (wxMarkupParserOutput::*endFunc
)();
220 { "b", &wxMarkupParserOutput::OnBoldStart
,
221 &wxMarkupParserOutput::OnBoldEnd
},
222 { "i", &wxMarkupParserOutput::OnItalicStart
,
223 &wxMarkupParserOutput::OnItalicEnd
},
224 { "u", &wxMarkupParserOutput::OnUnderlinedStart
,
225 &wxMarkupParserOutput::OnUnderlinedEnd
},
226 { "s", &wxMarkupParserOutput::OnStrikethroughStart
,
227 &wxMarkupParserOutput::OnStrikethroughEnd
},
228 { "big", &wxMarkupParserOutput::OnBigStart
,
229 &wxMarkupParserOutput::OnBigEnd
},
230 { "small", &wxMarkupParserOutput::OnSmallStart
,
231 &wxMarkupParserOutput::OnSmallEnd
},
232 { "tt", &wxMarkupParserOutput::OnTeletypeStart
,
233 &wxMarkupParserOutput::OnTeletypeEnd
},
236 for ( unsigned n
= 0; n
< WXSIZEOF(tagHandlers
); n
++ )
238 const TagHandler
& h
= tagHandlers
[n
];
240 if ( tagAndAttrs
.name
.CmpNoCase(h
.name
) == 0 )
243 (m_output
.*(h
.startFunc
))();
245 (m_output
.*(h
.endFunc
))();
256 bool wxMarkupParser::Parse(const wxString
& text
)
258 // The stack containing the names and corresponding attributes (which are
259 // actually only used for <span> tags) of all of the currently opened tag
260 // or none if we're not inside any tag.
261 wxStack
<TagAndAttrs
> tags
;
263 // Current run of text.
266 const wxString::const_iterator end
= text
.end();
267 for ( wxString::const_iterator it
= text
.begin(); it
!= end
; ++it
)
269 switch ( (*it
).GetValue() )
273 // Flush the text preceding the tag, if any.
274 if ( !current
.empty() )
276 m_output
.OnText(current
);
280 // Remember the tag starting position for the error
282 const size_t pos
= it
- text
.begin();
285 if ( ++it
!= end
&& *it
== '/' )
291 const wxString tag
= ExtractUntil('>', it
, end
);
294 wxLogDebug("%s at %lu.",
295 it
== end
? "Unclosed tag starting"
304 const wxString name
= tag
.BeforeFirst(' ', &attrs
);
306 TagAndAttrs
tagAndAttrs(name
);
307 const wxString err
= ParseAttrs(attrs
, tagAndAttrs
);
310 wxLogDebug("Bad attributes for \"%s\" "
316 tags
.push(tagAndAttrs
);
320 if ( tags
.empty() || tags
.top().name
!= tag
)
322 wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
328 if ( !OutputTag(tags
.top(), start
) )
330 wxLogDebug("Unknown tag at %lu.", pos
);
340 wxLogDebug("'>' should be escaped as \">\"; at %lu.",
345 // Processing is somewhat complicated: we need to recognize at
346 // least the "<" entity to allow escaping left square
347 // brackets in the markup and, in fact, we recognize all of the
348 // standard XML entities for consistency with Pango markup
351 // However we also allow '&' to appear unescaped, i.e. directly
352 // and not as "&" when it is used to introduce the mnemonic
353 // for the label. In this case we simply leave it alone.
355 // Notice that this logic makes it impossible to have a label
356 // with "lt;" inside it and using "l" as mnemonic but hopefully
357 // this shouldn't be a problem in practice.
359 const size_t pos
= it
- text
.begin() + 1;
362 for ( n
= 0; n
< WXSIZEOF(xmlEntities
); n
++ )
364 const XMLEntity
& xmlEnt
= xmlEntities
[n
];
365 if ( text
.compare(pos
, xmlEnt
.len
, xmlEnt
.name
) == 0
366 && text
[pos
+ xmlEnt
.len
] == ';' )
368 // Escape the ampersands if needed to protect them
369 // from being interpreted as mnemonics indicators.
370 if ( xmlEnt
.value
== '&' )
373 current
+= xmlEnt
.value
;
375 it
+= xmlEnt
.len
+ 1; // +1 for '&' itself
381 if ( n
< WXSIZEOF(xmlEntities
) )
383 //else: fall through, '&' is not special
393 wxLogDebug("Missing closing tag for \"%s\"", tags
.top().name
);
397 if ( !current
.empty() )
398 m_output
.OnText(current
);
404 wxString
wxMarkupParser::Quote(const wxString
& text
)
407 quoted
.reserve(text
.length());
409 for ( wxString::const_iterator it
= text
.begin(); it
!= text
.end(); ++it
)
412 for ( n
= 0; n
< WXSIZEOF(xmlEntities
); n
++ )
414 const XMLEntity
& xmlEnt
= xmlEntities
[n
];
415 if ( *it
== xmlEnt
.value
)
417 quoted
<< '&' << xmlEnt
.name
<< ';';
422 if ( n
== WXSIZEOF(xmlEntities
) )
430 wxString
wxMarkupParser::Strip(const wxString
& text
)
432 class StripOutput
: public wxMarkupParserOutput
437 const wxString
& GetText() const { return m_text
; }
439 virtual void OnText(const wxString
& text
) { m_text
+= text
; }
441 virtual void OnBoldStart() { }
442 virtual void OnBoldEnd() { }
444 virtual void OnItalicStart() { }
445 virtual void OnItalicEnd() { }
447 virtual void OnUnderlinedStart() { }
448 virtual void OnUnderlinedEnd() { }
450 virtual void OnStrikethroughStart() { }
451 virtual void OnStrikethroughEnd() { }
453 virtual void OnBigStart() { }
454 virtual void OnBigEnd() { }
456 virtual void OnSmallStart() { }
457 virtual void OnSmallEnd() { }
459 virtual void OnTeletypeStart() { }
460 virtual void OnTeletypeEnd() { }
462 virtual void OnSpanStart(const wxMarkupSpanAttributes
& WXUNUSED(a
)) { }
463 virtual void OnSpanEnd(const wxMarkupSpanAttributes
& WXUNUSED(a
)) { }
470 wxMarkupParser
parser(output
);
471 if ( !parser
.Parse(text
) )
474 return output
.GetText();