src/common/markupparser.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/markupparser.cpp
   3 // Purpose:     Implementation of wxMarkupParser.
   4 // Author:      Vadim Zeitlin
   5 // Created:     2011-02-16
   6 // RCS-ID:      $Id: $
   7 // Copyright:   (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ============================================================================
  12 // declarations
  13 // ============================================================================
  14
  15 // ----------------------------------------------------------------------------
  16 // headers
  17 // ----------------------------------------------------------------------------
  18
  19 // for compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #ifndef WX_PRECOMP
  27 #endif // WX_PRECOMP
  28
  29 #include "wx/private/markupparser.h"
  30
  31 #include "wx/stack.h"
  32
  33 namespace
  34 {
  35
  36 // ----------------------------------------------------------------------------
  37 // constants
  38 // ----------------------------------------------------------------------------
  39
  40 // Array containing the predefined XML 1.0 entities.
  41 const struct XMLEntity
  42 {
  43     const char *name;
  44     int len;            // == strlen(name)
  45     char value;
  46 } xmlEntities[] =
  47 {
  48     { "lt",     2,  '<' },
  49     { "gt",     2,  '>' },
  50     { "amp",    3,  '&' },
  51     { "apos",   4,  '\''},
  52     { "quot",   4,  '"' },
  53 };
  54
  55 // ----------------------------------------------------------------------------
  56 // helper functions
  57 // ----------------------------------------------------------------------------
  58
  59 wxString
  60 ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end)
  61 {
  62     wxString str;
  63     for ( ; it != end; ++it )
  64     {
  65         if ( *it == ch )
  66             return str;
  67
  68         str += *it;
  69     }
  70
  71     // Return empty string to indicate that we didn't find ch at all.
  72     return wxString();
  73 }
  74
  75 } // anonymous namespace
  76
  77 // ============================================================================
  78 // wxMarkupParser implementation
  79 // ============================================================================
  80
  81 wxString
  82 wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs)
  83 {
  84     if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() )
  85     {
  86         return wxString::Format("tag \"%s\" can't have attributes",
  87                                 tagAndAttrs.name);
  88     }
  89
  90     // TODO: Parse more attributes described at
  91     //       http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
  92     //       and at least ignore them gracefully instead of giving errors (but
  93     //       quite a few of them could be supported as well, notable font_desc).
  94
  95     wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs;
  96
  97     while ( !attrs.empty() )
  98     {
  99         wxString rest;
 100         const wxString attr = attrs.BeforeFirst(' ', &rest);
 101         attrs = rest;
 102
 103         // The "original" versions are used for error messages only.
 104         wxString valueOrig;
 105         const wxString nameOrig = attr.BeforeFirst('=', &valueOrig);
 106
 107         const wxString name = nameOrig.Lower();
 108         wxString value = valueOrig.Lower();
 109
 110         // All attributes values must be quoted.
 111         if ( value.length() < 2 ||
 112                 (value[0] != value.Last()) ||
 113                     (value[0] != '"' && value[0] != '\'') )
 114         {
 115             return wxString::Format("bad quoting for value of \"%s\"",
 116                                     nameOrig);
 117         }
 118
 119         value.assign(value, 1, value.length() - 2);
 120
 121         if ( name == "foreground" || name == "fgcolor" || name == "color" )
 122         {
 123             spanAttrs.m_fgCol = value;
 124         }
 125         else if ( name == "background" || name == "bgcolor" )
 126         {
 127             spanAttrs.m_bgCol = value;
 128         }
 129         else if ( name == "font_family" || name == "face" )
 130         {
 131             spanAttrs.m_fontFace = value;
 132         }
 133         else if ( name == "font_weight" || name == "weight" )
 134         {
 135             unsigned long weight;
 136
 137             if ( value == "ultralight" || value == "light" || value == "normal" )
 138                 spanAttrs.m_isBold = wxMarkupSpanAttributes::No;
 139             else if ( value == "bold" || value == "ultrabold" || value == "heavy" )
 140                 spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes;
 141             else if ( value.ToULong(&weight) )
 142                 spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes
 143                                                    : wxMarkupSpanAttributes::No;
 144             else
 145                 return wxString::Format("invalid font weight \"%s\"", valueOrig);
 146         }
 147         else if ( name == "font_style" || name == "style" )
 148         {
 149             if ( value == "normal" )
 150                 spanAttrs.m_isItalic = wxMarkupSpanAttributes::No;
 151             else if ( value == "oblique" || value == "italic" )
 152                 spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes;
 153             else
 154                 return wxString::Format("invalid font style \"%s\"", valueOrig);
 155         }
 156         else if ( name == "size" )
 157         {
 158             unsigned long size;
 159             if ( value.ToULong(&size) )
 160             {
 161                 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts;
 162                 spanAttrs.m_fontSize = size;
 163             }
 164             else if ( value == "smaller" || value == "larger" )
 165             {
 166                 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative;
 167                 spanAttrs.m_fontSize = value == "smaller" ? -1 : +1;
 168             }
 169             else // Must be a CSS-like size specification
 170             {
 171                 int cssSize = 1;
 172                 wxString rest;
 173                 if ( value.StartsWith("xx-", &rest) )
 174                     cssSize = 3;
 175                 else if ( value.StartsWith("x-", &rest) )
 176                     cssSize = 2;
 177                 else if ( value == "medium" )
 178                     cssSize = 0;
 179                 else
 180                     rest = value;
 181
 182                 if ( cssSize != 0 )
 183                 {
 184                     if ( rest == "small" )
 185                         cssSize = -cssSize;
 186                     else if ( rest != "large" )
 187                         return wxString::Format("invalid font size \"%s\"",
 188                                                 valueOrig);
 189                 }
 190
 191                 spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic;
 192                 spanAttrs.m_fontSize = cssSize;
 193             }
 194         }
 195     }
 196
 197     return wxString();
 198 }
 199
 200 bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start)
 201 {
 202     if ( tagAndAttrs.name.CmpNoCase("span") == 0 )
 203     {
 204         if ( start )
 205             m_output.OnSpanStart(tagAndAttrs.attrs);
 206         else
 207             m_output.OnSpanEnd(tagAndAttrs.attrs);
 208
 209         return true;
 210     }
 211     else // non-span tag
 212     {
 213         static const struct TagHandler
 214         {
 215             const char *name;
 216             void (wxMarkupParserOutput::*startFunc)();
 217             void (wxMarkupParserOutput::*endFunc)();
 218         } tagHandlers[] =
 219         {
 220             { "b", &wxMarkupParserOutput::OnBoldStart,
 221                    &wxMarkupParserOutput::OnBoldEnd },
 222             { "i", &wxMarkupParserOutput::OnItalicStart,
 223                    &wxMarkupParserOutput::OnItalicEnd },
 224             { "u", &wxMarkupParserOutput::OnUnderlinedStart,
 225                    &wxMarkupParserOutput::OnUnderlinedEnd },
 226             { "s", &wxMarkupParserOutput::OnStrikethroughStart,
 227                    &wxMarkupParserOutput::OnStrikethroughEnd },
 228             { "big", &wxMarkupParserOutput::OnBigStart,
 229                      &wxMarkupParserOutput::OnBigEnd },
 230             { "small", &wxMarkupParserOutput::OnSmallStart,
 231                        &wxMarkupParserOutput::OnSmallEnd },
 232             { "tt", &wxMarkupParserOutput::OnTeletypeStart,
 233                     &wxMarkupParserOutput::OnTeletypeEnd },
 234         };
 235
 236         for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ )
 237         {
 238             const TagHandler& h = tagHandlers[n];
 239
 240             if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 )
 241             {
 242                 if ( start )
 243                     (m_output.*(h.startFunc))();
 244                 else
 245                     (m_output.*(h.endFunc))();
 246
 247                 return true;
 248             }
 249         }
 250     }
 251
 252     // Unknown tag name.
 253     return false;
 254 }
 255
 256 bool wxMarkupParser::Parse(const wxString& text)
 257 {
 258     // The stack containing the names and corresponding attributes (which are
 259     // actually only used for <span> tags) of all of the currently opened tag
 260     // or none if we're not inside any tag.
 261     wxStack<TagAndAttrs> tags;
 262
 263     // Current run of text.
 264     wxString current;
 265
 266     const wxString::const_iterator end = text.end();
 267     for ( wxString::const_iterator it = text.begin(); it != end; ++it )
 268     {
 269         switch ( (*it).GetValue() )
 270         {
 271             case '<':
 272                 {
 273                     // Flush the text preceding the tag, if any.
 274                     if ( !current.empty() )
 275                     {
 276                         m_output.OnText(current);
 277                         current.clear();
 278                     }
 279
 280                     // Remember the tag starting position for the error
 281                     // messages.
 282                     const size_t pos = it - text.begin();
 283
 284                     bool start = true;
 285                     if ( ++it != end && *it == '/' )
 286                     {
 287                         start = false;
 288                         ++it;
 289                     }
 290
 291                     const wxString tag = ExtractUntil('>', it, end);
 292                     if ( tag.empty() )
 293                     {
 294                         wxLogDebug("%s at %lu.",
 295                                    it == end ? "Unclosed tag starting"
 296                                              : "Empty tag",
 297                                    pos);
 298                         return false;
 299                     }
 300
 301                     if ( start )
 302                     {
 303                         wxString attrs;
 304                         const wxString name = tag.BeforeFirst(' ', &attrs);
 305
 306                         TagAndAttrs tagAndAttrs(name);
 307                         const wxString err = ParseAttrs(attrs, tagAndAttrs);
 308                         if ( !err.empty() )
 309                         {
 310                             wxLogDebug("Bad attributes for \"%s\" "
 311                                        "at %lu: %s.",
 312                                        name, pos, err);
 313                             return false;
 314                         }
 315
 316                         tags.push(tagAndAttrs);
 317                     }
 318                     else // end tag
 319                     {
 320                         if ( tags.empty() || tags.top().name != tag )
 321                         {
 322                             wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
 323                                        tag, pos);
 324                             return false;
 325                         }
 326                     }
 327
 328                     if ( !OutputTag(tags.top(), start) )
 329                     {
 330                         wxLogDebug("Unknown tag at %lu.", pos);
 331                         return false;
 332                     }
 333
 334                     if ( !start )
 335                         tags.pop();
 336                 }
 337                 break;
 338
 339             case '>':
 340                 wxLogDebug("'>' should be escaped as \"&gt\"; at %lu.",
 341                            it - text.begin());
 342                 break;
 343
 344             case '&':
 345                 // Processing is somewhat complicated: we need to recognize at
 346                 // least the "&lt;" entity to allow escaping left square
 347                 // brackets in the markup and, in fact, we recognize all of the
 348                 // standard XML entities for consistency with Pango markup
 349                 // parsing.
 350                 //
 351                 // However we also allow '&' to appear unescaped, i.e. directly
 352                 // and not as "&amp;" when it is used to introduce the mnemonic
 353                 // for the label. In this case we simply leave it alone.
 354                 //
 355                 // Notice that this logic makes it impossible to have a label
 356                 // with "lt;" inside it and using "l" as mnemonic but hopefully
 357                 // this shouldn't be a problem in practice.
 358                 {
 359                     const size_t pos = it - text.begin() + 1;
 360
 361                     unsigned n;
 362                     for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
 363                     {
 364                         const XMLEntity& xmlEnt = xmlEntities[n];
 365                         if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0
 366                                 && text[pos + xmlEnt.len] == ';' )
 367                         {
 368                             // Escape the ampersands if needed to protect them
 369                             // from being interpreted as mnemonics indicators.
 370                             if ( xmlEnt.value == '&' )
 371                                 current += "&&";
 372                             else
 373                                 current += xmlEnt.value;
 374
 375                             it += xmlEnt.len + 1; // +1 for '&' itself
 376
 377                             break;
 378                         }
 379                     }
 380
 381                     if ( n < WXSIZEOF(xmlEntities) )
 382                         break;
 383                     //else: fall through, '&' is not special
 384                 }
 385
 386             default:
 387                 current += *it;
 388         }
 389     }
 390
 391     if ( !tags.empty() )
 392     {
 393         wxLogDebug("Missing closing tag for \"%s\"", tags.top().name);
 394         return false;
 395     }
 396
 397     if ( !current.empty() )
 398         m_output.OnText(current);
 399
 400     return true;
 401 }
 402
 403 /* static */
 404 wxString wxMarkupParser::Quote(const wxString& text)
 405 {
 406     wxString quoted;
 407     quoted.reserve(text.length());
 408
 409     for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it )
 410     {
 411         unsigned n;
 412         for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
 413         {
 414             const XMLEntity& xmlEnt = xmlEntities[n];
 415             if ( *it == xmlEnt.value )
 416             {
 417                 quoted << '&' << xmlEnt.name << ';';
 418                 break;
 419             }
 420         }
 421
 422         if ( n == WXSIZEOF(xmlEntities) )
 423             quoted += *it;
 424     }
 425
 426     return quoted;
 427 }