src/html/htmltag.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/html/htmltag.cpp
   3 // Purpose:     wxHtmlTag class (represents single tag)
   4 // Author:      Vaclav Slavik
   5 // RCS-ID:      $Id$
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #include "wx/wxprec.h"
  11
  12 #ifdef __BORLANDC__
  13     #pragma hdrstop
  14 #endif
  15
  16 #if wxUSE_HTML
  17
  18 #include "wx/html/htmltag.h"
  19
  20 #ifndef WX_PRECOMP
  21     #include "wx/colour.h"
  22     #include "wx/wxcrtvararg.h"
  23 #endif
  24
  25 #include "wx/html/htmlpars.h"
  26 #include "wx/html/styleparams.h"
  27
  28 #include "wx/vector.h"
  29
  30 #include <stdio.h> // for vsscanf
  31 #include <stdarg.h>
  32
  33 //-----------------------------------------------------------------------------
  34 // wxHtmlTagsCache
  35 //-----------------------------------------------------------------------------
  36
  37 struct wxHtmlCacheItem
  38 {
  39     // this is "pos" value passed to wxHtmlTag's constructor.
  40     // it is position of '<' character of the tag
  41     wxString::const_iterator Key;
  42
  43     // Tag type
  44     enum Type
  45     {
  46         Type_Normal, // normal tag with a matching ending tag
  47         Type_NoMatchingEndingTag, // there's no ending tag for this tag
  48         Type_EndingTag // this is ending tag </..>
  49     };
  50     Type type;
  51
  52     // end positions for the tag:
  53     // end1 is '<' of ending tag,
  54     // end2 is '>' or both are
  55     wxString::const_iterator End1, End2;
  56
  57     // name of this tag
  58     wxChar *Name;
  59 };
  60
  61 // NB: this is an empty class and not typedef because of forward declaration
  62 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
  63 {
  64 };
  65
  66 bool wxIsCDATAElement(const wxChar *tag)
  67 {
  68     return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
  69            (wxStrcmp(tag, wxT("STYLE")) == 0);
  70 }
  71
  72 bool wxIsCDATAElement(const wxString& tag)
  73 {
  74     return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
  75            (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
  76 }
  77
  78 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
  79 {
  80     m_Cache = new wxHtmlTagsCacheData;
  81     m_CachePos = 0;
  82
  83     wxChar tagBuffer[256];
  84
  85     const wxString::const_iterator end = source.end();
  86     for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
  87     {
  88         if (*pos != wxT('<'))
  89             continue;
  90
  91         // possible tag start found:
  92
  93         // don't cache comment tags
  94         if ( wxHtmlParser::SkipCommentTag(pos, end) )
  95             continue;
  96
  97         // Remember the starting tag position.
  98         wxString::const_iterator stpos = pos++;
  99
 100         // And look for the ending one.
 101         int i;
 102         for ( i = 0;
 103               pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
 104               *pos != wxT('>') && !wxIsspace(*pos);
 105               ++i, ++pos )
 106         {
 107             tagBuffer[i] = (wxChar)wxToupper(*pos);
 108         }
 109         tagBuffer[i] = wxT('\0');
 110
 111         while (pos < end && *pos != wxT('>'))
 112             ++pos;
 113
 114         if ( pos == end )
 115         {
 116             // We didn't find a closing bracket, this is not a valid tag after
 117             // all. Notice that we need to roll back pos to avoid creating an
 118             // invalid iterator when "++pos" is done in the loop statement.
 119             --pos;
 120
 121             continue;
 122         }
 123
 124         // We have a valid tag, add it to the cache.
 125         size_t tg = Cache().size();
 126         Cache().push_back(wxHtmlCacheItem());
 127         Cache()[tg].Key = stpos;
 128         Cache()[tg].Name = new wxChar[i+1];
 129         memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
 130
 131         if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
 132         {
 133             Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
 134             // find matching begin tag:
 135             for (i = tg; i >= 0; i--)
 136             {
 137                 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
 138                 {
 139                     Cache()[i].type = wxHtmlCacheItem::Type_Normal;
 140                     Cache()[i].End1 = stpos;
 141                     Cache()[i].End2 = pos + 1;
 142                     break;
 143                 }
 144             }
 145         }
 146         else
 147         {
 148             Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
 149
 150             if (wxIsCDATAElement(tagBuffer))
 151             {
 152                 // store the orig pos in case we are missing the closing
 153                 // tag (see below)
 154                 const wxString::const_iterator old_pos = pos;
 155                 bool foundCloseTag = false;
 156
 157                 // find next matching tag
 158                 int tag_len = wxStrlen(tagBuffer);
 159                 while (pos < end)
 160                 {
 161                     // find the ending tag
 162                     while (pos + 1 < end &&
 163                            (*pos != '<' || *(pos+1) != '/'))
 164                         ++pos;
 165                     if (*pos == '<')
 166                         ++pos;
 167
 168                     // see if it matches
 169                     int match_pos = 0;
 170                     while (pos < end && match_pos < tag_len )
 171                     {
 172                         wxChar c = *pos;
 173                         if ( c == '>' || c == '<' )
 174                             break;
 175
 176                         // cast to wxChar needed to suppress warning in
 177                         // Unicode build
 178                         if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
 179                         {
 180                             ++match_pos;
 181                         }
 182                         else if (c == wxT(' ') || c == wxT('\n') ||
 183                             c == wxT('\r') || c == wxT('\t'))
 184                         {
 185                             // need to skip over these
 186                         }
 187                         else
 188                         {
 189                             match_pos = 0;
 190                         }
 191                         ++pos;
 192                     }
 193
 194                     // found a match
 195                     if (match_pos == tag_len)
 196                     {
 197                         pos = pos - tag_len - 3;
 198                         foundCloseTag = true;
 199                         break;
 200                     }
 201                     else // keep looking for the closing tag
 202                     {
 203                         ++pos;
 204                     }
 205                 }
 206                 if (!foundCloseTag)
 207                 {
 208                     // we didn't find closing tag; this means the markup
 209                     // is incorrect and the best thing we can do is to
 210                     // ignore the unclosed tag and continue parsing as if
 211                     // it didn't exist:
 212                     pos = old_pos;
 213                 }
 214             }
 215         }
 216     }
 217
 218     // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
 219     for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
 220           i != Cache().end(); ++i )
 221     {
 222         wxDELETEA(i->Name);
 223     }
 224 }
 225
 226 wxHtmlTagsCache::~wxHtmlTagsCache()
 227 {
 228     delete m_Cache;
 229 }
 230
 231 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
 232                                const wxString::const_iterator& inputEnd,
 233                                wxString::const_iterator *end1,
 234                                wxString::const_iterator *end2,
 235                                bool *hasEnding)
 236 {
 237     if (Cache().empty())
 238     {
 239         *end1 =
 240         *end2 = inputEnd;
 241         *hasEnding = true;
 242         return;
 243     }
 244
 245     if (Cache()[m_CachePos].Key != at)
 246     {
 247         int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
 248         do
 249         {
 250             m_CachePos += delta;
 251
 252             if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
 253             {
 254                 if ( m_CachePos < 0 )
 255                     m_CachePos = 0;
 256                 else
 257                     m_CachePos = Cache().size() - 1;
 258                 // something is very wrong with HTML, give up by returning an
 259                 // impossibly large value which is going to be ignored by the
 260                 // caller
 261                 *end1 =
 262                 *end2 = inputEnd;
 263                 *hasEnding = true;
 264                 return;
 265             }
 266         }
 267         while (Cache()[m_CachePos].Key != at);
 268     }
 269
 270     switch ( Cache()[m_CachePos].type )
 271     {
 272         case wxHtmlCacheItem::Type_Normal:
 273             *end1 = Cache()[m_CachePos].End1;
 274             *end2 = Cache()[m_CachePos].End2;
 275             *hasEnding = true;
 276             break;
 277
 278         case wxHtmlCacheItem::Type_EndingTag:
 279             wxFAIL_MSG("QueryTag called for ending tag - can't be");
 280             // but if it does happen, fall through, better than crashing
 281
 282         case wxHtmlCacheItem::Type_NoMatchingEndingTag:
 283             // If input HTML is invalid and there's no closing tag for this
 284             // one, pretend that it runs all the way to the end of input
 285             *end1 = inputEnd;
 286             *end2 = inputEnd;
 287             *hasEnding = false;
 288             break;
 289     }
 290 }
 291
 292
 293
 294
 295 //-----------------------------------------------------------------------------
 296 // wxHtmlTag
 297 //-----------------------------------------------------------------------------
 298
 299 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
 300                      const wxString *source,
 301                      const wxString::const_iterator& pos,
 302                      const wxString::const_iterator& end_pos,
 303                      wxHtmlTagsCache *cache,
 304                      wxHtmlEntitiesParser *entParser)
 305 {
 306     /* Setup DOM relations */
 307
 308     m_Next = NULL;
 309     m_FirstChild = m_LastChild = NULL;
 310     m_Parent = parent;
 311     if (parent)
 312     {
 313         m_Prev = m_Parent->m_LastChild;
 314         if (m_Prev == NULL)
 315             m_Parent->m_FirstChild = this;
 316         else
 317             m_Prev->m_Next = this;
 318         m_Parent->m_LastChild = this;
 319     }
 320     else
 321         m_Prev = NULL;
 322
 323     /* Find parameters and their values: */
 324
 325     wxChar c wxDUMMY_INITIALIZE(0);
 326
 327     // fill-in name, params and begin pos:
 328     wxString::const_iterator i(pos+1);
 329
 330     // find tag's name and convert it to uppercase:
 331     while ((i < end_pos) &&
 332            ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
 333              c != wxT('\n') && c != wxT('\t') &&
 334              c != wxT('>') && c != wxT('/')))
 335     {
 336         if ((c >= wxT('a')) && (c <= wxT('z')))
 337             c -= (wxT('a') - wxT('A'));
 338         m_Name << c;
 339     }
 340
 341     // if the tag has parameters, read them and "normalize" them,
 342     // i.e. convert to uppercase, replace whitespaces by spaces and
 343     // remove whitespaces around '=':
 344     if (*(i-1) != wxT('>'))
 345     {
 346         #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
 347                              c == wxT('\n') || c == wxT('\t'))
 348         wxString pname, pvalue;
 349         wxChar quote;
 350         enum
 351         {
 352             ST_BEFORE_NAME = 1,
 353             ST_NAME,
 354             ST_BEFORE_EQ,
 355             ST_BEFORE_VALUE,
 356             ST_VALUE
 357         } state;
 358
 359         quote = 0;
 360         state = ST_BEFORE_NAME;
 361         while (i < end_pos)
 362         {
 363             c = *(i++);
 364
 365             if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
 366             {
 367                 if (state == ST_BEFORE_EQ || state == ST_NAME)
 368                 {
 369                     m_ParamNames.Add(pname);
 370                     m_ParamValues.Add(wxGetEmptyString());
 371                 }
 372                 else if (state == ST_VALUE && quote == 0)
 373                 {
 374                     m_ParamNames.Add(pname);
 375                     if (entParser)
 376                         m_ParamValues.Add(entParser->Parse(pvalue));
 377                     else
 378                         m_ParamValues.Add(pvalue);
 379                 }
 380                 break;
 381             }
 382             switch (state)
 383             {
 384                 case ST_BEFORE_NAME:
 385                     if (!IS_WHITE(c))
 386                     {
 387                         pname = c;
 388                         state = ST_NAME;
 389                     }
 390                     break;
 391                 case ST_NAME:
 392                     if (IS_WHITE(c))
 393                         state = ST_BEFORE_EQ;
 394                     else if (c == wxT('='))
 395                         state = ST_BEFORE_VALUE;
 396                     else
 397                         pname << c;
 398                     break;
 399                 case ST_BEFORE_EQ:
 400                     if (c == wxT('='))
 401                         state = ST_BEFORE_VALUE;
 402                     else if (!IS_WHITE(c))
 403                     {
 404                         m_ParamNames.Add(pname);
 405                         m_ParamValues.Add(wxGetEmptyString());
 406                         pname = c;
 407                         state = ST_NAME;
 408                     }
 409                     break;
 410                 case ST_BEFORE_VALUE:
 411                     if (!IS_WHITE(c))
 412                     {
 413                         if (c == wxT('"') || c == wxT('\''))
 414                             quote = c, pvalue = wxGetEmptyString();
 415                         else
 416                             quote = 0, pvalue = c;
 417                         state = ST_VALUE;
 418                     }
 419                     break;
 420                 case ST_VALUE:
 421                     if ((quote != 0 && c == quote) ||
 422                         (quote == 0 && IS_WHITE(c)))
 423                     {
 424                         m_ParamNames.Add(pname);
 425                         if (quote == 0)
 426                         {
 427                             // VS: backward compatibility, no real reason,
 428                             //     but wxHTML code relies on this... :(
 429                             pvalue.MakeUpper();
 430                         }
 431                         if (entParser)
 432                             m_ParamValues.Add(entParser->Parse(pvalue));
 433                         else
 434                             m_ParamValues.Add(pvalue);
 435                         state = ST_BEFORE_NAME;
 436                     }
 437                     else
 438                         pvalue << c;
 439                     break;
 440             }
 441         }
 442
 443         #undef IS_WHITE
 444     }
 445     m_Begin = i;
 446     cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
 447     if (m_End1 > end_pos) m_End1 = end_pos;
 448     if (m_End2 > end_pos) m_End2 = end_pos;
 449
 450 #if WXWIN_COMPATIBILITY_2_8
 451     m_sourceStart = source->begin();
 452 #endif
 453
 454     // Try to parse any style parameters that can be handled simply by
 455     // converting them to the equivalent HTML 3 attributes: this is a far cry
 456     // from perfect but better than nothing.
 457     static const struct EquivAttr
 458     {
 459         const char *style;
 460         const char *attr;
 461     } equivAttrs[] =
 462     {
 463         { "text-align",         "ALIGN"         },
 464         { "width",              "WIDTH"         },
 465         { "vertical-align",     "VALIGN"        },
 466         { "background",         "BGCOLOR"       },
 467     };
 468
 469     wxHtmlStyleParams styleParams(*this);
 470     for ( unsigned n = 0; n < WXSIZEOF(equivAttrs); n++ )
 471     {
 472         const EquivAttr& ea = equivAttrs[n];
 473         if ( styleParams.HasParam(ea.style) && !HasParam(ea.attr) )
 474         {
 475             m_ParamNames.Add(ea.attr);
 476             m_ParamValues.Add(styleParams.GetParam(ea.style));
 477         }
 478     }
 479 }
 480
 481 wxHtmlTag::~wxHtmlTag()
 482 {
 483     wxHtmlTag *t1, *t2;
 484     t1 = m_FirstChild;
 485     while (t1)
 486     {
 487         t2 = t1->GetNextSibling();
 488         delete t1;
 489         t1 = t2;
 490     }
 491 }
 492
 493 bool wxHtmlTag::HasParam(const wxString& par) const
 494 {
 495     return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
 496 }
 497
 498 wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
 499 {
 500     int index = m_ParamNames.Index(par, false);
 501     if (index == wxNOT_FOUND)
 502         return wxGetEmptyString();
 503     if (with_quotes)
 504     {
 505         // VS: backward compatibility, seems to be never used by wxHTML...
 506         wxString s;
 507         s << wxT('"') << m_ParamValues[index] << wxT('"');
 508         return s;
 509     }
 510     else
 511         return m_ParamValues[index];
 512 }
 513
 514 int wxHtmlTag::ScanParam(const wxString& par,
 515                          const char *format,
 516                          void *param) const
 517 {
 518     wxString parval = GetParam(par);
 519     return wxSscanf(parval, format, param);
 520 }
 521
 522 int wxHtmlTag::ScanParam(const wxString& par,
 523                          const wchar_t *format,
 524                          void *param) const
 525 {
 526     wxString parval = GetParam(par);
 527     return wxSscanf(parval, format, param);
 528 }
 529
 530 /* static */
 531 bool wxHtmlTag::ParseAsColour(const wxString& str, wxColour *clr)
 532 {
 533     wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
 534
 535     // handle colours defined in HTML 4.0 first:
 536     if (str.length() > 1 && str[0] != wxT('#'))
 537     {
 538         #define HTML_COLOUR(name, r, g, b)              \
 539             if (str.IsSameAs(wxS(name), false))         \
 540                 { clr->Set(r, g, b); return true; }
 541         HTML_COLOUR("black",   0x00,0x00,0x00)
 542         HTML_COLOUR("silver",  0xC0,0xC0,0xC0)
 543         HTML_COLOUR("gray",    0x80,0x80,0x80)
 544         HTML_COLOUR("white",   0xFF,0xFF,0xFF)
 545         HTML_COLOUR("maroon",  0x80,0x00,0x00)
 546         HTML_COLOUR("red",     0xFF,0x00,0x00)
 547         HTML_COLOUR("purple",  0x80,0x00,0x80)
 548         HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
 549         HTML_COLOUR("green",   0x00,0x80,0x00)
 550         HTML_COLOUR("lime",    0x00,0xFF,0x00)
 551         HTML_COLOUR("olive",   0x80,0x80,0x00)
 552         HTML_COLOUR("yellow",  0xFF,0xFF,0x00)
 553         HTML_COLOUR("navy",    0x00,0x00,0x80)
 554         HTML_COLOUR("blue",    0x00,0x00,0xFF)
 555         HTML_COLOUR("teal",    0x00,0x80,0x80)
 556         HTML_COLOUR("aqua",    0x00,0xFF,0xFF)
 557         #undef HTML_COLOUR
 558     }
 559
 560     // then try to parse #rrggbb representations or set from other well
 561     // known names (note that this doesn't strictly conform to HTML spec,
 562     // but it doesn't do real harm -- but it *must* be done after the standard
 563     // colors are handled above):
 564     if (clr->Set(str))
 565         return true;
 566
 567     return false;
 568 }
 569
 570 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
 571 {
 572     const wxString str = GetParam(par);
 573     return !str.empty() && ParseAsColour(str, clr);
 574 }
 575
 576 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
 577 {
 578     if ( !HasParam(par) )
 579         return false;
 580
 581     long i;
 582     if ( !GetParam(par).ToLong(&i) )
 583         return false;
 584
 585     *clr = (int)i;
 586     return true;
 587 }
 588
 589 wxString wxHtmlTag::GetAllParams() const
 590 {
 591     // VS: this function is for backward compatibility only,
 592     //     never used by wxHTML
 593     wxString s;
 594     size_t cnt = m_ParamNames.GetCount();
 595     for (size_t i = 0; i < cnt; i++)
 596     {
 597         s << m_ParamNames[i];
 598         s << wxT('=');
 599         if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
 600             s << wxT('\'') << m_ParamValues[i] << wxT('\'');
 601         else
 602             s << wxT('"') << m_ParamValues[i] << wxT('"');
 603     }
 604     return s;
 605 }
 606
 607 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
 608 {
 609     if (m_Parent)
 610         return m_Parent->m_FirstChild;
 611     else
 612     {
 613         wxHtmlTag *cur = (wxHtmlTag*)this;
 614         while (cur->m_Prev)
 615             cur = cur->m_Prev;
 616         return cur;
 617     }
 618 }
 619
 620 wxHtmlTag *wxHtmlTag::GetLastSibling() const
 621 {
 622     if (m_Parent)
 623         return m_Parent->m_LastChild;
 624     else
 625     {
 626         wxHtmlTag *cur = (wxHtmlTag*)this;
 627         while (cur->m_Next)
 628             cur = cur->m_Next;
 629         return cur;
 630     }
 631 }
 632
 633 wxHtmlTag *wxHtmlTag::GetNextTag() const
 634 {
 635     if (m_FirstChild) return m_FirstChild;
 636     if (m_Next) return m_Next;
 637     wxHtmlTag *cur = m_Parent;
 638     if (!cur) return NULL;
 639     while (cur->m_Parent && !cur->m_Next)
 640         cur = cur->m_Parent;
 641     return cur->m_Next;
 642 }
 643
 644 #endif