src/html/htmltag.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/html/htmltag.cpp
   3 // Purpose:     wxHtmlTag class (represents single tag)
   4 // Author:      Vaclav Slavik
   5 // RCS-ID:      $Id$
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #include "wx/wxprec.h"
  11
  12 #ifdef __BORLANDC__
  13     #pragma hdrstop
  14 #endif
  15
  16 #if wxUSE_HTML
  17
  18 #include "wx/html/htmltag.h"
  19
  20 #ifndef WX_PRECOMP
  21     #include "wx/colour.h"
  22     #include "wx/wxcrtvararg.h"
  23 #endif
  24
  25 #include "wx/html/htmlpars.h"
  26 #include "wx/html/styleparams.h"
  27
  28 #include "wx/vector.h"
  29
  30 #include <stdio.h> // for vsscanf
  31 #include <stdarg.h>
  32
  33 //-----------------------------------------------------------------------------
  34 // wxHtmlTagsCache
  35 //-----------------------------------------------------------------------------
  36
  37 struct wxHtmlCacheItem
  38 {
  39     // this is "pos" value passed to wxHtmlTag's constructor.
  40     // it is position of '<' character of the tag
  41     wxString::const_iterator Key;
  42
  43     // Tag type
  44     enum Type
  45     {
  46         Type_Normal, // normal tag with a matching ending tag
  47         Type_NoMatchingEndingTag, // there's no ending tag for this tag
  48         Type_EndingTag // this is ending tag </..>
  49     };
  50     Type type;
  51
  52     // end positions for the tag:
  53     // end1 is '<' of ending tag,
  54     // end2 is '>' or both are
  55     wxString::const_iterator End1, End2;
  56
  57     // name of this tag
  58     wxChar *Name;
  59 };
  60
  61 // NB: this is an empty class and not typedef because of forward declaration
  62 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
  63 {
  64 };
  65
  66 bool wxIsCDATAElement(const wxChar *tag)
  67 {
  68     return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
  69            (wxStrcmp(tag, wxT("STYLE")) == 0);
  70 }
  71
  72 bool wxIsCDATAElement(const wxString& tag)
  73 {
  74     return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
  75            (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
  76 }
  77
  78 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
  79 {
  80     m_Cache = new wxHtmlTagsCacheData;
  81     m_CachePos = 0;
  82
  83     wxChar tagBuffer[256];
  84
  85     const wxString::const_iterator end = source.end();
  86     for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
  87     {
  88         if (*pos != wxT('<'))
  89             continue;
  90
  91         // possible tag start found:
  92
  93         // don't cache comment tags
  94         if ( wxHtmlParser::SkipCommentTag(pos, end) )
  95             continue;
  96
  97         size_t tg = Cache().size();
  98         Cache().push_back(wxHtmlCacheItem());
  99
 100         wxString::const_iterator stpos = pos++;
 101         Cache()[tg].Key = stpos;
 102
 103         int i;
 104         for ( i = 0;
 105               pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
 106               *pos != wxT('>') && !wxIsspace(*pos);
 107               ++i, ++pos )
 108         {
 109             tagBuffer[i] = (wxChar)wxToupper(*pos);
 110         }
 111         tagBuffer[i] = wxT('\0');
 112
 113         Cache()[tg].Name = new wxChar[i+1];
 114         memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
 115
 116         while (pos < end && *pos != wxT('>'))
 117             ++pos;
 118
 119         if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
 120         {
 121             Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
 122             // find matching begin tag:
 123             for (i = tg; i >= 0; i--)
 124             {
 125                 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
 126                 {
 127                     Cache()[i].type = wxHtmlCacheItem::Type_Normal;
 128                     Cache()[i].End1 = stpos;
 129                     Cache()[i].End2 = pos + 1;
 130                     break;
 131                 }
 132             }
 133         }
 134         else
 135         {
 136             Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
 137
 138             if (wxIsCDATAElement(tagBuffer))
 139             {
 140                 // store the orig pos in case we are missing the closing
 141                 // tag (see below)
 142                 const wxString::const_iterator old_pos = pos;
 143                 bool foundCloseTag = false;
 144
 145                 // find next matching tag
 146                 int tag_len = wxStrlen(tagBuffer);
 147                 while (pos < end)
 148                 {
 149                     // find the ending tag
 150                     while (pos + 1 < end &&
 151                            (*pos != '<' || *(pos+1) != '/'))
 152                         ++pos;
 153                     if (*pos == '<')
 154                         ++pos;
 155
 156                     // see if it matches
 157                     int match_pos = 0;
 158                     while (pos < end && match_pos < tag_len )
 159                     {
 160                         wxChar c = *pos;
 161                         if ( c == '>' || c == '<' )
 162                             break;
 163
 164                         // cast to wxChar needed to suppress warning in
 165                         // Unicode build
 166                         if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
 167                         {
 168                             ++match_pos;
 169                         }
 170                         else if (c == wxT(' ') || c == wxT('\n') ||
 171                             c == wxT('\r') || c == wxT('\t'))
 172                         {
 173                             // need to skip over these
 174                         }
 175                         else
 176                         {
 177                             match_pos = 0;
 178                         }
 179                         ++pos;
 180                     }
 181
 182                     // found a match
 183                     if (match_pos == tag_len)
 184                     {
 185                         pos = pos - tag_len - 3;
 186                         foundCloseTag = true;
 187                         break;
 188                     }
 189                     else // keep looking for the closing tag
 190                     {
 191                         ++pos;
 192                     }
 193                 }
 194                 if (!foundCloseTag)
 195                 {
 196                     // we didn't find closing tag; this means the markup
 197                     // is incorrect and the best thing we can do is to
 198                     // ignore the unclosed tag and continue parsing as if
 199                     // it didn't exist:
 200                     pos = old_pos;
 201                 }
 202             }
 203         }
 204     }
 205
 206     // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
 207     for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
 208           i != Cache().end(); ++i )
 209     {
 210         wxDELETEA(i->Name);
 211     }
 212 }
 213
 214 wxHtmlTagsCache::~wxHtmlTagsCache()
 215 {
 216     delete m_Cache;
 217 }
 218
 219 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
 220                                const wxString::const_iterator& inputEnd,
 221                                wxString::const_iterator *end1,
 222                                wxString::const_iterator *end2,
 223                                bool *hasEnding)
 224 {
 225     if (Cache().empty())
 226         return;
 227
 228     if (Cache()[m_CachePos].Key != at)
 229     {
 230         int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
 231         do
 232         {
 233             m_CachePos += delta;
 234
 235             if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
 236             {
 237                 if ( m_CachePos < 0 )
 238                     m_CachePos = 0;
 239                 else
 240                     m_CachePos = Cache().size() - 1;
 241                 // something is very wrong with HTML, give up by returning an
 242                 // impossibly large value which is going to be ignored by the
 243                 // caller
 244                 *end1 =
 245                 *end2 = inputEnd;
 246                 *hasEnding = true;
 247                 return;
 248             }
 249         }
 250         while (Cache()[m_CachePos].Key != at);
 251     }
 252
 253     switch ( Cache()[m_CachePos].type )
 254     {
 255         case wxHtmlCacheItem::Type_Normal:
 256             *end1 = Cache()[m_CachePos].End1;
 257             *end2 = Cache()[m_CachePos].End2;
 258             *hasEnding = true;
 259             break;
 260
 261         case wxHtmlCacheItem::Type_EndingTag:
 262             wxFAIL_MSG("QueryTag called for ending tag - can't be");
 263             // but if it does happen, fall through, better than crashing
 264
 265         case wxHtmlCacheItem::Type_NoMatchingEndingTag:
 266             // If input HTML is invalid and there's no closing tag for this
 267             // one, pretend that it runs all the way to the end of input
 268             *end1 = inputEnd;
 269             *end2 = inputEnd;
 270             *hasEnding = false;
 271             break;
 272     }
 273 }
 274
 275
 276
 277
 278 //-----------------------------------------------------------------------------
 279 // wxHtmlTag
 280 //-----------------------------------------------------------------------------
 281
 282 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
 283                      const wxString *source,
 284                      const wxString::const_iterator& pos,
 285                      const wxString::const_iterator& end_pos,
 286                      wxHtmlTagsCache *cache,
 287                      wxHtmlEntitiesParser *entParser)
 288 {
 289     /* Setup DOM relations */
 290
 291     m_Next = NULL;
 292     m_FirstChild = m_LastChild = NULL;
 293     m_Parent = parent;
 294     if (parent)
 295     {
 296         m_Prev = m_Parent->m_LastChild;
 297         if (m_Prev == NULL)
 298             m_Parent->m_FirstChild = this;
 299         else
 300             m_Prev->m_Next = this;
 301         m_Parent->m_LastChild = this;
 302     }
 303     else
 304         m_Prev = NULL;
 305
 306     /* Find parameters and their values: */
 307
 308     wxChar c wxDUMMY_INITIALIZE(0);
 309
 310     // fill-in name, params and begin pos:
 311     wxString::const_iterator i(pos+1);
 312
 313     // find tag's name and convert it to uppercase:
 314     while ((i < end_pos) &&
 315            ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
 316              c != wxT('\n') && c != wxT('\t') &&
 317              c != wxT('>') && c != wxT('/')))
 318     {
 319         if ((c >= wxT('a')) && (c <= wxT('z')))
 320             c -= (wxT('a') - wxT('A'));
 321         m_Name << c;
 322     }
 323
 324     // if the tag has parameters, read them and "normalize" them,
 325     // i.e. convert to uppercase, replace whitespaces by spaces and
 326     // remove whitespaces around '=':
 327     if (*(i-1) != wxT('>'))
 328     {
 329         #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
 330                              c == wxT('\n') || c == wxT('\t'))
 331         wxString pname, pvalue;
 332         wxChar quote;
 333         enum
 334         {
 335             ST_BEFORE_NAME = 1,
 336             ST_NAME,
 337             ST_BEFORE_EQ,
 338             ST_BEFORE_VALUE,
 339             ST_VALUE
 340         } state;
 341
 342         quote = 0;
 343         state = ST_BEFORE_NAME;
 344         while (i < end_pos)
 345         {
 346             c = *(i++);
 347
 348             if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
 349             {
 350                 if (state == ST_BEFORE_EQ || state == ST_NAME)
 351                 {
 352                     m_ParamNames.Add(pname);
 353                     m_ParamValues.Add(wxGetEmptyString());
 354                 }
 355                 else if (state == ST_VALUE && quote == 0)
 356                 {
 357                     m_ParamNames.Add(pname);
 358                     if (entParser)
 359                         m_ParamValues.Add(entParser->Parse(pvalue));
 360                     else
 361                         m_ParamValues.Add(pvalue);
 362                 }
 363                 break;
 364             }
 365             switch (state)
 366             {
 367                 case ST_BEFORE_NAME:
 368                     if (!IS_WHITE(c))
 369                     {
 370                         pname = c;
 371                         state = ST_NAME;
 372                     }
 373                     break;
 374                 case ST_NAME:
 375                     if (IS_WHITE(c))
 376                         state = ST_BEFORE_EQ;
 377                     else if (c == wxT('='))
 378                         state = ST_BEFORE_VALUE;
 379                     else
 380                         pname << c;
 381                     break;
 382                 case ST_BEFORE_EQ:
 383                     if (c == wxT('='))
 384                         state = ST_BEFORE_VALUE;
 385                     else if (!IS_WHITE(c))
 386                     {
 387                         m_ParamNames.Add(pname);
 388                         m_ParamValues.Add(wxGetEmptyString());
 389                         pname = c;
 390                         state = ST_NAME;
 391                     }
 392                     break;
 393                 case ST_BEFORE_VALUE:
 394                     if (!IS_WHITE(c))
 395                     {
 396                         if (c == wxT('"') || c == wxT('\''))
 397                             quote = c, pvalue = wxGetEmptyString();
 398                         else
 399                             quote = 0, pvalue = c;
 400                         state = ST_VALUE;
 401                     }
 402                     break;
 403                 case ST_VALUE:
 404                     if ((quote != 0 && c == quote) ||
 405                         (quote == 0 && IS_WHITE(c)))
 406                     {
 407                         m_ParamNames.Add(pname);
 408                         if (quote == 0)
 409                         {
 410                             // VS: backward compatibility, no real reason,
 411                             //     but wxHTML code relies on this... :(
 412                             pvalue.MakeUpper();
 413                         }
 414                         if (entParser)
 415                             m_ParamValues.Add(entParser->Parse(pvalue));
 416                         else
 417                             m_ParamValues.Add(pvalue);
 418                         state = ST_BEFORE_NAME;
 419                     }
 420                     else
 421                         pvalue << c;
 422                     break;
 423             }
 424         }
 425
 426         #undef IS_WHITE
 427     }
 428     m_Begin = i;
 429     cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
 430     if (m_End1 > end_pos) m_End1 = end_pos;
 431     if (m_End2 > end_pos) m_End2 = end_pos;
 432
 433 #if WXWIN_COMPATIBILITY_2_8
 434     m_sourceStart = source->begin();
 435 #endif
 436
 437     // Try to parse any style parameters that can be handled simply by
 438     // converting them to the equivalent HTML 3 attributes: this is a far cry
 439     // from perfect but better than nothing.
 440     static const struct EquivAttr
 441     {
 442         const char *style;
 443         const char *attr;
 444     } equivAttrs[] =
 445     {
 446         { "text-align",         "ALIGN"         },
 447         { "width",              "WIDTH"         },
 448         { "vertical-align",     "VALIGN"        },
 449         { "background",         "BGCOLOR"       },
 450     };
 451
 452     wxHtmlStyleParams styleParams(*this);
 453     for ( unsigned n = 0; n < WXSIZEOF(equivAttrs); n++ )
 454     {
 455         const EquivAttr& ea = equivAttrs[n];
 456         if ( styleParams.HasParam(ea.style) && !HasParam(ea.attr) )
 457         {
 458             m_ParamNames.Add(ea.attr);
 459             m_ParamValues.Add(styleParams.GetParam(ea.style));
 460         }
 461     }
 462 }
 463
 464 wxHtmlTag::~wxHtmlTag()
 465 {
 466     wxHtmlTag *t1, *t2;
 467     t1 = m_FirstChild;
 468     while (t1)
 469     {
 470         t2 = t1->GetNextSibling();
 471         delete t1;
 472         t1 = t2;
 473     }
 474 }
 475
 476 bool wxHtmlTag::HasParam(const wxString& par) const
 477 {
 478     return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
 479 }
 480
 481 wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
 482 {
 483     int index = m_ParamNames.Index(par, false);
 484     if (index == wxNOT_FOUND)
 485         return wxGetEmptyString();
 486     if (with_quotes)
 487     {
 488         // VS: backward compatibility, seems to be never used by wxHTML...
 489         wxString s;
 490         s << wxT('"') << m_ParamValues[index] << wxT('"');
 491         return s;
 492     }
 493     else
 494         return m_ParamValues[index];
 495 }
 496
 497 int wxHtmlTag::ScanParam(const wxString& par,
 498                          const char *format,
 499                          void *param) const
 500 {
 501     wxString parval = GetParam(par);
 502     return wxSscanf(parval, format, param);
 503 }
 504
 505 int wxHtmlTag::ScanParam(const wxString& par,
 506                          const wchar_t *format,
 507                          void *param) const
 508 {
 509     wxString parval = GetParam(par);
 510     return wxSscanf(parval, format, param);
 511 }
 512
 513 /* static */
 514 bool wxHtmlTag::ParseAsColour(const wxString& str, wxColour *clr)
 515 {
 516     wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
 517
 518     // handle colours defined in HTML 4.0 first:
 519     if (str.length() > 1 && str[0] != wxT('#'))
 520     {
 521         #define HTML_COLOUR(name, r, g, b)              \
 522             if (str.IsSameAs(wxS(name), false))         \
 523                 { clr->Set(r, g, b); return true; }
 524         HTML_COLOUR("black",   0x00,0x00,0x00)
 525         HTML_COLOUR("silver",  0xC0,0xC0,0xC0)
 526         HTML_COLOUR("gray",    0x80,0x80,0x80)
 527         HTML_COLOUR("white",   0xFF,0xFF,0xFF)
 528         HTML_COLOUR("maroon",  0x80,0x00,0x00)
 529         HTML_COLOUR("red",     0xFF,0x00,0x00)
 530         HTML_COLOUR("purple",  0x80,0x00,0x80)
 531         HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
 532         HTML_COLOUR("green",   0x00,0x80,0x00)
 533         HTML_COLOUR("lime",    0x00,0xFF,0x00)
 534         HTML_COLOUR("olive",   0x80,0x80,0x00)
 535         HTML_COLOUR("yellow",  0xFF,0xFF,0x00)
 536         HTML_COLOUR("navy",    0x00,0x00,0x80)
 537         HTML_COLOUR("blue",    0x00,0x00,0xFF)
 538         HTML_COLOUR("teal",    0x00,0x80,0x80)
 539         HTML_COLOUR("aqua",    0x00,0xFF,0xFF)
 540         #undef HTML_COLOUR
 541     }
 542
 543     // then try to parse #rrggbb representations or set from other well
 544     // known names (note that this doesn't strictly conform to HTML spec,
 545     // but it doesn't do real harm -- but it *must* be done after the standard
 546     // colors are handled above):
 547     if (clr->Set(str))
 548         return true;
 549
 550     return false;
 551 }
 552
 553 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
 554 {
 555     const wxString str = GetParam(par);
 556     return !str.empty() && ParseAsColour(str, clr);
 557 }
 558
 559 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
 560 {
 561     if ( !HasParam(par) )
 562         return false;
 563
 564     long i;
 565     if ( !GetParam(par).ToLong(&i) )
 566         return false;
 567
 568     *clr = (int)i;
 569     return true;
 570 }
 571
 572 wxString wxHtmlTag::GetAllParams() const
 573 {
 574     // VS: this function is for backward compatibility only,
 575     //     never used by wxHTML
 576     wxString s;
 577     size_t cnt = m_ParamNames.GetCount();
 578     for (size_t i = 0; i < cnt; i++)
 579     {
 580         s << m_ParamNames[i];
 581         s << wxT('=');
 582         if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
 583             s << wxT('\'') << m_ParamValues[i] << wxT('\'');
 584         else
 585             s << wxT('"') << m_ParamValues[i] << wxT('"');
 586     }
 587     return s;
 588 }
 589
 590 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
 591 {
 592     if (m_Parent)
 593         return m_Parent->m_FirstChild;
 594     else
 595     {
 596         wxHtmlTag *cur = (wxHtmlTag*)this;
 597         while (cur->m_Prev)
 598             cur = cur->m_Prev;
 599         return cur;
 600     }
 601 }
 602
 603 wxHtmlTag *wxHtmlTag::GetLastSibling() const
 604 {
 605     if (m_Parent)
 606         return m_Parent->m_LastChild;
 607     else
 608     {
 609         wxHtmlTag *cur = (wxHtmlTag*)this;
 610         while (cur->m_Next)
 611             cur = cur->m_Next;
 612         return cur;
 613     }
 614 }
 615
 616 wxHtmlTag *wxHtmlTag::GetNextTag() const
 617 {
 618     if (m_FirstChild) return m_FirstChild;
 619     if (m_Next) return m_Next;
 620     wxHtmlTag *cur = m_Parent;
 621     if (!cur) return NULL;
 622     while (cur->m_Parent && !cur->m_Next)
 623         cur = cur->m_Parent;
 624     return cur->m_Next;
 625 }
 626
 627 #endif