src/html/htmltag.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/html/htmltag.cpp
   3 // Purpose:     wxHtmlTag class (represents single tag)
   4 // Author:      Vaclav Slavik
   5 // RCS-ID:      $Id$
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #include "wx/wxprec.h"
  11
  12 #ifdef __BORLANDC__
  13     #pragma hdrstop
  14 #endif
  15
  16 #if wxUSE_HTML
  17
  18 #include "wx/html/htmltag.h"
  19
  20 #ifndef WX_PRECOMP
  21     #include "wx/colour.h"
  22     #include "wx/wxcrtvararg.h"
  23 #endif
  24
  25 #include "wx/html/htmlpars.h"
  26 #include "wx/vector.h"
  27
  28 #include <stdio.h> // for vsscanf
  29 #include <stdarg.h>
  30
  31 //-----------------------------------------------------------------------------
  32 // wxHtmlTagsCache
  33 //-----------------------------------------------------------------------------
  34
  35 struct wxHtmlCacheItem
  36 {
  37     // this is "pos" value passed to wxHtmlTag's constructor.
  38     // it is position of '<' character of the tag
  39     wxString::const_iterator Key;
  40
  41     // Tag type
  42     enum Type
  43     {
  44         Type_Normal, // normal tag with a matching ending tag
  45         Type_NoMatchingEndingTag, // there's no ending tag for this tag
  46         Type_EndingTag // this is ending tag </..>
  47     };
  48     Type type;
  49
  50     // end positions for the tag:
  51     // end1 is '<' of ending tag,
  52     // end2 is '>' or both are
  53     wxString::const_iterator End1, End2;
  54
  55     // name of this tag
  56     wxChar *Name;
  57 };
  58
  59 // NB: this is an empty class and not typedef because of forward declaration
  60 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
  61 {
  62 };
  63
  64 bool wxIsCDATAElement(const wxChar *tag)
  65 {
  66     return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
  67            (wxStrcmp(tag, _T("STYLE")) == 0);
  68 }
  69
  70 bool wxIsCDATAElement(const wxString& tag)
  71 {
  72     return (wxStrcmp(tag.wx_str(), wxSTRING_TEXT("SCRIPT")) == 0) ||
  73            (wxStrcmp(tag.wx_str(), wxSTRING_TEXT("STYLE")) == 0);
  74 }
  75
  76 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
  77 {
  78     m_Cache = new wxHtmlTagsCacheData;
  79     m_CachePos = 0;
  80
  81     wxChar tagBuffer[256];
  82
  83     const wxString::const_iterator end = source.end();
  84     for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
  85     {
  86         if (*pos == wxT('<'))   // tag found:
  87         {
  88             // don't cache comment tags
  89             if ( wxHtmlParser::SkipCommentTag(pos, source.end()) )
  90                 continue;
  91
  92             size_t tg = Cache().size();
  93             Cache().push_back(wxHtmlCacheItem());
  94
  95             wxString::const_iterator stpos = pos++;
  96             Cache()[tg].Key = stpos;
  97
  98             int i;
  99             for ( i = 0;
 100                   pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
 101                   *pos != wxT('>') && !wxIsspace(*pos);
 102                   ++i, ++pos )
 103             {
 104                 tagBuffer[i] = (wxChar)wxToupper(*pos);
 105             }
 106             tagBuffer[i] = _T('\0');
 107
 108             Cache()[tg].Name = new wxChar[i+1];
 109             memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
 110
 111             while (pos < end && *pos != wxT('>'))
 112                 ++pos;
 113
 114             if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
 115             {
 116                 Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
 117                 // find matching begin tag:
 118                 for (i = tg; i >= 0; i--)
 119                 {
 120                     if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
 121                     {
 122                         Cache()[i].type = wxHtmlCacheItem::Type_Normal;
 123                         Cache()[i].End1 = stpos;
 124                         Cache()[i].End2 = pos + 1;
 125                         break;
 126                     }
 127                 }
 128             }
 129             else
 130             {
 131                 Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
 132
 133                 if (wxIsCDATAElement(tagBuffer))
 134                 {
 135                     // store the orig pos in case we are missing the closing
 136                     // tag (see below)
 137                     const wxString::const_iterator old_pos = pos;
 138                     bool foundCloseTag = false;
 139
 140                     // find next matching tag
 141                     int tag_len = wxStrlen(tagBuffer);
 142                     while (pos < end)
 143                     {
 144                         // find the ending tag
 145                         while (pos + 1 < end &&
 146                                (*pos != '<' || *(pos+1) != '/'))
 147                             ++pos;
 148                         if (*pos == '<')
 149                             ++pos;
 150
 151                         // see if it matches
 152                         int match_pos = 0;
 153                         while (pos < end && match_pos < tag_len )
 154                         {
 155                             wxChar c = *pos;
 156                             if ( c == '>' || c == '<' )
 157                                 break;
 158
 159                             // cast to wxChar needed to suppress warning in
 160                             // Unicode build
 161                             if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
 162                             {
 163                                 ++match_pos;
 164                             }
 165                             else if (c == wxT(' ') || c == wxT('\n') ||
 166                                 c == wxT('\r') || c == wxT('\t'))
 167                             {
 168                                 // need to skip over these
 169                             }
 170                             else
 171                             {
 172                                 match_pos = 0;
 173                             }
 174                             ++pos;
 175                         }
 176
 177                         // found a match
 178                         if (match_pos == tag_len)
 179                         {
 180                             pos = pos - tag_len - 3;
 181                             foundCloseTag = true;
 182                             break;
 183                         }
 184                         else // keep looking for the closing tag
 185                         {
 186                             ++pos;
 187                         }
 188                     }
 189                     if (!foundCloseTag)
 190                     {
 191                         // we didn't find closing tag; this means the markup
 192                         // is incorrect and the best thing we can do is to
 193                         // ignore the unclosed tag and continue parsing as if
 194                         // it didn't exist:
 195                         pos = old_pos;
 196                     }
 197                 }
 198             }
 199         }
 200     }
 201
 202     // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
 203     for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
 204           i != Cache().end(); ++i )
 205     {
 206         delete[] i->Name;
 207         i->Name = NULL;
 208     }
 209 }
 210
 211 wxHtmlTagsCache::~wxHtmlTagsCache()
 212 {
 213     delete m_Cache;
 214 }
 215
 216 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
 217                                const wxString::const_iterator& inputEnd,
 218                                wxString::const_iterator *end1,
 219                                wxString::const_iterator *end2,
 220                                bool *hasEnding)
 221 {
 222     if (Cache().empty())
 223         return;
 224
 225     if (Cache()[m_CachePos].Key != at)
 226     {
 227         int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
 228         do
 229         {
 230             m_CachePos += delta;
 231
 232             if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
 233             {
 234                 if ( m_CachePos < 0 )
 235                     m_CachePos = 0;
 236                 else
 237                     m_CachePos = Cache().size() - 1;
 238                 // something is very wrong with HTML, give up by returning an
 239                 // impossibly large value which is going to be ignored by the
 240                 // caller
 241                 *end1 =
 242                 *end2 = inputEnd;
 243                 *hasEnding = true;
 244                 return;
 245             }
 246         }
 247         while (Cache()[m_CachePos].Key != at);
 248     }
 249     *end1 = Cache()[m_CachePos].End1;
 250     *end2 = Cache()[m_CachePos].End2;
 251     *hasEnding = (Cache()[m_CachePos].type == wxHtmlCacheItem::Type_Normal);
 252 }
 253
 254
 255
 256
 257 //-----------------------------------------------------------------------------
 258 // wxHtmlTag
 259 //-----------------------------------------------------------------------------
 260
 261 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
 262                      const wxString *source,
 263                      const wxString::const_iterator& pos,
 264                      const wxString::const_iterator& end_pos,
 265                      wxHtmlTagsCache *cache,
 266                      wxHtmlEntitiesParser *entParser)
 267 {
 268     /* Setup DOM relations */
 269
 270     m_Next = NULL;
 271     m_FirstChild = m_LastChild = NULL;
 272     m_Parent = parent;
 273     if (parent)
 274     {
 275         m_Prev = m_Parent->m_LastChild;
 276         if (m_Prev == NULL)
 277             m_Parent->m_FirstChild = this;
 278         else
 279             m_Prev->m_Next = this;
 280         m_Parent->m_LastChild = this;
 281     }
 282     else
 283         m_Prev = NULL;
 284
 285     /* Find parameters and their values: */
 286
 287     wxChar c;
 288
 289     // fill-in name, params and begin pos:
 290     wxString::const_iterator i(pos+1);
 291
 292     // find tag's name and convert it to uppercase:
 293     while ((i < end_pos) &&
 294            ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
 295              c != wxT('\n') && c != wxT('\t') &&
 296              c != wxT('>')))
 297     {
 298         if ((c >= wxT('a')) && (c <= wxT('z')))
 299             c -= (wxT('a') - wxT('A'));
 300         m_Name << c;
 301     }
 302
 303     // if the tag has parameters, read them and "normalize" them,
 304     // i.e. convert to uppercase, replace whitespaces by spaces and
 305     // remove whitespaces around '=':
 306     if (*(i-1) != wxT('>'))
 307     {
 308         #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
 309                              c == wxT('\n') || c == wxT('\t'))
 310         wxString pname, pvalue;
 311         wxChar quote;
 312         enum
 313         {
 314             ST_BEFORE_NAME = 1,
 315             ST_NAME,
 316             ST_BEFORE_EQ,
 317             ST_BEFORE_VALUE,
 318             ST_VALUE
 319         } state;
 320
 321         quote = 0;
 322         state = ST_BEFORE_NAME;
 323         while (i < end_pos)
 324         {
 325             c = *(i++);
 326
 327             if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
 328             {
 329                 if (state == ST_BEFORE_EQ || state == ST_NAME)
 330                 {
 331                     m_ParamNames.Add(pname);
 332                     m_ParamValues.Add(wxGetEmptyString());
 333                 }
 334                 else if (state == ST_VALUE && quote == 0)
 335                 {
 336                     m_ParamNames.Add(pname);
 337                     if (entParser)
 338                         m_ParamValues.Add(entParser->Parse(pvalue));
 339                     else
 340                         m_ParamValues.Add(pvalue);
 341                 }
 342                 break;
 343             }
 344             switch (state)
 345             {
 346                 case ST_BEFORE_NAME:
 347                     if (!IS_WHITE(c))
 348                     {
 349                         pname = c;
 350                         state = ST_NAME;
 351                     }
 352                     break;
 353                 case ST_NAME:
 354                     if (IS_WHITE(c))
 355                         state = ST_BEFORE_EQ;
 356                     else if (c == wxT('='))
 357                         state = ST_BEFORE_VALUE;
 358                     else
 359                         pname << c;
 360                     break;
 361                 case ST_BEFORE_EQ:
 362                     if (c == wxT('='))
 363                         state = ST_BEFORE_VALUE;
 364                     else if (!IS_WHITE(c))
 365                     {
 366                         m_ParamNames.Add(pname);
 367                         m_ParamValues.Add(wxGetEmptyString());
 368                         pname = c;
 369                         state = ST_NAME;
 370                     }
 371                     break;
 372                 case ST_BEFORE_VALUE:
 373                     if (!IS_WHITE(c))
 374                     {
 375                         if (c == wxT('"') || c == wxT('\''))
 376                             quote = c, pvalue = wxGetEmptyString();
 377                         else
 378                             quote = 0, pvalue = c;
 379                         state = ST_VALUE;
 380                     }
 381                     break;
 382                 case ST_VALUE:
 383                     if ((quote != 0 && c == quote) ||
 384                         (quote == 0 && IS_WHITE(c)))
 385                     {
 386                         m_ParamNames.Add(pname);
 387                         if (quote == 0)
 388                         {
 389                             // VS: backward compatibility, no real reason,
 390                             //     but wxHTML code relies on this... :(
 391                             pvalue.MakeUpper();
 392                         }
 393                         if (entParser)
 394                             m_ParamValues.Add(entParser->Parse(pvalue));
 395                         else
 396                             m_ParamValues.Add(pvalue);
 397                         state = ST_BEFORE_NAME;
 398                     }
 399                     else
 400                         pvalue << c;
 401                     break;
 402             }
 403         }
 404
 405         #undef IS_WHITE
 406     }
 407     m_Begin = i;
 408     cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
 409     if (m_End1 > end_pos) m_End1 = end_pos;
 410     if (m_End2 > end_pos) m_End2 = end_pos;
 411
 412 #if WXWIN_COMPATIBILITY_2_8
 413     m_sourceStart = source->begin();
 414 #endif
 415 }
 416
 417 wxHtmlTag::~wxHtmlTag()
 418 {
 419     wxHtmlTag *t1, *t2;
 420     t1 = m_FirstChild;
 421     while (t1)
 422     {
 423         t2 = t1->GetNextSibling();
 424         delete t1;
 425         t1 = t2;
 426     }
 427 }
 428
 429 bool wxHtmlTag::HasParam(const wxString& par) const
 430 {
 431     return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
 432 }
 433
 434 wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
 435 {
 436     int index = m_ParamNames.Index(par, false);
 437     if (index == wxNOT_FOUND)
 438         return wxGetEmptyString();
 439     if (with_commas)
 440     {
 441         // VS: backward compatibility, seems to be never used by wxHTML...
 442         wxString s;
 443         s << wxT('"') << m_ParamValues[index] << wxT('"');
 444         return s;
 445     }
 446     else
 447         return m_ParamValues[index];
 448 }
 449
 450 int wxHtmlTag::ScanParam(const wxString& par,
 451                          const char *format,
 452                          void *param) const
 453 {
 454     wxString parval = GetParam(par);
 455     return wxSscanf(parval, format, param);
 456 }
 457
 458 int wxHtmlTag::ScanParam(const wxString& par,
 459                          const wchar_t *format,
 460                          void *param) const
 461 {
 462     wxString parval = GetParam(par);
 463     return wxSscanf(parval, format, param);
 464 }
 465
 466 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
 467 {
 468     wxCHECK_MSG( clr, false, _T("invalid colour argument") );
 469
 470     wxString str = GetParam(par);
 471
 472     // handle colours defined in HTML 4.0 first:
 473     if (str.length() > 1 && str[0] != _T('#'))
 474     {
 475         #define HTML_COLOUR(name, r, g, b)                        \
 476             if (str.IsSameAs(wxSTRING_TEXT(name), false))         \
 477                 { clr->Set(r, g, b); return true; }
 478         HTML_COLOUR("black",   0x00,0x00,0x00)
 479         HTML_COLOUR("silver",  0xC0,0xC0,0xC0)
 480         HTML_COLOUR("gray",    0x80,0x80,0x80)
 481         HTML_COLOUR("white",   0xFF,0xFF,0xFF)
 482         HTML_COLOUR("maroon",  0x80,0x00,0x00)
 483         HTML_COLOUR("red",     0xFF,0x00,0x00)
 484         HTML_COLOUR("purple",  0x80,0x00,0x80)
 485         HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
 486         HTML_COLOUR("green",   0x00,0x80,0x00)
 487         HTML_COLOUR("lime",    0x00,0xFF,0x00)
 488         HTML_COLOUR("olive",   0x80,0x80,0x00)
 489         HTML_COLOUR("yellow",  0xFF,0xFF,0x00)
 490         HTML_COLOUR("navy",    0x00,0x00,0x80)
 491         HTML_COLOUR("blue",    0x00,0x00,0xFF)
 492         HTML_COLOUR("teal",    0x00,0x80,0x80)
 493         HTML_COLOUR("aqua",    0x00,0xFF,0xFF)
 494         #undef HTML_COLOUR
 495     }
 496
 497     // then try to parse #rrggbb representations or set from other well
 498     // known names (note that this doesn't strictly conform to HTML spec,
 499     // but it doesn't do real harm -- but it *must* be done after the standard
 500     // colors are handled above):
 501     if (clr->Set(str))
 502         return true;
 503
 504     return false;
 505 }
 506
 507 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
 508 {
 509     if (!HasParam(par)) return false;
 510     long i;
 511     bool succ = GetParam(par).ToLong(&i);
 512     *clr = (int)i;
 513     return succ;
 514 }
 515
 516 wxString wxHtmlTag::GetAllParams() const
 517 {
 518     // VS: this function is for backward compatibility only,
 519     //     never used by wxHTML
 520     wxString s;
 521     size_t cnt = m_ParamNames.GetCount();
 522     for (size_t i = 0; i < cnt; i++)
 523     {
 524         s << m_ParamNames[i];
 525         s << wxT('=');
 526         if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
 527             s << wxT('\'') << m_ParamValues[i] << wxT('\'');
 528         else
 529             s << wxT('"') << m_ParamValues[i] << wxT('"');
 530     }
 531     return s;
 532 }
 533
 534 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
 535 {
 536     if (m_Parent)
 537         return m_Parent->m_FirstChild;
 538     else
 539     {
 540         wxHtmlTag *cur = (wxHtmlTag*)this;
 541         while (cur->m_Prev)
 542             cur = cur->m_Prev;
 543         return cur;
 544     }
 545 }
 546
 547 wxHtmlTag *wxHtmlTag::GetLastSibling() const
 548 {
 549     if (m_Parent)
 550         return m_Parent->m_LastChild;
 551     else
 552     {
 553         wxHtmlTag *cur = (wxHtmlTag*)this;
 554         while (cur->m_Next)
 555             cur = cur->m_Next;
 556         return cur;
 557     }
 558 }
 559
 560 wxHtmlTag *wxHtmlTag::GetNextTag() const
 561 {
 562     if (m_FirstChild) return m_FirstChild;
 563     if (m_Next) return m_Next;
 564     wxHtmlTag *cur = m_Parent;
 565     if (!cur) return NULL;
 566     while (cur->m_Parent && !cur->m_Next)
 567         cur = cur->m_Parent;
 568     return cur->m_Next;
 569 }
 570
 571 #endif