X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/211dfeddfca5c5992db510bdaa72226bf2013c05..f17fb297676dd3231b849ab7e55486473aa6ee21:/src/html/htmlpars.cpp diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp index c45c2dfa29..09f7c0ecd9 100644 --- a/src/html/htmlpars.cpp +++ b/src/html/htmlpars.cpp @@ -49,8 +49,9 @@ public: WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces); WX_DEFINE_OBJARRAY(wxHtmlTextPieces); -struct wxHtmlParserState +class wxHtmlParserState { +public: wxHtmlTag *m_curTag; wxHtmlTag *m_tags; wxHtmlTextPieces *m_textPieces; @@ -79,8 +80,9 @@ wxHtmlParser::wxHtmlParser() wxHtmlParser::~wxHtmlParser() { - while (RestoreState()) - DestroyDOMTree(); + while (RestoreState()) {} + DestroyDOMTree(); + delete m_HandlersStack; m_HandlersHash.Clear(); m_HandlersList.DeleteContents(TRUE); @@ -135,7 +137,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, wxChar c; int i = begin_pos; int textBeginning = begin_pos; - + while (i < end_pos) { c = m_Source.GetChar(i); @@ -159,7 +161,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, while (i < end_pos) { c = m_Source.GetChar(i++); - if ((c == wxT(' ') || c == wxT('\n') || + if ((c == wxT(' ') || c == wxT('\n') || c == wxT('\r') || c == wxT('\t')) && dashes >= 2) {} else if (c == wxT('>') && dashes >= 2) { @@ -168,31 +170,31 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, } else if (c == wxT('-')) dashes++; - else + else dashes = 0; } } - + // add another tag to the tree: else if (i < end_pos-1 && m_Source.GetChar(i+1) != wxT('/')) { wxHtmlTag *chd; - if (cur) - chd = new wxHtmlTag(cur, m_Source, + if (cur) + chd = new wxHtmlTag(cur, m_Source, i, end_pos, cache, m_entitiesParser); - else + else { chd = new wxHtmlTag(NULL, m_Source, i, end_pos, cache, m_entitiesParser); - if (!m_Tags) + if (!m_Tags) { - // if this is the first tag to be created make the root + // if this is the first tag to be created make the root // m_Tags point to it: m_Tags = chd; } else { - // if there is already a root tag add this tag as + // if there is already a root tag add this tag as // the last sibling: chd->m_Prev = m_Tags->GetLastSibling(); chd->m_Prev->m_Next = chd; @@ -202,7 +204,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, if (chd->HasEnding()) { CreateDOMSubTree(chd, - chd->GetBeginPos(), chd->GetEndPos1(), + chd->GetBeginPos(), chd->GetEndPos1(), cache); i = chd->GetEndPos2(); } @@ -212,7 +214,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, } // ... or skip ending tag: - else + else { while (i < end_pos && m_Source.GetChar(i) != wxT('>')) i++; textBeginning = i+1; @@ -243,36 +245,37 @@ void wxHtmlParser::DestroyDOMTree() m_TextPieces = NULL; } -void wxHtmlParser::DoParsing() +void wxHtmlParser::DoParsing() { m_CurTag = m_Tags; m_CurTextPiece = 0; - DoParsing(0, m_Source.Length()); + DoParsing(0, m_Source.Length()); } void wxHtmlParser::DoParsing(int begin_pos, int end_pos) { if (end_pos <= begin_pos) return; - + wxHtmlTextPieces& pieces = *m_TextPieces; size_t piecesCnt = pieces.GetCount(); - + while (begin_pos < end_pos) { while (m_CurTag && m_CurTag->GetBeginPos() < begin_pos) m_CurTag = m_CurTag->GetNextTag(); - while (m_CurTextPiece < piecesCnt && + while (m_CurTextPiece < piecesCnt && pieces[m_CurTextPiece].m_pos < begin_pos) m_CurTextPiece++; - if (m_CurTextPiece < piecesCnt && - (!m_CurTag || + if (m_CurTextPiece < piecesCnt && + (!m_CurTag || pieces[m_CurTextPiece].m_pos < m_CurTag->GetBeginPos())) { // Add text: - AddText(m_Source.Mid(pieces[m_CurTextPiece].m_pos, - pieces[m_CurTextPiece].m_lng)); - begin_pos = pieces[m_CurTextPiece].m_pos + + AddText(GetEntitiesParser()->Parse( + m_Source.Mid(pieces[m_CurTextPiece].m_pos, + pieces[m_CurTextPiece].m_lng))); + begin_pos = pieces[m_CurTextPiece].m_pos + pieces[m_CurTextPiece].m_lng; m_CurTextPiece++; } @@ -283,7 +286,7 @@ void wxHtmlParser::DoParsing(int begin_pos, int end_pos) { if (m_CurTag->HasEnding()) begin_pos = m_CurTag->GetEndPos2(); - else + else begin_pos = m_CurTag->GetBeginPos(); } wxHtmlTag *t = m_CurTag; @@ -315,7 +318,7 @@ void wxHtmlParser::AddTagHandler(wxHtmlTagHandler *handler) wxStringTokenizer tokenizer(s, wxT(", ")); while (tokenizer.HasMoreTokens()) - m_HandlersHash.Put(tokenizer.NextToken(), handler); + m_HandlersHash.Put(tokenizer.GetNextToken(), handler); if (m_HandlersList.IndexOf(handler) == wxNOT_FOUND) m_HandlersList.Append(handler); @@ -338,7 +341,7 @@ void wxHtmlParser::PushTagHandler(wxHtmlTagHandler *handler, wxString tags) while (tokenizer.HasMoreTokens()) { - key = tokenizer.NextToken(); + key = tokenizer.GetNextToken(); m_HandlersHash.Delete(key); m_HandlersHash.Put(key, handler); } @@ -376,23 +379,25 @@ void wxHtmlParser::SetSourceAndSaveState(const wxString& src) m_TextPieces = NULL; m_CurTextPiece = 0; m_Source = wxEmptyString; - + SetSource(src); } bool wxHtmlParser::RestoreState() { if (!m_SavedStates) return FALSE; - + + DestroyDOMTree(); + wxHtmlParserState *s = m_SavedStates; m_SavedStates = s->m_nextState; - + m_CurTag = s->m_curTag; m_Tags = s->m_tags; m_TextPieces = s->m_textPieces; m_CurTextPiece = s->m_curTextPiece; m_Source = s->m_source; - + delete s; return TRUE; } @@ -433,6 +438,8 @@ void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding) m_encoding = encoding; if (m_encoding != wxFONTENCODING_SYSTEM) m_conv = new wxCSConv(wxFontMapper::GetEncodingName(m_encoding)); +#else + (void) encoding; #endif } @@ -441,6 +448,8 @@ wxString wxHtmlEntitiesParser::Parse(const wxString& input) const wxChar *c, *last; const wxChar *in_str = input.c_str(); wxString output; + + output.reserve(input.length()); for (c = in_str, last = in_str; *c != wxT('\0'); c++) { @@ -449,8 +458,11 @@ wxString wxHtmlEntitiesParser::Parse(const wxString& input) if (c - last > 0) output.append(last, c - last); if (++c == wxT('\0')) break; + wxString entity; const wxChar *ent_s = c; + wxChar entity_char; + for (; (*c >= wxT('a') && *c <= wxT('z')) || (*c >= wxT('A') && *c <= wxT('Z')) || (*c >= wxT('0') && *c <= wxT('9')) || @@ -458,7 +470,14 @@ wxString wxHtmlEntitiesParser::Parse(const wxString& input) entity.append(ent_s, c - ent_s); if (*c != wxT(';')) c--; last = c+1; - output << GetEntityChar(entity); + entity_char = GetEntityChar(entity); + if (entity_char) + output << entity_char; + else + { + output.append(ent_s-1, c-ent_s+2); + wxLogDebug(wxT("Unrecognized HTML entity: '%s'"), entity.c_str()); + } } } if (*last != wxT('\0')) @@ -487,7 +506,7 @@ wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code) wbuf[0] = (wchar_t)code; wbuf[1] = 0; wxMBConv *conv = m_conv ? m_conv : &wxConvLocal; - if (conv->WC2MB(buf, wbuf, 1) == (size_t)-1) + if (conv->WC2MB(buf, wbuf, 2) == (size_t)-1) return '?'; return buf[0]; #else @@ -788,7 +807,7 @@ wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity) } if (code == 0) - return wxT('?'); + return 0; else return GetCharForCode(code); }