X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/3c47c04740b7c456823e0fd31edbdf1e6bfff43b..65669e3152f9d6d3bb77c31d4211a356fe089d56:/src/html/htmlpars.cpp diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp index c51181a3fa..1274b14f6b 100644 --- a/src/html/htmlpars.cpp +++ b/src/html/htmlpars.cpp @@ -48,8 +48,10 @@ class wxHtmlTextPiece { public: wxHtmlTextPiece() {} - wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {} - int m_pos, m_lng; + wxHtmlTextPiece(const wxString::const_iterator& start, + const wxString::const_iterator& end) + : m_start(start), m_end(end) {} + wxString::const_iterator m_start, m_end; }; // NB: this is an empty class and not typedef because of forward declaration @@ -64,7 +66,7 @@ public: wxHtmlTag *m_tags; wxHtmlTextPieces *m_textPieces; int m_curTextPiece; - wxString m_source; + const wxString *m_source; wxHtmlParserState *m_nextState; }; @@ -75,9 +77,10 @@ public: IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject) wxHtmlParser::wxHtmlParser() - : wxObject(), m_HandlersHash(wxKEY_STRING), - m_FS(NULL), m_HandlersStack(NULL) + : wxObject(), + m_FS(NULL) { + m_Source = NULL; m_entitiesParser = new wxHtmlEntitiesParser; m_Tags = NULL; m_CurTag = NULL; @@ -91,18 +94,10 @@ wxHtmlParser::~wxHtmlParser() while (RestoreState()) {} DestroyDOMTree(); - if (m_HandlersStack) - { - wxList& tmp = *m_HandlersStack; - wxList::iterator it, en; - for( it = tmp.begin(), en = tmp.end(); it != en; ++it ) - delete (wxHashTable*)*it; - tmp.clear(); - } - delete m_HandlersStack; - m_HandlersHash.Clear(); - WX_CLEAR_LIST(wxList, m_HandlersList); + WX_CLEAR_ARRAY(m_HandlersStack); + WX_CLEAR_HASH_SET(wxHtmlTagHandlersSet, m_HandlersSet); delete m_entitiesParser; + delete m_Source; } wxObject* wxHtmlParser::Parse(const wxString& source) @@ -128,7 +123,15 @@ void wxHtmlParser::DoneParser() void wxHtmlParser::SetSource(const wxString& src) { DestroyDOMTree(); - m_Source = src; + // NB: This is allocated on heap because wxHtmlTag uses iterators and + // making a copy of m_Source string in SetSourceAndSaveState() and + // RestoreState() would invalidate them (because wxString::m_impl's + // memory would change completely twice and iterators use pointers + // into it). So instead, we keep the string object intact and only + // store/restore pointer to it, for which we need it to be allocated + // on the heap. + delete m_Source; + m_Source = new wxString(src); CreateDOMTree(); m_CurTag = NULL; m_CurTextPiece = 0; @@ -136,54 +139,53 @@ void wxHtmlParser::SetSource(const wxString& src) void wxHtmlParser::CreateDOMTree() { - wxHtmlTagsCache cache(m_Source); + wxHtmlTagsCache cache(*m_Source); m_TextPieces = new wxHtmlTextPieces; - CreateDOMSubTree(NULL, 0, m_Source.length(), &cache); + CreateDOMSubTree(NULL, m_Source->begin(), m_Source->end(), &cache); m_CurTextPiece = 0; } -extern bool wxIsCDATAElement(const wxChar *tag); +extern bool wxIsCDATAElement(const wxString& tag); void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, - int begin_pos, int end_pos, + const wxString::const_iterator& begin_pos, + const wxString::const_iterator& end_pos, wxHtmlTagsCache *cache) { - if (end_pos <= begin_pos) return; + if (end_pos <= begin_pos) + return; wxChar c; - int i = begin_pos; - int textBeginning = begin_pos; + wxString::const_iterator i = begin_pos; + wxString::const_iterator textBeginning = begin_pos; // If the tag contains CDATA text, we include the text between beginning // and ending tag verbosely. Setting i=end_pos will skip to the very // end of this function where text piece is added, bypassing any child // tags parsing (CDATA element can't have child elements by definition): - if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str())) + if (cur != NULL && wxIsCDATAElement(cur->GetName())) { i = end_pos; } while (i < end_pos) { - c = m_Source.GetChar(i); + c = *i; if (c == wxT('<')) { // add text to m_TextPieces: - if (i - textBeginning > 0) - m_TextPieces->push_back( - wxHtmlTextPiece(textBeginning, i - textBeginning)); + if (i > textBeginning) + m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, i)); // if it is a comment, skip it: - wxString::const_iterator iter = m_Source.begin() + i; - if ( SkipCommentTag(iter, m_Source.end()) ) + if ( SkipCommentTag(i, m_Source->end()) ) { - textBeginning = - i = iter - m_Source.begin() + 1; // skip closing '>' too + textBeginning = i = i + 1; // skip closing '>' too } // add another tag to the tree: - else if (i < end_pos-1 && m_Source.GetChar(i+1) != wxT('/')) + else if (i < end_pos-1 && *(i+1) != wxT('/')) { wxHtmlTag *chd; if (cur) @@ -211,12 +213,12 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, if (chd->HasEnding()) { CreateDOMSubTree(chd, - chd->GetBeginPos(), chd->GetEndPos1(), + chd->GetBeginIter(), chd->GetEndIter1(), cache); - i = chd->GetEndPos2(); + i = chd->GetEndIter2(); } else - i = chd->GetBeginPos(); + i = chd->GetBeginIter(); textBeginning = i; } @@ -224,17 +226,16 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, // ... or skip ending tag: else { - while (i < end_pos && m_Source.GetChar(i) != wxT('>')) i++; + while (i < end_pos && *i != wxT('>')) ++i; textBeginning = i+1; } } - else i++; + else ++i; } // add remaining text to m_TextPieces: - if (end_pos - textBeginning > 0) - m_TextPieces->push_back( - wxHtmlTextPiece(textBeginning, end_pos - textBeginning)); + if (end_pos > textBeginning) + m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, end_pos)); } void wxHtmlParser::DestroyDOMTree() @@ -257,42 +258,45 @@ void wxHtmlParser::DoParsing() { m_CurTag = m_Tags; m_CurTextPiece = 0; - DoParsing(0, m_Source.length()); + DoParsing(m_Source->begin(), m_Source->end()); } -void wxHtmlParser::DoParsing(int begin_pos, int end_pos) +void wxHtmlParser::DoParsing(const wxString::const_iterator& begin_pos_, + const wxString::const_iterator& end_pos) { - if (end_pos <= begin_pos) return; + wxString::const_iterator begin_pos(begin_pos_); + + if (end_pos <= begin_pos) + return; wxHtmlTextPieces& pieces = *m_TextPieces; size_t piecesCnt = pieces.size(); while (begin_pos < end_pos) { - while (m_CurTag && m_CurTag->GetBeginPos() < begin_pos) + while (m_CurTag && m_CurTag->GetBeginIter() < begin_pos) m_CurTag = m_CurTag->GetNextTag(); while (m_CurTextPiece < piecesCnt && - pieces[m_CurTextPiece].m_pos < begin_pos) + pieces[m_CurTextPiece].m_start < begin_pos) m_CurTextPiece++; if (m_CurTextPiece < piecesCnt && (!m_CurTag || - pieces[m_CurTextPiece].m_pos < m_CurTag->GetBeginPos())) + pieces[m_CurTextPiece].m_start < m_CurTag->GetBeginIter())) { // Add text: AddText(GetEntitiesParser()->Parse( - m_Source.Mid(pieces[m_CurTextPiece].m_pos, - pieces[m_CurTextPiece].m_lng))); - begin_pos = pieces[m_CurTextPiece].m_pos + - pieces[m_CurTextPiece].m_lng; + wxString(pieces[m_CurTextPiece].m_start, + pieces[m_CurTextPiece].m_end))); + begin_pos = pieces[m_CurTextPiece].m_end; m_CurTextPiece++; } else if (m_CurTag) { if (m_CurTag->HasEnding()) - begin_pos = m_CurTag->GetEndPos2(); + begin_pos = m_CurTag->GetEndIter2(); else - begin_pos = m_CurTag->GetBeginPos(); + begin_pos = m_CurTag->GetBeginIter(); wxHtmlTag *t = m_CurTag; m_CurTag = m_CurTag->GetNextTag(); AddTag(*t); @@ -305,20 +309,19 @@ void wxHtmlParser::DoParsing(int begin_pos, int end_pos) void wxHtmlParser::AddTag(const wxHtmlTag& tag) { - wxHtmlTagHandler *h; bool inner = false; - h = (wxHtmlTagHandler*) m_HandlersHash.Get(tag.GetName()); - if (h) + wxHtmlTagHandlersHash::const_iterator h = m_HandlersHash.find(tag.GetName()); + if (h != m_HandlersHash.end()) { - inner = h->HandleTag(tag); + inner = h->second->HandleTag(tag); if (m_stopParsing) return; } if (!inner) { if (tag.HasEnding()) - DoParsing(tag.GetBeginPos(), tag.GetEndPos1()); + DoParsing(tag.GetBeginIter(), tag.GetEndIter1()); } } @@ -328,10 +331,9 @@ void wxHtmlParser::AddTagHandler(wxHtmlTagHandler *handler) wxStringTokenizer tokenizer(s, wxT(", ")); while (tokenizer.HasMoreTokens()) - m_HandlersHash.Put(tokenizer.GetNextToken(), handler); + m_HandlersHash[tokenizer.GetNextToken()] = handler; - if (m_HandlersList.IndexOf(handler) == wxNOT_FOUND) - m_HandlersList.Append(handler); + m_HandlersSet.insert(handler); handler->SetParser(this); } @@ -341,39 +343,24 @@ void wxHtmlParser::PushTagHandler(wxHtmlTagHandler *handler, const wxString& tag wxStringTokenizer tokenizer(tags, wxT(", ")); wxString key; - if (m_HandlersStack == NULL) - { - m_HandlersStack = new wxList; - } - - m_HandlersStack->Insert((wxObject*)new wxHashTable(m_HandlersHash)); + m_HandlersStack.push_back(new wxHtmlTagHandlersHash(m_HandlersHash)); while (tokenizer.HasMoreTokens()) { key = tokenizer.GetNextToken(); - m_HandlersHash.Delete(key); - m_HandlersHash.Put(key, handler); + m_HandlersHash[key] = handler; } } void wxHtmlParser::PopTagHandler() { - wxList::compatibility_iterator first; + wxCHECK_RET( !m_HandlersStack.empty(), + "attempt to remove HTML tag handler from empty stack" ); - if ( !m_HandlersStack || -#if wxUSE_STL - !(first = m_HandlersStack->GetFirst()) -#else // !wxUSE_STL - ((first = m_HandlersStack->GetFirst()) == NULL) -#endif // wxUSE_STL/!wxUSE_STL - ) - { - wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack.")); - return; - } - m_HandlersHash = *((wxHashTable*) first->GetData()); - delete (wxHashTable*) first->GetData(); - m_HandlersStack->Erase(first); + wxHtmlTagHandlersHash *prev = m_HandlersStack.back(); + m_HandlersStack.pop_back(); + m_HandlersHash = *prev; + delete prev; } void wxHtmlParser::SetSourceAndSaveState(const wxString& src) @@ -393,7 +380,7 @@ void wxHtmlParser::SetSourceAndSaveState(const wxString& src) m_Tags = NULL; m_TextPieces = NULL; m_CurTextPiece = 0; - m_Source = wxEmptyString; + m_Source = NULL; SetSource(src); } @@ -403,6 +390,7 @@ bool wxHtmlParser::RestoreState() if (!m_SavedStates) return false; DestroyDOMTree(); + delete m_Source; wxHtmlParserState *s = m_SavedStates; m_SavedStates = s->m_nextState; @@ -419,8 +407,7 @@ bool wxHtmlParser::RestoreState() wxString wxHtmlParser::GetInnerSource(const wxHtmlTag& tag) { - return GetSource()->Mid(tag.GetBeginPos(), - tag.GetEndPos1() - tag.GetBeginPos()); + return wxString(tag.GetBeginIter(), tag.GetEndIter1()); } //----------------------------------------------------------------------------- @@ -501,11 +488,16 @@ wxString wxHtmlEntitiesParser::Parse(const wxString& input) const const wxString::const_iterator ent_s = c; wxChar entity_char; - for (; c != end && - ((*c >= wxT('a') && *c <= wxT('z')) || - (*c >= wxT('A') && *c <= wxT('Z')) || - (*c >= wxT('0') && *c <= wxT('9')) || - *c == wxT('_') || *c == wxT('#')); ++c) {} + for ( ; c != end; ++c ) + { + wxChar ch = *c; + if ( !((ch >= wxT('a') && ch <= wxT('z')) || + (ch >= wxT('A') && ch <= wxT('Z')) || + (ch >= wxT('0') && ch <= wxT('9')) || + ch == wxT('_') || ch == wxT('#')) ) + break; + } + entity.append(ent_s, c); if (c == end || *c != wxT(';')) --c; last = c+1; @@ -565,6 +557,9 @@ wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity) const { unsigned code = 0; + if (entity.empty()) + return 0; // invalid entity reference + if (entity[0] == wxT('#')) { // NB: parsed value is a number, so it's OK to use wx_str(), internal @@ -572,13 +567,13 @@ wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity) const const wxStringCharType *ent_s = entity.wx_str(); const wxStringCharType *format; - if (ent_s[1] == wxSTRING_TEXT('x') || ent_s[1] == wxSTRING_TEXT('X')) + if (ent_s[1] == wxS('x') || ent_s[1] == wxS('X')) { - format = wxSTRING_TEXT("%x"); + format = wxS("%x"); ent_s++; } else - format = wxSTRING_TEXT("%u"); + format = wxS("%u"); ent_s++; if (wxSscanf(ent_s, format, &code) != 1) @@ -588,7 +583,7 @@ wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity) const { // store the literals in wx's internal representation (either char* // in UTF-8 or wchar_t*) for best performance: - #define ENTITY(name, code) { wxSTRING_TEXT(name), code } + #define ENTITY(name, code) { wxS(name), code } static wxHtmlEntityInfo substitutions[] = { ENTITY("AElig", 198),