#include "wx/fontmap.h"
#include "wx/html/htmldefs.h"
#include "wx/html/htmlpars.h"
-#include "wx/arrimpl.cpp"
+#include "wx/vector.h"
#ifdef __WXWINCE__
#include "wx/msw/wince/missing.h" // for bsearch()
class wxHtmlTextPiece
{
public:
- wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {}
- int m_pos, m_lng;
+ wxHtmlTextPiece() {}
+ wxHtmlTextPiece(const wxString::const_iterator& start,
+ const wxString::const_iterator& end)
+ : m_start(start), m_end(end) {}
+ wxString::const_iterator m_start, m_end;
};
-WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces);
-WX_DEFINE_OBJARRAY(wxHtmlTextPieces)
+// NB: this is an empty class and not typedef because of forward declaration
+class wxHtmlTextPieces : public wxVector<wxHtmlTextPiece>
+{
+};
class wxHtmlParserState
{
wxHtmlTag *m_tags;
wxHtmlTextPieces *m_textPieces;
int m_curTextPiece;
- wxString m_source;
+ const wxString *m_source;
wxHtmlParserState *m_nextState;
};
IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject)
wxHtmlParser::wxHtmlParser()
- : wxObject(), m_HandlersHash(wxKEY_STRING),
- m_FS(NULL), m_HandlersStack(NULL)
+ : wxObject(),
+ m_FS(NULL)
{
+ m_Source = NULL;
m_entitiesParser = new wxHtmlEntitiesParser;
m_Tags = NULL;
m_CurTag = NULL;
while (RestoreState()) {}
DestroyDOMTree();
- if (m_HandlersStack)
- {
- wxList& tmp = *m_HandlersStack;
- wxList::iterator it, en;
- for( it = tmp.begin(), en = tmp.end(); it != en; ++it )
- delete (wxHashTable*)*it;
- tmp.clear();
- }
- delete m_HandlersStack;
- m_HandlersHash.Clear();
- WX_CLEAR_LIST(wxList, m_HandlersList);
+ WX_CLEAR_ARRAY(m_HandlersStack);
+ WX_CLEAR_HASH_SET(wxHtmlTagHandlersSet, m_HandlersSet);
delete m_entitiesParser;
+ delete m_Source;
}
wxObject* wxHtmlParser::Parse(const wxString& source)
void wxHtmlParser::SetSource(const wxString& src)
{
DestroyDOMTree();
- m_Source = src;
+ // NB: This is allocated on heap because wxHtmlTag uses iterators and
+ // making a copy of m_Source string in SetSourceAndSaveState() and
+ // RestoreState() would invalidate them (because wxString::m_impl's
+ // memory would change completely twice and iterators use pointers
+ // into it). So instead, we keep the string object intact and only
+ // store/restore pointer to it, for which we need it to be allocated
+ // on the heap.
+ delete m_Source;
+ m_Source = new wxString(src);
CreateDOMTree();
m_CurTag = NULL;
m_CurTextPiece = 0;
void wxHtmlParser::CreateDOMTree()
{
- wxHtmlTagsCache cache(m_Source);
+ wxHtmlTagsCache cache(*m_Source);
m_TextPieces = new wxHtmlTextPieces;
- CreateDOMSubTree(NULL, 0, m_Source.length(), &cache);
+ CreateDOMSubTree(NULL, m_Source->begin(), m_Source->end(), &cache);
m_CurTextPiece = 0;
}
-extern bool wxIsCDATAElement(const wxChar *tag);
+extern bool wxIsCDATAElement(const wxString& tag);
void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
- int begin_pos, int end_pos,
+ const wxString::const_iterator& begin_pos,
+ const wxString::const_iterator& end_pos,
wxHtmlTagsCache *cache)
{
- if (end_pos <= begin_pos) return;
+ if (end_pos <= begin_pos)
+ return;
wxChar c;
- int i = begin_pos;
- int textBeginning = begin_pos;
+ wxString::const_iterator i = begin_pos;
+ wxString::const_iterator textBeginning = begin_pos;
// If the tag contains CDATA text, we include the text between beginning
// and ending tag verbosely. Setting i=end_pos will skip to the very
// end of this function where text piece is added, bypassing any child
// tags parsing (CDATA element can't have child elements by definition):
- if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str()))
+ if (cur != NULL && wxIsCDATAElement(cur->GetName()))
{
i = end_pos;
}
while (i < end_pos)
{
- c = m_Source.GetChar(i);
+ c = *i;
if (c == wxT('<'))
{
// add text to m_TextPieces:
- if (i - textBeginning > 0)
- m_TextPieces->Add(
- wxHtmlTextPiece(textBeginning, i - textBeginning));
+ if (i > textBeginning)
+ m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, i));
// if it is a comment, skip it:
- wxString::const_iterator iter = m_Source.begin() + i;
- if ( SkipCommentTag(iter, m_Source.end()) )
+ if ( SkipCommentTag(i, m_Source->end()) )
{
- textBeginning =
- i = iter - m_Source.begin() + 1; // skip closing '>' too
+ textBeginning = i = i + 1; // skip closing '>' too
}
// add another tag to the tree:
- else if (i < end_pos-1 && m_Source.GetChar(i+1) != wxT('/'))
+ else if (i < end_pos-1 && *(i+1) != wxT('/'))
{
wxHtmlTag *chd;
if (cur)
if (chd->HasEnding())
{
CreateDOMSubTree(chd,
- chd->GetBeginPos(), chd->GetEndPos1(),
+ chd->GetBeginIter(), chd->GetEndIter1(),
cache);
- i = chd->GetEndPos2();
+ i = chd->GetEndIter2();
}
else
- i = chd->GetBeginPos();
+ i = chd->GetBeginIter();
textBeginning = i;
}
// ... or skip ending tag:
else
{
- while (i < end_pos && m_Source.GetChar(i) != wxT('>')) i++;
+ while (i < end_pos && *i != wxT('>')) ++i;
textBeginning = i+1;
}
}
- else i++;
+ else ++i;
}
// add remaining text to m_TextPieces:
- if (end_pos - textBeginning > 0)
- m_TextPieces->Add(
- wxHtmlTextPiece(textBeginning, end_pos - textBeginning));
+ if (end_pos > textBeginning)
+ m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, end_pos));
}
void wxHtmlParser::DestroyDOMTree()
{
m_CurTag = m_Tags;
m_CurTextPiece = 0;
- DoParsing(0, m_Source.length());
+ DoParsing(m_Source->begin(), m_Source->end());
}
-void wxHtmlParser::DoParsing(int begin_pos, int end_pos)
+void wxHtmlParser::DoParsing(const wxString::const_iterator& begin_pos_,
+ const wxString::const_iterator& end_pos)
{
- if (end_pos <= begin_pos) return;
+ wxString::const_iterator begin_pos(begin_pos_);
+
+ if (end_pos <= begin_pos)
+ return;
wxHtmlTextPieces& pieces = *m_TextPieces;
- size_t piecesCnt = pieces.GetCount();
+ size_t piecesCnt = pieces.size();
while (begin_pos < end_pos)
{
- while (m_CurTag && m_CurTag->GetBeginPos() < begin_pos)
+ while (m_CurTag && m_CurTag->GetBeginIter() < begin_pos)
m_CurTag = m_CurTag->GetNextTag();
while (m_CurTextPiece < piecesCnt &&
- pieces[m_CurTextPiece].m_pos < begin_pos)
+ pieces[m_CurTextPiece].m_start < begin_pos)
m_CurTextPiece++;
if (m_CurTextPiece < piecesCnt &&
(!m_CurTag ||
- pieces[m_CurTextPiece].m_pos < m_CurTag->GetBeginPos()))
+ pieces[m_CurTextPiece].m_start < m_CurTag->GetBeginIter()))
{
// Add text:
AddText(GetEntitiesParser()->Parse(
- m_Source.Mid(pieces[m_CurTextPiece].m_pos,
- pieces[m_CurTextPiece].m_lng)));
- begin_pos = pieces[m_CurTextPiece].m_pos +
- pieces[m_CurTextPiece].m_lng;
+ wxString(pieces[m_CurTextPiece].m_start,
+ pieces[m_CurTextPiece].m_end)));
+ begin_pos = pieces[m_CurTextPiece].m_end;
m_CurTextPiece++;
}
else if (m_CurTag)
{
if (m_CurTag->HasEnding())
- begin_pos = m_CurTag->GetEndPos2();
+ begin_pos = m_CurTag->GetEndIter2();
else
- begin_pos = m_CurTag->GetBeginPos();
+ begin_pos = m_CurTag->GetBeginIter();
wxHtmlTag *t = m_CurTag;
m_CurTag = m_CurTag->GetNextTag();
AddTag(*t);
void wxHtmlParser::AddTag(const wxHtmlTag& tag)
{
- wxHtmlTagHandler *h;
bool inner = false;
- h = (wxHtmlTagHandler*) m_HandlersHash.Get(tag.GetName());
- if (h)
+ wxHtmlTagHandlersHash::const_iterator h = m_HandlersHash.find(tag.GetName());
+ if (h != m_HandlersHash.end())
{
- inner = h->HandleTag(tag);
+ inner = h->second->HandleTag(tag);
if (m_stopParsing)
return;
}
if (!inner)
{
if (tag.HasEnding())
- DoParsing(tag.GetBeginPos(), tag.GetEndPos1());
+ DoParsing(tag.GetBeginIter(), tag.GetEndIter1());
}
}
wxStringTokenizer tokenizer(s, wxT(", "));
while (tokenizer.HasMoreTokens())
- m_HandlersHash.Put(tokenizer.GetNextToken(), handler);
+ m_HandlersHash[tokenizer.GetNextToken()] = handler;
- if (m_HandlersList.IndexOf(handler) == wxNOT_FOUND)
- m_HandlersList.Append(handler);
+ m_HandlersSet.insert(handler);
handler->SetParser(this);
}
wxStringTokenizer tokenizer(tags, wxT(", "));
wxString key;
- if (m_HandlersStack == NULL)
- {
- m_HandlersStack = new wxList;
- }
-
- m_HandlersStack->Insert((wxObject*)new wxHashTable(m_HandlersHash));
+ m_HandlersStack.push_back(new wxHtmlTagHandlersHash(m_HandlersHash));
while (tokenizer.HasMoreTokens())
{
key = tokenizer.GetNextToken();
- m_HandlersHash.Delete(key);
- m_HandlersHash.Put(key, handler);
+ m_HandlersHash[key] = handler;
}
}
void wxHtmlParser::PopTagHandler()
{
- wxList::compatibility_iterator first;
+ wxCHECK_RET( !m_HandlersStack.empty(),
+ "attempt to remove HTML tag handler from empty stack" );
- if ( !m_HandlersStack ||
-#if wxUSE_STL
- !(first = m_HandlersStack->GetFirst())
-#else // !wxUSE_STL
- ((first = m_HandlersStack->GetFirst()) == NULL)
-#endif // wxUSE_STL/!wxUSE_STL
- )
- {
- wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack."));
- return;
- }
- m_HandlersHash = *((wxHashTable*) first->GetData());
- delete (wxHashTable*) first->GetData();
- m_HandlersStack->Erase(first);
+ wxHtmlTagHandlersHash *prev = m_HandlersStack.back();
+ m_HandlersStack.pop_back();
+ m_HandlersHash = *prev;
+ delete prev;
}
void wxHtmlParser::SetSourceAndSaveState(const wxString& src)
m_Tags = NULL;
m_TextPieces = NULL;
m_CurTextPiece = 0;
- m_Source = wxEmptyString;
+ m_Source = NULL;
SetSource(src);
}
if (!m_SavedStates) return false;
DestroyDOMTree();
+ delete m_Source;
wxHtmlParserState *s = m_SavedStates;
m_SavedStates = s->m_nextState;
wxString wxHtmlParser::GetInnerSource(const wxHtmlTag& tag)
{
- return GetSource()->Mid(tag.GetBeginPos(),
- tag.GetEndPos1() - tag.GetBeginPos());
+ return wxString(tag.GetBeginIter(), tag.GetEndIter1());
}
//-----------------------------------------------------------------------------
#endif
}
-wxString wxHtmlEntitiesParser::Parse(const wxString& input)
+wxString wxHtmlEntitiesParser::Parse(const wxString& input) const
{
- const wxChar *c, *last;
- const wxChar *in_str = input.c_str();
wxString output;
- output.reserve(input.length());
+ const wxString::const_iterator end(input.end());
+ wxString::const_iterator c(input.begin());
+ wxString::const_iterator last(c);
- for (c = in_str, last = in_str; *c != wxT('\0'); c++)
+ for ( ; c < end; ++c )
{
if (*c == wxT('&'))
{
+ if ( output.empty() )
+ output.reserve(input.length());
+
if (c - last > 0)
- output.append(last, c - last);
- if ( *++c == wxT('\0') )
+ output.append(last, c);
+ if ( ++c == end )
break;
wxString entity;
- const wxChar *ent_s = c;
+ const wxString::const_iterator ent_s = c;
wxChar entity_char;
- for (; (*c >= wxT('a') && *c <= wxT('z')) ||
- (*c >= wxT('A') && *c <= wxT('Z')) ||
- (*c >= wxT('0') && *c <= wxT('9')) ||
- *c == wxT('_') || *c == wxT('#'); c++) {}
- entity.append(ent_s, c - ent_s);
- if (*c != wxT(';')) c--;
+ for ( ; c != end; ++c )
+ {
+ wxChar ch = *c;
+ if ( !((ch >= wxT('a') && ch <= wxT('z')) ||
+ (ch >= wxT('A') && ch <= wxT('Z')) ||
+ (ch >= wxT('0') && ch <= wxT('9')) ||
+ ch == wxT('_') || ch == wxT('#')) )
+ break;
+ }
+
+ entity.append(ent_s, c);
+ if (c == end || *c != wxT(';')) --c;
last = c+1;
entity_char = GetEntityChar(entity);
if (entity_char)
output << entity_char;
else
{
- output.append(ent_s-1, c-ent_s+2);
+ output.append(ent_s-1, c+1);
wxLogTrace(wxTRACE_HTML_DEBUG,
- wxT("Unrecognized HTML entity: '%s'"),
- entity.c_str());
+ "Unrecognized HTML entity: '%s'",
+ entity);
}
}
}
- if (*last != wxT('\0'))
- output.append(last);
+ if ( last == input.begin() ) // common case: no entity
+ return input;
+ if ( last != end )
+ output.append(last, end);
return output;
}
#if !wxUSE_UNICODE
-wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
+wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code) const
{
#if wxUSE_WCHAR_T
char buf[2];
#endif
}
-wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
+wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity) const
{
unsigned code = 0;
if (entity[0] == wxT('#'))
{
- const wxChar *ent_s = entity.c_str();
- const wxChar *format;
+ // NB: parsed value is a number, so it's OK to use wx_str(), internal
+ // representation is the same for numbers
+ const wxStringCharType *ent_s = entity.wx_str();
+ const wxStringCharType *format;
- if (ent_s[1] == wxT('x') || ent_s[1] == wxT('X'))
+ if (ent_s[1] == wxS('x') || ent_s[1] == wxS('X'))
{
- format = wxT("%x");
+ format = wxS("%x");
ent_s++;
}
else
- format = wxT("%u");
+ format = wxS("%u");
ent_s++;
if (wxSscanf(ent_s, format, &code) != 1)
{
// store the literals in wx's internal representation (either char*
// in UTF-8 or wchar_t*) for best performance:
- #define ENTITY(name, code) { wxSTRING_TEXT(name), code }
+ #define ENTITY(name, code) { wxS(name), code }
static wxHtmlEntityInfo substitutions[] = {
ENTITY("AElig", 198),
wxString::const_iterator p = start;
// comments begin with "<!--" in HTML 4.0
- if ( end - p < 3 || *++p != '!' || *++p != '-' || *++p != '-' )
+ if ( p > end - 3 || *++p != '!' || *++p != '-' || *++p != '-' )
{
// not a comment at all
return false;