X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/891000d61bd6f34749efd472e0a1960aae75abd6..1c9919467b422edf561753db25605bc7aa293b55:/src/html/helpdata.cpp?ds=inline diff --git a/src/html/helpdata.cpp b/src/html/helpdata.cpp index 5e35fe729e..63b2988e61 100644 --- a/src/html/helpdata.cpp +++ b/src/html/helpdata.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: helpdata.cpp +// Name: src/html/helpdata.cpp // Purpose: wxHtmlHelpData // Notes: Based on htmlhelp.cpp, implementing a monolithic // HTML Help controller class, by Vaclav Slavik @@ -9,61 +9,106 @@ // Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// -#ifdef __GNUG__ -#pragma implementation -#endif - // For compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" #ifdef __BORLANDC__ -#pragma hdrstop + #pragma hdrstop #endif -#include "wx/defs.h" - #if wxUSE_HTML && wxUSE_STREAMS -#ifndef WXPRECOMP -#include "wx/wx.h" +#ifndef WX_PRECOMP + #include "wx/intl.h" + #include "wx/log.h" #endif +#include + #include "wx/html/helpdata.h" #include "wx/tokenzr.h" #include "wx/wfstream.h" #include "wx/busyinfo.h" #include "wx/encconv.h" #include "wx/fontmap.h" -#include "wx/log.h" #include "wx/html/htmlpars.h" #include "wx/html/htmldefs.h" +#include "wx/html/htmlfilt.h" +#include "wx/filename.h" #include "wx/arrimpl.cpp" WX_DEFINE_OBJARRAY(wxHtmlBookRecArray) +WX_DEFINE_OBJARRAY(wxHtmlHelpDataItems) //----------------------------------------------------------------------------- // static helper functions //----------------------------------------------------------------------------- // Reads one line, stores it into buf and returns pointer to new line or NULL. -static char* ReadLine(char *line, char *buf) +static const wxChar* ReadLine(const wxChar *line, wxChar *buf, size_t bufsize) { - char *writeptr = buf, *readptr = line; + wxChar *writeptr = buf; + wxChar *endptr = buf + bufsize - 1; + const wxChar *readptr = line; - while (*readptr != 0 && *readptr != '\r' && *readptr != '\n') *(writeptr++) = *(readptr++); + while (*readptr != 0 && *readptr != wxT('\r') && *readptr != wxT('\n') && + writeptr != endptr) + *(writeptr++) = *(readptr++); *writeptr = 0; - while (*readptr == '\r' || *readptr == '\n') readptr++; - if (*readptr == 0) return NULL; - else return readptr; + while (*readptr == wxT('\r') || *readptr == wxT('\n')) + readptr++; + if (*readptr == 0) + return NULL; + else + return readptr; } -static int LINKAGEMODE IndexCompareFunc(const void *a, const void *b) +static int +wxHtmlHelpIndexCompareFunc(wxHtmlHelpDataItem **a, wxHtmlHelpDataItem **b) { - return wxStrcmp(((wxHtmlContentsItem*)a)->m_Name, ((wxHtmlContentsItem*)b)->m_Name); -} + wxHtmlHelpDataItem *ia = *a; + wxHtmlHelpDataItem *ib = *b; + + if (ia == NULL) + return -1; + if (ib == NULL) + return 1; + + if (ia->parent == ib->parent) + { + return ia->name.CmpNoCase(ib->name); + } + else if (ia->level == ib->level) + { + return wxHtmlHelpIndexCompareFunc(&ia->parent, &ib->parent); + } + else + { + wxHtmlHelpDataItem *ia2 = ia; + wxHtmlHelpDataItem *ib2 = ib; + while (ia2->level > ib2->level) + { + ia2 = ia2->parent; + } + while (ib2->level > ia2->level) + { + ib2 = ib2->parent; + } + + wxASSERT(ia2); + wxASSERT(ib2); + int res = wxHtmlHelpIndexCompareFunc(&ia2, &ib2); + if (res != 0) + return res; + else if (ia->level > ib->level) + return 1; + else + return -1; + } +} //----------------------------------------------------------------------------- // HP_Parser @@ -71,9 +116,18 @@ static int LINKAGEMODE IndexCompareFunc(const void *a, const void *b) class HP_Parser : public wxHtmlParser { - public: - void AddText(const char* WXUNUSED(text)) { } - wxObject* GetProduct() { return NULL; } +public: + HP_Parser() + { + GetEntitiesParser()->SetEncoding(wxFONTENCODING_ISO8859_1); + } + + wxObject* GetProduct() { return NULL; } + +protected: + virtual void AddText(const wxString& WXUNUSED(txt)) {} + + wxDECLARE_NO_COPY_CLASS(HP_Parser); }; @@ -84,489 +138,348 @@ class HP_Parser : public wxHtmlParser class HP_TagHandler : public wxHtmlTagHandler { private: - wxString m_Name, m_Page; - int m_Level; - int m_ID; - int m_Index; - wxHtmlContentsItem *m_Items; - int m_ItemsCnt; - wxHtmlBookRecord *m_Book; - bool m_firstTime; // For checking if we're adding sections at level zero, so we 'delete' the first one + wxString m_name, m_page; + int m_level; + int m_id; + int m_index; + int m_count; + wxHtmlHelpDataItem *m_parentItem; + wxHtmlBookRecord *m_book; + + wxHtmlHelpDataItems *m_data; public: - HP_TagHandler(wxHtmlBookRecord *b) : wxHtmlTagHandler() - { m_Book = b; m_Items = NULL; m_ItemsCnt = 0; m_Name = m_Page = wxEmptyString; - m_Level = 0; m_ID = -1; m_firstTime = TRUE; } + HP_TagHandler(wxHtmlBookRecord *b) : wxHtmlTagHandler() + { + m_data = NULL; + m_book = b; + m_name = m_page = wxEmptyString; + m_level = 0; + m_id = wxID_ANY; + m_count = 0; + m_parentItem = NULL; + } wxString GetSupportedTags() { return wxT("UL,OBJECT,PARAM"); } bool HandleTag(const wxHtmlTag& tag); - void WriteOut(wxHtmlContentsItem*& array, int& size); - void ReadIn(wxHtmlContentsItem* array, int size); + + void Reset(wxHtmlHelpDataItems& data) + { + m_data = &data; + m_count = 0; + m_level = 0; + m_parentItem = NULL; + } + + wxDECLARE_NO_COPY_CLASS(HP_TagHandler); }; bool HP_TagHandler::HandleTag(const wxHtmlTag& tag) { - if (tag.GetName() == wxT("UL")) + if (tag.GetName() == wxT("UL")) { - m_Level++; + wxHtmlHelpDataItem *oldparent = m_parentItem; + m_level++; + m_parentItem = (m_count > 0) ? &(*m_data)[m_data->size()-1] : NULL; ParseInner(tag); - m_Level--; - return TRUE; + m_level--; + m_parentItem = oldparent; + return true; } - else if (tag.GetName() == wxT("OBJECT")) + else if (tag.GetName() == wxT("OBJECT")) { - m_Name = m_Page = wxEmptyString; + m_name = m_page = wxEmptyString; ParseInner(tag); - if (tag.GetParam("TYPE") == "text/sitemap") - - // if (!m_Page.IsEmpty()) +#if 0 + if (!page.IsEmpty()) /* Valid HHW's file may contain only two object tags: - + - + or - - - - - - - We're interested in the latter. !m_Page.IsEmpty() is valid + + + + + + + We're interested in the latter. !page.IsEmpty() is valid condition because text/site properties does not contain Local param */ +#endif + if (tag.GetParam(wxT("TYPE")) == wxT("text/sitemap")) { - // We're reading in items at level zero, which must mean we want to specify - // our own 'books', so chuck out the first (empty) one that AddBook already - // created - if (m_firstTime && (m_Level == 0) && (m_ItemsCnt > 0)) - { - m_ItemsCnt --; - } - else - { - if (m_ItemsCnt % wxHTML_REALLOC_STEP == 0) - m_Items = (wxHtmlContentsItem*) realloc(m_Items, (m_ItemsCnt + wxHTML_REALLOC_STEP) * sizeof(wxHtmlContentsItem)); - } - m_Items[m_ItemsCnt].m_Level = m_Level; - m_Items[m_ItemsCnt].m_ID = m_ID; - m_Items[m_ItemsCnt].m_Page = new wxChar[m_Page.Length() + 1]; - wxStrcpy(m_Items[m_ItemsCnt].m_Page, m_Page.c_str()); - m_Items[m_ItemsCnt].m_Name = new wxChar [m_Name.Length() + 1]; - wxStrcpy(m_Items[m_ItemsCnt].m_Name, m_Name.c_str()); - m_Items[m_ItemsCnt].m_Book = m_Book; - m_ItemsCnt++; - - m_firstTime = FALSE; + wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem(); + item->parent = m_parentItem; + item->level = m_level; + item->id = m_id; + item->page = m_page; + item->name = m_name; + + item->book = m_book; + m_data->Add(item); + m_count++; } - return TRUE; + return true; } - else + else { // "PARAM" - if (m_Name == wxEmptyString && tag.GetParam(wxT("NAME")) == wxT("Name")) - { - m_Name = tag.GetParam(wxT("VALUE")); - if (m_Name.Find(wxT('&')) != -1) - { -#define ESCSEQ(escape, subst) \ - { _T("&") _T(escape) _T(";"), _T("&") _T(escape) _T(" "), _T("&") _T(escape), _T(subst) } - static wxChar* substitutions[][4] = - { - ESCSEQ("quot", "\""), - ESCSEQ("#34", "\""), - ESCSEQ("lt", "<"), - ESCSEQ("#60", "<"), - ESCSEQ("gt", ">"), - ESCSEQ("#62", ">"), - - ESCSEQ("#94", "^"), /* ^ */ - - ESCSEQ("nbsp", " "), - ESCSEQ("#32", " "), - ESCSEQ("iexcl", "!"), - ESCSEQ("#33", "!"), - ESCSEQ("cent", "¢"/* ¢ */), - ESCSEQ("#162", "¢"/* ¢ */), - - ESCSEQ("trade", "(TM)"), - ESCSEQ("#153", "(TM)"), - - ESCSEQ("yen", "¥"), - ESCSEQ("#165", "¥"), - ESCSEQ("brkbar", "¦"), - ESCSEQ("#166", "¦"), - ESCSEQ("sect", "§"), - ESCSEQ("#167", "§"), - ESCSEQ("uml", "¨"), - ESCSEQ("#168", "¨"), - - ESCSEQ("copy", "©"), /* © */ - ESCSEQ("#169", "©"), - ESCSEQ("ordf", "ª"), - ESCSEQ("#170", "ª"), - ESCSEQ("laquo", "«"), /* « */ - ESCSEQ("#171", "«"), - ESCSEQ("not", "¬"), - ESCSEQ("#172", "¬"), - - ESCSEQ("reg", "®"), /* ® */ - ESCSEQ("#174", "®"), - - ESCSEQ("deg", "°"), /* ° */ - ESCSEQ("#176", "°"), - ESCSEQ("plusm", "±"), /* ± */ - ESCSEQ("#177", "±"), - - ESCSEQ("acute", "´"), - ESCSEQ("#180", "´"), - ESCSEQ("macron", "¯"), - ESCSEQ("#175", "¯"), - ESCSEQ("micro", "µ"), /* µ */ - ESCSEQ("#181", "µ"), - ESCSEQ("para", "¶"), /* ¶ */ - ESCSEQ("#182", "¶"), - - ESCSEQ("ordm", "º"), /* º */ - ESCSEQ("#186", "º"), - ESCSEQ("raquo", "»"), /* » */ - ESCSEQ("#187", "»"), - - ESCSEQ("iquest", "¿"), /* ¿ */ - ESCSEQ("#191", "¿"), - ESCSEQ("Agrave", "\300"/* À */), - ESCSEQ("#193", "\300"/* À */), - - ESCSEQ("Acirc", "\302"/* Â */), - ESCSEQ("Atilde", "\303"/* Ã */), - ESCSEQ("Auml", "\304"/* Ä */), - ESCSEQ("Aring", " "), - ESCSEQ("AElig", " "), - ESCSEQ("Ccedil", "\347"/* ç */), - ESCSEQ("Egrave", "\310"/* È */), - ESCSEQ("Eacute", "\311"/* É */), - ESCSEQ("Ecirc", "\312"/* Ê */), - ESCSEQ("Euml", "\313"/* Ë */), - ESCSEQ("Igrave", "\314"/* Ì */), - - ESCSEQ("Icirc", "\316"/* Î */), - ESCSEQ("Iuml", "\317"/* Ï */), - - ESCSEQ("Ntilde", "\321"/* Ñ */), - ESCSEQ("Ograve", "\322"/* Ò */), - - ESCSEQ("Ocirc", "\324"/* Ô */), - ESCSEQ("Otilde", "\325"/* Õ */), - ESCSEQ("Ouml", "\326"/* Ö */), - - ESCSEQ("Oslash", " "), - ESCSEQ("Ugrave", "\331"/* Ù */), - - ESCSEQ("Ucirc", " "), - ESCSEQ("Uuml", "\334"/* Ü */), - - ESCSEQ("szlig", "\247"/* § */), - ESCSEQ("agrave;","à"), - ESCSEQ("aacute", "\341"/* á */), - ESCSEQ("acirc", "\342"/* â */), - ESCSEQ("atilde", "\343"/* ã */), - ESCSEQ("auml", "\344"/* ä */), - ESCSEQ("aring", "a"), - ESCSEQ("aelig", "ae"), - ESCSEQ("ccedil", "\347"/* ç */), - ESCSEQ("egrave", "\350"/* è */), - ESCSEQ("eacute", "\351"/* é */), - ESCSEQ("ecirc", "\352"/* ê */), - ESCSEQ("euml", "\353"/* ë */), - ESCSEQ("igrave", "\354"/* ì */), - ESCSEQ("iacute", "\355"/* í */), - ESCSEQ("icirc", " "), - ESCSEQ("iuml", "\357"/* ï */), - ESCSEQ("eth", " "), - ESCSEQ("ntilde", "\361"/* ñ */), - ESCSEQ("ograve", "\362"/* ò */), - ESCSEQ("oacute", "\363"/* ó */), - ESCSEQ("ocirc", "\364"/* ô */), - ESCSEQ("otilde", "\365"/* õ */), - ESCSEQ("ouml", "\366"/* ö */), - ESCSEQ("divide", " "), - ESCSEQ("oslash", " "), - ESCSEQ("ugrave", "\371"/* ù */), - ESCSEQ("uacute", "\372"/* ú */), - ESCSEQ("ucirc", "\373"/* û */), - ESCSEQ("uuml", "\374"/* ü */), - - ESCSEQ("yuml", ""), - - /* this one should ALWAYS stay the last one!!! */ - ESCSEQ("amp", "&"), - ESCSEQ("#38", "&"), - - { NULL, NULL, NULL } - }; - - for (int i = 0; substitutions[i][0] != NULL; i++) - { - m_Name.Replace(substitutions[i][0], substitutions[i][3], TRUE); - m_Name.Replace(substitutions[i][1], substitutions[i][3], TRUE); - m_Name.Replace(substitutions[i][2], substitutions[i][3], TRUE); - } - } - } - if (tag.GetParam(wxT("NAME")) == wxT("Local")) m_Page = tag.GetParam(wxT("VALUE")); - if (tag.GetParam(wxT("NAME")) == wxT("ID")) tag.ScanParam(wxT("VALUE"), wxT("%i"), &m_ID); - return FALSE; + if (m_name.empty() && tag.GetParam(wxT("NAME")) == wxT("Name")) + m_name = tag.GetParam(wxT("VALUE")); + if (tag.GetParam(wxT("NAME")) == wxT("Local")) + m_page = tag.GetParam(wxT("VALUE")); + if (tag.GetParam(wxT("NAME")) == wxT("ID")) + tag.GetParamAsInt(wxT("VALUE"), &m_id); + return false; } } +//----------------------------------------------------------------------------- +// wxHtmlHelpData +//----------------------------------------------------------------------------- -void HP_TagHandler::WriteOut(wxHtmlContentsItem*& array, int& size) +wxString wxHtmlBookRecord::GetFullPath(const wxString &page) const { - array = m_Items; - size = m_ItemsCnt; - m_Items = NULL; - m_ItemsCnt = 0; + if (wxIsAbsolutePath(page)) + return page; + else + return m_BasePath + page; } -void HP_TagHandler::ReadIn(wxHtmlContentsItem* array, int size) +wxString wxHtmlHelpDataItem::GetIndentedName() const { - m_Items = array; - m_ItemsCnt = size; + wxString s; + for (int i = 1; i < level; i++) + s << wxT(" "); + s << name; + return s; } - - -//----------------------------------------------------------------------------- -// wxHtmlHelpData -//----------------------------------------------------------------------------- - IMPLEMENT_DYNAMIC_CLASS(wxHtmlHelpData, wxObject) wxHtmlHelpData::wxHtmlHelpData() { - m_TempPath = wxEmptyString; - - m_Contents = NULL; - m_ContentsCnt = 0; - m_Index = NULL; - m_IndexCnt = 0; } wxHtmlHelpData::~wxHtmlHelpData() { - int i; - - m_BookRecords.Empty(); - if (m_Contents) - { - for (i = 0; i < m_ContentsCnt; i++) - { - delete[] m_Contents[i].m_Page; - delete[] m_Contents[i].m_Name; - } - free(m_Contents); - } - if (m_Index) - { - for (i = 0; i < m_IndexCnt; i++) - { - delete[] m_Index[i].m_Page; - delete[] m_Index[i].m_Name; - } - free(m_Index); - } } -bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, const wxString& indexfile, const wxString& contentsfile) +bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, + const wxString& indexfile, + const wxString& contentsfile) { wxFSFile *f; - char *buf; - int sz; + wxHtmlFilterHTML filter; + wxString buf; wxString string; HP_Parser parser; HP_TagHandler *handler = new HP_TagHandler(book); parser.AddTagHandler(handler); - f = ( contentsfile.IsEmpty() ? (wxFSFile*) NULL : fsys.OpenFile(contentsfile) ); - if (f) + f = ( contentsfile.empty() ? NULL : fsys.OpenFile(contentsfile) ); + if (f) { - sz = f->GetStream()->GetSize(); - buf = new char[sz + 1]; - buf[sz] = 0; - f->GetStream()->Read(buf, sz); + buf.clear(); + buf = filter.ReadFile(*f); delete f; - handler->ReadIn(m_Contents, m_ContentsCnt); + handler->Reset(m_contents); parser.Parse(buf); - handler->WriteOut(m_Contents, m_ContentsCnt); - delete[] buf; } else + { wxLogError(_("Cannot open contents file: %s"), contentsfile.c_str()); + } - f = ( indexfile.IsEmpty() ? (wxFSFile*) NULL : fsys.OpenFile(indexfile) ); - if (f) + f = ( indexfile.empty() ? NULL : fsys.OpenFile(indexfile) ); + if (f) { - sz = f->GetStream()->GetSize(); - buf = new char[sz + 1]; - buf[sz] = 0; - f->GetStream()->Read(buf, sz); + buf.clear(); + buf = filter.ReadFile(*f); delete f; - handler->ReadIn(m_Index, m_IndexCnt); + handler->Reset(m_index); parser.Parse(buf); - handler->WriteOut(m_Index, m_IndexCnt); - delete[] buf; } - else if (!indexfile.IsEmpty()) + else if (!indexfile.empty()) + { wxLogError(_("Cannot open index file: %s"), indexfile.c_str()); - return TRUE; + } + return true; } +inline static void CacheWriteInt32(wxOutputStream *f, wxInt32 value) +{ + wxInt32 x = wxINT32_SWAP_ON_BE(value); + f->Write(&x, sizeof(x)); +} +inline static wxInt32 CacheReadInt32(wxInputStream *f) +{ + wxInt32 x; + f->Read(&x, sizeof(x)); + return wxINT32_SWAP_ON_BE(x); +} +inline static void CacheWriteString(wxOutputStream *f, const wxString& str) +{ + const wxWX2MBbuf mbstr = str.mb_str(wxConvUTF8); + size_t len = strlen((const char*)mbstr)+1; + CacheWriteInt32(f, len); + f->Write((const char*)mbstr, len); +} -#if wxUSE_UNICODE - -#define READ_STRING(f, s, lng) { char tmpc; for (int i = 0; i < lng; i++) { f->Read(&tmpc, 1); s[i] = (wxChar)tmpc;} } -#define WRITE_STRING(f, s, lng) { char tmpc; for (int i = 0; i < lng; i++) { tmpc = (char)s[i]; f->Write(&tmpc, 1);} } - -#else - -#define READ_STRING(f, s, lng) f->Read(s, lng * sizeof(char)); -#define WRITE_STRING(f, s, lng) f->Write(s, lng * sizeof(char)); +inline static wxString CacheReadString(wxInputStream *f) +{ + size_t len = (size_t)CacheReadInt32(f); + wxCharBuffer str(len-1); + f->Read(str.data(), len); + return wxString(str, wxConvUTF8); +} -#endif +#define CURRENT_CACHED_BOOK_VERSION 5 +// Additional flags to detect incompatibilities of the runtime environment: +#define CACHED_BOOK_FORMAT_FLAGS \ + (wxUSE_UNICODE << 0) -#define CURRENT_CACHED_BOOK_VERSION 1 bool wxHtmlHelpData::LoadCachedBook(wxHtmlBookRecord *book, wxInputStream *f) { - int i, st; - wxInt32 x; + int i, st, newsize; wxInt32 version; /* load header - version info : */ + version = CacheReadInt32(f); - f->Read(&x, sizeof(x)); - version = wxINT32_SWAP_ON_BE(x); - - if (version != CURRENT_CACHED_BOOK_VERSION) + if (version != CURRENT_CACHED_BOOK_VERSION) { - wxLogError(_("Incorrect version of HTML help book")); - return FALSE; - // NOTE: when adding new version, please ensure backward compatibility! + // NB: We can just silently return false here and don't worry about + // it anymore, because AddBookParam will load the MS project in + // absence of (properly versioned) .cached file and automatically + // create new .cached file immediately afterward. + return false; } - - /* load contents : */ - f->Read(&x, sizeof(x)); - st = m_ContentsCnt; - m_ContentsCnt += wxINT32_SWAP_ON_BE(x); - m_Contents = (wxHtmlContentsItem*) realloc(m_Contents, - (m_ContentsCnt / wxHTML_REALLOC_STEP + 1) * - wxHTML_REALLOC_STEP * sizeof(wxHtmlContentsItem)); - for (i = st; i < m_ContentsCnt; i++) - { - f->Read(&x, sizeof(x)); - m_Contents[i].m_Level = wxINT32_SWAP_ON_BE(x); - f->Read(&x, sizeof(x)); - m_Contents[i].m_ID = wxINT32_SWAP_ON_BE(x); - f->Read(&x, sizeof(x)); x = wxINT32_SWAP_ON_BE(x); - m_Contents[i].m_Name = new wxChar[x]; - READ_STRING(f, m_Contents[i].m_Name, x); - f->Read(&x, sizeof(x)); x = wxINT32_SWAP_ON_BE(x); - m_Contents[i].m_Page = new wxChar[x]; - READ_STRING(f, m_Contents[i].m_Page, x); - m_Contents[i].m_Book = book; + if (CacheReadInt32(f) != CACHED_BOOK_FORMAT_FLAGS) + return false; + + /* load contents : */ + st = m_contents.size(); + newsize = st + CacheReadInt32(f); + m_contents.Alloc(newsize); + for (i = st; i < newsize; i++) + { + wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem; + item->level = CacheReadInt32(f); + item->id = CacheReadInt32(f); + item->name = CacheReadString(f); + item->page = CacheReadString(f); + item->book = book; + m_contents.Add(item); } /* load index : */ - - f->Read(&x, sizeof(x)); - st = m_IndexCnt; - m_IndexCnt += wxINT32_SWAP_ON_BE(x); - m_Index = (wxHtmlContentsItem*) realloc(m_Index, (m_IndexCnt / wxHTML_REALLOC_STEP + 1) * - wxHTML_REALLOC_STEP * sizeof(wxHtmlContentsItem)); - for (i = st; i < m_IndexCnt; i++) - { - f->Read(&x, sizeof(x)); x = wxINT32_SWAP_ON_BE(x); - m_Index[i].m_Name = new wxChar[x]; - READ_STRING(f, m_Index[i].m_Name, x); - f->Read(&x, sizeof(x)); x = wxINT32_SWAP_ON_BE(x); - m_Index[i].m_Page = new wxChar[x]; - READ_STRING(f, m_Index[i].m_Page, x); - m_Index[i].m_Book = book; - } - return TRUE; + st = m_index.size(); + newsize = st + CacheReadInt32(f); + m_index.Alloc(newsize); + for (i = st; i < newsize; i++) + { + wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem; + item->name = CacheReadString(f); + item->page = CacheReadString(f); + item->level = CacheReadInt32(f); + item->book = book; + int parentShift = CacheReadInt32(f); + if (parentShift != 0) + item->parent = &m_index[m_index.size() - parentShift]; + m_index.Add(item); + } + return true; } bool wxHtmlHelpData::SaveCachedBook(wxHtmlBookRecord *book, wxOutputStream *f) { int i; - wxInt32 x; + wxInt32 cnt; /* save header - version info : */ - - x = wxINT32_SWAP_ON_BE(CURRENT_CACHED_BOOK_VERSION); - f->Write(&x, sizeof(x)); + CacheWriteInt32(f, CURRENT_CACHED_BOOK_VERSION); + CacheWriteInt32(f, CACHED_BOOK_FORMAT_FLAGS); /* save contents : */ + int len = m_contents.size(); + for (cnt = 0, i = 0; i < len; i++) + if (m_contents[i].book == book && m_contents[i].level > 0) + cnt++; + CacheWriteInt32(f, cnt); - x = 0; - for (i = 0; i < m_ContentsCnt; i++) if (m_Contents[i].m_Book == book && m_Contents[i].m_Level > 0) x++; - x = wxINT32_SWAP_ON_BE(x); - f->Write(&x, sizeof(x)); - for (i = 0; i < m_ContentsCnt; i++) + for (i = 0; i < len; i++) { - if (m_Contents[i].m_Book != book || m_Contents[i].m_Level == 0) continue; - x = wxINT32_SWAP_ON_BE(m_Contents[i].m_Level); - f->Write(&x, sizeof(x)); - x = wxINT32_SWAP_ON_BE(m_Contents[i].m_ID); - f->Write(&x, sizeof(x)); - x = wxINT32_SWAP_ON_BE(wxStrlen(m_Contents[i].m_Name) + 1); - f->Write(&x, sizeof(x)); - WRITE_STRING(f, m_Contents[i].m_Name, x); - x = wxINT32_SWAP_ON_BE(wxStrlen(m_Contents[i].m_Page) + 1); - f->Write(&x, sizeof(x)); - WRITE_STRING(f, m_Contents[i].m_Page, x); + if (m_contents[i].book != book || m_contents[i].level == 0) + continue; + CacheWriteInt32(f, m_contents[i].level); + CacheWriteInt32(f, m_contents[i].id); + CacheWriteString(f, m_contents[i].name); + CacheWriteString(f, m_contents[i].page); } /* save index : */ + len = m_index.size(); + for (cnt = 0, i = 0; i < len; i++) + if (m_index[i].book == book && m_index[i].level > 0) + cnt++; + CacheWriteInt32(f, cnt); - x = 0; - for (i = 0; i < m_IndexCnt; i++) if (m_Index[i].m_Book == book && m_Index[i].m_Level > 0) x++; - x = wxINT32_SWAP_ON_BE(x); - f->Write(&x, sizeof(x)); - for (i = 0; i < m_IndexCnt; i++) + for (i = 0; i < len; i++) { - if (m_Index[i].m_Book != book || m_Index[i].m_Level == 0) continue; - x = wxINT32_SWAP_ON_BE(wxStrlen(m_Index[i].m_Name) + 1); - f->Write(&x, sizeof(x)); - WRITE_STRING(f, m_Index[i].m_Name, x); - x = wxINT32_SWAP_ON_BE(wxStrlen(m_Index[i].m_Page) + 1); - f->Write(&x, sizeof(x)); - WRITE_STRING(f, m_Index[i].m_Page, x); + if (m_index[i].book != book || m_index[i].level == 0) + continue; + CacheWriteString(f, m_index[i].name); + CacheWriteString(f, m_index[i].page); + CacheWriteInt32(f, m_index[i].level); + // save distance to parent item, if any: + if (m_index[i].parent == NULL) + { + CacheWriteInt32(f, 0); + } + else + { + int cnt2 = 0; + wxHtmlHelpDataItem *parent = m_index[i].parent; + for (int j = i-1; j >= 0; j--) + { + if (m_index[j].book == book && m_index[j].level > 0) + cnt2++; + if (&m_index[j] == parent) + break; + } + wxASSERT(cnt2 > 0); + CacheWriteInt32(f, cnt2); + } } - return TRUE; + return true; } void wxHtmlHelpData::SetTempDir(const wxString& path) { - if (path == wxEmptyString) m_TempPath = path; - else + if (path.empty()) + m_tempPath = path; + else { - if (wxIsAbsolutePath(path)) m_TempPath = path; - else m_TempPath = wxGetCwd() + _T("/") + path; + if (wxIsAbsolutePath(path)) m_tempPath = path; + else m_tempPath = wxGetCwd() + wxT("/") + path; - if (m_TempPath[m_TempPath.Length() - 1] != _T('/')) - m_TempPath << _T('/'); + if (m_tempPath[m_tempPath.length() - 1] != wxT('/')) + m_tempPath << wxT('/'); } } @@ -591,247 +504,286 @@ bool wxHtmlHelpData::AddBookParam(const wxFSFile& bookfile, wxFileSystem fsys; wxFSFile *fi; wxHtmlBookRecord *bookr; - - int IndexOld = m_IndexCnt, - ContentsOld = m_ContentsCnt; - if (! path.IsEmpty()) - fsys.ChangePathTo(path, TRUE); + int IndexOld = m_index.size(), + ContentsOld = m_contents.size(); + + if (!path.empty()) + fsys.ChangePathTo(path, true); + + size_t booksCnt = m_bookRecords.GetCount(); + for (size_t i = 0; i < booksCnt; i++) + { + if ( m_bookRecords[i].GetBookFile() == bookfile.GetLocation() ) + return true; // book is (was) loaded + } - bookr = new wxHtmlBookRecord(fsys.GetPath(), title, deftopic); + bookr = new wxHtmlBookRecord(bookfile.GetLocation(), fsys.GetPath(), title, deftopic); - if (m_ContentsCnt % wxHTML_REALLOC_STEP == 0) - m_Contents = (wxHtmlContentsItem*) realloc(m_Contents, (m_ContentsCnt + wxHTML_REALLOC_STEP) * sizeof(wxHtmlContentsItem)); - m_Contents[m_ContentsCnt].m_Level = 0; - m_Contents[m_ContentsCnt].m_ID = 0; - m_Contents[m_ContentsCnt].m_Page = new wxChar[deftopic.Length() + 1]; - wxStrcpy(m_Contents[m_ContentsCnt].m_Page, deftopic.c_str()); - m_Contents[m_ContentsCnt].m_Name = new wxChar [title.Length() + 1]; - wxStrcpy(m_Contents[m_ContentsCnt].m_Name, title.c_str()); - m_Contents[m_ContentsCnt].m_Book = bookr; + wxHtmlHelpDataItem *bookitem = new wxHtmlHelpDataItem; + bookitem->level = 0; + bookitem->id = 0; + bookitem->page = deftopic; + bookitem->name = title; + bookitem->book = bookr; // store the contents index for later - int cont_start = m_ContentsCnt++; + int cont_start = m_contents.size(); + + m_contents.Add(bookitem); // Try to find cached binary versions: // 1. save file as book, but with .hhp.cached extension // 2. same as 1. but in temp path // 3. otherwise or if cache load failed, load it from MS. - + fi = fsys.OpenFile(bookfile.GetLocation() + wxT(".cached")); - - if (fi == NULL || - fi->GetModificationTime() < bookfile.GetModificationTime() || + + if (fi == NULL || +#if wxUSE_DATETIME + fi->GetModificationTime() < bookfile.GetModificationTime() || +#endif // wxUSE_DATETIME !LoadCachedBook(bookr, fi->GetStream())) { if (fi != NULL) delete fi; - fi = fsys.OpenFile(m_TempPath + wxFileNameFromPath(bookfile.GetLocation()) + wxT(".cached")); - if (m_TempPath == wxEmptyString || fi == NULL || - fi->GetModificationTime() < bookfile.GetModificationTime() || + fi = fsys.OpenFile(m_tempPath + wxFileNameFromPath(bookfile.GetLocation()) + wxT(".cached")); + if (m_tempPath.empty() || fi == NULL || +#if wxUSE_DATETIME + fi->GetModificationTime() < bookfile.GetModificationTime() || +#endif // wxUSE_DATETIME !LoadCachedBook(bookr, fi->GetStream())) { LoadMSProject(bookr, fsys, indexfile, contfile); - if (m_TempPath != wxEmptyString) + if (!m_tempPath.empty()) { - wxFileOutputStream *outs = new wxFileOutputStream(m_TempPath + + wxFileOutputStream *outs = new wxFileOutputStream(m_tempPath + SafeFileName(wxFileNameFromPath(bookfile.GetLocation())) + wxT(".cached")); SaveCachedBook(bookr, outs); delete outs; } } } - + if (fi != NULL) delete fi; // Now store the contents range - bookr->SetContentsRange(cont_start, m_ContentsCnt); - - // Convert encoding, if neccessary: + bookr->SetContentsRange(cont_start, m_contents.size()); + +#if wxUSE_WCHAR_T + // MS HTML Help files [written by MS HTML Help Workshop] are broken + // in that the data are iso-8859-1 (including HTML entities), but must + // be interpreted as being in language's windows charset. Correct the + // differences here and also convert to wxConvLocal in ANSI build if (encoding != wxFONTENCODING_SYSTEM) { - wxFontEncodingArray a = wxEncodingConverter::GetPlatformEquivalents(encoding); - if (a.GetCount() != 0 && a[0] != encoding) + #if wxUSE_UNICODE + #define CORRECT_STR(str, conv) \ + str = wxString((str).mb_str(wxConvISO8859_1), conv) + #else + #define CORRECT_STR(str, conv) \ + str = wxString((str).wc_str(conv), wxConvLocal) + #endif + wxCSConv conv(encoding); + size_t IndexCnt = m_index.size(); + size_t ContentsCnt = m_contents.size(); + size_t i; + for (i = IndexOld; i < IndexCnt; i++) { - int i; - wxEncodingConverter conv; - conv.Init(encoding, a[0]); - - for (i = IndexOld; i < m_IndexCnt; i++) - conv.Convert(m_Index[i].m_Name); - for (i = ContentsOld; i < m_ContentsCnt; i++) - conv.Convert(m_Contents[i].m_Name); + CORRECT_STR(m_index[i].name, conv); } + for (i = ContentsOld; i < ContentsCnt; i++) + { + CORRECT_STR(m_contents[i].name, conv); + } + #undef CORRECT_STR } +#else + wxUnusedVar(IndexOld); + wxUnusedVar(ContentsOld); + wxASSERT_MSG(encoding == wxFONTENCODING_SYSTEM, wxT("Help files need charset conversion, but wxUSE_WCHAR_T is 0")); +#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T - m_BookRecords.Add(bookr); - if (m_IndexCnt > 0) - qsort(m_Index, m_IndexCnt, sizeof(wxHtmlContentsItem), IndexCompareFunc); + m_bookRecords.Add(bookr); + if (!m_index.empty()) + { + m_index.Sort(wxHtmlHelpIndexCompareFunc); + } - return TRUE; + return true; } bool wxHtmlHelpData::AddBook(const wxString& book) { - if (book.Right(4).Lower() == wxT(".zip") || - book.Right(4).Lower() == wxT(".htb") /*html book*/) - + wxString extension(book.Right(4).Lower()); + if (extension == wxT(".zip") || +#if wxUSE_LIBMSPACK + extension == wxT(".chm") /*compressed html help book*/ || +#endif + extension == wxT(".htb") /*html book*/) { wxFileSystem fsys; wxString s; - bool rt = FALSE; + bool rt = false; - s = fsys.FindFirst(book + wxT("#zip:") + wxT("*.hhp"), wxFILE); - while (!s.IsEmpty()) +#if wxUSE_LIBMSPACK + if (extension == wxT(".chm")) + s = fsys.FindFirst(book + wxT("#chm:*.hhp"), wxFILE); + else +#endif + s = fsys.FindFirst(book + wxT("#zip:*.hhp"), wxFILE); + + while (!s.empty()) { - if (AddBook(s)) rt = TRUE; + if (AddBook(s)) rt = true; s = fsys.FindNext(); } - + return rt; } + wxFSFile *fi; + wxFileSystem fsys; - else - { - wxFSFile *fi; - wxFileSystem fsys; - wxInputStream *s; - wxString bookFull; + wxString title = _("noname"), + safetitle, + start = wxEmptyString, + contents = wxEmptyString, + index = wxEmptyString, + charset = wxEmptyString; - int sz; - char *buff, *lineptr; - char linebuf[300]; + fi = fsys.OpenFile(book); + if (fi == NULL) + { + wxLogError(_("Cannot open HTML help book: %s"), book.c_str()); + return false; + } + fsys.ChangePathTo(book); - wxString title = _("noname"), - safetitle, - start = wxEmptyString, - contents = wxEmptyString, - index = wxEmptyString, - charset = wxEmptyString; + const wxChar *lineptr; + wxChar linebuf[300]; + wxString tmp; + wxHtmlFilterPlainText filter; + tmp = filter.ReadFile(*fi); + lineptr = tmp.c_str(); - if (wxIsAbsolutePath(book)) bookFull = book; - else bookFull = wxGetCwd() + "/" + book; + do + { + lineptr = ReadLine(lineptr, linebuf, 300); + + for (wxChar *ch = linebuf; *ch != wxT('\0') && *ch != wxT('='); ch++) + *ch = (wxChar)wxTolower(*ch); + + if (wxStrstr(linebuf, wxT("title=")) == linebuf) + title = linebuf + wxStrlen(wxT("title=")); + if (wxStrstr(linebuf, wxT("default topic=")) == linebuf) + start = linebuf + wxStrlen(wxT("default topic=")); + if (wxStrstr(linebuf, wxT("index file=")) == linebuf) + index = linebuf + wxStrlen(wxT("index file=")); + if (wxStrstr(linebuf, wxT("contents file=")) == linebuf) + contents = linebuf + wxStrlen(wxT("contents file=")); + if (wxStrstr(linebuf, wxT("charset=")) == linebuf) + charset = linebuf + wxStrlen(wxT("charset=")); + } while (lineptr != NULL); + + wxFontEncoding enc = wxFONTENCODING_SYSTEM; +#if wxUSE_FONTMAP + if (charset != wxEmptyString) + enc = wxFontMapper::Get()->CharsetToEncoding(charset); +#endif - fi = fsys.OpenFile(bookFull); - if (fi == NULL) - { - wxLogError(_("Cannot open HTML help book: %s"), bookFull.c_str()); - return FALSE; - } - fsys.ChangePathTo(bookFull); - s = fi->GetStream(); - sz = s->GetSize(); - buff = new char[sz + 1]; - buff[sz] = 0; - s->Read(buff, sz); - lineptr = buff; - - do { - lineptr = ReadLine(lineptr, linebuf); - - if (strstr(linebuf, "Title=") == linebuf) - title = linebuf + strlen("Title="); - if (strstr(linebuf, "Default topic=") == linebuf) - start = linebuf + strlen("Default topic="); - if (strstr(linebuf, "Index file=") == linebuf) - index = linebuf + strlen("Index file="); - if (strstr(linebuf, "Contents file=") == linebuf) - contents = linebuf + strlen("Contents file="); - if (strstr(linebuf, "Charset=") == linebuf) - charset = linebuf + strlen("Charset="); - } while (lineptr != NULL); - delete[] buff; - - wxFontEncoding enc; - if (charset == wxEmptyString) enc = wxFONTENCODING_SYSTEM; - else enc = wxTheFontMapper->CharsetToEncoding(charset); - bool rtval = AddBookParam(*fi, enc, - title, contents, index, start, fsys.GetPath()); - delete fi; - return rtval; - } + bool rtval = AddBookParam(*fi, enc, + title, contents, index, start, fsys.GetPath()); + delete fi; + + return rtval; } wxString wxHtmlHelpData::FindPageByName(const wxString& x) { - int cnt; int i; - wxFileSystem fsys; - wxFSFile *f; - wxString url(wxEmptyString); - - /* 1. try to open given file: */ - cnt = m_BookRecords.GetCount(); - for (i = 0; i < cnt; i++) + bool has_non_ascii = false; + wxString::const_iterator it; + for (it = x.begin(); it != x.end(); ++it) { - f = fsys.OpenFile(m_BookRecords[i].GetBasePath() + x); - if (f) - { - url = m_BookRecords[i].GetBasePath() + x; - delete f; - return url; + wxUniChar ch = *it; + if (!ch.IsAscii()) + { + has_non_ascii = true; + break; } } + int cnt = m_bookRecords.GetCount(); - /* 2. try to find a book: */ - - for (i = 0; i < cnt; i++) + if (!has_non_ascii) { - if (m_BookRecords[i].GetTitle() == x) - { - url = m_BookRecords[i].GetBasePath() + m_BookRecords[i].GetStart(); + wxFileSystem fsys; + wxFSFile *f; + // 1. try to open given file: + for (i = 0; i < cnt; i++) + { + f = fsys.OpenFile(m_bookRecords[i].GetFullPath(x)); + if (f) + { + wxString url = m_bookRecords[i].GetFullPath(x); + delete f; return url; } + } } - /* 3. try to find in contents: */ - cnt = m_ContentsCnt; - for (i = 0; i < cnt; i++) + // 2. try to find a book: + for (i = 0; i < cnt; i++) { - if (wxStrcmp(m_Contents[i].m_Name, x) == 0) - { - url = m_Contents[i].m_Book->GetBasePath() + m_Contents[i].m_Page; - return url; - } + if (m_bookRecords[i].GetTitle() == x) + return m_bookRecords[i].GetFullPath(m_bookRecords[i].GetStart()); + } + + // 3. try to find in contents: + cnt = m_contents.size(); + for (i = 0; i < cnt; i++) + { + if (m_contents[i].name == x) + return m_contents[i].GetFullPath(); } - /* 4. try to find in index: */ + // 4. try to find in index: + cnt = m_index.size(); + for (i = 0; i < cnt; i++) + { + if (m_index[i].name == x) + return m_index[i].GetFullPath(); + } - cnt = m_IndexCnt; - for (i = 0; i < cnt; i++) + // 4b. if still not found, try case-insensitive comparison + for (i = 0; i < cnt; i++) { - if (wxStrcmp(m_Index[i].m_Name, x) == 0) - { - url = m_Index[i].m_Book->GetBasePath() + m_Index[i].m_Page; - return url; - } + if (m_index[i].name.CmpNoCase(x) == 0) + return m_index[i].GetFullPath(); } - return url; + return wxEmptyString; } wxString wxHtmlHelpData::FindPageById(int id) { - int i; - wxString url(wxEmptyString); - - for (i = 0; i < m_ContentsCnt; i++) + size_t cnt = m_contents.size(); + for (size_t i = 0; i < cnt; i++) { - if (m_Contents[i].m_ID == id) - { - url = m_Contents[i].m_Book->GetBasePath() + m_Contents[i].m_Page; - return url; + if (m_contents[i].id == id) + { + return m_contents[i].GetFullPath(); } } - return url; + return wxEmptyString; } + //---------------------------------------------------------------------------------- // wxHtmlSearchStatus functions //---------------------------------------------------------------------------------- @@ -843,14 +795,14 @@ wxHtmlSearchStatus::wxHtmlSearchStatus(wxHtmlHelpData* data, const wxString& key m_Data = data; m_Keyword = keyword; wxHtmlBookRecord* bookr = NULL; - if (book != wxEmptyString) + if (book != wxEmptyString) { // we have to search in a specific book. Find it first - int i, cnt = data->m_BookRecords.GetCount(); + int i, cnt = data->m_bookRecords.GetCount(); for (i = 0; i < cnt; i++) - if (data->m_BookRecords[i].GetTitle() == book) - { - bookr = &(data->m_BookRecords[i]); + if (data->m_bookRecords[i].GetTitle() == book) + { + bookr = &(data->m_bookRecords[i]); m_CurIndex = bookr->GetContentsStart(); m_MaxIndex = bookr->GetContentsEnd(); break; @@ -858,58 +810,58 @@ wxHtmlSearchStatus::wxHtmlSearchStatus(wxHtmlHelpData* data, const wxString& key // check; we won't crash if the book doesn't exist, but it's Bad Anyway. wxASSERT(bookr); } - if (! bookr) + if (! bookr) { // no book specified; search all books m_CurIndex = 0; - m_MaxIndex = m_Data->m_ContentsCnt; + m_MaxIndex = m_Data->m_contents.size(); } m_Engine.LookFor(keyword, case_sensitive, whole_words_only); m_Active = (m_CurIndex < m_MaxIndex); - m_LastPage = NULL; } bool wxHtmlSearchStatus::Search() { wxFSFile *file; int i = m_CurIndex; // shortcut - bool found = FALSE; - wxChar *thepage; + bool found = false; + wxString thepage; - if (!m_Active) + if (!m_Active) { // sanity check. Illegal use, but we'll try to prevent a crash anyway wxASSERT(m_Active); - return FALSE; + return false; } m_Name = wxEmptyString; - m_ContentsItem = NULL; - thepage = m_Data->m_Contents[i].m_Page; + m_CurItem = NULL; + thepage = m_Data->m_contents[i].page; m_Active = (++m_CurIndex < m_MaxIndex); // check if it is same page with different anchor: - if (m_LastPage != NULL) + if (!m_LastPage.empty()) { - wxChar *p1, *p2; - for (p1 = thepage, p2 = m_LastPage; - *p1 != 0 && *p1 != _T('#') && *p1 == *p2; p1++, p2++) {} + const wxChar *p1, *p2; + for (p1 = thepage.c_str(), p2 = m_LastPage.c_str(); + *p1 != 0 && *p1 != wxT('#') && *p1 == *p2; p1++, p2++) {} m_LastPage = thepage; - if (*p1 == 0 || *p1 == _T('#')) - return FALSE; + if (*p1 == 0 || *p1 == wxT('#')) + return false; } else m_LastPage = thepage; - + wxFileSystem fsys; - file = fsys.OpenFile(m_Data->m_Contents[i].m_Book->GetBasePath() + thepage); - if (file) + file = fsys.OpenFile(m_Data->m_contents[i].book->GetFullPath(thepage)); + if (file) { - if (m_Engine.Scan(file->GetStream())) { - m_Name = m_Data->m_Contents[i].m_Name; - m_ContentsItem = m_Data->m_Contents + i; - found = TRUE; + if (m_Engine.Scan(*file)) + { + m_Name = m_Data->m_contents[i].name; + m_CurItem = &m_Data->m_contents[i]; + found = true; } delete file; } @@ -924,73 +876,113 @@ bool wxHtmlSearchStatus::Search() //-------------------------------------------------------------------------------- -// wxSearchEngine +// wxHtmlSearchEngine //-------------------------------------------------------------------------------- -void wxSearchEngine::LookFor(const wxString& keyword, bool case_sensitive, bool whole_words_only) +void wxHtmlSearchEngine::LookFor(const wxString& keyword, bool case_sensitive, bool whole_words_only) { m_CaseSensitive = case_sensitive; m_WholeWords = whole_words_only; - if (m_Keyword) delete[] m_Keyword; - m_Keyword = new wxChar[keyword.Length() + 1]; - wxStrcpy(m_Keyword, keyword.c_str()); - + m_Keyword = keyword; + if (!m_CaseSensitive) - { - for (int i = wxStrlen(m_Keyword) - 1; i >= 0; i--) - { - if ((m_Keyword[i] >= wxT('A')) && (m_Keyword[i] <= wxT('Z'))) - m_Keyword[i] += wxT('a') - wxT('A'); - } - } + m_Keyword.LowerCase(); } +static inline bool WHITESPACE(wxChar c) +{ + return c == wxT(' ') || c == wxT('\n') || c == wxT('\r') || c == wxT('\t'); +} + +// replace continuous spaces by one single space +static inline wxString CompressSpaces(const wxString & str) +{ + wxString buf; + buf.reserve( str.size() ); + + bool space_counted = false; + for( const wxChar * pstr = str.c_str(); *pstr; ++pstr ) + { + wxChar ch = *pstr; + if( WHITESPACE( ch ) ) + { + if( space_counted ) + { + continue; + } + ch = wxT(' '); + space_counted = true; + } + else + { + space_counted = false; + } + buf += ch; + } -#define WHITESPACE(c) (c == ' ' || c == '\n' || c == '\r' || c == '\t') + return buf; +} -bool wxSearchEngine::Scan(wxInputStream *stream) +bool wxHtmlSearchEngine::Scan(const wxFSFile& file) { - wxASSERT_MSG(m_Keyword != NULL, wxT("wxSearchEngine::LookFor must be called before scanning!")); + wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!")); - int i, j; - int lng = stream ->GetSize(); - int wrd = wxStrlen(m_Keyword); - bool found = FALSE; - char *buf = new char[lng + 1]; - stream->Read(buf, lng); - buf[lng] = 0; + wxHtmlFilterHTML filter; + wxString bufStr = filter.ReadFile(file); if (!m_CaseSensitive) - for (i = 0; i < lng; i++) - if ((buf[i] >= 'A') && (buf[i] <= 'Z')) buf[i] += 'a' - 'A'; + bufStr.LowerCase(); - if (m_WholeWords) - { - for (i = 0; i < lng - wrd; i++) - { - if (WHITESPACE(buf[i])) continue; - j = 0; - while ((j < wrd) && (buf[i + j] == m_Keyword[j])) j++; - if (j == wrd && WHITESPACE(buf[i + j])) { found = TRUE; break; } - } - } - - else - { - for (i = 0; i < lng - wrd; i++) - { - j = 0; - while ((j < wrd) && (buf[i + j] == m_Keyword[j])) j++; - if (j == wrd) { found = TRUE; break; } + { // remove html tags + wxString bufStrCopy; + bufStrCopy.reserve( bufStr.size() ); + bool insideTag = false; + for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr) + { + wxChar c = *pBufStr; + if (insideTag) + { + if (c == wxT('>')) + { + insideTag = false; + // replace the tag by an empty space + c = wxT(' '); + } + else + continue; + } + else if (c == wxT('<')) + { + wxChar nextCh = *(pBufStr + 1); + if (nextCh == wxT('/') || !WHITESPACE(nextCh)) + { + insideTag = true; + continue; + } + } + bufStrCopy += c; } + bufStr.swap( bufStrCopy ); } - delete[] buf; - return found; -} + wxString keyword = m_Keyword; + if (m_WholeWords) + { + // insert ' ' at the beginning and at the end + keyword.insert( 0, wxT(" ") ); + keyword.append( wxT(" ") ); + bufStr.insert( 0, wxT(" ") ); + bufStr.append( wxT(" ") ); + } + // remove continuous spaces + keyword = CompressSpaces( keyword ); + bufStr = CompressSpaces( bufStr ); + // finally do the search + return bufStr.find( keyword ) != wxString::npos; +} #endif