Fix handling of spaces after <img> tag in wxHTML.

[wxWidgets.git] / src / html / helpdata.cpp
diff --git a/src/html/helpdata.cpp b/src/html/helpdata.cpp

index 32aad951960d880789281b7a25b1e4f3c919bea5..edebdcc7fbf15a75ab21deb53c0b812442006dd9 100644 (file)
--- a/src/html/helpdata.cpp
+++ b/src/html/helpdata.cpp
@@ -1,5 +1,5 @@
  /////////////////////////////////////////////////////////////////////////////
-// Name:        helpdata.cpp
+// Name:        src/html/helpdata.cpp
  // Purpose:     wxHtmlHelpData
  // Notes:       Based on htmlhelp.cpp, implementing a monolithic
  //              HTML Help controller class,  by Vaclav Slavik
@@ -9,22 +9,16 @@
  // Licence:     wxWindows licence
  /////////////////////////////////////////////////////////////////////////////
  
-#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
-#pragma implementation "helpdata.h"
-#endif
-
  // For compilers that support precompilation, includes "wx.h".
  #include "wx/wxprec.h"
  
  #ifdef __BORLANDC__
-#pragma hdrstop
+    #pragma hdrstop
  #endif
  
-#include "wx/defs.h"
-
  #if wxUSE_HTML && wxUSE_STREAMS
  
-#ifndef WXPRECOMP
+#ifndef WX_PRECOMP
      #include "wx/intl.h"
      #include "wx/log.h"
  #endif
@@ -37,7 +31,6 @@
  #include "wx/busyinfo.h"
  #include "wx/encconv.h"
  #include "wx/fontmap.h"
-#include "wx/log.h"
  #include "wx/html/htmlpars.h"
  #include "wx/html/htmldefs.h"
  #include "wx/html/htmlfilt.h"
@@ -58,15 +51,15 @@ static const wxChar* ReadLine(const wxChar *line, wxChar *buf, size_t bufsize)
      wxChar *endptr = buf + bufsize - 1;
      const wxChar *readptr = line;
  
-    while (*readptr != 0 && *readptr != _T('\r') && *readptr != _T('\n') &&
-           writeptr != endptr) 
+    while (*readptr != 0 && *readptr != wxT('\r') && *readptr != wxT('\n') &&
+           writeptr != endptr)
          *(writeptr++) = *(readptr++);
      *writeptr = 0;
-    while (*readptr == _T('\r') || *readptr == _T('\n'))
+    while (*readptr == wxT('\r') || *readptr == wxT('\n'))
          readptr++;
      if (*readptr == 0)
          return NULL;
-    else 
+    else
          return readptr;
  }
  
@@ -78,6 +71,11 @@ wxHtmlHelpIndexCompareFunc(wxHtmlHelpDataItem **a, wxHtmlHelpDataItem **b)
      wxHtmlHelpDataItem *ia = *a;
      wxHtmlHelpDataItem *ib = *b;
  
+    if (ia == NULL)
+        return -1;
+    if (ib == NULL)
+        return 1;
+
      if (ia->parent == ib->parent)
      {
          return ia->name.CmpNoCase(ib->name);
@@ -90,7 +88,7 @@ wxHtmlHelpIndexCompareFunc(wxHtmlHelpDataItem **a, wxHtmlHelpDataItem **b)
      {
          wxHtmlHelpDataItem *ia2 = ia;
          wxHtmlHelpDataItem *ib2 = ib;
-        
+
          while (ia2->level > ib2->level)
          {
              ia2 = ia2->parent;
@@ -99,7 +97,7 @@ wxHtmlHelpIndexCompareFunc(wxHtmlHelpDataItem **a, wxHtmlHelpDataItem **b)
          {
              ib2 = ib2->parent;
          }
-        
+
          wxASSERT(ia2);
          wxASSERT(ib2);
          int res = wxHtmlHelpIndexCompareFunc(&ia2, &ib2);
@@ -127,9 +125,9 @@ public:
      wxObject* GetProduct() { return NULL; }
  
  protected:
-    virtual void AddText(const wxChar* WXUNUSED(txt)) {}
+    virtual void AddText(const wxString& WXUNUSED(txt)) {}
  
-    DECLARE_NO_COPY_CLASS(HP_Parser)
+    wxDECLARE_NO_COPY_CLASS(HP_Parser);
  };
  
  
@@ -147,7 +145,7 @@ class HP_TagHandler : public wxHtmlTagHandler
          int m_count;
          wxHtmlHelpDataItem *m_parentItem;
          wxHtmlBookRecord *m_book;
-    
+
          wxHtmlHelpDataItems *m_data;
  
      public:
@@ -157,7 +155,7 @@ class HP_TagHandler : public wxHtmlTagHandler
              m_book = b;
              m_name = m_page = wxEmptyString;
              m_level = 0;
-            m_id = -1;
+            m_id = wxID_ANY;
              m_count = 0;
              m_parentItem = NULL;
          }
@@ -172,7 +170,7 @@ class HP_TagHandler : public wxHtmlTagHandler
              m_parentItem = NULL;
          }
  
-    DECLARE_NO_COPY_CLASS(HP_TagHandler)
+    wxDECLARE_NO_COPY_CLASS(HP_TagHandler);
  };
  
  
@@ -220,7 +218,7 @@ bool HP_TagHandler::HandleTag(const wxHtmlTag& tag)
              item->id = m_id;
              item->page = m_page;
              item->name = m_name;
-            
+
              item->book = m_book;
              m_data->Add(item);
              m_count++;
@@ -257,7 +255,7 @@ wxString wxHtmlHelpDataItem::GetIndentedName() const
  {
      wxString s;
      for (int i = 1; i < level; i++)
-        s << _T("   ");
+        s << wxT("   ");
      s << name;
      return s;
  }
@@ -267,17 +265,10 @@ IMPLEMENT_DYNAMIC_CLASS(wxHtmlHelpData, wxObject)
  
  wxHtmlHelpData::wxHtmlHelpData()
  {
-#if WXWIN_COMPATIBILITY_2_4
-    m_cacheContents = NULL;
-    m_cacheIndex = NULL;
-#endif
  }
  
  wxHtmlHelpData::~wxHtmlHelpData()
  {
-#if WXWIN_COMPATIBILITY_2_4
-    CleanCompatibilityData();
-#endif
  }
  
  bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys,
@@ -293,7 +284,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys,
      HP_TagHandler *handler = new HP_TagHandler(book);
      parser.AddTagHandler(handler);
  
-    f = ( contentsfile.IsEmpty() ? (wxFSFile*) NULL : fsys.OpenFile(contentsfile) );
+    f = ( contentsfile.empty() ? NULL : fsys.OpenFile(contentsfile) );
      if (f)
      {
          buf.clear();
@@ -307,7 +298,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys,
          wxLogError(_("Cannot open contents file: %s"), contentsfile.c_str());
      }
  
-    f = ( indexfile.IsEmpty() ? (wxFSFile*) NULL : fsys.OpenFile(indexfile) );
+    f = ( indexfile.empty() ? NULL : fsys.OpenFile(indexfile) );
      if (f)
      {
          buf.clear();
@@ -316,11 +307,11 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys,
          handler->Reset(m_index);
          parser.Parse(buf);
      }
-    else if (!indexfile.IsEmpty())
+    else if (!indexfile.empty())
      {
          wxLogError(_("Cannot open index file: %s"), indexfile.c_str());
      }
-    return TRUE;
+    return true;
  }
  
  inline static void CacheWriteInt32(wxOutputStream *f, wxInt32 value)
@@ -337,7 +328,7 @@ inline static wxInt32 CacheReadInt32(wxInputStream *f)
  }
  
  inline static void CacheWriteString(wxOutputStream *f, const wxString& str)
-{    
+{
      const wxWX2MBbuf mbstr = str.mb_str(wxConvUTF8);
      size_t len = strlen((const char*)mbstr)+1;
      CacheWriteInt32(f, len);
@@ -366,12 +357,12 @@ bool wxHtmlHelpData::LoadCachedBook(wxHtmlBookRecord *book, wxInputStream *f)
  
      /* load header - version info : */
      version = CacheReadInt32(f);
-    
+
      if (version != CURRENT_CACHED_BOOK_VERSION)
      {
-        // NB: We can just silently return FALSE here and don't worry about
+        // NB: We can just silently return false here and don't worry about
          //     it anymore, because AddBookParam will load the MS project in
-        //     absence of (properly versioned) .cached file and automatically 
+        //     absence of (properly versioned) .cached file and automatically
          //     create new .cached file immediately afterward.
          return false;
      }
@@ -425,14 +416,14 @@ bool wxHtmlHelpData::SaveCachedBook(wxHtmlBookRecord *book, wxOutputStream *f)
  
      /* save contents : */
      int len = m_contents.size();
-    for (cnt = 0, i = 0; i < len; i++) 
+    for (cnt = 0, i = 0; i < len; i++)
          if (m_contents[i].book == book && m_contents[i].level > 0)
              cnt++;
      CacheWriteInt32(f, cnt);
  
      for (i = 0; i < len; i++)
      {
-        if (m_contents[i].book != book || m_contents[i].level == 0) 
+        if (m_contents[i].book != book || m_contents[i].level == 0)
              continue;
          CacheWriteInt32(f, m_contents[i].level);
          CacheWriteInt32(f, m_contents[i].id);
@@ -442,14 +433,14 @@ bool wxHtmlHelpData::SaveCachedBook(wxHtmlBookRecord *book, wxOutputStream *f)
  
      /* save index : */
      len = m_index.size();
-    for (cnt = 0, i = 0; i < len; i++) 
-        if (m_index[i].book == book && m_index[i].level > 0) 
+    for (cnt = 0, i = 0; i < len; i++)
+        if (m_index[i].book == book && m_index[i].level > 0)
              cnt++;
      CacheWriteInt32(f, cnt);
  
      for (i = 0; i < len; i++)
      {
-        if (m_index[i].book != book || m_index[i].level == 0) 
+        if (m_index[i].book != book || m_index[i].level == 0)
              continue;
          CacheWriteString(f, m_index[i].name);
          CacheWriteString(f, m_index[i].page);
@@ -461,17 +452,17 @@ bool wxHtmlHelpData::SaveCachedBook(wxHtmlBookRecord *book, wxOutputStream *f)
          }
          else
          {
-            int cnt = 0;
+            int cnt2 = 0;
              wxHtmlHelpDataItem *parent = m_index[i].parent;
              for (int j = i-1; j >= 0; j--)
              {
                  if (m_index[j].book == book && m_index[j].level > 0)
-                    cnt++;
+                    cnt2++;
                  if (&m_index[j] == parent)
                      break;
              }
-            wxASSERT(cnt > 0);
-            CacheWriteInt32(f, cnt);
+            wxASSERT(cnt2 > 0);
+            CacheWriteInt32(f, cnt2);
          }
      }
      return true;
@@ -484,11 +475,10 @@ void wxHtmlHelpData::SetTempDir(const wxString& path)
          m_tempPath = path;
      else
      {
-        if (wxIsAbsolutePath(path)) m_tempPath = path;
-        else m_tempPath = wxGetCwd() + _T("/") + path;
+        wxFileName fn(path);
+        fn.MakeAbsolute();
  
-        if (m_tempPath[m_tempPath.Length() - 1] != _T('/'))
-            m_tempPath << _T('/');
+        m_tempPath = fn.GetPath(wxPATH_GET_VOLUME | wxPATH_GET_SEPARATOR);
      }
  }
  
@@ -517,8 +507,8 @@ bool wxHtmlHelpData::AddBookParam(const wxFSFile& bookfile,
      int IndexOld = m_index.size(),
          ContentsOld = m_contents.size();
  
-    if (!path.IsEmpty())
-        fsys.ChangePathTo(path, TRUE);
+    if (!path.empty())
+        fsys.ChangePathTo(path, true);
  
      size_t booksCnt = m_bookRecords.GetCount();
      for (size_t i = 0; i < booksCnt; i++)
@@ -528,7 +518,7 @@ bool wxHtmlHelpData::AddBookParam(const wxFSFile& bookfile,
      }
  
      bookr = new wxHtmlBookRecord(bookfile.GetLocation(), fsys.GetPath(), title, deftopic);
-    
+
      wxHtmlHelpDataItem *bookitem = new wxHtmlHelpDataItem;
      bookitem->level = 0;
      bookitem->id = 0;
@@ -578,7 +568,6 @@ bool wxHtmlHelpData::AddBookParam(const wxFSFile& bookfile,
      // Now store the contents range
      bookr->SetContentsRange(cont_start, m_contents.size());
  
-#if wxUSE_WCHAR_T
      // MS HTML Help files [written by MS HTML Help Workshop] are broken
      // in that the data are iso-8859-1 (including HTML entities), but must
      // be interpreted as being in language's windows charset. Correct the
@@ -606,11 +595,6 @@ bool wxHtmlHelpData::AddBookParam(const wxFSFile& bookfile,
          }
          #undef CORRECT_STR
      }
-#else
-    wxUnusedVar(IndexOld);
-    wxUnusedVar(ContentsOld);
-    wxASSERT_MSG(encoding == wxFONTENCODING_SYSTEM, wxT("Help files need charset conversion, but wxUSE_WCHAR_T is 0"));
-#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
  
      m_bookRecords.Add(bookr);
      if (!m_index.empty())
@@ -633,7 +617,7 @@ bool wxHtmlHelpData::AddBook(const wxString& book)
      {
          wxFileSystem fsys;
          wxString s;
-        bool rt = FALSE;
+        bool rt = false;
  
  #if wxUSE_LIBMSPACK
          if (extension == wxT(".chm"))
@@ -642,9 +626,9 @@ bool wxHtmlHelpData::AddBook(const wxString& book)
  #endif
              s = fsys.FindFirst(book + wxT("#zip:*.hhp"), wxFILE);
  
-        while (!s.IsEmpty())
+        while (!s.empty())
          {
-            if (AddBook(s)) rt = TRUE;
+            if (AddBook(s)) rt = true;
              s = fsys.FindNext();
          }
  
@@ -665,7 +649,7 @@ bool wxHtmlHelpData::AddBook(const wxString& book)
      if (fi == NULL)
      {
          wxLogError(_("Cannot open HTML help book: %s"), book.c_str());
-        return FALSE;
+        return false;
      }
      fsys.ChangePathTo(book);
  
@@ -676,52 +660,63 @@ bool wxHtmlHelpData::AddBook(const wxString& book)
      tmp = filter.ReadFile(*fi);
      lineptr = tmp.c_str();
  
-    do 
+    do
      {
          lineptr = ReadLine(lineptr, linebuf, 300);
-        
+
          for (wxChar *ch = linebuf; *ch != wxT('\0') && *ch != wxT('='); ch++)
-           *ch = tolower(*ch);
-
-        if (wxStrstr(linebuf, _T("title=")) == linebuf)
-            title = linebuf + wxStrlen(_T("title="));
-        if (wxStrstr(linebuf, _T("default topic=")) == linebuf)
-            start = linebuf + wxStrlen(_T("default topic="));
-        if (wxStrstr(linebuf, _T("index file=")) == linebuf)
-            index = linebuf + wxStrlen(_T("index file="));
-        if (wxStrstr(linebuf, _T("contents file=")) == linebuf)
-            contents = linebuf + wxStrlen(_T("contents file="));
-        if (wxStrstr(linebuf, _T("charset=")) == linebuf)
-            charset = linebuf + wxStrlen(_T("charset="));
+           *ch = (wxChar)wxTolower(*ch);
+
+        if (wxStrstr(linebuf, wxT("title=")) == linebuf)
+            title = linebuf + wxStrlen(wxT("title="));
+        if (wxStrstr(linebuf, wxT("default topic=")) == linebuf)
+            start = linebuf + wxStrlen(wxT("default topic="));
+        if (wxStrstr(linebuf, wxT("index file=")) == linebuf)
+            index = linebuf + wxStrlen(wxT("index file="));
+        if (wxStrstr(linebuf, wxT("contents file=")) == linebuf)
+            contents = linebuf + wxStrlen(wxT("contents file="));
+        if (wxStrstr(linebuf, wxT("charset=")) == linebuf)
+            charset = linebuf + wxStrlen(wxT("charset="));
      } while (lineptr != NULL);
-        
-    wxFontEncoding enc;
-    if (charset == wxEmptyString) enc = wxFONTENCODING_SYSTEM;
-    else enc = wxFontMapper::Get()->CharsetToEncoding(charset);
+
+    wxFontEncoding enc = wxFONTENCODING_SYSTEM;
+#if wxUSE_FONTMAP
+    if (charset != wxEmptyString)
+        enc = wxFontMapper::Get()->CharsetToEncoding(charset);
+#endif
  
      bool rtval = AddBookParam(*fi, enc,
                                title, contents, index, start, fsys.GetPath());
      delete fi;
  
-#if WXWIN_COMPATIBILITY_2_4
-    CleanCompatibilityData();
-#endif
-
      return rtval;
  }
  
  wxString wxHtmlHelpData::FindPageByName(const wxString& x)
  {
-    int cnt;
      int i;
-    wxFileSystem fsys;
-    wxFSFile *f;
  
-    /* 1. try to open given file: */
+    bool has_non_ascii = false;
+    wxString::const_iterator it;
+    for (it = x.begin(); it != x.end(); ++it)
+    {
+        wxUniChar ch = *it;
+        if (!ch.IsAscii())
+        {
+            has_non_ascii = true;
+            break;
+        }
+    }
  
-    cnt = m_bookRecords.GetCount();
-    for (i = 0; i < cnt; i++)
+    int cnt = m_bookRecords.GetCount();
+
+    if (!has_non_ascii)
      {
+      wxFileSystem fsys;
+      wxFSFile *f;
+      // 1. try to open given file:
+      for (i = 0; i < cnt; i++)
+      {
          f = fsys.OpenFile(m_bookRecords[i].GetFullPath(x));
          if (f)
          {
@@ -729,19 +724,18 @@ wxString wxHtmlHelpData::FindPageByName(const wxString& x)
              delete f;
              return url;
          }
+      }
      }
  
  
-    /* 2. try to find a book: */
-
+    // 2. try to find a book:
      for (i = 0; i < cnt; i++)
      {
          if (m_bookRecords[i].GetTitle() == x)
              return m_bookRecords[i].GetFullPath(m_bookRecords[i].GetStart());
      }
  
-    /* 3. try to find in contents: */
-
+    // 3. try to find in contents:
      cnt = m_contents.size();
      for (i = 0; i < cnt; i++)
      {
@@ -750,8 +744,7 @@ wxString wxHtmlHelpData::FindPageByName(const wxString& x)
      }
  
  
-    /* 4. try to find in index: */
-
+    // 4. try to find in index:
      cnt = m_index.size();
      for (i = 0; i < cnt; i++)
      {
@@ -759,6 +752,13 @@ wxString wxHtmlHelpData::FindPageByName(const wxString& x)
              return m_index[i].GetFullPath();
      }
  
+    // 4b. if still not found, try case-insensitive comparison
+    for (i = 0; i < cnt; i++)
+    {
+        if (m_index[i].name.CmpNoCase(x) == 0)
+            return m_index[i].GetFullPath();
+    }
+
      return wxEmptyString;
  }
  
@@ -776,90 +776,6 @@ wxString wxHtmlHelpData::FindPageById(int id)
      return wxEmptyString;
  }
  
-#if WXWIN_COMPATIBILITY_2_4
-wxHtmlContentsItem::wxHtmlContentsItem()
-    : m_Level(0), m_ID(-1), m_Name(NULL), m_Page(NULL), m_Book(NULL),
-      m_autofree(false)
-{
-}
-
-wxHtmlContentsItem::wxHtmlContentsItem(const wxHtmlHelpDataItem& d)
-{
-    m_autofree = true;
-    m_Level = d.level;
-    m_ID = d.id;
-    m_Name = wxStrdup(d.name.c_str());
-    m_Page = wxStrdup(d.page.c_str());
-    m_Book = d.book;
-}
-
-wxHtmlContentsItem& wxHtmlContentsItem::operator=(const wxHtmlContentsItem& d)
-{
-    if (m_autofree)
-    {
-        free(m_Name);
-        free(m_Page);
-    }
-    m_autofree = true;
-    m_Level = d.m_Level;
-    m_ID = d.m_ID;
-    m_Name = d.m_Name ? wxStrdup(d.m_Name) : NULL;
-    m_Page = d.m_Page ? wxStrdup(d.m_Page) : NULL;
-    m_Book = d.m_Book;
-    return *this;
-}
-
-wxHtmlContentsItem::~wxHtmlContentsItem()
-{
-    if (m_autofree)
-    {
-        free(m_Name);
-        free(m_Page);
-    }
-}
-
-wxHtmlContentsItem* wxHtmlHelpData::GetContents()
-{
-    if (!m_cacheContents && !m_contents.empty())
-    {
-        size_t len = m_contents.size();
-        m_cacheContents = new wxHtmlContentsItem[len];
-        for (size_t i = 0; i < len; i++)
-            m_cacheContents[i] = m_contents[i];
-    }
-    return m_cacheContents;
-}
-
-int wxHtmlHelpData::GetContentsCnt()
-{
-    return m_contents.size();
-}
-
-wxHtmlContentsItem* wxHtmlHelpData::GetIndex()
-{
-    if (!m_cacheContents && !m_index.empty())
-    {
-        size_t len = m_index.size();
-        m_cacheContents = new wxHtmlContentsItem[len];
-        for (size_t i = 0; i < len; i++)
-            m_cacheContents[i] = m_index[i];
-    }
-    return m_cacheContents;
-}
-
-int wxHtmlHelpData::GetIndexCnt()
-{
-    return m_index.size();
-}
-
-void wxHtmlHelpData::CleanCompatibilityData()
-{
-    delete[] m_cacheContents;
-    m_cacheContents = NULL;
-    delete[] m_cacheIndex;
-    m_cacheIndex = NULL;
-}
-#endif // WXWIN_COMPATIBILITY_2_4
  
  //----------------------------------------------------------------------------------
  // wxHtmlSearchStatus functions
@@ -897,15 +813,6 @@ wxHtmlSearchStatus::wxHtmlSearchStatus(wxHtmlHelpData* data, const wxString& key
      m_Active = (m_CurIndex < m_MaxIndex);
  }
  
-#if WXWIN_COMPATIBILITY_2_4
-wxHtmlContentsItem* wxHtmlSearchStatus::GetContentsItem()
-{
-    static wxHtmlContentsItem it;
-    it = wxHtmlContentsItem(*m_CurItem);
-    return &it;
-}
-#endif
-
  bool wxHtmlSearchStatus::Search()
  {
      wxFSFile *file;
@@ -930,11 +837,11 @@ bool wxHtmlSearchStatus::Search()
      {
          const wxChar *p1, *p2;
          for (p1 = thepage.c_str(), p2 = m_LastPage.c_str();
-             *p1 != 0 && *p1 != _T('#') && *p1 == *p2; p1++, p2++) {}
+             *p1 != 0 && *p1 != wxT('#') && *p1 == *p2; p1++, p2++) {}
  
          m_LastPage = thepage;
  
-        if (*p1 == 0 || *p1 == _T('#'))
+        if (*p1 == 0 || *p1 == wxT('#'))
              return false;
      }
      else m_LastPage = thepage;
@@ -971,57 +878,104 @@ void wxHtmlSearchEngine::LookFor(const wxString& keyword, bool case_sensitive, b
      m_WholeWords = whole_words_only;
      m_Keyword = keyword;
  
-    if (m_CaseSensitive)
+    if (!m_CaseSensitive)
          m_Keyword.LowerCase();
  }
  
  
  static inline bool WHITESPACE(wxChar c)
  {
-    return c == _T(' ') || c == _T('\n') || c == _T('\r') || c == _T('\t');
+    return c == wxT(' ') || c == wxT('\n') || c == wxT('\r') || c == wxT('\t');
+}
+
+// replace continuous spaces by one single space
+static inline wxString CompressSpaces(const wxString & str)
+{
+    wxString buf;
+    buf.reserve( str.size() );
+
+    bool space_counted = false;
+    for( const wxChar * pstr = str.c_str(); *pstr; ++pstr )
+    {
+        wxChar ch = *pstr;
+        if( WHITESPACE( ch ) )
+        {
+            if( space_counted )
+            {
+                continue;
+            }
+            ch = wxT(' ');
+            space_counted = true;
+        }
+        else
+        {
+            space_counted = false;
+        }
+        buf += ch;
+    }
+
+    return buf;
  }
  
  bool wxHtmlSearchEngine::Scan(const wxFSFile& file)
  {
      wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!"));
  
-    int i, j;
-    int wrd = m_Keyword.Length();
-    bool found = FALSE;
      wxHtmlFilterHTML filter;
-    wxString tmp = filter.ReadFile(file);
-    int lng = tmp.length();
-    const wxChar *buf = tmp.c_str();
+    wxString bufStr = filter.ReadFile(file);
  
      if (!m_CaseSensitive)
-        tmp.LowerCase();
+        bufStr.LowerCase();
  
-    const wxChar *kwd = m_Keyword.c_str();
-    
-    if (m_WholeWords)
-    {
-        for (i = 0; i < lng - wrd; i++)
+    {   // remove html tags
+        wxString bufStrCopy;
+        bufStrCopy.reserve( bufStr.size() );
+        bool insideTag = false;
+        for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr)
          {
-            if (WHITESPACE(buf[i])) continue;
-            j = 0;
-            while ((j < wrd) && (buf[i + j] == kwd[j])) j++;
-            if (j == wrd && WHITESPACE(buf[i + j])) { found = true; break; }
+            wxChar c = *pBufStr;
+            if (insideTag)
+            {
+                if (c == wxT('>'))
+                {
+                    insideTag = false;
+                    // replace the tag by an empty space
+                    c = wxT(' ');
+                }
+                else
+                    continue;
+            }
+            else if (c == wxT('<'))
+            {
+                wxChar nextCh = *(pBufStr + 1);
+                if (nextCh == wxT('/') || !WHITESPACE(nextCh))
+                {
+                    insideTag = true;
+                    continue;
+                }
+            }
+            bufStrCopy += c;
          }
+        bufStr.swap( bufStrCopy );
      }
  
-    else
+    wxString keyword = m_Keyword;
+
+    if (m_WholeWords)
      {
-        for (i = 0; i < lng - wrd; i++)
-        {
-            j = 0;
-            while ((j < wrd) && (buf[i + j] == kwd[j])) j++;
-            if (j == wrd) { found = true; break; }
-        }
+        // insert ' ' at the beginning and at the end
+        keyword.insert( 0, wxT(" ") );
+        keyword.append( wxT(" ") );
+        bufStr.insert( 0, wxT(" ") );
+        bufStr.append( wxT(" ") );
      }
  
-    return found;
-}
-
+    // remove continuous spaces
+    keyword = CompressSpaces( keyword );
+    bufStr = CompressSpaces( bufStr );
  
+    // finally do the search
+    return bufStr.find( keyword ) != wxString::npos;
+}
  
  #endif