strip HTML tags when searching for keywords
[wxWidgets.git] / src / html / helpdata.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/helpdata.cpp
3 // Purpose: wxHtmlHelpData
4 // Notes: Based on htmlhelp.cpp, implementing a monolithic
5 // HTML Help controller class, by Vaclav Slavik
6 // Author: Harm van der Heijden and Vaclav Slavik
7 // RCS-ID: $Id$
8 // Copyright: (c) Harm van der Heijden and Vaclav Slavik
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
11
12 // For compilers that support precompilation, includes "wx.h".
13 #include "wx/wxprec.h"
14
15 #ifdef __BORLANDC__
16 #pragma hdrstop
17 #endif
18
19 #if wxUSE_HTML && wxUSE_STREAMS
20
21 #ifndef WXPRECOMP
22 #include "wx/intl.h"
23 #include "wx/log.h"
24 #endif
25
26 #include <ctype.h>
27
28 #include "wx/html/helpdata.h"
29 #include "wx/tokenzr.h"
30 #include "wx/wfstream.h"
31 #include "wx/busyinfo.h"
32 #include "wx/encconv.h"
33 #include "wx/fontmap.h"
34 #include "wx/html/htmlpars.h"
35 #include "wx/html/htmldefs.h"
36 #include "wx/html/htmlfilt.h"
37 #include "wx/filename.h"
38
39 #include "wx/arrimpl.cpp"
40 WX_DEFINE_OBJARRAY(wxHtmlBookRecArray)
41 WX_DEFINE_OBJARRAY(wxHtmlHelpDataItems)
42
43 //-----------------------------------------------------------------------------
44 // static helper functions
45 //-----------------------------------------------------------------------------
46
47 // Reads one line, stores it into buf and returns pointer to new line or NULL.
48 static const wxChar* ReadLine(const wxChar *line, wxChar *buf, size_t bufsize)
49 {
50 wxChar *writeptr = buf;
51 wxChar *endptr = buf + bufsize - 1;
52 const wxChar *readptr = line;
53
54 while (*readptr != 0 && *readptr != _T('\r') && *readptr != _T('\n') &&
55 writeptr != endptr)
56 *(writeptr++) = *(readptr++);
57 *writeptr = 0;
58 while (*readptr == _T('\r') || *readptr == _T('\n'))
59 readptr++;
60 if (*readptr == 0)
61 return NULL;
62 else
63 return readptr;
64 }
65
66
67
68 static int
69 wxHtmlHelpIndexCompareFunc(wxHtmlHelpDataItem **a, wxHtmlHelpDataItem **b)
70 {
71 wxHtmlHelpDataItem *ia = *a;
72 wxHtmlHelpDataItem *ib = *b;
73
74 if (ia == NULL)
75 return -1;
76 if (ib == NULL)
77 return 1;
78
79 if (ia->parent == ib->parent)
80 {
81 return ia->name.CmpNoCase(ib->name);
82 }
83 else if (ia->level == ib->level)
84 {
85 return wxHtmlHelpIndexCompareFunc(&ia->parent, &ib->parent);
86 }
87 else
88 {
89 wxHtmlHelpDataItem *ia2 = ia;
90 wxHtmlHelpDataItem *ib2 = ib;
91
92 while (ia2->level > ib2->level)
93 {
94 ia2 = ia2->parent;
95 }
96 while (ib2->level > ia2->level)
97 {
98 ib2 = ib2->parent;
99 }
100
101 wxASSERT(ia2);
102 wxASSERT(ib2);
103 int res = wxHtmlHelpIndexCompareFunc(&ia2, &ib2);
104 if (res != 0)
105 return res;
106 else if (ia->level > ib->level)
107 return 1;
108 else
109 return -1;
110 }
111 }
112
113 //-----------------------------------------------------------------------------
114 // HP_Parser
115 //-----------------------------------------------------------------------------
116
117 class HP_Parser : public wxHtmlParser
118 {
119 public:
120 HP_Parser()
121 {
122 GetEntitiesParser()->SetEncoding(wxFONTENCODING_ISO8859_1);
123 }
124
125 wxObject* GetProduct() { return NULL; }
126
127 protected:
128 virtual void AddText(const wxChar* WXUNUSED(txt)) {}
129
130 DECLARE_NO_COPY_CLASS(HP_Parser)
131 };
132
133
134 //-----------------------------------------------------------------------------
135 // HP_TagHandler
136 //-----------------------------------------------------------------------------
137
138 class HP_TagHandler : public wxHtmlTagHandler
139 {
140 private:
141 wxString m_name, m_page;
142 int m_level;
143 int m_id;
144 int m_index;
145 int m_count;
146 wxHtmlHelpDataItem *m_parentItem;
147 wxHtmlBookRecord *m_book;
148
149 wxHtmlHelpDataItems *m_data;
150
151 public:
152 HP_TagHandler(wxHtmlBookRecord *b) : wxHtmlTagHandler()
153 {
154 m_data = NULL;
155 m_book = b;
156 m_name = m_page = wxEmptyString;
157 m_level = 0;
158 m_id = wxID_ANY;
159 m_count = 0;
160 m_parentItem = NULL;
161 }
162 wxString GetSupportedTags() { return wxT("UL,OBJECT,PARAM"); }
163 bool HandleTag(const wxHtmlTag& tag);
164
165 void Reset(wxHtmlHelpDataItems& data)
166 {
167 m_data = &data;
168 m_count = 0;
169 m_level = 0;
170 m_parentItem = NULL;
171 }
172
173 DECLARE_NO_COPY_CLASS(HP_TagHandler)
174 };
175
176
177 bool HP_TagHandler::HandleTag(const wxHtmlTag& tag)
178 {
179 if (tag.GetName() == wxT("UL"))
180 {
181 wxHtmlHelpDataItem *oldparent = m_parentItem;
182 m_level++;
183 m_parentItem = (m_count > 0) ? &(*m_data)[m_data->size()-1] : NULL;
184 ParseInner(tag);
185 m_level--;
186 m_parentItem = oldparent;
187 return true;
188 }
189 else if (tag.GetName() == wxT("OBJECT"))
190 {
191 m_name = m_page = wxEmptyString;
192 ParseInner(tag);
193
194 #if 0
195 if (!page.IsEmpty())
196 /* Valid HHW's file may contain only two object tags:
197
198 <OBJECT type="text/site properties">
199 <param name="ImageType" value="Folder">
200 </OBJECT>
201
202 or
203
204 <OBJECT type="text/sitemap">
205 <param name="Name" value="main page">
206 <param name="Local" value="another.htm">
207 </OBJECT>
208
209 We're interested in the latter. !page.IsEmpty() is valid
210 condition because text/site properties does not contain Local param
211 */
212 #endif
213 if (tag.GetParam(wxT("TYPE")) == wxT("text/sitemap"))
214 {
215 wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem();
216 item->parent = m_parentItem;
217 item->level = m_level;
218 item->id = m_id;
219 item->page = m_page;
220 item->name = m_name;
221
222 item->book = m_book;
223 m_data->Add(item);
224 m_count++;
225 }
226
227 return true;
228 }
229 else
230 { // "PARAM"
231 if (m_name.empty() && tag.GetParam(wxT("NAME")) == wxT("Name"))
232 m_name = tag.GetParam(wxT("VALUE"));
233 if (tag.GetParam(wxT("NAME")) == wxT("Local"))
234 m_page = tag.GetParam(wxT("VALUE"));
235 if (tag.GetParam(wxT("NAME")) == wxT("ID"))
236 tag.GetParamAsInt(wxT("VALUE"), &m_id);
237 return false;
238 }
239 }
240
241
242 //-----------------------------------------------------------------------------
243 // wxHtmlHelpData
244 //-----------------------------------------------------------------------------
245
246 wxString wxHtmlBookRecord::GetFullPath(const wxString &page) const
247 {
248 if (wxIsAbsolutePath(page))
249 return page;
250 else
251 return m_BasePath + page;
252 }
253
254 wxString wxHtmlHelpDataItem::GetIndentedName() const
255 {
256 wxString s;
257 for (int i = 1; i < level; i++)
258 s << _T(" ");
259 s << name;
260 return s;
261 }
262
263
264 IMPLEMENT_DYNAMIC_CLASS(wxHtmlHelpData, wxObject)
265
266 wxHtmlHelpData::wxHtmlHelpData()
267 {
268 #if WXWIN_COMPATIBILITY_2_4
269 m_cacheContents = NULL;
270 m_cacheIndex = NULL;
271 #endif
272 }
273
274 wxHtmlHelpData::~wxHtmlHelpData()
275 {
276 #if WXWIN_COMPATIBILITY_2_4
277 CleanCompatibilityData();
278 #endif
279 }
280
281 bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys,
282 const wxString& indexfile,
283 const wxString& contentsfile)
284 {
285 wxFSFile *f;
286 wxHtmlFilterHTML filter;
287 wxString buf;
288 wxString string;
289
290 HP_Parser parser;
291 HP_TagHandler *handler = new HP_TagHandler(book);
292 parser.AddTagHandler(handler);
293
294 f = ( contentsfile.empty() ? (wxFSFile*) NULL : fsys.OpenFile(contentsfile) );
295 if (f)
296 {
297 buf.clear();
298 buf = filter.ReadFile(*f);
299 delete f;
300 handler->Reset(m_contents);
301 parser.Parse(buf);
302 }
303 else
304 {
305 wxLogError(_("Cannot open contents file: %s"), contentsfile.c_str());
306 }
307
308 f = ( indexfile.empty() ? (wxFSFile*) NULL : fsys.OpenFile(indexfile) );
309 if (f)
310 {
311 buf.clear();
312 buf = filter.ReadFile(*f);
313 delete f;
314 handler->Reset(m_index);
315 parser.Parse(buf);
316 }
317 else if (!indexfile.empty())
318 {
319 wxLogError(_("Cannot open index file: %s"), indexfile.c_str());
320 }
321 return true;
322 }
323
324 inline static void CacheWriteInt32(wxOutputStream *f, wxInt32 value)
325 {
326 wxInt32 x = wxINT32_SWAP_ON_BE(value);
327 f->Write(&x, sizeof(x));
328 }
329
330 inline static wxInt32 CacheReadInt32(wxInputStream *f)
331 {
332 wxInt32 x;
333 f->Read(&x, sizeof(x));
334 return wxINT32_SWAP_ON_BE(x);
335 }
336
337 inline static void CacheWriteString(wxOutputStream *f, const wxString& str)
338 {
339 const wxWX2MBbuf mbstr = str.mb_str(wxConvUTF8);
340 size_t len = strlen((const char*)mbstr)+1;
341 CacheWriteInt32(f, len);
342 f->Write((const char*)mbstr, len);
343 }
344
345 inline static wxString CacheReadString(wxInputStream *f)
346 {
347 size_t len = (size_t)CacheReadInt32(f);
348 wxCharBuffer str(len-1);
349 f->Read(str.data(), len);
350 return wxString(str, wxConvUTF8);
351 }
352
353 #define CURRENT_CACHED_BOOK_VERSION 5
354
355 // Additional flags to detect incompatibilities of the runtime environment:
356 #define CACHED_BOOK_FORMAT_FLAGS \
357 (wxUSE_UNICODE << 0)
358
359
360 bool wxHtmlHelpData::LoadCachedBook(wxHtmlBookRecord *book, wxInputStream *f)
361 {
362 int i, st, newsize;
363 wxInt32 version;
364
365 /* load header - version info : */
366 version = CacheReadInt32(f);
367
368 if (version != CURRENT_CACHED_BOOK_VERSION)
369 {
370 // NB: We can just silently return false here and don't worry about
371 // it anymore, because AddBookParam will load the MS project in
372 // absence of (properly versioned) .cached file and automatically
373 // create new .cached file immediately afterward.
374 return false;
375 }
376
377 if (CacheReadInt32(f) != CACHED_BOOK_FORMAT_FLAGS)
378 return false;
379
380 /* load contents : */
381 st = m_contents.size();
382 newsize = st + CacheReadInt32(f);
383 m_contents.Alloc(newsize);
384 for (i = st; i < newsize; i++)
385 {
386 wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem;
387 item->level = CacheReadInt32(f);
388 item->id = CacheReadInt32(f);
389 item->name = CacheReadString(f);
390 item->page = CacheReadString(f);
391 item->book = book;
392 m_contents.Add(item);
393 }
394
395 /* load index : */
396 st = m_index.size();
397 newsize = st + CacheReadInt32(f);
398 m_index.Alloc(newsize);
399 for (i = st; i < newsize; i++)
400 {
401 wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem;
402 item->name = CacheReadString(f);
403 item->page = CacheReadString(f);
404 item->level = CacheReadInt32(f);
405 item->book = book;
406 int parentShift = CacheReadInt32(f);
407 if (parentShift != 0)
408 item->parent = &m_index[m_index.size() - parentShift];
409 m_index.Add(item);
410 }
411 return true;
412 }
413
414
415 bool wxHtmlHelpData::SaveCachedBook(wxHtmlBookRecord *book, wxOutputStream *f)
416 {
417 int i;
418 wxInt32 cnt;
419
420 /* save header - version info : */
421 CacheWriteInt32(f, CURRENT_CACHED_BOOK_VERSION);
422 CacheWriteInt32(f, CACHED_BOOK_FORMAT_FLAGS);
423
424 /* save contents : */
425 int len = m_contents.size();
426 for (cnt = 0, i = 0; i < len; i++)
427 if (m_contents[i].book == book && m_contents[i].level > 0)
428 cnt++;
429 CacheWriteInt32(f, cnt);
430
431 for (i = 0; i < len; i++)
432 {
433 if (m_contents[i].book != book || m_contents[i].level == 0)
434 continue;
435 CacheWriteInt32(f, m_contents[i].level);
436 CacheWriteInt32(f, m_contents[i].id);
437 CacheWriteString(f, m_contents[i].name);
438 CacheWriteString(f, m_contents[i].page);
439 }
440
441 /* save index : */
442 len = m_index.size();
443 for (cnt = 0, i = 0; i < len; i++)
444 if (m_index[i].book == book && m_index[i].level > 0)
445 cnt++;
446 CacheWriteInt32(f, cnt);
447
448 for (i = 0; i < len; i++)
449 {
450 if (m_index[i].book != book || m_index[i].level == 0)
451 continue;
452 CacheWriteString(f, m_index[i].name);
453 CacheWriteString(f, m_index[i].page);
454 CacheWriteInt32(f, m_index[i].level);
455 // save distance to parent item, if any:
456 if (m_index[i].parent == NULL)
457 {
458 CacheWriteInt32(f, 0);
459 }
460 else
461 {
462 int cnt2 = 0;
463 wxHtmlHelpDataItem *parent = m_index[i].parent;
464 for (int j = i-1; j >= 0; j--)
465 {
466 if (m_index[j].book == book && m_index[j].level > 0)
467 cnt2++;
468 if (&m_index[j] == parent)
469 break;
470 }
471 wxASSERT(cnt2 > 0);
472 CacheWriteInt32(f, cnt2);
473 }
474 }
475 return true;
476 }
477
478
479 void wxHtmlHelpData::SetTempDir(const wxString& path)
480 {
481 if (path.empty())
482 m_tempPath = path;
483 else
484 {
485 if (wxIsAbsolutePath(path)) m_tempPath = path;
486 else m_tempPath = wxGetCwd() + _T("/") + path;
487
488 if (m_tempPath[m_tempPath.length() - 1] != _T('/'))
489 m_tempPath << _T('/');
490 }
491 }
492
493
494
495 static wxString SafeFileName(const wxString& s)
496 {
497 wxString res(s);
498 res.Replace(wxT("#"), wxT("_"));
499 res.Replace(wxT(":"), wxT("_"));
500 res.Replace(wxT("\\"), wxT("_"));
501 res.Replace(wxT("/"), wxT("_"));
502 return res;
503 }
504
505 bool wxHtmlHelpData::AddBookParam(const wxFSFile& bookfile,
506 wxFontEncoding encoding,
507 const wxString& title, const wxString& contfile,
508 const wxString& indexfile, const wxString& deftopic,
509 const wxString& path)
510 {
511 wxFileSystem fsys;
512 wxFSFile *fi;
513 wxHtmlBookRecord *bookr;
514
515 int IndexOld = m_index.size(),
516 ContentsOld = m_contents.size();
517
518 if (!path.empty())
519 fsys.ChangePathTo(path, true);
520
521 size_t booksCnt = m_bookRecords.GetCount();
522 for (size_t i = 0; i < booksCnt; i++)
523 {
524 if ( m_bookRecords[i].GetBookFile() == bookfile.GetLocation() )
525 return true; // book is (was) loaded
526 }
527
528 bookr = new wxHtmlBookRecord(bookfile.GetLocation(), fsys.GetPath(), title, deftopic);
529
530 wxHtmlHelpDataItem *bookitem = new wxHtmlHelpDataItem;
531 bookitem->level = 0;
532 bookitem->id = 0;
533 bookitem->page = deftopic;
534 bookitem->name = title;
535 bookitem->book = bookr;
536
537 // store the contents index for later
538 int cont_start = m_contents.size();
539
540 m_contents.Add(bookitem);
541
542 // Try to find cached binary versions:
543 // 1. save file as book, but with .hhp.cached extension
544 // 2. same as 1. but in temp path
545 // 3. otherwise or if cache load failed, load it from MS.
546
547 fi = fsys.OpenFile(bookfile.GetLocation() + wxT(".cached"));
548
549 if (fi == NULL ||
550 #if wxUSE_DATETIME
551 fi->GetModificationTime() < bookfile.GetModificationTime() ||
552 #endif // wxUSE_DATETIME
553 !LoadCachedBook(bookr, fi->GetStream()))
554 {
555 if (fi != NULL) delete fi;
556 fi = fsys.OpenFile(m_tempPath + wxFileNameFromPath(bookfile.GetLocation()) + wxT(".cached"));
557 if (m_tempPath.empty() || fi == NULL ||
558 #if wxUSE_DATETIME
559 fi->GetModificationTime() < bookfile.GetModificationTime() ||
560 #endif // wxUSE_DATETIME
561 !LoadCachedBook(bookr, fi->GetStream()))
562 {
563 LoadMSProject(bookr, fsys, indexfile, contfile);
564 if (!m_tempPath.empty())
565 {
566 wxFileOutputStream *outs = new wxFileOutputStream(m_tempPath +
567 SafeFileName(wxFileNameFromPath(bookfile.GetLocation())) + wxT(".cached"));
568 SaveCachedBook(bookr, outs);
569 delete outs;
570 }
571 }
572 }
573
574 if (fi != NULL) delete fi;
575
576 // Now store the contents range
577 bookr->SetContentsRange(cont_start, m_contents.size());
578
579 #if wxUSE_WCHAR_T
580 // MS HTML Help files [written by MS HTML Help Workshop] are broken
581 // in that the data are iso-8859-1 (including HTML entities), but must
582 // be interpreted as being in language's windows charset. Correct the
583 // differences here and also convert to wxConvLocal in ANSI build
584 if (encoding != wxFONTENCODING_SYSTEM)
585 {
586 #if wxUSE_UNICODE
587 #define CORRECT_STR(str, conv) \
588 str = wxString((str).mb_str(wxConvISO8859_1), conv)
589 #else
590 #define CORRECT_STR(str, conv) \
591 str = wxString((str).wc_str(conv), wxConvLocal)
592 #endif
593 wxCSConv conv(encoding);
594 size_t IndexCnt = m_index.size();
595 size_t ContentsCnt = m_contents.size();
596 size_t i;
597 for (i = IndexOld; i < IndexCnt; i++)
598 {
599 CORRECT_STR(m_index[i].name, conv);
600 }
601 for (i = ContentsOld; i < ContentsCnt; i++)
602 {
603 CORRECT_STR(m_contents[i].name, conv);
604 }
605 #undef CORRECT_STR
606 }
607 #else
608 wxUnusedVar(IndexOld);
609 wxUnusedVar(ContentsOld);
610 wxASSERT_MSG(encoding == wxFONTENCODING_SYSTEM, wxT("Help files need charset conversion, but wxUSE_WCHAR_T is 0"));
611 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
612
613 m_bookRecords.Add(bookr);
614 if (!m_index.empty())
615 {
616 m_index.Sort(wxHtmlHelpIndexCompareFunc);
617 }
618
619 return true;
620 }
621
622
623 bool wxHtmlHelpData::AddBook(const wxString& book)
624 {
625 wxString extension(book.Right(4).Lower());
626 if (extension == wxT(".zip") ||
627 #if wxUSE_LIBMSPACK
628 extension == wxT(".chm") /*compressed html help book*/ ||
629 #endif
630 extension == wxT(".htb") /*html book*/)
631 {
632 wxFileSystem fsys;
633 wxString s;
634 bool rt = false;
635
636 #if wxUSE_LIBMSPACK
637 if (extension == wxT(".chm"))
638 s = fsys.FindFirst(book + wxT("#chm:*.hhp"), wxFILE);
639 else
640 #endif
641 s = fsys.FindFirst(book + wxT("#zip:*.hhp"), wxFILE);
642
643 while (!s.empty())
644 {
645 if (AddBook(s)) rt = true;
646 s = fsys.FindNext();
647 }
648
649 return rt;
650 }
651
652 wxFSFile *fi;
653 wxFileSystem fsys;
654
655 wxString title = _("noname"),
656 safetitle,
657 start = wxEmptyString,
658 contents = wxEmptyString,
659 index = wxEmptyString,
660 charset = wxEmptyString;
661
662 fi = fsys.OpenFile(book);
663 if (fi == NULL)
664 {
665 wxLogError(_("Cannot open HTML help book: %s"), book.c_str());
666 return false;
667 }
668 fsys.ChangePathTo(book);
669
670 const wxChar *lineptr;
671 wxChar linebuf[300];
672 wxString tmp;
673 wxHtmlFilterPlainText filter;
674 tmp = filter.ReadFile(*fi);
675 lineptr = tmp.c_str();
676
677 do
678 {
679 lineptr = ReadLine(lineptr, linebuf, 300);
680
681 for (wxChar *ch = linebuf; *ch != wxT('\0') && *ch != wxT('='); ch++)
682 *ch = (wxChar)wxTolower(*ch);
683
684 if (wxStrstr(linebuf, _T("title=")) == linebuf)
685 title = linebuf + wxStrlen(_T("title="));
686 if (wxStrstr(linebuf, _T("default topic=")) == linebuf)
687 start = linebuf + wxStrlen(_T("default topic="));
688 if (wxStrstr(linebuf, _T("index file=")) == linebuf)
689 index = linebuf + wxStrlen(_T("index file="));
690 if (wxStrstr(linebuf, _T("contents file=")) == linebuf)
691 contents = linebuf + wxStrlen(_T("contents file="));
692 if (wxStrstr(linebuf, _T("charset=")) == linebuf)
693 charset = linebuf + wxStrlen(_T("charset="));
694 } while (lineptr != NULL);
695
696 wxFontEncoding enc = wxFONTENCODING_SYSTEM;
697 #if wxUSE_FONTMAP
698 if (charset != wxEmptyString)
699 enc = wxFontMapper::Get()->CharsetToEncoding(charset);
700 #endif
701
702 bool rtval = AddBookParam(*fi, enc,
703 title, contents, index, start, fsys.GetPath());
704 delete fi;
705
706 #if WXWIN_COMPATIBILITY_2_4
707 CleanCompatibilityData();
708 #endif
709
710 return rtval;
711 }
712
713 wxString wxHtmlHelpData::FindPageByName(const wxString& x)
714 {
715 int cnt;
716 int i;
717 wxFileSystem fsys;
718 wxFSFile *f;
719
720 /* 1. try to open given file: */
721
722 cnt = m_bookRecords.GetCount();
723 for (i = 0; i < cnt; i++)
724 {
725 f = fsys.OpenFile(m_bookRecords[i].GetFullPath(x));
726 if (f)
727 {
728 wxString url = m_bookRecords[i].GetFullPath(x);
729 delete f;
730 return url;
731 }
732 }
733
734
735 /* 2. try to find a book: */
736
737 for (i = 0; i < cnt; i++)
738 {
739 if (m_bookRecords[i].GetTitle() == x)
740 return m_bookRecords[i].GetFullPath(m_bookRecords[i].GetStart());
741 }
742
743 /* 3. try to find in contents: */
744
745 cnt = m_contents.size();
746 for (i = 0; i < cnt; i++)
747 {
748 if (m_contents[i].name == x)
749 return m_contents[i].GetFullPath();
750 }
751
752
753 /* 4. try to find in index: */
754
755 cnt = m_index.size();
756 for (i = 0; i < cnt; i++)
757 {
758 if (m_index[i].name == x)
759 return m_index[i].GetFullPath();
760 }
761
762 return wxEmptyString;
763 }
764
765 wxString wxHtmlHelpData::FindPageById(int id)
766 {
767 size_t cnt = m_contents.size();
768 for (size_t i = 0; i < cnt; i++)
769 {
770 if (m_contents[i].id == id)
771 {
772 return m_contents[i].GetFullPath();
773 }
774 }
775
776 return wxEmptyString;
777 }
778
779 #if WXWIN_COMPATIBILITY_2_4
780 wxHtmlContentsItem::wxHtmlContentsItem()
781 : m_Level(0), m_ID(wxID_ANY), m_Name(NULL), m_Page(NULL), m_Book(NULL),
782 m_autofree(false)
783 {
784 }
785
786 wxHtmlContentsItem::wxHtmlContentsItem(const wxHtmlHelpDataItem& d)
787 {
788 m_autofree = true;
789 m_Level = d.level;
790 m_ID = d.id;
791 m_Name = wxStrdup(d.name.c_str());
792 m_Page = wxStrdup(d.page.c_str());
793 m_Book = d.book;
794 }
795
796 wxHtmlContentsItem& wxHtmlContentsItem::operator=(const wxHtmlContentsItem& d)
797 {
798 if (m_autofree)
799 {
800 free(m_Name);
801 free(m_Page);
802 }
803 m_autofree = true;
804 m_Level = d.m_Level;
805 m_ID = d.m_ID;
806 m_Name = d.m_Name ? wxStrdup(d.m_Name) : NULL;
807 m_Page = d.m_Page ? wxStrdup(d.m_Page) : NULL;
808 m_Book = d.m_Book;
809 return *this;
810 }
811
812 wxHtmlContentsItem::~wxHtmlContentsItem()
813 {
814 if (m_autofree)
815 {
816 free(m_Name);
817 free(m_Page);
818 }
819 }
820
821 wxHtmlContentsItem* wxHtmlHelpData::GetContents()
822 {
823 if (!m_cacheContents && !m_contents.empty())
824 {
825 size_t len = m_contents.size();
826 m_cacheContents = new wxHtmlContentsItem[len];
827 for (size_t i = 0; i < len; i++)
828 m_cacheContents[i] = m_contents[i];
829 }
830 return m_cacheContents;
831 }
832
833 int wxHtmlHelpData::GetContentsCnt()
834 {
835 return m_contents.size();
836 }
837
838 wxHtmlContentsItem* wxHtmlHelpData::GetIndex()
839 {
840 if (!m_cacheContents && !m_index.empty())
841 {
842 size_t len = m_index.size();
843 m_cacheContents = new wxHtmlContentsItem[len];
844 for (size_t i = 0; i < len; i++)
845 m_cacheContents[i] = m_index[i];
846 }
847 return m_cacheContents;
848 }
849
850 int wxHtmlHelpData::GetIndexCnt()
851 {
852 return m_index.size();
853 }
854
855 void wxHtmlHelpData::CleanCompatibilityData()
856 {
857 delete[] m_cacheContents;
858 m_cacheContents = NULL;
859 delete[] m_cacheIndex;
860 m_cacheIndex = NULL;
861 }
862 #endif // WXWIN_COMPATIBILITY_2_4
863
864 //----------------------------------------------------------------------------------
865 // wxHtmlSearchStatus functions
866 //----------------------------------------------------------------------------------
867
868 wxHtmlSearchStatus::wxHtmlSearchStatus(wxHtmlHelpData* data, const wxString& keyword,
869 bool case_sensitive, bool whole_words_only,
870 const wxString& book)
871 {
872 m_Data = data;
873 m_Keyword = keyword;
874 wxHtmlBookRecord* bookr = NULL;
875 if (book != wxEmptyString)
876 {
877 // we have to search in a specific book. Find it first
878 int i, cnt = data->m_bookRecords.GetCount();
879 for (i = 0; i < cnt; i++)
880 if (data->m_bookRecords[i].GetTitle() == book)
881 {
882 bookr = &(data->m_bookRecords[i]);
883 m_CurIndex = bookr->GetContentsStart();
884 m_MaxIndex = bookr->GetContentsEnd();
885 break;
886 }
887 // check; we won't crash if the book doesn't exist, but it's Bad Anyway.
888 wxASSERT(bookr);
889 }
890 if (! bookr)
891 {
892 // no book specified; search all books
893 m_CurIndex = 0;
894 m_MaxIndex = m_Data->m_contents.size();
895 }
896 m_Engine.LookFor(keyword, case_sensitive, whole_words_only);
897 m_Active = (m_CurIndex < m_MaxIndex);
898 }
899
900 #if WXWIN_COMPATIBILITY_2_4
901 wxHtmlContentsItem* wxHtmlSearchStatus::GetContentsItem()
902 {
903 static wxHtmlContentsItem it;
904 it = wxHtmlContentsItem(*m_CurItem);
905 return &it;
906 }
907 #endif
908
909 bool wxHtmlSearchStatus::Search()
910 {
911 wxFSFile *file;
912 int i = m_CurIndex; // shortcut
913 bool found = false;
914 wxString thepage;
915
916 if (!m_Active)
917 {
918 // sanity check. Illegal use, but we'll try to prevent a crash anyway
919 wxASSERT(m_Active);
920 return false;
921 }
922
923 m_Name = wxEmptyString;
924 m_CurItem = NULL;
925 thepage = m_Data->m_contents[i].page;
926
927 m_Active = (++m_CurIndex < m_MaxIndex);
928 // check if it is same page with different anchor:
929 if (!m_LastPage.empty())
930 {
931 const wxChar *p1, *p2;
932 for (p1 = thepage.c_str(), p2 = m_LastPage.c_str();
933 *p1 != 0 && *p1 != _T('#') && *p1 == *p2; p1++, p2++) {}
934
935 m_LastPage = thepage;
936
937 if (*p1 == 0 || *p1 == _T('#'))
938 return false;
939 }
940 else m_LastPage = thepage;
941
942 wxFileSystem fsys;
943 file = fsys.OpenFile(m_Data->m_contents[i].book->GetFullPath(thepage));
944 if (file)
945 {
946 if (m_Engine.Scan(*file))
947 {
948 m_Name = m_Data->m_contents[i].name;
949 m_CurItem = &m_Data->m_contents[i];
950 found = true;
951 }
952 delete file;
953 }
954 return found;
955 }
956
957
958
959
960
961
962
963
964 //--------------------------------------------------------------------------------
965 // wxHtmlSearchEngine
966 //--------------------------------------------------------------------------------
967
968 void wxHtmlSearchEngine::LookFor(const wxString& keyword, bool case_sensitive, bool whole_words_only)
969 {
970 m_CaseSensitive = case_sensitive;
971 m_WholeWords = whole_words_only;
972 m_Keyword = keyword;
973
974 if (!m_CaseSensitive)
975 m_Keyword.LowerCase();
976 }
977
978
979 static inline bool WHITESPACE(wxChar c)
980 {
981 return c == _T(' ') || c == _T('\n') || c == _T('\r') || c == _T('\t');
982 }
983
984 // replace continuous spaces by one single space
985 static inline wxString CompressSpaces(const wxString & str)
986 {
987 wxString buf;
988 buf.reserve( str.size() );
989
990 bool space_counted = false;
991 for( const wxChar * pstr = str.c_str(); *pstr; ++pstr )
992 {
993 wxChar ch = *pstr;
994 if( WHITESPACE( ch ) )
995 {
996 if( space_counted )
997 {
998 continue;
999 }
1000 ch = _T(' ');
1001 space_counted = true;
1002 }
1003 else
1004 {
1005 space_counted = false;
1006 }
1007 buf += ch;
1008 }
1009
1010 return buf;
1011 }
1012
1013 bool wxHtmlSearchEngine::Scan(const wxFSFile& file)
1014 {
1015 wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!"));
1016
1017 wxHtmlFilterHTML filter;
1018 wxString bufStr = filter.ReadFile(file);
1019
1020 if (!m_CaseSensitive)
1021 bufStr.LowerCase();
1022
1023 { // remove html tags
1024 wxString bufStrCopy;
1025 bufStrCopy.reserve( bufStr.size() );
1026 bool insideTag = false;
1027 for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr)
1028 {
1029 wxChar c = *pBufStr;
1030 if (insideTag)
1031 {
1032 if (c == _T('>'))
1033 {
1034 insideTag = false;
1035 // replace the tag by an empty space
1036 c = _T(' ');
1037 }
1038 else
1039 continue;
1040 }
1041 else if (c == _T('<'))
1042 {
1043 wxChar nextCh = *(pBufStr + 1);
1044 if (nextCh == _T('/') || !WHITESPACE(nextCh))
1045 {
1046 insideTag = true;
1047 continue;
1048 }
1049 }
1050 bufStrCopy += c;
1051 }
1052 bufStr.swap( bufStrCopy );
1053 }
1054
1055 wxString keyword = m_Keyword;
1056
1057 if (m_WholeWords)
1058 {
1059 // insert ' ' at the beginning and at the end
1060 keyword.insert( 0, _T(" ") );
1061 keyword.append( _T(" ") );
1062 bufStr.insert( 0, _T(" ") );
1063 bufStr.append( _T(" ") );
1064 }
1065
1066 // remove continuous spaces
1067 keyword = CompressSpaces( keyword );
1068 bufStr = CompressSpaces( bufStr );
1069
1070 // finally do the search
1071 return bufStr.find( keyword ) != wxString::npos;
1072 }
1073
1074 #endif