Cope with the common case of utf-8 being specified in the .hhp file, and convert...
[wxWidgets.git] / src / html / helpdata.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/helpdata.cpp
3 // Purpose: wxHtmlHelpData
4 // Notes: Based on htmlhelp.cpp, implementing a monolithic
5 // HTML Help controller class, by Vaclav Slavik
6 // Author: Harm van der Heijden and Vaclav Slavik
7 // RCS-ID: $Id$
8 // Copyright: (c) Harm van der Heijden and Vaclav Slavik
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
11
12 // For compilers that support precompilation, includes "wx.h".
13 #include "wx/wxprec.h"
14
15 #ifdef __BORLANDC__
16 #pragma hdrstop
17 #endif
18
19 #if wxUSE_HTML && wxUSE_STREAMS
20
21 #ifndef WX_PRECOMP
22 #include "wx/intl.h"
23 #include "wx/log.h"
24 #endif
25
26 #include <ctype.h>
27
28 #include "wx/html/helpdata.h"
29 #include "wx/tokenzr.h"
30 #include "wx/wfstream.h"
31 #include "wx/busyinfo.h"
32 #include "wx/encconv.h"
33 #include "wx/fontmap.h"
34 #include "wx/html/htmlpars.h"
35 #include "wx/html/htmldefs.h"
36 #include "wx/html/htmlfilt.h"
37 #include "wx/filename.h"
38
39 #include "wx/arrimpl.cpp"
40 WX_DEFINE_OBJARRAY(wxHtmlBookRecArray)
41 WX_DEFINE_OBJARRAY(wxHtmlHelpDataItems)
42
43 //-----------------------------------------------------------------------------
44 // static helper functions
45 //-----------------------------------------------------------------------------
46
47 // Reads one line, stores it into buf and returns pointer to new line or NULL.
48 static const wxChar* ReadLine(const wxChar *line, wxChar *buf, size_t bufsize)
49 {
50 wxChar *writeptr = buf;
51 wxChar *endptr = buf + bufsize - 1;
52 const wxChar *readptr = line;
53
54 while (*readptr != 0 && *readptr != wxT('\r') && *readptr != wxT('\n') &&
55 writeptr != endptr)
56 *(writeptr++) = *(readptr++);
57 *writeptr = 0;
58 while (*readptr == wxT('\r') || *readptr == wxT('\n'))
59 readptr++;
60 if (*readptr == 0)
61 return NULL;
62 else
63 return readptr;
64 }
65
66
67
68 static int
69 wxHtmlHelpIndexCompareFunc(wxHtmlHelpDataItem **a, wxHtmlHelpDataItem **b)
70 {
71 wxHtmlHelpDataItem *ia = *a;
72 wxHtmlHelpDataItem *ib = *b;
73
74 if (ia == NULL)
75 return -1;
76 if (ib == NULL)
77 return 1;
78
79 if (ia->parent == ib->parent)
80 {
81 return ia->name.CmpNoCase(ib->name);
82 }
83 else if (ia->level == ib->level)
84 {
85 return wxHtmlHelpIndexCompareFunc(&ia->parent, &ib->parent);
86 }
87 else
88 {
89 wxHtmlHelpDataItem *ia2 = ia;
90 wxHtmlHelpDataItem *ib2 = ib;
91
92 while (ia2->level > ib2->level)
93 {
94 ia2 = ia2->parent;
95 }
96 while (ib2->level > ia2->level)
97 {
98 ib2 = ib2->parent;
99 }
100
101 wxASSERT(ia2);
102 wxASSERT(ib2);
103 int res = wxHtmlHelpIndexCompareFunc(&ia2, &ib2);
104 if (res != 0)
105 return res;
106 else if (ia->level > ib->level)
107 return 1;
108 else
109 return -1;
110 }
111 }
112
113 //-----------------------------------------------------------------------------
114 // HP_Parser
115 //-----------------------------------------------------------------------------
116
117 class HP_Parser : public wxHtmlParser
118 {
119 public:
120 HP_Parser()
121 {
122 GetEntitiesParser()->SetEncoding(wxFONTENCODING_ISO8859_1);
123 }
124
125 wxObject* GetProduct() { return NULL; }
126
127 protected:
128 virtual void AddText(const wxString& WXUNUSED(txt)) {}
129
130 wxDECLARE_NO_COPY_CLASS(HP_Parser);
131 };
132
133
134 //-----------------------------------------------------------------------------
135 // HP_TagHandler
136 //-----------------------------------------------------------------------------
137
138 class HP_TagHandler : public wxHtmlTagHandler
139 {
140 private:
141 wxString m_name, m_page;
142 int m_level;
143 int m_id;
144 int m_index;
145 int m_count;
146 wxHtmlHelpDataItem *m_parentItem;
147 wxHtmlBookRecord *m_book;
148
149 wxHtmlHelpDataItems *m_data;
150
151 public:
152 HP_TagHandler(wxHtmlBookRecord *b) : wxHtmlTagHandler()
153 {
154 m_data = NULL;
155 m_book = b;
156 m_name = m_page = wxEmptyString;
157 m_level = 0;
158 m_id = wxID_ANY;
159 m_count = 0;
160 m_parentItem = NULL;
161 }
162 wxString GetSupportedTags() { return wxT("UL,OBJECT,PARAM"); }
163 bool HandleTag(const wxHtmlTag& tag);
164
165 void Reset(wxHtmlHelpDataItems& data)
166 {
167 m_data = &data;
168 m_count = 0;
169 m_level = 0;
170 m_parentItem = NULL;
171 }
172
173 wxDECLARE_NO_COPY_CLASS(HP_TagHandler);
174 };
175
176
177 bool HP_TagHandler::HandleTag(const wxHtmlTag& tag)
178 {
179 if (tag.GetName() == wxT("UL"))
180 {
181 wxHtmlHelpDataItem *oldparent = m_parentItem;
182 m_level++;
183 m_parentItem = (m_count > 0) ? &(*m_data)[m_data->size()-1] : NULL;
184 ParseInner(tag);
185 m_level--;
186 m_parentItem = oldparent;
187 return true;
188 }
189 else if (tag.GetName() == wxT("OBJECT"))
190 {
191 m_name = m_page = wxEmptyString;
192 ParseInner(tag);
193
194 #if 0
195 if (!page.IsEmpty())
196 /* Valid HHW's file may contain only two object tags:
197
198 <OBJECT type="text/site properties">
199 <param name="ImageType" value="Folder">
200 </OBJECT>
201
202 or
203
204 <OBJECT type="text/sitemap">
205 <param name="Name" value="main page">
206 <param name="Local" value="another.htm">
207 </OBJECT>
208
209 We're interested in the latter. !page.IsEmpty() is valid
210 condition because text/site properties does not contain Local param
211 */
212 #endif
213 if (tag.GetParam(wxT("TYPE")) == wxT("text/sitemap"))
214 {
215 wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem();
216 item->parent = m_parentItem;
217 item->level = m_level;
218 item->id = m_id;
219 item->page = m_page;
220 item->name = m_name;
221
222 item->book = m_book;
223 m_data->Add(item);
224 m_count++;
225 }
226
227 return true;
228 }
229 else
230 { // "PARAM"
231 if (m_name.empty() && tag.GetParam(wxT("NAME")) == wxT("Name"))
232 m_name = tag.GetParam(wxT("VALUE"));
233 if (tag.GetParam(wxT("NAME")) == wxT("Local"))
234 m_page = tag.GetParam(wxT("VALUE"));
235 if (tag.GetParam(wxT("NAME")) == wxT("ID"))
236 tag.GetParamAsInt(wxT("VALUE"), &m_id);
237 return false;
238 }
239 }
240
241
242 //-----------------------------------------------------------------------------
243 // wxHtmlHelpData
244 //-----------------------------------------------------------------------------
245
246 wxString wxHtmlBookRecord::GetFullPath(const wxString &page) const
247 {
248 if (wxIsAbsolutePath(page))
249 return page;
250 else
251 return m_BasePath + page;
252 }
253
254 wxString wxHtmlHelpDataItem::GetIndentedName() const
255 {
256 wxString s;
257 for (int i = 1; i < level; i++)
258 s << wxT(" ");
259 s << name;
260 return s;
261 }
262
263
264 IMPLEMENT_DYNAMIC_CLASS(wxHtmlHelpData, wxObject)
265
266 wxHtmlHelpData::wxHtmlHelpData()
267 {
268 }
269
270 wxHtmlHelpData::~wxHtmlHelpData()
271 {
272 }
273
274 bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys,
275 const wxString& indexfile,
276 const wxString& contentsfile)
277 {
278 wxFSFile *f;
279 wxHtmlFilterHTML filter;
280 wxString buf;
281 wxString string;
282
283 HP_Parser parser;
284 HP_TagHandler *handler = new HP_TagHandler(book);
285 parser.AddTagHandler(handler);
286
287 f = ( contentsfile.empty() ? NULL : fsys.OpenFile(contentsfile) );
288 if (f)
289 {
290 buf.clear();
291 buf = filter.ReadFile(*f);
292 delete f;
293 handler->Reset(m_contents);
294 parser.Parse(buf);
295 }
296 else
297 {
298 wxLogError(_("Cannot open contents file: %s"), contentsfile.c_str());
299 }
300
301 f = ( indexfile.empty() ? NULL : fsys.OpenFile(indexfile) );
302 if (f)
303 {
304 buf.clear();
305 buf = filter.ReadFile(*f);
306 delete f;
307 handler->Reset(m_index);
308 parser.Parse(buf);
309 }
310 else if (!indexfile.empty())
311 {
312 wxLogError(_("Cannot open index file: %s"), indexfile.c_str());
313 }
314 return true;
315 }
316
317 inline static void CacheWriteInt32(wxOutputStream *f, wxInt32 value)
318 {
319 wxInt32 x = wxINT32_SWAP_ON_BE(value);
320 f->Write(&x, sizeof(x));
321 }
322
323 inline static wxInt32 CacheReadInt32(wxInputStream *f)
324 {
325 wxInt32 x;
326 f->Read(&x, sizeof(x));
327 return wxINT32_SWAP_ON_BE(x);
328 }
329
330 inline static void CacheWriteString(wxOutputStream *f, const wxString& str)
331 {
332 const wxWX2MBbuf mbstr = str.mb_str(wxConvUTF8);
333 size_t len = strlen((const char*)mbstr)+1;
334 CacheWriteInt32(f, len);
335 f->Write((const char*)mbstr, len);
336 }
337
338 inline static wxString CacheReadString(wxInputStream *f)
339 {
340 size_t len = (size_t)CacheReadInt32(f);
341 wxCharBuffer str(len-1);
342 f->Read(str.data(), len);
343 return wxString(str, wxConvUTF8);
344 }
345
346 #define CURRENT_CACHED_BOOK_VERSION 5
347
348 // Additional flags to detect incompatibilities of the runtime environment:
349 #define CACHED_BOOK_FORMAT_FLAGS \
350 (wxUSE_UNICODE << 0)
351
352
353 bool wxHtmlHelpData::LoadCachedBook(wxHtmlBookRecord *book, wxInputStream *f)
354 {
355 int i, st, newsize;
356 wxInt32 version;
357
358 /* load header - version info : */
359 version = CacheReadInt32(f);
360
361 if (version != CURRENT_CACHED_BOOK_VERSION)
362 {
363 // NB: We can just silently return false here and don't worry about
364 // it anymore, because AddBookParam will load the MS project in
365 // absence of (properly versioned) .cached file and automatically
366 // create new .cached file immediately afterward.
367 return false;
368 }
369
370 if (CacheReadInt32(f) != CACHED_BOOK_FORMAT_FLAGS)
371 return false;
372
373 /* load contents : */
374 st = m_contents.size();
375 newsize = st + CacheReadInt32(f);
376 m_contents.Alloc(newsize);
377 for (i = st; i < newsize; i++)
378 {
379 wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem;
380 item->level = CacheReadInt32(f);
381 item->id = CacheReadInt32(f);
382 item->name = CacheReadString(f);
383 item->page = CacheReadString(f);
384 item->book = book;
385 m_contents.Add(item);
386 }
387
388 /* load index : */
389 st = m_index.size();
390 newsize = st + CacheReadInt32(f);
391 m_index.Alloc(newsize);
392 for (i = st; i < newsize; i++)
393 {
394 wxHtmlHelpDataItem *item = new wxHtmlHelpDataItem;
395 item->name = CacheReadString(f);
396 item->page = CacheReadString(f);
397 item->level = CacheReadInt32(f);
398 item->book = book;
399 int parentShift = CacheReadInt32(f);
400 if (parentShift != 0)
401 item->parent = &m_index[m_index.size() - parentShift];
402 m_index.Add(item);
403 }
404 return true;
405 }
406
407
408 bool wxHtmlHelpData::SaveCachedBook(wxHtmlBookRecord *book, wxOutputStream *f)
409 {
410 int i;
411 wxInt32 cnt;
412
413 /* save header - version info : */
414 CacheWriteInt32(f, CURRENT_CACHED_BOOK_VERSION);
415 CacheWriteInt32(f, CACHED_BOOK_FORMAT_FLAGS);
416
417 /* save contents : */
418 int len = m_contents.size();
419 for (cnt = 0, i = 0; i < len; i++)
420 if (m_contents[i].book == book && m_contents[i].level > 0)
421 cnt++;
422 CacheWriteInt32(f, cnt);
423
424 for (i = 0; i < len; i++)
425 {
426 if (m_contents[i].book != book || m_contents[i].level == 0)
427 continue;
428 CacheWriteInt32(f, m_contents[i].level);
429 CacheWriteInt32(f, m_contents[i].id);
430 CacheWriteString(f, m_contents[i].name);
431 CacheWriteString(f, m_contents[i].page);
432 }
433
434 /* save index : */
435 len = m_index.size();
436 for (cnt = 0, i = 0; i < len; i++)
437 if (m_index[i].book == book && m_index[i].level > 0)
438 cnt++;
439 CacheWriteInt32(f, cnt);
440
441 for (i = 0; i < len; i++)
442 {
443 if (m_index[i].book != book || m_index[i].level == 0)
444 continue;
445 CacheWriteString(f, m_index[i].name);
446 CacheWriteString(f, m_index[i].page);
447 CacheWriteInt32(f, m_index[i].level);
448 // save distance to parent item, if any:
449 if (m_index[i].parent == NULL)
450 {
451 CacheWriteInt32(f, 0);
452 }
453 else
454 {
455 int cnt2 = 0;
456 wxHtmlHelpDataItem *parent = m_index[i].parent;
457 for (int j = i-1; j >= 0; j--)
458 {
459 if (m_index[j].book == book && m_index[j].level > 0)
460 cnt2++;
461 if (&m_index[j] == parent)
462 break;
463 }
464 wxASSERT(cnt2 > 0);
465 CacheWriteInt32(f, cnt2);
466 }
467 }
468 return true;
469 }
470
471
472 void wxHtmlHelpData::SetTempDir(const wxString& path)
473 {
474 if (path.empty())
475 m_tempPath = path;
476 else
477 {
478 wxFileName fn(path);
479 fn.MakeAbsolute();
480
481 m_tempPath = fn.GetPath(wxPATH_GET_VOLUME | wxPATH_GET_SEPARATOR);
482 }
483 }
484
485
486
487 static wxString SafeFileName(const wxString& s)
488 {
489 wxString res(s);
490 res.Replace(wxT("#"), wxT("_"));
491 res.Replace(wxT(":"), wxT("_"));
492 res.Replace(wxT("\\"), wxT("_"));
493 res.Replace(wxT("/"), wxT("_"));
494 return res;
495 }
496
497 bool wxHtmlHelpData::AddBookParam(const wxFSFile& bookfile,
498 wxFontEncoding encoding,
499 const wxString& title, const wxString& contfile,
500 const wxString& indexfile, const wxString& deftopic,
501 const wxString& path)
502 {
503 wxFileSystem fsys;
504 wxFSFile *fi;
505 wxHtmlBookRecord *bookr;
506
507 int IndexOld = m_index.size(),
508 ContentsOld = m_contents.size();
509
510 if (!path.empty())
511 fsys.ChangePathTo(path, true);
512
513 size_t booksCnt = m_bookRecords.GetCount();
514 for (size_t i = 0; i < booksCnt; i++)
515 {
516 if ( m_bookRecords[i].GetBookFile() == bookfile.GetLocation() )
517 return true; // book is (was) loaded
518 }
519
520 bookr = new wxHtmlBookRecord(bookfile.GetLocation(), fsys.GetPath(), title, deftopic);
521
522 wxHtmlHelpDataItem *bookitem = new wxHtmlHelpDataItem;
523 bookitem->level = 0;
524 bookitem->id = 0;
525 bookitem->page = deftopic;
526 bookitem->name = title;
527 bookitem->book = bookr;
528
529 // store the contents index for later
530 int cont_start = m_contents.size();
531
532 m_contents.Add(bookitem);
533
534 // Try to find cached binary versions:
535 // 1. save file as book, but with .hhp.cached extension
536 // 2. same as 1. but in temp path
537 // 3. otherwise or if cache load failed, load it from MS.
538
539 fi = fsys.OpenFile(bookfile.GetLocation() + wxT(".cached"));
540
541 if (fi == NULL ||
542 #if wxUSE_DATETIME
543 fi->GetModificationTime() < bookfile.GetModificationTime() ||
544 #endif // wxUSE_DATETIME
545 !LoadCachedBook(bookr, fi->GetStream()))
546 {
547 if (fi != NULL) delete fi;
548 fi = fsys.OpenFile(m_tempPath + wxFileNameFromPath(bookfile.GetLocation()) + wxT(".cached"));
549 if (m_tempPath.empty() || fi == NULL ||
550 #if wxUSE_DATETIME
551 fi->GetModificationTime() < bookfile.GetModificationTime() ||
552 #endif // wxUSE_DATETIME
553 !LoadCachedBook(bookr, fi->GetStream()))
554 {
555 LoadMSProject(bookr, fsys, indexfile, contfile);
556 if (!m_tempPath.empty())
557 {
558 wxFileOutputStream *outs = new wxFileOutputStream(m_tempPath +
559 SafeFileName(wxFileNameFromPath(bookfile.GetLocation())) + wxT(".cached"));
560 SaveCachedBook(bookr, outs);
561 delete outs;
562 }
563 }
564 }
565
566 if (fi != NULL) delete fi;
567
568 // Now store the contents range
569 bookr->SetContentsRange(cont_start, m_contents.size());
570
571 // MS HTML Help files [written by MS HTML Help Workshop] are broken
572 // in that the data are iso-8859-1 (including HTML entities), but must
573 // be interpreted as being in language's windows charset. Correct the
574 // differences here and also convert to wxConvLocal in ANSI build
575 if (encoding != wxFONTENCODING_SYSTEM)
576 {
577 #if wxUSE_UNICODE
578 #define CORRECT_STR(str, conv) \
579 str = wxString((str).mb_str(wxConvISO8859_1), conv)
580 #else
581 #define CORRECT_STR(str, conv) \
582 str = wxString((str).wc_str(conv), wxConvLocal)
583 #endif
584 wxCSConv conv(encoding);
585 size_t IndexCnt = m_index.size();
586 size_t ContentsCnt = m_contents.size();
587 size_t i;
588 for (i = IndexOld; i < IndexCnt; i++)
589 {
590 CORRECT_STR(m_index[i].name, conv);
591 }
592 for (i = ContentsOld; i < ContentsCnt; i++)
593 {
594 CORRECT_STR(m_contents[i].name, conv);
595 }
596 #undef CORRECT_STR
597 }
598
599 m_bookRecords.Add(bookr);
600 if (!m_index.empty())
601 {
602 m_index.Sort(wxHtmlHelpIndexCompareFunc);
603 }
604
605 return true;
606 }
607
608
609 bool wxHtmlHelpData::AddBook(const wxString& book)
610 {
611 wxString extension(book.Right(4).Lower());
612 if (extension == wxT(".zip") ||
613 #if wxUSE_LIBMSPACK
614 extension == wxT(".chm") /*compressed html help book*/ ||
615 #endif
616 extension == wxT(".htb") /*html book*/)
617 {
618 wxFileSystem fsys;
619 wxString s;
620 bool rt = false;
621
622 #if wxUSE_LIBMSPACK
623 if (extension == wxT(".chm"))
624 s = fsys.FindFirst(book + wxT("#chm:*.hhp"), wxFILE);
625 else
626 #endif
627 s = fsys.FindFirst(book + wxT("#zip:*.hhp"), wxFILE);
628
629 while (!s.empty())
630 {
631 if (AddBook(s)) rt = true;
632 s = fsys.FindNext();
633 }
634
635 return rt;
636 }
637
638 wxFSFile *fi;
639 wxFileSystem fsys;
640
641 wxString title = _("noname"),
642 safetitle,
643 start = wxEmptyString,
644 contents = wxEmptyString,
645 index = wxEmptyString,
646 charset = wxEmptyString;
647
648 fi = fsys.OpenFile(book);
649 if (fi == NULL)
650 {
651 wxLogError(_("Cannot open HTML help book: %s"), book.c_str());
652 return false;
653 }
654 fsys.ChangePathTo(book);
655
656 const wxChar *lineptr;
657 wxChar linebuf[300];
658 wxString tmp;
659 wxHtmlFilterPlainText filter;
660 tmp = filter.ReadFile(*fi);
661 lineptr = tmp.c_str();
662
663 do
664 {
665 lineptr = ReadLine(lineptr, linebuf, 300);
666
667 for (wxChar *ch = linebuf; *ch != wxT('\0') && *ch != wxT('='); ch++)
668 *ch = (wxChar)wxTolower(*ch);
669
670 if (wxStrstr(linebuf, wxT("title=")) == linebuf)
671 title = linebuf + wxStrlen(wxT("title="));
672 if (wxStrstr(linebuf, wxT("default topic=")) == linebuf)
673 start = linebuf + wxStrlen(wxT("default topic="));
674 if (wxStrstr(linebuf, wxT("index file=")) == linebuf)
675 index = linebuf + wxStrlen(wxT("index file="));
676 if (wxStrstr(linebuf, wxT("contents file=")) == linebuf)
677 contents = linebuf + wxStrlen(wxT("contents file="));
678 if (wxStrstr(linebuf, wxT("charset=")) == linebuf)
679 charset = linebuf + wxStrlen(wxT("charset="));
680 } while (lineptr != NULL);
681
682 wxFontEncoding enc = wxFONTENCODING_SYSTEM;
683 #if wxUSE_FONTMAP
684 if (charset != wxEmptyString)
685 enc = wxFontMapper::Get()->CharsetToEncoding(charset);
686 #endif
687
688 // No conversion was done on the title yet; at least
689 // test for a common case.
690 if (charset == wxT("utf-8") && !title.IsEmpty())
691 {
692 char *buf = new char[title.Length()+1];
693 size_t i;
694 for (i = 0; i < title.Length(); i++)
695 buf[i] = (char) title[i];
696 buf[i] = 0;
697 title = wxString::FromUTF8(buf);
698 delete[] buf;
699 }
700
701 bool rtval = AddBookParam(*fi, enc,
702 title, contents, index, start, fsys.GetPath());
703 delete fi;
704
705 return rtval;
706 }
707
708 wxString wxHtmlHelpData::FindPageByName(const wxString& x)
709 {
710 int i;
711
712 bool has_non_ascii = false;
713 wxString::const_iterator it;
714 for (it = x.begin(); it != x.end(); ++it)
715 {
716 wxUniChar ch = *it;
717 if (!ch.IsAscii())
718 {
719 has_non_ascii = true;
720 break;
721 }
722 }
723
724 int cnt = m_bookRecords.GetCount();
725
726 if (!has_non_ascii)
727 {
728 wxFileSystem fsys;
729 wxFSFile *f;
730 // 1. try to open given file:
731 for (i = 0; i < cnt; i++)
732 {
733 f = fsys.OpenFile(m_bookRecords[i].GetFullPath(x));
734 if (f)
735 {
736 wxString url = m_bookRecords[i].GetFullPath(x);
737 delete f;
738 return url;
739 }
740 }
741 }
742
743
744 // 2. try to find a book:
745 for (i = 0; i < cnt; i++)
746 {
747 if (m_bookRecords[i].GetTitle() == x)
748 return m_bookRecords[i].GetFullPath(m_bookRecords[i].GetStart());
749 }
750
751 // 3. try to find in contents:
752 cnt = m_contents.size();
753 for (i = 0; i < cnt; i++)
754 {
755 if (m_contents[i].name == x)
756 return m_contents[i].GetFullPath();
757 }
758
759
760 // 4. try to find in index:
761 cnt = m_index.size();
762 for (i = 0; i < cnt; i++)
763 {
764 if (m_index[i].name == x)
765 return m_index[i].GetFullPath();
766 }
767
768 // 4b. if still not found, try case-insensitive comparison
769 for (i = 0; i < cnt; i++)
770 {
771 if (m_index[i].name.CmpNoCase(x) == 0)
772 return m_index[i].GetFullPath();
773 }
774
775 return wxEmptyString;
776 }
777
778 wxString wxHtmlHelpData::FindPageById(int id)
779 {
780 size_t cnt = m_contents.size();
781 for (size_t i = 0; i < cnt; i++)
782 {
783 if (m_contents[i].id == id)
784 {
785 return m_contents[i].GetFullPath();
786 }
787 }
788
789 return wxEmptyString;
790 }
791
792
793 //----------------------------------------------------------------------------------
794 // wxHtmlSearchStatus functions
795 //----------------------------------------------------------------------------------
796
797 wxHtmlSearchStatus::wxHtmlSearchStatus(wxHtmlHelpData* data, const wxString& keyword,
798 bool case_sensitive, bool whole_words_only,
799 const wxString& book)
800 {
801 m_Data = data;
802 m_Keyword = keyword;
803 wxHtmlBookRecord* bookr = NULL;
804 if (book != wxEmptyString)
805 {
806 // we have to search in a specific book. Find it first
807 int i, cnt = data->m_bookRecords.GetCount();
808 for (i = 0; i < cnt; i++)
809 if (data->m_bookRecords[i].GetTitle() == book)
810 {
811 bookr = &(data->m_bookRecords[i]);
812 m_CurIndex = bookr->GetContentsStart();
813 m_MaxIndex = bookr->GetContentsEnd();
814 break;
815 }
816 // check; we won't crash if the book doesn't exist, but it's Bad Anyway.
817 wxASSERT(bookr);
818 }
819 if (! bookr)
820 {
821 // no book specified; search all books
822 m_CurIndex = 0;
823 m_MaxIndex = m_Data->m_contents.size();
824 }
825 m_Engine.LookFor(keyword, case_sensitive, whole_words_only);
826 m_Active = (m_CurIndex < m_MaxIndex);
827 }
828
829 bool wxHtmlSearchStatus::Search()
830 {
831 wxFSFile *file;
832 int i = m_CurIndex; // shortcut
833 bool found = false;
834 wxString thepage;
835
836 if (!m_Active)
837 {
838 // sanity check. Illegal use, but we'll try to prevent a crash anyway
839 wxASSERT(m_Active);
840 return false;
841 }
842
843 m_Name = wxEmptyString;
844 m_CurItem = NULL;
845 thepage = m_Data->m_contents[i].page;
846
847 m_Active = (++m_CurIndex < m_MaxIndex);
848 // check if it is same page with different anchor:
849 if (!m_LastPage.empty())
850 {
851 const wxChar *p1, *p2;
852 for (p1 = thepage.c_str(), p2 = m_LastPage.c_str();
853 *p1 != 0 && *p1 != wxT('#') && *p1 == *p2; p1++, p2++) {}
854
855 m_LastPage = thepage;
856
857 if (*p1 == 0 || *p1 == wxT('#'))
858 return false;
859 }
860 else m_LastPage = thepage;
861
862 wxFileSystem fsys;
863 file = fsys.OpenFile(m_Data->m_contents[i].book->GetFullPath(thepage));
864 if (file)
865 {
866 if (m_Engine.Scan(*file))
867 {
868 m_Name = m_Data->m_contents[i].name;
869 m_CurItem = &m_Data->m_contents[i];
870 found = true;
871 }
872 delete file;
873 }
874 return found;
875 }
876
877
878
879
880
881
882
883
884 //--------------------------------------------------------------------------------
885 // wxHtmlSearchEngine
886 //--------------------------------------------------------------------------------
887
888 void wxHtmlSearchEngine::LookFor(const wxString& keyword, bool case_sensitive, bool whole_words_only)
889 {
890 m_CaseSensitive = case_sensitive;
891 m_WholeWords = whole_words_only;
892 m_Keyword = keyword;
893
894 if (!m_CaseSensitive)
895 m_Keyword.LowerCase();
896 }
897
898
899 static inline bool WHITESPACE(wxChar c)
900 {
901 return c == wxT(' ') || c == wxT('\n') || c == wxT('\r') || c == wxT('\t');
902 }
903
904 // replace continuous spaces by one single space
905 static inline wxString CompressSpaces(const wxString & str)
906 {
907 wxString buf;
908 buf.reserve( str.size() );
909
910 bool space_counted = false;
911 for( const wxChar * pstr = str.c_str(); *pstr; ++pstr )
912 {
913 wxChar ch = *pstr;
914 if( WHITESPACE( ch ) )
915 {
916 if( space_counted )
917 {
918 continue;
919 }
920 ch = wxT(' ');
921 space_counted = true;
922 }
923 else
924 {
925 space_counted = false;
926 }
927 buf += ch;
928 }
929
930 return buf;
931 }
932
933 bool wxHtmlSearchEngine::Scan(const wxFSFile& file)
934 {
935 wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!"));
936
937 wxHtmlFilterHTML filter;
938 wxString bufStr = filter.ReadFile(file);
939
940 if (!m_CaseSensitive)
941 bufStr.LowerCase();
942
943 { // remove html tags
944 wxString bufStrCopy;
945 bufStrCopy.reserve( bufStr.size() );
946 bool insideTag = false;
947 for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr)
948 {
949 wxChar c = *pBufStr;
950 if (insideTag)
951 {
952 if (c == wxT('>'))
953 {
954 insideTag = false;
955 // replace the tag by an empty space
956 c = wxT(' ');
957 }
958 else
959 continue;
960 }
961 else if (c == wxT('<'))
962 {
963 wxChar nextCh = *(pBufStr + 1);
964 if (nextCh == wxT('/') || !WHITESPACE(nextCh))
965 {
966 insideTag = true;
967 continue;
968 }
969 }
970 bufStrCopy += c;
971 }
972 bufStr.swap( bufStrCopy );
973 }
974
975 wxString keyword = m_Keyword;
976
977 if (m_WholeWords)
978 {
979 // insert ' ' at the beginning and at the end
980 keyword.insert( 0, wxT(" ") );
981 keyword.append( wxT(" ") );
982 bufStr.insert( 0, wxT(" ") );
983 bufStr.append( wxT(" ") );
984 }
985
986 // remove continuous spaces
987 keyword = CompressSpaces( keyword );
988 bufStr = CompressSpaces( bufStr );
989
990 // finally do the search
991 return bufStr.find( keyword ) != wxString::npos;
992 }
993
994 #endif