[wxWidgets.git] / src / html / htmlfilt.cpp

/////////////////////////////////////////////////////////////////////////////
// Name:        htmlfilt.cpp
// Purpose:     wxHtmlFilter - input filter for translating into HTML format
// Author:      Vaclav Slavik
// RCS-ID:      $Id$
// Copyright:   (c) 1999 Vaclav Slavik
// Licence:     wxWindows licence
/////////////////////////////////////////////////////////////////////////////


#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
#pragma implementation "htmlfilt.h"
#endif

#include "wx/wxprec.h"

#include "wx/defs.h"
#if wxUSE_HTML && wxUSE_STREAMS

#ifdef __BORLANDC__
#pragma hdrstop
#endif

#ifndef WXPRECOMP
    #include "wx/log.h"
    #include "wx/intl.h"
#endif

#include "wx/strconv.h"
#include "wx/html/htmlfilt.h"
#include "wx/html/htmlwin.h"

// utility function: read a wxString from a wxInputStream
static void ReadString(wxString& str, wxInputStream* s, wxMBConv& conv)
{
    size_t streamSize = s->GetSize();

    if (streamSize == ~(size_t)0)
    {
        const size_t bufSize = 4095;
        char buffer[bufSize+1];
        size_t lastRead;

        do
        {
            s->Read(buffer, bufSize);
            lastRead = s->LastRead();
            buffer[lastRead] = 0;
            str.Append(wxString(buffer, conv));
        }
        while (lastRead == bufSize);
    }
    else
    {
        char* src = new char[streamSize+1];
        s->Read(src, streamSize);
        src[streamSize] = 0;
        str = wxString(src, conv);
        delete[] src;
    }
}

/*

There is code for several default filters:

*/

IMPLEMENT_ABSTRACT_CLASS(wxHtmlFilter, wxObject)

//--------------------------------------------------------------------------------
// wxHtmlFilterPlainText
//          filter for text/plain or uknown
//--------------------------------------------------------------------------------

IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterPlainText, wxHtmlFilter)

bool wxHtmlFilterPlainText::CanRead(const wxFSFile& WXUNUSED(file)) const
{
    return TRUE;
}


wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
{
    wxInputStream *s = file.GetStream();
    wxString doc, doc2;

    if (s == NULL) return wxEmptyString;
    ReadString(doc, s, wxConvISO8859_1);

    doc.Replace(wxT("&"), wxT("&amp;"), TRUE);
    doc.Replace(wxT("<"), wxT("&lt;"), TRUE);
    doc.Replace(wxT(">"), wxT("&gt;"), TRUE);
    doc2 = wxT("<HTML><BODY><PRE>\n") + doc + wxT("\n</PRE></BODY></HTML>");
    return doc2;
}


//--------------------------------------------------------------------------------
// wxHtmlFilterImage
//          filter for image/*
//--------------------------------------------------------------------------------

class wxHtmlFilterImage : public wxHtmlFilter
{
    DECLARE_DYNAMIC_CLASS(wxHtmlFilterImage)

    public:
        virtual bool CanRead(const wxFSFile& file) const;
        virtual wxString ReadFile(const wxFSFile& file) const;
};

IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterImage, wxHtmlFilter)


bool wxHtmlFilterImage::CanRead(const wxFSFile& file) const
{
    return (file.GetMimeType().Left(6) == wxT("image/"));
}


wxString wxHtmlFilterImage::ReadFile(const wxFSFile& file) const
{
    wxString res = wxT("<HTML><BODY><IMG SRC=\"") + file.GetLocation() + wxT("\"></BODY></HTML>");
    return res;
}


//--------------------------------------------------------------------------------
// wxHtmlFilterHTML
//          filter for text/html
//--------------------------------------------------------------------------------


IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterHTML, wxHtmlFilter)

bool wxHtmlFilterHTML::CanRead(const wxFSFile& file) const
{
//    return (file.GetMimeType() == "text/html");
// This is true in most case but some page can return:
// "text/html; char-encoding=...."
// So we use Find instead
  return (file.GetMimeType().Find(wxT("text/html")) == 0);
}


wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
{
    wxInputStream *s = file.GetStream();
    wxString doc;

    if (s == NULL)
    {
        wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str());
        return wxEmptyString;
    }

    // NB: We convert input file to wchar_t here in Unicode mode, based on 
    //     either Content-Type header or <meta> tags. In ANSI mode, we don't
    //     do it as it is done by wxHtmlParser (for this reason, we add <meta>
    //     tag if we used Content-Type header).
#if wxUSE_UNICODE
    int charsetPos;    
    if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND)
    {
        wxString charset = file.GetMimeType().Mid(charsetPos + 10);
        wxCSConv conv(charset);
        ReadString(doc, s, conv);
    }
    else
    {
        wxString tmpdoc;
        ReadString(tmpdoc, s, wxConvISO8859_1);
        wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc);
        if (charset.empty())
            doc = tmpdoc;
        else
        {
            wxCSConv conv(charset);
            doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv);
        }
    }
#else // !wxUSE_UNICODE
    ReadString(doc, s, wxConvLibc);
    // add meta tag if we obtained this through http:
    if (!file.GetMimeType().empty())
    {
        wxString hdr;
        wxString mime = file.GetMimeType();
        hdr.Printf(_T("<meta http-equiv=\"Content-Type\" content=\"%s\">"), mime.c_str());
        return hdr+doc;
    }
#endif

    return doc;
}


///// Module:

class wxHtmlFilterModule : public wxModule
{
    DECLARE_DYNAMIC_CLASS(wxHtmlFilterModule)

    public:
        virtual bool OnInit()
        {
            wxHtmlWindow::AddFilter(new wxHtmlFilterHTML);
            wxHtmlWindow::AddFilter(new wxHtmlFilterImage);
            return TRUE;
        }
        virtual void OnExit() {}
};

IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterModule, wxModule)

#endif
Commit	Line	Data
5526e819	1	/////////////////////////////////////////////////////////////////////////////
69941f05	2	// Name: htmlfilt.cpp
5526e819 VS	3	// Purpose: wxHtmlFilter - input filter for translating into HTML format
5526e819 VS	4	// Author: Vaclav Slavik
69941f05	5	// RCS-ID: $Id$
5526e819	6	// Copyright: (c) 1999 Vaclav Slavik
65571936	7	// Licence: wxWindows licence
5526e819 VS	8	/////////////////////////////////////////////////////////////////////////////
	9
	10
14f355c2	11	#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
1aedb1dd	12	#pragma implementation "htmlfilt.h"
5526e819 VS	13	#endif
5526e819 VS	14
4dcaf11a	15	#include "wx/wxprec.h"
5526e819	16
314260fb	17	#include "wx/defs.h"
f6bcfd97	18	#if wxUSE_HTML && wxUSE_STREAMS
5526e819	19
2b5f62a0	20	#ifdef __BORLANDC__
5526e819 VS	21	#pragma hdrstop
	22	#endif
	23
	24	#ifndef WXPRECOMP
04dbb646 VZ	25	#include "wx/log.h"
04dbb646 VZ	26	#include "wx/intl.h"
5526e819 VS	27	#endif
5526e819 VS	28
2b5f62a0	29	#include "wx/strconv.h"
69941f05	30	#include "wx/html/htmlfilt.h"
4dcaf11a	31	#include "wx/html/htmlwin.h"
5526e819	32
eb37e1d2	33	// utility function: read a wxString from a wxInputStream
2b5f62a0	34	static void ReadString(wxString& str, wxInputStream* s, wxMBConv& conv)
eb37e1d2 MB	35	{
	36	size_t streamSize = s->GetSize();
	37
2b5f62a0	38	if (streamSize == ~(size_t)0)
eb37e1d2 MB	39	{
	40	const size_t bufSize = 4095;
	41	char buffer[bufSize+1];
	42	size_t lastRead;
	43
	44	do
	45	{
	46	s->Read(buffer, bufSize);
	47	lastRead = s->LastRead();
	48	buffer[lastRead] = 0;
2b5f62a0	49	str.Append(wxString(buffer, conv));
eb37e1d2	50	}
2b5f62a0	51	while (lastRead == bufSize);
eb37e1d2 MB	52	}
	53	else
	54	{
	55	char* src = new char[streamSize+1];
	56	s->Read(src, streamSize);
	57	src[streamSize] = 0;
2b5f62a0 VZ	58	str = wxString(src, conv);
2b5f62a0 VZ	59	delete[] src;
eb37e1d2 MB	60	}
eb37e1d2 MB	61	}
5526e819 VS	62
	63	/*
	64
	65	There is code for several default filters:
	66
	67	*/
	68
	69	IMPLEMENT_ABSTRACT_CLASS(wxHtmlFilter, wxObject)
	70
	71	//--------------------------------------------------------------------------------
	72	// wxHtmlFilterPlainText
	73	// filter for text/plain or uknown
	74	//--------------------------------------------------------------------------------
	75
	76	IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterPlainText, wxHtmlFilter)
	77
a4c97004	78	bool wxHtmlFilterPlainText::CanRead(const wxFSFile& WXUNUSED(file)) const
5526e819 VS	79	{
	80	return TRUE;
	81	}
	82
	83
	84
420ec58a	85	wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
5526e819 VS	86	{
5526e819 VS	87	wxInputStream *s = file.GetStream();
5526e819 VS	88	wxString doc, doc2;
	89
	90	if (s == NULL) return wxEmptyString;
2b5f62a0	91	ReadString(doc, s, wxConvISO8859_1);
5526e819	92
eb37e1d2	93	doc.Replace(wxT("&"), wxT("&"), TRUE);
66a77a74 OK	94	doc.Replace(wxT("<"), wxT("<"), TRUE);
66a77a74 OK	95	doc.Replace(wxT(">"), wxT(">"), TRUE);
2b5f62a0	96	doc2 = wxT("<HTML><BODY><PRE>\n") + doc + wxT("\n</PRE></BODY></HTML>");
5526e819 VS	97	return doc2;
	98	}
	99
	100
	101
	102
	103
	104	//--------------------------------------------------------------------------------
	105	// wxHtmlFilterImage
	106	// filter for image/*
	107	//--------------------------------------------------------------------------------
	108
	109	class wxHtmlFilterImage : public wxHtmlFilter
	110	{
	111	DECLARE_DYNAMIC_CLASS(wxHtmlFilterImage)
	112
	113	public:
420ec58a VS	114	virtual bool CanRead(const wxFSFile& file) const;
420ec58a VS	115	virtual wxString ReadFile(const wxFSFile& file) const;
5526e819 VS	116	};
	117
	118	IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterImage, wxHtmlFilter)
	119
	120
	121
420ec58a	122	bool wxHtmlFilterImage::CanRead(const wxFSFile& file) const
5526e819	123	{
0413cec5	124	return (file.GetMimeType().Left(6) == wxT("image/"));
5526e819 VS	125	}
	126
	127
	128
420ec58a	129	wxString wxHtmlFilterImage::ReadFile(const wxFSFile& file) const
5526e819	130	{
2b5f62a0 VZ	131	wxString res = wxT("<HTML><BODY><IMG SRC=\"") + file.GetLocation() + wxT("\"></BODY></HTML>");
2b5f62a0 VZ	132	return res;
5526e819 VS	133	}
	134
	135
	136
	137
	138	//--------------------------------------------------------------------------------
2b5f62a0 VZ	139	// wxHtmlFilterHTML
2b5f62a0 VZ	140	// filter for text/html
5526e819 VS	141	//--------------------------------------------------------------------------------
5526e819 VS	142
5526e819 VS	143
	144	IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterHTML, wxHtmlFilter)
	145
420ec58a	146	bool wxHtmlFilterHTML::CanRead(const wxFSFile& file) const
5526e819	147	{
f61815af GL	148	// return (file.GetMimeType() == "text/html");
	149	// This is true in most case but some page can return:
	150	// "text/html; char-encoding=...."
	151	// So we use Find instead
77611ad4	152	return (file.GetMimeType().Find(wxT("text/html")) == 0);
5526e819 VS	153	}
	154
	155
	156
420ec58a	157	wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
5526e819 VS	158	{
5526e819 VS	159	wxInputStream *s = file.GetStream();
5526e819 VS	160	wxString doc;
5526e819 VS	161
04dbb646	162	if (s == NULL)
f3c82859	163	{
f6bcfd97	164	wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str());
f3c82859 VS	165	return wxEmptyString;
f3c82859 VS	166	}
5526e819	167
2b5f62a0 VZ	168	// NB: We convert input file to wchar_t here in Unicode mode, based on
	169	// either Content-Type header or <meta> tags. In ANSI mode, we don't
	170	// do it as it is done by wxHtmlParser (for this reason, we add <meta>
	171	// tag if we used Content-Type header).
	172	#if wxUSE_UNICODE
	173	int charsetPos;
	174	if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND)
	175	{
	176	wxString charset = file.GetMimeType().Mid(charsetPos + 10);
	177	wxCSConv conv(charset);
	178	ReadString(doc, s, conv);
	179	}
	180	else
	181	{
	182	wxString tmpdoc;
	183	ReadString(tmpdoc, s, wxConvISO8859_1);
	184	wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc);
	185	if (charset.empty())
	186	doc = tmpdoc;
	187	else
	188	{
	189	wxCSConv conv(charset);
	190	doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv);
	191	}
	192	}
	193	#else // !wxUSE_UNICODE
	194	ReadString(doc, s, wxConvLibc);
04dbb646	195	// add meta tag if we obtained this through http:
2b5f62a0	196	if (!file.GetMimeType().empty())
981e62aa	197	{
2b5f62a0 VZ	198	wxString hdr;
	199	wxString mime = file.GetMimeType();
	200	hdr.Printf(_T("<meta http-equiv=\"Content-Type\" content=\"%s\">"), mime.c_str());
	201	return hdr+doc;
981e62aa	202	}
2b5f62a0	203	#endif
981e62aa	204
3ca6a5f0	205	return doc;
5526e819 VS	206	}
	207
	208
	209
	210
	211	///// Module:
	212
	213	class wxHtmlFilterModule : public wxModule
	214	{
	215	DECLARE_DYNAMIC_CLASS(wxHtmlFilterModule)
	216
	217	public:
	218	virtual bool OnInit()
	219	{
	220	wxHtmlWindow::AddFilter(new wxHtmlFilterHTML);
	221	wxHtmlWindow::AddFilter(new wxHtmlFilterImage);
	222	return TRUE;
	223	}
	224	virtual void OnExit() {}
	225	};
	226
	227	IMPLEMENT_DYNAMIC_CLASS(wxHtmlFilterModule, wxModule)
	228
8dd71e2b	229	#endif