]> git.saurik.com Git - wxWidgets.git/blob - src/html/winpars.cpp
use fallback encoding in wxConvAuto when input is not in UTF-8
[wxWidgets.git] / src / html / winpars.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/winpars.cpp
3 // Purpose: wxHtmlParser class (generic parser)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML && wxUSE_STREAMS
17
18 #ifndef WX_PRECOMP
19 #include "wx/intl.h"
20 #include "wx/dc.h"
21 #include "wx/log.h"
22 #include "wx/settings.h"
23 #endif
24
25 #include "wx/html/htmldefs.h"
26 #include "wx/html/winpars.h"
27 #include "wx/html/htmlwin.h"
28 #include "wx/fontmap.h"
29 #include "wx/uri.h"
30
31
32 //-----------------------------------------------------------------------------
33 // wxHtmlWinParser
34 //-----------------------------------------------------------------------------
35
36 IMPLEMENT_ABSTRACT_CLASS(wxHtmlWinParser, wxHtmlParser)
37
38 wxList wxHtmlWinParser::m_Modules;
39
40 wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindowInterface *wndIface)
41 {
42 m_tmpStrBuf = NULL;
43 m_tmpStrBufSize = 0;
44 m_windowInterface = wndIface;
45 m_Container = NULL;
46 m_DC = NULL;
47 m_CharHeight = m_CharWidth = 0;
48 m_UseLink = false;
49 #if !wxUSE_UNICODE
50 m_nbsp = 0;
51 m_EncConv = NULL;
52 m_InputEnc = wxFONTENCODING_ISO8859_1;
53 m_OutputEnc = wxFONTENCODING_DEFAULT;
54 #endif
55 m_lastWordCell = NULL;
56
57 {
58 int i, j, k, l, m;
59 for (i = 0; i < 2; i++)
60 for (j = 0; j < 2; j++)
61 for (k = 0; k < 2; k++)
62 for (l = 0; l < 2; l++)
63 for (m = 0; m < 7; m++)
64 {
65 m_FontsTable[i][j][k][l][m] = NULL;
66 m_FontsFacesTable[i][j][k][l][m] = wxEmptyString;
67 #if !wxUSE_UNICODE
68 m_FontsEncTable[i][j][k][l][m] = wxFONTENCODING_DEFAULT;
69 #endif
70 }
71
72 SetFonts(wxEmptyString, wxEmptyString, NULL);
73 }
74
75 // fill in wxHtmlParser's tables:
76 wxList::compatibility_iterator node = m_Modules.GetFirst();
77 while (node)
78 {
79 wxHtmlTagsModule *mod = (wxHtmlTagsModule*) node->GetData();
80 mod->FillHandlersTable(this);
81 node = node->GetNext();
82 }
83 }
84
85 wxHtmlWinParser::~wxHtmlWinParser()
86 {
87 int i, j, k, l, m;
88
89 for (i = 0; i < 2; i++)
90 for (j = 0; j < 2; j++)
91 for (k = 0; k < 2; k++)
92 for (l = 0; l < 2; l++)
93 for (m = 0; m < 7; m++)
94 {
95 if (m_FontsTable[i][j][k][l][m] != NULL)
96 delete m_FontsTable[i][j][k][l][m];
97 }
98 #if !wxUSE_UNICODE
99 delete m_EncConv;
100 #endif
101 delete[] m_tmpStrBuf;
102 }
103
104 void wxHtmlWinParser::AddModule(wxHtmlTagsModule *module)
105 {
106 m_Modules.Append(module);
107 }
108
109 void wxHtmlWinParser::RemoveModule(wxHtmlTagsModule *module)
110 {
111 m_Modules.DeleteObject(module);
112 }
113
114 // build all HTML font sizes (1..7) from the given base size
115 static void wxBuildFontSizes(int *sizes, int size)
116 {
117 // using a fixed factor (1.2, from CSS2) is a bad idea as explained at
118 // http://www.w3.org/TR/CSS21/fonts.html#font-size-props but this is by far
119 // simplest thing to do so still do it like this for now
120 sizes[0] = int(size * 0.75); // exception to 1.2 rule, otherwise too small
121 sizes[1] = int(size * 0.83);
122 sizes[2] = size;
123 sizes[3] = int(size * 1.2);
124 sizes[4] = int(size * 1.44);
125 sizes[5] = int(size * 1.73);
126 sizes[6] = int(size * 2);
127 }
128
129 static int wxGetDefaultHTMLFontSize()
130 {
131 // base the default font size on the size of the default system font but
132 // also ensure that we have a font of reasonable size, otherwise small HTML
133 // fonts are unreadable
134 int size = wxNORMAL_FONT->GetPointSize();
135 if ( size < 10 )
136 size = 10;
137 return size;
138 }
139
140 void wxHtmlWinParser::SetFonts(const wxString& normal_face,
141 const wxString& fixed_face,
142 const int *sizes)
143 {
144 static int default_sizes[7] = { 0 };
145 if ( !sizes )
146 {
147 if ( !default_sizes[0] )
148 wxBuildFontSizes(default_sizes, wxGetDefaultHTMLFontSize());
149
150 sizes = default_sizes;
151 }
152
153 int i, j, k, l, m;
154
155 for (i = 0; i < 7; i++)
156 m_FontsSizes[i] = sizes[i];
157
158 m_FontFaceFixed = fixed_face;
159 m_FontFaceNormal = normal_face;
160
161 #if !wxUSE_UNICODE
162 SetInputEncoding(m_InputEnc);
163 #endif
164
165 for (i = 0; i < 2; i++)
166 for (j = 0; j < 2; j++)
167 for (k = 0; k < 2; k++)
168 for (l = 0; l < 2; l++)
169 for (m = 0; m < 7; m++) {
170 if (m_FontsTable[i][j][k][l][m] != NULL)
171 {
172 delete m_FontsTable[i][j][k][l][m];
173 m_FontsTable[i][j][k][l][m] = NULL;
174 }
175 }
176 }
177
178 void wxHtmlWinParser::SetStandardFonts(int size,
179 const wxString& normal_face,
180 const wxString& fixed_face)
181 {
182 if (size == -1)
183 size = wxGetDefaultHTMLFontSize();
184
185 int f_sizes[7];
186 wxBuildFontSizes(f_sizes, size);
187
188 wxString normal = normal_face;
189 if ( normal.empty() )
190 normal = wxNORMAL_FONT->GetFaceName();
191
192 SetFonts(normal, fixed_face, f_sizes);
193 }
194
195 void wxHtmlWinParser::InitParser(const wxString& source)
196 {
197 wxHtmlParser::InitParser(source);
198 wxASSERT_MSG(m_DC != NULL, wxT("no DC assigned to wxHtmlWinParser!!"));
199
200 m_FontBold = m_FontItalic = m_FontUnderlined = m_FontFixed = FALSE;
201 m_FontSize = 3; //default one
202 CreateCurrentFont(); // we're selecting default font into
203
204 // we're not using GetCharWidth/Height() because of
205 // differences under X and win
206 wxCoord w,h;
207 m_DC->GetTextExtent( wxT("H"), &w, &h);
208 m_CharWidth = w;
209 m_CharHeight = h;
210
211 m_UseLink = false;
212 m_Link = wxHtmlLinkInfo( wxEmptyString );
213 m_LinkColor.Set(0, 0, 0xFF);
214 m_ActualColor.Set(0, 0, 0);
215 m_Align = wxHTML_ALIGN_LEFT;
216 m_ScriptMode = wxHTML_SCRIPT_NORMAL;
217 m_ScriptBaseline = 0;
218 m_tmpLastWasSpace = false;
219 m_lastWordCell = NULL;
220
221 // open the toplevel container that contains everything else and that
222 // is never closed (this makes parser's life easier):
223 OpenContainer();
224
225 // then open the first container into which page's content will go:
226 OpenContainer();
227
228 #if !wxUSE_UNICODE
229 wxString charset = ExtractCharsetInformation(source);
230 if (!charset.empty())
231 {
232 wxFontEncoding enc = wxFontMapper::Get()->CharsetToEncoding(charset);
233 if (enc != wxFONTENCODING_SYSTEM)
234 SetInputEncoding(enc);
235 }
236 #endif
237
238 m_Container->InsertCell(new wxHtmlColourCell(m_ActualColor));
239 wxColour windowColour = wxSystemSettings::GetColour(wxSYS_COLOUR_WINDOW) ;
240
241 m_Container->InsertCell
242 (
243 new wxHtmlColourCell
244 (
245 m_windowInterface
246 ? m_windowInterface->GetHTMLBackgroundColour()
247 : windowColour,
248 wxHTML_CLR_BACKGROUND
249 )
250 );
251
252 m_Container->InsertCell(new wxHtmlFontCell(CreateCurrentFont()));
253 }
254
255 void wxHtmlWinParser::DoneParser()
256 {
257 m_Container = NULL;
258 #if !wxUSE_UNICODE
259 SetInputEncoding(wxFONTENCODING_ISO8859_1); // for next call
260 #endif
261 wxHtmlParser::DoneParser();
262 }
263
264 #if WXWIN_COMPATIBILITY_2_6
265 wxHtmlWindow *wxHtmlWinParser::GetWindow()
266 {
267 if (!m_windowInterface)
268 return NULL;
269 return wxDynamicCast(m_windowInterface->GetHTMLWindow(), wxHtmlWindow);
270 }
271 #endif
272
273 wxObject* wxHtmlWinParser::GetProduct()
274 {
275 wxHtmlContainerCell *top;
276
277 CloseContainer();
278 OpenContainer();
279
280 top = m_Container;
281 while (top->GetParent()) top = top->GetParent();
282 top->RemoveExtraSpacing(true, true);
283
284 return top;
285 }
286
287 wxFSFile *wxHtmlWinParser::OpenURL(wxHtmlURLType type,
288 const wxString& url) const
289 {
290 if ( !m_windowInterface )
291 return wxHtmlParser::OpenURL(type, url);
292
293 wxString myurl(url);
294 wxHtmlOpeningStatus status;
295 for (;;)
296 {
297 wxString myfullurl(myurl);
298
299 // consider url as absolute path first
300 wxURI current(myurl);
301 myfullurl = current.BuildUnescapedURI();
302
303 // if not absolute then ...
304 if( current.IsReference() )
305 {
306 wxString basepath = GetFS()->GetPath();
307 wxURI base(basepath);
308
309 // ... try to apply base path if valid ...
310 if( !base.IsReference() )
311 {
312 wxURI path(myfullurl);
313 path.Resolve( base );
314 myfullurl = path.BuildUnescapedURI();
315 }
316 else
317 {
318 // ... or force such addition if not included already
319 if( !current.GetPath().Contains(base.GetPath()) )
320 {
321 basepath += myurl;
322 wxURI connected( basepath );
323 myfullurl = connected.BuildUnescapedURI();
324 }
325 }
326 }
327
328 wxString redirect;
329 status = m_windowInterface->OnHTMLOpeningURL(type, myfullurl, &redirect);
330 if ( status != wxHTML_REDIRECT )
331 break;
332
333 myurl = redirect;
334 }
335
336 if ( status == wxHTML_BLOCK )
337 return NULL;
338
339 int flags = wxFS_READ;
340 if (type == wxHTML_URL_IMAGE)
341 flags |= wxFS_SEEKABLE;
342
343 return GetFS()->OpenFile(myurl, flags);
344 }
345
346 void wxHtmlWinParser::AddText(const wxString& txt)
347 {
348 register wxChar d;
349 int templen = 0;
350
351 size_t lng = txt.length();
352 if (lng+1 > m_tmpStrBufSize)
353 {
354 delete[] m_tmpStrBuf;
355 m_tmpStrBuf = new wxChar[lng+1];
356 m_tmpStrBufSize = lng+1;
357 }
358 wxChar *temp = m_tmpStrBuf;
359
360 wxString::const_iterator i = txt.begin();
361 wxString::const_iterator end = txt.end();
362
363 if (m_tmpLastWasSpace)
364 {
365 while ( (i < end) &&
366 (*i == wxT('\n') || *i == wxT('\r') || *i == wxT(' ') ||
367 *i == wxT('\t')) )
368 {
369 ++i;
370 }
371 }
372
373 while (i < end)
374 {
375 size_t x = 0;
376 d = temp[templen++] = *i;
377 if ((d == wxT('\n')) || (d == wxT('\r')) || (d == wxT(' ')) || (d == wxT('\t')))
378 {
379 ++i, ++x;
380 while ( (i < end) &&
381 (*i == wxT('\n') || *i == wxT('\r') ||
382 *i == wxT(' ') || *i == wxT('\t')) )
383 {
384 ++i;
385 ++x;
386 }
387 }
388 else
389 ++i;
390
391 if (x)
392 {
393 temp[templen-1] = wxT(' ');
394 DoAddText(temp, templen);
395 m_tmpLastWasSpace = true;
396 }
397 }
398
399 if (templen && (templen > 1 || temp[0] != wxT(' ')))
400 {
401 DoAddText(temp, templen);
402 m_tmpLastWasSpace = false;
403 }
404 }
405
406 void wxHtmlWinParser::DoAddText(wxChar *temp, int& templen)
407 {
408 #define NBSP_UNICODE_VALUE 160
409 #if !wxUSE_UNICODE
410 if ( m_nbsp == 0 )
411 m_nbsp = GetEntitiesParser()->GetCharForCode(NBSP_UNICODE_VALUE);
412 #define CUR_NBSP_VALUE m_nbsp
413 #else
414 #define CUR_NBSP_VALUE NBSP_UNICODE_VALUE
415 #endif
416
417 temp[templen] = 0;
418 templen = 0;
419 #if !wxUSE_UNICODE
420 if (m_EncConv)
421 m_EncConv->Convert(temp);
422 #endif
423 size_t len = wxStrlen(temp);
424 for (size_t j = 0; j < len; j++)
425 {
426 if (temp[j] == CUR_NBSP_VALUE)
427 temp[j] = wxT(' ');
428 }
429
430 wxHtmlCell *c = new wxHtmlWordCell(temp, *(GetDC()));
431
432 ApplyStateToCell(c);
433
434 m_Container->InsertCell(c);
435 ((wxHtmlWordCell*)c)->SetPreviousWord(m_lastWordCell);
436 m_lastWordCell = (wxHtmlWordCell*)c;
437 }
438
439
440
441 wxHtmlContainerCell* wxHtmlWinParser::OpenContainer()
442 {
443 m_Container = new wxHtmlContainerCell(m_Container);
444 m_Container->SetAlignHor(m_Align);
445 m_tmpLastWasSpace = true;
446 /* to avoid space being first character in paragraph */
447 return m_Container;
448 }
449
450
451
452 wxHtmlContainerCell* wxHtmlWinParser::SetContainer(wxHtmlContainerCell *c)
453 {
454 m_tmpLastWasSpace = true;
455 /* to avoid space being first character in paragraph */
456 return m_Container = c;
457 }
458
459
460
461 wxHtmlContainerCell* wxHtmlWinParser::CloseContainer()
462 {
463 m_Container = m_Container->GetParent();
464 return m_Container;
465 }
466
467
468 void wxHtmlWinParser::SetFontSize(int s)
469 {
470 if (s < 1) s = 1;
471 else if (s > 7) s = 7;
472 m_FontSize = s;
473 }
474
475
476
477 wxFont* wxHtmlWinParser::CreateCurrentFont()
478 {
479 int fb = GetFontBold(),
480 fi = GetFontItalic(),
481 fu = GetFontUnderlined(),
482 ff = GetFontFixed(),
483 fs = GetFontSize() - 1 /*remap from <1;7> to <0;6>*/ ;
484
485 wxString face = ff ? m_FontFaceFixed : m_FontFaceNormal;
486 wxString *faceptr = &(m_FontsFacesTable[fb][fi][fu][ff][fs]);
487 wxFont **fontptr = &(m_FontsTable[fb][fi][fu][ff][fs]);
488 #if !wxUSE_UNICODE
489 wxFontEncoding *encptr = &(m_FontsEncTable[fb][fi][fu][ff][fs]);
490 #endif
491
492 if (*fontptr != NULL && (*faceptr != face
493 #if !wxUSE_UNICODE
494 || *encptr != m_OutputEnc
495 #endif
496 ))
497 {
498 delete *fontptr;
499 *fontptr = NULL;
500 }
501
502 if (*fontptr == NULL)
503 {
504 *faceptr = face;
505 *fontptr = new wxFont(
506 (int) (m_FontsSizes[fs] * m_PixelScale),
507 ff ? wxMODERN : wxSWISS,
508 fi ? wxITALIC : wxNORMAL,
509 fb ? wxBOLD : wxNORMAL,
510 fu ? true : false, face
511 #if wxUSE_UNICODE
512 );
513 #else
514 , m_OutputEnc);
515 *encptr = m_OutputEnc;
516 #endif
517 }
518 m_DC->SetFont(**fontptr);
519 return (*fontptr);
520 }
521
522
523
524 void wxHtmlWinParser::SetLink(const wxHtmlLinkInfo& link)
525 {
526 m_Link = link;
527 m_UseLink = (link.GetHref() != wxEmptyString);
528 }
529
530 void wxHtmlWinParser::SetFontFace(const wxString& face)
531 {
532 if (GetFontFixed()) m_FontFaceFixed = face;
533 else m_FontFaceNormal = face;
534
535 #if !wxUSE_UNICODE
536 if (m_InputEnc != wxFONTENCODING_DEFAULT)
537 SetInputEncoding(m_InputEnc);
538 #endif
539 }
540
541 void wxHtmlWinParser::ApplyStateToCell(wxHtmlCell *cell)
542 {
543 // set the link:
544 if (m_UseLink)
545 cell->SetLink(GetLink());
546
547 // apply current script mode settings:
548 cell->SetScriptMode(GetScriptMode(), GetScriptBaseline());
549 }
550
551
552 #if !wxUSE_UNICODE
553 void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
554 {
555 // the character used for non-breakable space may change:
556 m_nbsp = 0;
557
558 m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
559 if (m_EncConv)
560 {
561 delete m_EncConv;
562 m_EncConv = NULL;
563 }
564
565 if (enc == wxFONTENCODING_DEFAULT) return;
566
567 wxFontEncoding altfix, altnorm;
568 bool availfix, availnorm;
569
570 // exact match?
571 availnorm = wxFontMapper::Get()->IsEncodingAvailable(enc, m_FontFaceNormal);
572 availfix = wxFontMapper::Get()->IsEncodingAvailable(enc, m_FontFaceFixed);
573 if (availnorm && availfix)
574 m_OutputEnc = enc;
575
576 // alternatives?
577 else if (wxFontMapper::Get()->GetAltForEncoding(enc, &altnorm, m_FontFaceNormal, false) &&
578 wxFontMapper::Get()->GetAltForEncoding(enc, &altfix, m_FontFaceFixed, false) &&
579 altnorm == altfix)
580 m_OutputEnc = altnorm;
581
582 // at least normal face?
583 else if (availnorm)
584 m_OutputEnc = enc;
585 else if (wxFontMapper::Get()->GetAltForEncoding(enc, &altnorm, m_FontFaceNormal, false))
586 m_OutputEnc = altnorm;
587
588 else
589 {
590 #ifndef __WXMAC__
591 // okay, let's convert to ISO_8859-1, available always
592 m_OutputEnc = wxFONTENCODING_DEFAULT;
593 #else
594 m_OutputEnc = wxLocale::GetSystemEncoding() ;
595 #endif
596 }
597
598 m_InputEnc = enc;
599 if (m_OutputEnc == wxFONTENCODING_DEFAULT)
600 GetEntitiesParser()->SetEncoding(wxFONTENCODING_SYSTEM);
601 else
602 GetEntitiesParser()->SetEncoding(m_OutputEnc);
603
604 if (m_InputEnc == m_OutputEnc) return;
605
606 m_EncConv = new wxEncodingConverter();
607 if (!m_EncConv->Init(m_InputEnc,
608 (m_OutputEnc == wxFONTENCODING_DEFAULT) ?
609 wxFONTENCODING_ISO8859_1 : m_OutputEnc,
610 wxCONVERT_SUBSTITUTE))
611 { // total failure :-(
612 wxLogError(_("Failed to display HTML document in %s encoding"),
613 wxFontMapper::GetEncodingName(enc).c_str());
614 m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
615 delete m_EncConv;
616 m_EncConv = NULL;
617 }
618 }
619 #endif
620
621
622
623
624 //-----------------------------------------------------------------------------
625 // wxHtmlWinTagHandler
626 //-----------------------------------------------------------------------------
627
628 IMPLEMENT_ABSTRACT_CLASS(wxHtmlWinTagHandler, wxHtmlTagHandler)
629
630 //-----------------------------------------------------------------------------
631 // wxHtmlTagsModule
632 //-----------------------------------------------------------------------------
633
634 // NB: This is *NOT* winpars.cpp's initialization and shutdown code!!
635 // This module is an ancestor for tag handlers modules defined
636 // in m_*.cpp files with TAGS_MODULE_BEGIN...TAGS_MODULE_END construct.
637 //
638 // Do not add any winpars.cpp shutdown or initialization code to it,
639 // create a new module instead!
640
641 IMPLEMENT_DYNAMIC_CLASS(wxHtmlTagsModule, wxModule)
642
643 bool wxHtmlTagsModule::OnInit()
644 {
645 wxHtmlWinParser::AddModule(this);
646 return true;
647 }
648
649 void wxHtmlTagsModule::OnExit()
650 {
651 wxHtmlWinParser::RemoveModule(this);
652 }
653
654 #endif