preserve TAB characters when copying HTML <pre> content to clipboard
[wxWidgets.git] / src / html / winpars.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/winpars.cpp
3 // Purpose: wxHtmlParser class (generic parser)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML && wxUSE_STREAMS
17
18 #ifndef WX_PRECOMP
19 #include "wx/intl.h"
20 #include "wx/dc.h"
21 #include "wx/log.h"
22 #include "wx/settings.h"
23 #endif
24
25 #include "wx/html/htmldefs.h"
26 #include "wx/html/winpars.h"
27 #include "wx/html/htmlwin.h"
28 #include "wx/fontmap.h"
29 #include "wx/uri.h"
30
31
32 //-----------------------------------------------------------------------------
33 // wxHtmlWinParser
34 //-----------------------------------------------------------------------------
35
36 IMPLEMENT_ABSTRACT_CLASS(wxHtmlWinParser, wxHtmlParser)
37
38 wxList wxHtmlWinParser::m_Modules;
39
40 wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindowInterface *wndIface)
41 {
42 m_tmpStrBuf = NULL;
43 m_tmpStrBufSize = 0;
44 m_windowInterface = wndIface;
45 m_Container = NULL;
46 m_DC = NULL;
47 m_CharHeight = m_CharWidth = 0;
48 m_UseLink = false;
49 #if !wxUSE_UNICODE
50 m_nbsp = 0;
51 m_EncConv = NULL;
52 m_InputEnc = wxFONTENCODING_ISO8859_1;
53 m_OutputEnc = wxFONTENCODING_DEFAULT;
54 #endif
55 m_whitespaceMode = Whitespace_Normal;
56 m_lastWordCell = NULL;
57 m_posColumn = 0;
58
59 {
60 int i, j, k, l, m;
61 for (i = 0; i < 2; i++)
62 for (j = 0; j < 2; j++)
63 for (k = 0; k < 2; k++)
64 for (l = 0; l < 2; l++)
65 for (m = 0; m < 7; m++)
66 {
67 m_FontsTable[i][j][k][l][m] = NULL;
68 m_FontsFacesTable[i][j][k][l][m] = wxEmptyString;
69 #if !wxUSE_UNICODE
70 m_FontsEncTable[i][j][k][l][m] = wxFONTENCODING_DEFAULT;
71 #endif
72 }
73
74 SetFonts(wxEmptyString, wxEmptyString, NULL);
75 }
76
77 // fill in wxHtmlParser's tables:
78 wxList::compatibility_iterator node = m_Modules.GetFirst();
79 while (node)
80 {
81 wxHtmlTagsModule *mod = (wxHtmlTagsModule*) node->GetData();
82 mod->FillHandlersTable(this);
83 node = node->GetNext();
84 }
85 }
86
87 wxHtmlWinParser::~wxHtmlWinParser()
88 {
89 int i, j, k, l, m;
90
91 for (i = 0; i < 2; i++)
92 for (j = 0; j < 2; j++)
93 for (k = 0; k < 2; k++)
94 for (l = 0; l < 2; l++)
95 for (m = 0; m < 7; m++)
96 {
97 if (m_FontsTable[i][j][k][l][m] != NULL)
98 delete m_FontsTable[i][j][k][l][m];
99 }
100 #if !wxUSE_UNICODE
101 delete m_EncConv;
102 #endif
103 delete[] m_tmpStrBuf;
104 }
105
106 void wxHtmlWinParser::AddModule(wxHtmlTagsModule *module)
107 {
108 m_Modules.Append(module);
109 }
110
111 void wxHtmlWinParser::RemoveModule(wxHtmlTagsModule *module)
112 {
113 m_Modules.DeleteObject(module);
114 }
115
116 // build all HTML font sizes (1..7) from the given base size
117 static void wxBuildFontSizes(int *sizes, int size)
118 {
119 // using a fixed factor (1.2, from CSS2) is a bad idea as explained at
120 // http://www.w3.org/TR/CSS21/fonts.html#font-size-props but this is by far
121 // simplest thing to do so still do it like this for now
122 sizes[0] = int(size * 0.75); // exception to 1.2 rule, otherwise too small
123 sizes[1] = int(size * 0.83);
124 sizes[2] = size;
125 sizes[3] = int(size * 1.2);
126 sizes[4] = int(size * 1.44);
127 sizes[5] = int(size * 1.73);
128 sizes[6] = int(size * 2);
129 }
130
131 static int wxGetDefaultHTMLFontSize()
132 {
133 // base the default font size on the size of the default system font but
134 // also ensure that we have a font of reasonable size, otherwise small HTML
135 // fonts are unreadable
136 int size = wxNORMAL_FONT->GetPointSize();
137 if ( size < 10 )
138 size = 10;
139 return size;
140 }
141
142 void wxHtmlWinParser::SetFonts(const wxString& normal_face,
143 const wxString& fixed_face,
144 const int *sizes)
145 {
146 static int default_sizes[7] = { 0 };
147 if ( !sizes )
148 {
149 if ( !default_sizes[0] )
150 wxBuildFontSizes(default_sizes, wxGetDefaultHTMLFontSize());
151
152 sizes = default_sizes;
153 }
154
155 int i, j, k, l, m;
156
157 for (i = 0; i < 7; i++)
158 m_FontsSizes[i] = sizes[i];
159
160 m_FontFaceFixed = fixed_face;
161 m_FontFaceNormal = normal_face;
162
163 #if !wxUSE_UNICODE
164 SetInputEncoding(m_InputEnc);
165 #endif
166
167 for (i = 0; i < 2; i++)
168 for (j = 0; j < 2; j++)
169 for (k = 0; k < 2; k++)
170 for (l = 0; l < 2; l++)
171 for (m = 0; m < 7; m++) {
172 if (m_FontsTable[i][j][k][l][m] != NULL)
173 {
174 delete m_FontsTable[i][j][k][l][m];
175 m_FontsTable[i][j][k][l][m] = NULL;
176 }
177 }
178 }
179
180 void wxHtmlWinParser::SetStandardFonts(int size,
181 const wxString& normal_face,
182 const wxString& fixed_face)
183 {
184 if (size == -1)
185 size = wxGetDefaultHTMLFontSize();
186
187 int f_sizes[7];
188 wxBuildFontSizes(f_sizes, size);
189
190 wxString normal = normal_face;
191 if ( normal.empty() )
192 normal = wxNORMAL_FONT->GetFaceName();
193
194 SetFonts(normal, fixed_face, f_sizes);
195 }
196
197 void wxHtmlWinParser::InitParser(const wxString& source)
198 {
199 wxHtmlParser::InitParser(source);
200 wxASSERT_MSG(m_DC != NULL, wxT("no DC assigned to wxHtmlWinParser!!"));
201
202 m_FontBold = m_FontItalic = m_FontUnderlined = m_FontFixed = FALSE;
203 m_FontSize = 3; //default one
204 CreateCurrentFont(); // we're selecting default font into
205
206 // we're not using GetCharWidth/Height() because of
207 // differences under X and win
208 wxCoord w,h;
209 m_DC->GetTextExtent( wxT("H"), &w, &h);
210 m_CharWidth = w;
211 m_CharHeight = h;
212
213 m_UseLink = false;
214 m_Link = wxHtmlLinkInfo( wxEmptyString );
215 m_LinkColor.Set(0, 0, 0xFF);
216 m_ActualColor.Set(0, 0, 0);
217 m_Align = wxHTML_ALIGN_LEFT;
218 m_ScriptMode = wxHTML_SCRIPT_NORMAL;
219 m_ScriptBaseline = 0;
220 m_tmpLastWasSpace = false;
221 m_lastWordCell = NULL;
222
223 // open the toplevel container that contains everything else and that
224 // is never closed (this makes parser's life easier):
225 OpenContainer();
226
227 // then open the first container into which page's content will go:
228 OpenContainer();
229
230 #if !wxUSE_UNICODE
231 wxString charset = ExtractCharsetInformation(source);
232 if (!charset.empty())
233 {
234 wxFontEncoding enc = wxFontMapper::Get()->CharsetToEncoding(charset);
235 if (enc != wxFONTENCODING_SYSTEM)
236 SetInputEncoding(enc);
237 }
238 #endif
239
240 m_Container->InsertCell(new wxHtmlColourCell(m_ActualColor));
241 wxColour windowColour = wxSystemSettings::GetColour(wxSYS_COLOUR_WINDOW) ;
242
243 m_Container->InsertCell
244 (
245 new wxHtmlColourCell
246 (
247 m_windowInterface
248 ? m_windowInterface->GetHTMLBackgroundColour()
249 : windowColour,
250 wxHTML_CLR_BACKGROUND
251 )
252 );
253
254 m_Container->InsertCell(new wxHtmlFontCell(CreateCurrentFont()));
255 }
256
257 void wxHtmlWinParser::DoneParser()
258 {
259 m_Container = NULL;
260 #if !wxUSE_UNICODE
261 SetInputEncoding(wxFONTENCODING_ISO8859_1); // for next call
262 #endif
263 wxHtmlParser::DoneParser();
264 }
265
266 #if WXWIN_COMPATIBILITY_2_6
267 wxHtmlWindow *wxHtmlWinParser::GetWindow()
268 {
269 if (!m_windowInterface)
270 return NULL;
271 return wxDynamicCast(m_windowInterface->GetHTMLWindow(), wxHtmlWindow);
272 }
273 #endif
274
275 wxObject* wxHtmlWinParser::GetProduct()
276 {
277 wxHtmlContainerCell *top;
278
279 CloseContainer();
280 OpenContainer();
281
282 top = m_Container;
283 while (top->GetParent()) top = top->GetParent();
284 top->RemoveExtraSpacing(true, true);
285
286 return top;
287 }
288
289 wxFSFile *wxHtmlWinParser::OpenURL(wxHtmlURLType type,
290 const wxString& url) const
291 {
292 if ( !m_windowInterface )
293 return wxHtmlParser::OpenURL(type, url);
294
295 wxString myurl(url);
296 wxHtmlOpeningStatus status;
297 for (;;)
298 {
299 wxString myfullurl(myurl);
300
301 // consider url as absolute path first
302 wxURI current(myurl);
303 myfullurl = current.BuildUnescapedURI();
304
305 // if not absolute then ...
306 if( current.IsReference() )
307 {
308 wxString basepath = GetFS()->GetPath();
309 wxURI base(basepath);
310
311 // ... try to apply base path if valid ...
312 if( !base.IsReference() )
313 {
314 wxURI path(myfullurl);
315 path.Resolve( base );
316 myfullurl = path.BuildUnescapedURI();
317 }
318 else
319 {
320 // ... or force such addition if not included already
321 if( !current.GetPath().Contains(base.GetPath()) )
322 {
323 basepath += myurl;
324 wxURI connected( basepath );
325 myfullurl = connected.BuildUnescapedURI();
326 }
327 }
328 }
329
330 wxString redirect;
331 status = m_windowInterface->OnHTMLOpeningURL(type, myfullurl, &redirect);
332 if ( status != wxHTML_REDIRECT )
333 break;
334
335 myurl = redirect;
336 }
337
338 if ( status == wxHTML_BLOCK )
339 return NULL;
340
341 int flags = wxFS_READ;
342 if (type == wxHTML_URL_IMAGE)
343 flags |= wxFS_SEEKABLE;
344
345 return GetFS()->OpenFile(myurl, flags);
346 }
347
348 void wxHtmlWinParser::AddText(const wxString& txt)
349 {
350 #define NBSP_UNICODE_VALUE (wxChar(160))
351 #if !wxUSE_UNICODE
352 if ( m_nbsp == 0 )
353 m_nbsp = GetEntitiesParser()->GetCharForCode(NBSP_UNICODE_VALUE);
354 #define CUR_NBSP_VALUE m_nbsp
355 #else
356 #define CUR_NBSP_VALUE NBSP_UNICODE_VALUE
357 #endif
358
359 if ( m_whitespaceMode == Whitespace_Normal )
360 {
361 int templen = 0;
362
363 size_t lng = txt.length();
364 if (lng+1 > m_tmpStrBufSize)
365 {
366 delete[] m_tmpStrBuf;
367 m_tmpStrBuf = new wxChar[lng+1];
368 m_tmpStrBufSize = lng+1;
369 }
370 wxChar *temp = m_tmpStrBuf;
371
372 wxString::const_iterator i = txt.begin();
373 const wxString::const_iterator end = txt.end();
374
375 if (m_tmpLastWasSpace)
376 {
377 while ( (i < end) &&
378 (*i == wxT('\n') || *i == wxT('\r') || *i == wxT(' ') ||
379 *i == wxT('\t')) )
380 {
381 ++i;
382 }
383 }
384
385 while (i < end)
386 {
387 size_t x = 0;
388 wxChar d = *i;
389 if ((d == wxT('\n')) || (d == wxT('\r')) || (d == wxT(' ')) || (d == wxT('\t')))
390 {
391 ++i, ++x;
392 while ( (i < end) &&
393 (*i == wxT('\n') || *i == wxT('\r') ||
394 *i == wxT(' ') || *i == wxT('\t')) )
395 {
396 ++i;
397 ++x;
398 }
399 }
400 else
401 {
402 ++i;
403 }
404
405 if (d == CUR_NBSP_VALUE)
406 d = ' ';
407
408 temp[templen++] = d;
409
410 if (x)
411 {
412 temp[templen-1] = wxT(' ');
413 FlushWordBuf(temp, templen);
414 m_tmpLastWasSpace = true;
415 }
416 }
417
418 if (templen && (templen > 1 || temp[0] != wxT(' ')))
419 {
420 FlushWordBuf(temp, templen);
421 m_tmpLastWasSpace = false;
422 }
423 }
424 else // m_whitespaceMode == Whitespace_Pre
425 {
426 if ( txt.find(CUR_NBSP_VALUE) != wxString::npos )
427 {
428 // we need to substitute spaces for &nbsp; here just like we
429 // did in the Whitespace_Normal branch above
430 wxString txt2(txt);
431 txt2.Replace(CUR_NBSP_VALUE, ' ');
432 AddPreBlock(txt2);
433 }
434 else
435 {
436 AddPreBlock(txt);
437 }
438
439 // don't eat any whitespace in <pre> block
440 m_tmpLastWasSpace = false;
441 }
442 }
443
444 void wxHtmlWinParser::FlushWordBuf(wxChar *buf, int& len)
445 {
446 buf[len] = 0;
447
448 #if !wxUSE_UNICODE
449 if (m_EncConv)
450 m_EncConv->Convert(buf);
451 #endif
452
453 AddWord(wxString(buf, len));
454
455 len = 0;
456 }
457
458 void wxHtmlWinParser::AddWord(wxHtmlWordCell *word)
459 {
460 ApplyStateToCell(word);
461
462 m_Container->InsertCell(word);
463 word->SetPreviousWord(m_lastWordCell);
464 m_lastWordCell = word;
465 }
466
467 void wxHtmlWinParser::AddPreBlock(const wxString& text)
468 {
469 if ( text.find('\t') != wxString::npos )
470 {
471 wxString text2;
472 text2.reserve(text.length());
473
474 const wxString::const_iterator end = text.end();
475 wxString::const_iterator copyFrom = text.begin();
476 size_t posFrom = 0;
477 size_t pos = 0;
478 int posColumn = m_posColumn;
479 for ( wxString::const_iterator i = copyFrom; i != end; ++i, ++pos )
480 {
481 if ( *i == '\t' )
482 {
483 if ( copyFrom != i )
484 text2.append(copyFrom, i);
485
486 const unsigned SPACES_PER_TAB = 8;
487 const size_t expandTo = SPACES_PER_TAB - posColumn % SPACES_PER_TAB;
488 text2.append(expandTo, ' ');
489
490 posColumn += expandTo;
491 copyFrom = i + 1;
492 posFrom = pos + 1;
493 }
494 else
495 {
496 ++posColumn;
497 }
498 }
499 if ( copyFrom != text.end() )
500 text2.append(copyFrom, text.end());
501
502 AddWord(new wxHtmlWordWithTabsCell(text2, text, m_posColumn, *(GetDC())));
503
504 m_posColumn = posColumn;
505 }
506 else
507 {
508 // no special formatting needed
509 AddWord(text);
510 m_posColumn += text.length();
511 }
512 }
513
514
515 wxHtmlContainerCell* wxHtmlWinParser::OpenContainer()
516 {
517 m_Container = new wxHtmlContainerCell(m_Container);
518 m_Container->SetAlignHor(m_Align);
519 m_posColumn = 0;
520 m_tmpLastWasSpace = true;
521 /* to avoid space being first character in paragraph */
522 return m_Container;
523 }
524
525
526
527 wxHtmlContainerCell* wxHtmlWinParser::SetContainer(wxHtmlContainerCell *c)
528 {
529 m_tmpLastWasSpace = true;
530 /* to avoid space being first character in paragraph */
531 return m_Container = c;
532 }
533
534
535
536 wxHtmlContainerCell* wxHtmlWinParser::CloseContainer()
537 {
538 m_Container = m_Container->GetParent();
539 return m_Container;
540 }
541
542
543 void wxHtmlWinParser::SetFontSize(int s)
544 {
545 if (s < 1)
546 s = 1;
547 else if (s > 7)
548 s = 7;
549 m_FontSize = s;
550 }
551
552
553
554 wxFont* wxHtmlWinParser::CreateCurrentFont()
555 {
556 int fb = GetFontBold(),
557 fi = GetFontItalic(),
558 fu = GetFontUnderlined(),
559 ff = GetFontFixed(),
560 fs = GetFontSize() - 1 /*remap from <1;7> to <0;6>*/ ;
561
562 wxString face = ff ? m_FontFaceFixed : m_FontFaceNormal;
563 wxString *faceptr = &(m_FontsFacesTable[fb][fi][fu][ff][fs]);
564 wxFont **fontptr = &(m_FontsTable[fb][fi][fu][ff][fs]);
565 #if !wxUSE_UNICODE
566 wxFontEncoding *encptr = &(m_FontsEncTable[fb][fi][fu][ff][fs]);
567 #endif
568
569 if (*fontptr != NULL && (*faceptr != face
570 #if !wxUSE_UNICODE
571 || *encptr != m_OutputEnc
572 #endif
573 ))
574 {
575 delete *fontptr;
576 *fontptr = NULL;
577 }
578
579 if (*fontptr == NULL)
580 {
581 *faceptr = face;
582 *fontptr = new wxFont(
583 (int) (m_FontsSizes[fs] * m_PixelScale),
584 ff ? wxMODERN : wxSWISS,
585 fi ? wxITALIC : wxNORMAL,
586 fb ? wxBOLD : wxNORMAL,
587 fu ? true : false, face
588 #if wxUSE_UNICODE
589 );
590 #else
591 , m_OutputEnc);
592 *encptr = m_OutputEnc;
593 #endif
594 }
595 m_DC->SetFont(**fontptr);
596 return (*fontptr);
597 }
598
599
600
601 void wxHtmlWinParser::SetLink(const wxHtmlLinkInfo& link)
602 {
603 m_Link = link;
604 m_UseLink = (link.GetHref() != wxEmptyString);
605 }
606
607 void wxHtmlWinParser::SetFontFace(const wxString& face)
608 {
609 if (GetFontFixed())
610 m_FontFaceFixed = face;
611 else
612 m_FontFaceNormal = face;
613
614 #if !wxUSE_UNICODE
615 if (m_InputEnc != wxFONTENCODING_DEFAULT)
616 SetInputEncoding(m_InputEnc);
617 #endif
618 }
619
620 void wxHtmlWinParser::ApplyStateToCell(wxHtmlCell *cell)
621 {
622 // set the link:
623 if (m_UseLink)
624 cell->SetLink(GetLink());
625
626 // apply current script mode settings:
627 cell->SetScriptMode(GetScriptMode(), GetScriptBaseline());
628 }
629
630
631 #if !wxUSE_UNICODE
632 void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
633 {
634 // the character used for non-breakable space may change:
635 m_nbsp = 0;
636
637 m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
638 if (m_EncConv)
639 {
640 delete m_EncConv;
641 m_EncConv = NULL;
642 }
643
644 if (enc == wxFONTENCODING_DEFAULT)
645 return;
646
647 wxFontEncoding altfix, altnorm;
648 bool availfix, availnorm;
649
650 availnorm = wxFontMapper::Get()->IsEncodingAvailable(enc, m_FontFaceNormal);
651 availfix = wxFontMapper::Get()->IsEncodingAvailable(enc, m_FontFaceFixed);
652
653 if (availnorm && availfix)
654 {
655 // exact match?
656 m_OutputEnc = enc;
657 }
658
659 else if (wxFontMapper::Get()->GetAltForEncoding(enc, &altnorm, m_FontFaceNormal, false) &&
660 wxFontMapper::Get()->GetAltForEncoding(enc, &altfix, m_FontFaceFixed, false) &&
661 altnorm == altfix)
662 {
663 // alternatives?
664 m_OutputEnc = altnorm;
665 }
666 else if (availnorm)
667 {
668 // at least normal face?
669 m_OutputEnc = enc;
670 }
671 else if (wxFontMapper::Get()->GetAltForEncoding(enc, &altnorm, m_FontFaceNormal, false))
672 {
673 m_OutputEnc = altnorm;
674 }
675 else
676 {
677 #ifndef __WXMAC__
678 // okay, let's convert to ISO_8859-1, available always
679 m_OutputEnc = wxFONTENCODING_DEFAULT;
680 #else
681 m_OutputEnc = wxLocale::GetSystemEncoding() ;
682 #endif
683 }
684
685 m_InputEnc = enc;
686 if (m_OutputEnc == wxFONTENCODING_DEFAULT)
687 {
688 GetEntitiesParser()->SetEncoding(wxFONTENCODING_SYSTEM);
689 }
690 else
691 {
692 GetEntitiesParser()->SetEncoding(m_OutputEnc);
693 }
694
695 if (m_InputEnc == m_OutputEnc)
696 return;
697
698 m_EncConv = new wxEncodingConverter();
699 if (!m_EncConv->Init(m_InputEnc,
700 (m_OutputEnc == wxFONTENCODING_DEFAULT) ?
701 wxFONTENCODING_ISO8859_1 : m_OutputEnc,
702 wxCONVERT_SUBSTITUTE))
703 { // total failure :-(
704 wxLogError(_("Failed to display HTML document in %s encoding"),
705 wxFontMapper::GetEncodingName(enc).c_str());
706 m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
707 delete m_EncConv;
708 m_EncConv = NULL;
709 }
710 }
711 #endif
712
713
714
715
716 //-----------------------------------------------------------------------------
717 // wxHtmlWinTagHandler
718 //-----------------------------------------------------------------------------
719
720 IMPLEMENT_ABSTRACT_CLASS(wxHtmlWinTagHandler, wxHtmlTagHandler)
721
722 //-----------------------------------------------------------------------------
723 // wxHtmlTagsModule
724 //-----------------------------------------------------------------------------
725
726 // NB: This is *NOT* winpars.cpp's initialization and shutdown code!!
727 // This module is an ancestor for tag handlers modules defined
728 // in m_*.cpp files with TAGS_MODULE_BEGIN...TAGS_MODULE_END construct.
729 //
730 // Do not add any winpars.cpp shutdown or initialization code to it,
731 // create a new module instead!
732
733 IMPLEMENT_DYNAMIC_CLASS(wxHtmlTagsModule, wxModule)
734
735 bool wxHtmlTagsModule::OnInit()
736 {
737 wxHtmlWinParser::AddModule(this);
738 return true;
739 }
740
741 void wxHtmlTagsModule::OnExit()
742 {
743 wxHtmlWinParser::RemoveModule(this);
744 }
745
746 #endif