]> git.saurik.com Git - wxWidgets.git/blob - src/richtext/richtexthtml.cpp
Hack to support iso8859 and other wrongly formated
[wxWidgets.git] / src / richtext / richtexthtml.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/richtext/richtexthtml.cpp
3 // Purpose: HTML I/O for wxRichTextCtrl
4 // Author: Julian Smart
5 // Modified by:
6 // Created: 2005-09-30
7 // RCS-ID: $Id$
8 // Copyright: (c) Julian Smart
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
11
12 // For compilers that support precompilation, includes "wx.h".
13 #include "wx/wxprec.h"
14
15 #ifdef __BORLANDC__
16 #pragma hdrstop
17 #endif
18
19 #if wxUSE_RICHTEXT
20
21 #include "wx/richtext/richtexthtml.h"
22 #include "wx/richtext/richtextstyles.h"
23
24 #ifndef WX_PRECOMP
25 #endif
26
27 #include "wx/filename.h"
28 #include "wx/wfstream.h"
29 #include "wx/txtstrm.h"
30
31 #if wxUSE_FILESYSTEM
32 #include "wx/filesys.h"
33 #include "wx/fs_mem.h"
34 #endif
35
36 IMPLEMENT_DYNAMIC_CLASS(wxRichTextHTMLHandler, wxRichTextFileHandler)
37
38 int wxRichTextHTMLHandler::sm_fileCounter = 1;
39
40 wxRichTextHTMLHandler::wxRichTextHTMLHandler(const wxString& name, const wxString& ext, int type)
41 : wxRichTextFileHandler(name, ext, type), m_buffer(NULL), m_font(false), m_inTable(false)
42 {
43 m_fontSizeMapping.Add(8);
44 m_fontSizeMapping.Add(10);
45 m_fontSizeMapping.Add(13);
46 m_fontSizeMapping.Add(17);
47 m_fontSizeMapping.Add(22);
48 m_fontSizeMapping.Add(30);
49 m_fontSizeMapping.Add(100);
50 }
51
52 /// Can we handle this filename (if using files)? By default, checks the extension.
53 bool wxRichTextHTMLHandler::CanHandle(const wxString& filename) const
54 {
55 wxString path, file, ext;
56 wxSplitPath(filename, & path, & file, & ext);
57
58 return (ext.Lower() == wxT("html") || ext.Lower() == wxT("htm"));
59 }
60
61
62 #if wxUSE_STREAMS
63 bool wxRichTextHTMLHandler::DoLoadFile(wxRichTextBuffer *WXUNUSED(buffer), wxInputStream& WXUNUSED(stream))
64 {
65 return false;
66 }
67
68 /*
69 * We need to output only _changes_ in character formatting.
70 */
71
72 bool wxRichTextHTMLHandler::DoSaveFile(wxRichTextBuffer *buffer, wxOutputStream& stream)
73 {
74 m_buffer = buffer;
75
76 ClearTemporaryImageLocations();
77
78 buffer->Defragment();
79
80 wxTextOutputStream str(stream);
81
82 wxTextAttrEx currentParaStyle = buffer->GetAttributes();
83 wxTextAttrEx currentCharStyle = buffer->GetAttributes();
84
85 if ((GetFlags() & wxRICHTEXT_HANDLER_NO_HEADER_FOOTER) == 0)
86 str << wxT("<html><head></head><body>\n");
87
88 str << wxT("<table border=0 cellpadding=0 cellspacing=0><tr><td width=\"100%\">");
89
90 OutputFont(currentParaStyle, str);
91
92 m_font = false;
93 m_inTable = false;
94
95 m_indents.Clear();
96 m_listTypes.Clear();
97
98 wxRichTextObjectList::compatibility_iterator node = buffer->GetChildren().GetFirst();
99 while (node)
100 {
101 wxRichTextParagraph* para = wxDynamicCast(node->GetData(), wxRichTextParagraph);
102 wxASSERT (para != NULL);
103
104 if (para)
105 {
106 wxTextAttrEx paraStyle(para->GetCombinedAttributes());
107
108 BeginParagraphFormatting(currentParaStyle, paraStyle, str);
109
110 wxRichTextObjectList::compatibility_iterator node2 = para->GetChildren().GetFirst();
111 while (node2)
112 {
113 wxRichTextObject* obj = node2->GetData();
114 wxRichTextPlainText* textObj = wxDynamicCast(obj, wxRichTextPlainText);
115 if (textObj && !textObj->IsEmpty())
116 {
117 wxTextAttrEx charStyle(para->GetCombinedAttributes(obj->GetAttributes()));
118 BeginCharacterFormatting(currentCharStyle, charStyle, paraStyle, str);
119
120 wxString text = textObj->GetText();
121
122 if (charStyle.HasTextEffects() && (charStyle.GetTextEffects() & wxTEXT_ATTR_EFFECT_CAPITALS))
123 text.MakeUpper();
124
125 wxString toReplace = wxRichTextLineBreakChar;
126 text.Replace(toReplace, wxT("<br>"));
127
128 str << text;
129
130 EndCharacterFormatting(currentCharStyle, charStyle, paraStyle, str);
131 }
132
133 wxRichTextImage* image = wxDynamicCast(obj, wxRichTextImage);
134 if( image && !image->IsEmpty())
135 WriteImage( image, stream );
136
137 node2 = node2->GetNext();
138 }
139
140 EndParagraphFormatting(currentParaStyle, paraStyle, str);
141
142 str << wxT("\n");
143 }
144 node = node->GetNext();
145 }
146
147 CloseLists(-1, str);
148
149 str << wxT("</font>");
150
151 str << wxT("</td></tr></table><p>");
152
153 if ((GetFlags() & wxRICHTEXT_HANDLER_NO_HEADER_FOOTER) == 0)
154 str << wxT("</body></html>");
155
156 str << wxT("\n");
157
158 m_buffer = NULL;
159
160 return true;
161 }
162
163 void wxRichTextHTMLHandler::BeginCharacterFormatting(const wxTextAttrEx& currentStyle, const wxTextAttrEx& thisStyle, const wxTextAttrEx& WXUNUSED(paraStyle), wxTextOutputStream& str)
164 {
165 wxString style;
166
167 // Is there any change in the font properties of the item?
168 if (thisStyle.GetFont().GetFaceName() != currentStyle.GetFont().GetFaceName())
169 {
170 wxString faceName(thisStyle.GetFont().GetFaceName());
171 style += wxString::Format(wxT(" face=\"%s\""), faceName.c_str());
172 }
173 if (thisStyle.GetFont().GetPointSize() != currentStyle.GetFont().GetPointSize())
174 style += wxString::Format(wxT(" size=\"%ld\""), PtToSize(thisStyle.GetFont().GetPointSize()));
175 if (thisStyle.GetTextColour() != currentStyle.GetTextColour() )
176 {
177 wxString color(thisStyle.GetTextColour().GetAsString(wxC2S_HTML_SYNTAX));
178 style += wxString::Format(wxT(" color=\"%s\""), color.c_str());
179 }
180
181 if (style.size())
182 {
183 str << wxString::Format(wxT("<font %s >"), style.c_str());
184 m_font = true;
185 }
186
187 if (thisStyle.GetFont().GetWeight() == wxBOLD)
188 str << wxT("<b>");
189 if (thisStyle.GetFont().GetStyle() == wxITALIC)
190 str << wxT("<i>");
191 if (thisStyle.GetFont().GetUnderlined())
192 str << wxT("<u>");
193
194 if (thisStyle.HasURL())
195 str << wxT("<a href=\"") << thisStyle.GetURL() << wxT("\">");
196 }
197
198 void wxRichTextHTMLHandler::EndCharacterFormatting(const wxTextAttrEx& WXUNUSED(currentStyle), const wxTextAttrEx& thisStyle, const wxTextAttrEx& WXUNUSED(paraStyle), wxTextOutputStream& stream)
199 {
200 if (thisStyle.HasURL())
201 stream << wxT("</a>");
202
203 if (thisStyle.GetFont().GetUnderlined())
204 stream << wxT("</u>");
205 if (thisStyle.GetFont().GetStyle() == wxITALIC)
206 stream << wxT("</i>");
207 if (thisStyle.GetFont().GetWeight() == wxBOLD)
208 stream << wxT("</b>");
209
210 if (m_font)
211 {
212 m_font = false;
213 stream << wxT("</font>");
214 }
215 }
216
217 /// Begin paragraph formatting
218 void wxRichTextHTMLHandler::BeginParagraphFormatting(const wxTextAttrEx& WXUNUSED(currentStyle), const wxTextAttrEx& thisStyle, wxTextOutputStream& str)
219 {
220 if (thisStyle.HasPageBreak())
221 {
222 str << wxT("</tr></td></table>");
223 str << wxT("<div style=\"page-break-after:always\"></div>\n");
224 str << wxT("<table border=0 cellpadding=0 cellspacing=0><tr><td width=\"100%\">");
225 }
226
227 if (thisStyle.HasLeftIndent() && thisStyle.GetLeftIndent() != 0)
228 {
229 if (thisStyle.HasBulletStyle())
230 {
231 int indent = thisStyle.GetLeftIndent();
232
233 // Close levels high than this
234 CloseLists(indent, str);
235
236 if (m_indents.GetCount() > 0 && indent == m_indents.Last())
237 {
238 // Same level, no need to start a new list
239 }
240 else if (m_indents.GetCount() == 0 || indent > m_indents.Last())
241 {
242 m_indents.Add(indent);
243
244 wxString tag;
245 int listType = TypeOfList(thisStyle, tag);
246 m_listTypes.Add(listType);
247
248 wxString align = GetAlignment(thisStyle);
249 str << wxString::Format(wxT("<p align=\"%s\">"), align.c_str());
250
251 str << tag;
252 }
253
254 str << wxT("<li> ");
255 }
256 else
257 {
258 CloseLists(-1, str);
259
260 wxString align = GetAlignment(thisStyle);
261 str << wxString::Format(wxT("<p align=\"%s\">"), align.c_str());
262
263 // Use a table
264 int indentTenthsMM = thisStyle.GetLeftIndent() + thisStyle.GetLeftSubIndent();
265 // TODO: convert to pixels
266 int indentPixels = indentTenthsMM/4;
267 str << wxString::Format(wxT("<table border=0 cellpadding=0 cellspacing=0><tr><td width=\"%d\"></td><td>"), indentPixels);
268
269 OutputFont(thisStyle, str);
270
271 if (thisStyle.GetLeftSubIndent() < 0)
272 {
273 str << SymbolicIndent( - thisStyle.GetLeftSubIndent());
274 }
275
276 m_inTable = true;
277 }
278 }
279 else
280 {
281 CloseLists(-1, str);
282
283 wxString align = GetAlignment(thisStyle);
284 str << wxString::Format(wxT("<p align=\"%s\">"), align.c_str());
285 }
286 }
287
288 /// End paragraph formatting
289 void wxRichTextHTMLHandler::EndParagraphFormatting(const wxTextAttrEx& WXUNUSED(currentStyle), const wxTextAttrEx& thisStyle, wxTextOutputStream& stream)
290 {
291 if (m_inTable)
292 {
293 if (thisStyle.HasFont())
294 stream << wxT("</font>");
295
296 stream << wxT("</td></tr></table>\n");
297 m_inTable = false;
298 }
299 }
300
301 /// Closes lists to level (-1 means close all)
302 void wxRichTextHTMLHandler::CloseLists(int level, wxTextOutputStream& str)
303 {
304 // Close levels high than this
305 int i = m_indents.GetCount()-1;
306 while (i >= 0)
307 {
308 int l = m_indents[i];
309 if (l > level)
310 {
311 if (m_listTypes[i] == 0)
312 str << wxT("</ol>");
313 else
314 str << wxT("</ul>");
315 m_indents.RemoveAt(i);
316 m_listTypes.RemoveAt(i);
317 }
318 else
319 break;
320 i --;
321 }
322 }
323
324 /// Output font tag
325 void wxRichTextHTMLHandler::OutputFont(const wxTextAttrEx& style, wxTextOutputStream& stream)
326 {
327 if (style.HasFont())
328 {
329 stream << wxString::Format(wxT("<font face=\"%s\" size=\"%ld\" color=\"%s\" >"),
330 style.GetFont().GetFaceName().c_str(), PtToSize(style.GetFont().GetPointSize()),
331 style.GetTextColour().GetAsString(wxC2S_HTML_SYNTAX).c_str());
332 }
333 }
334
335 int wxRichTextHTMLHandler::TypeOfList( const wxTextAttrEx& thisStyle, wxString& tag )
336 {
337 // We can use number attribute of li tag but not all the browsers support it.
338 // also wxHtmlWindow doesn't support type attribute.
339
340 bool m_is_ul = false;
341 if (thisStyle.GetBulletStyle() == (wxTEXT_ATTR_BULLET_STYLE_ARABIC|wxTEXT_ATTR_BULLET_STYLE_PERIOD))
342 tag = wxT("<ol type=\"1\">");
343 else if (thisStyle.GetBulletStyle() == wxTEXT_ATTR_BULLET_STYLE_LETTERS_UPPER)
344 tag = wxT("<ol type=\"A\">");
345 else if (thisStyle.GetBulletStyle() == wxTEXT_ATTR_BULLET_STYLE_LETTERS_LOWER)
346 tag = wxT("<ol type=\"a\">");
347 else if (thisStyle.GetBulletStyle() == wxTEXT_ATTR_BULLET_STYLE_ROMAN_UPPER)
348 tag = wxT("<ol type=\"I\">");
349 else if (thisStyle.GetBulletStyle() == wxTEXT_ATTR_BULLET_STYLE_ROMAN_LOWER)
350 tag = wxT("<ol type=\"i\">");
351 else
352 {
353 tag = wxT("<ul>");
354 m_is_ul = true;
355 }
356
357 if (m_is_ul)
358 return 1;
359 else
360 return 0;
361 }
362
363 wxString wxRichTextHTMLHandler::GetAlignment( const wxTextAttrEx& thisStyle )
364 {
365 switch( thisStyle.GetAlignment() )
366 {
367 case wxTEXT_ALIGNMENT_LEFT:
368 return wxT("left");
369 case wxTEXT_ALIGNMENT_RIGHT:
370 return wxT("right");
371 case wxTEXT_ALIGNMENT_CENTER:
372 return wxT("center");
373 case wxTEXT_ALIGNMENT_JUSTIFIED:
374 return wxT("justify");
375 default:
376 return wxT("left");
377 }
378 }
379
380 void wxRichTextHTMLHandler::WriteImage(wxRichTextImage* image, wxOutputStream& stream)
381 {
382 wxTextOutputStream str(stream);
383
384 str << wxT("<img src=\"");
385
386 #if wxUSE_FILESYSTEM
387 if (GetFlags() & wxRICHTEXT_HANDLER_SAVE_IMAGES_TO_MEMORY)
388 {
389 if (!image->GetImage().Ok() && image->GetImageBlock().GetData())
390 image->LoadFromBlock();
391 if (image->GetImage().Ok() && !image->GetImageBlock().GetData())
392 image->MakeBlock();
393
394 if (image->GetImage().Ok())
395 {
396 wxString ext(image->GetImageBlock().GetExtension());
397 wxString tempFilename(wxString::Format(wxT("image%d.%s"), sm_fileCounter, (const wxChar*) ext));
398 wxMemoryFSHandler::AddFile(tempFilename, image->GetImage(), image->GetImageBlock().GetImageType());
399
400 m_imageLocations.Add(tempFilename);
401
402 str << wxT("memory:") << tempFilename;
403 }
404 else
405 str << wxT("memory:?");
406
407 sm_fileCounter ++;
408 }
409 else if (GetFlags() & wxRICHTEXT_HANDLER_SAVE_IMAGES_TO_FILES)
410 {
411 if (!image->GetImage().Ok() && image->GetImageBlock().GetData())
412 image->LoadFromBlock();
413 if (image->GetImage().Ok() && !image->GetImageBlock().GetData())
414 image->MakeBlock();
415
416 if (image->GetImage().Ok())
417 {
418 wxString tempDir(GetTempDir());
419 if (tempDir.IsEmpty())
420 tempDir = wxFileName::GetTempDir();
421
422 wxString ext(image->GetImageBlock().GetExtension());
423 wxString tempFilename(wxString::Format(wxT("%s/image%d.%s"), (const wxChar*) tempDir, sm_fileCounter, (const wxChar*) ext));
424 image->GetImageBlock().Write(tempFilename);
425
426 m_imageLocations.Add(tempFilename);
427
428 str << wxFileSystem::FileNameToURL(tempFilename);
429 }
430 else
431 str << wxT("file:?");
432
433 sm_fileCounter ++;
434 }
435 else // if (GetFlags() & wxRICHTEXT_HANDLER_SAVE_IMAGES_TO_BASE64) // this is implied
436 #endif
437 {
438 str << wxT("data:");
439 str << GetMimeType(image->GetImageBlock().GetImageType());
440 str << wxT(";base64,");
441
442 if (image->GetImage().Ok() && !image->GetImageBlock().GetData())
443 image->MakeBlock();
444
445 wxChar* data = b64enc( image->GetImageBlock().GetData(), image->GetImageBlock().GetDataSize() );
446 str << data;
447
448 delete[] data;
449 }
450
451 str << wxT("\" />");
452 }
453
454 long wxRichTextHTMLHandler::PtToSize(long size)
455 {
456 int i;
457 int len = m_fontSizeMapping.GetCount();
458 for (i = 0; i < len; i++)
459 if (size <= m_fontSizeMapping[i])
460 return i+1;
461 return 7;
462 }
463
464 wxString wxRichTextHTMLHandler::SymbolicIndent(long indent)
465 {
466 wxString in;
467 for(;indent > 0; indent -= 20)
468 in.Append( wxT("&nbsp;") );
469 return in;
470 }
471
472 const wxChar* wxRichTextHTMLHandler::GetMimeType(int imageType)
473 {
474 switch(imageType)
475 {
476 case wxBITMAP_TYPE_BMP:
477 return wxT("image/bmp");
478 case wxBITMAP_TYPE_TIF:
479 return wxT("image/tiff");
480 case wxBITMAP_TYPE_GIF:
481 return wxT("image/gif");
482 case wxBITMAP_TYPE_PNG:
483 return wxT("image/png");
484 case wxBITMAP_TYPE_JPEG:
485 return wxT("image/jpeg");
486 default:
487 return wxT("image/unknown");
488 }
489 }
490
491 // exim-style base64 encoder
492 wxChar* wxRichTextHTMLHandler::b64enc( unsigned char* input, size_t in_len )
493 {
494 // elements of enc64 array must be 8 bit values
495 // otherwise encoder will fail
496 // hmmm.. Does wxT macro define a char as 16 bit value
497 // when compiling with UNICODE option?
498 static const wxChar enc64[] = wxT("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
499 wxChar* output = new wxChar[4*((in_len+2)/3)+1];
500 wxChar* p = output;
501
502 while( in_len-- > 0 )
503 {
504 register wxChar a, b;
505
506 a = *input++;
507
508 *p++ = enc64[ (a >> 2) & 0x3f ];
509
510 if( in_len-- == 0 )
511 {
512 *p++ = enc64[ (a << 4 ) & 0x30 ];
513 *p++ = '=';
514 *p++ = '=';
515 break;
516 }
517
518 b = *input++;
519
520 *p++ = enc64[(( a << 4 ) | ((b >> 4) &0xf )) & 0x3f];
521
522 if( in_len-- == 0 )
523 {
524 *p++ = enc64[ (b << 2) & 0x3f ];
525 *p++ = '=';
526 break;
527 }
528
529 a = *input++;
530
531 *p++ = enc64[ ((( b << 2 ) & 0x3f ) | ((a >> 6)& 0x3)) & 0x3f ];
532
533 *p++ = enc64[ a & 0x3f ];
534 }
535 *p = 0;
536
537 return output;
538 }
539 #endif
540 // wxUSE_STREAMS
541
542 /// Delete the in-memory or temporary files generated by the last operation
543 bool wxRichTextHTMLHandler::DeleteTemporaryImages()
544 {
545 return DeleteTemporaryImages(GetFlags(), m_imageLocations);
546 }
547
548 /// Delete the in-memory or temporary files generated by the last operation
549 bool wxRichTextHTMLHandler::DeleteTemporaryImages(int flags, const wxArrayString& imageLocations)
550 {
551 size_t i;
552 for (i = 0; i < imageLocations.GetCount(); i++)
553 {
554 wxString location = imageLocations[i];
555
556 if (flags & wxRICHTEXT_HANDLER_SAVE_IMAGES_TO_MEMORY)
557 {
558 #if wxUSE_FILESYSTEM
559 wxMemoryFSHandler::RemoveFile(location);
560 #endif
561 }
562 else if (flags & wxRICHTEXT_HANDLER_SAVE_IMAGES_TO_FILES)
563 {
564 if (wxFileExists(location))
565 wxRemoveFile(location);
566 }
567 }
568
569 return true;
570 }
571
572
573 #endif
574 // wxUSE_RICHTEXT
575