added conversions to/from UTF 16/32 LE/BE (patch 809685)
[wxWidgets.git] / src / common / fmapbase.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: common/fmapbase.cpp
3 // Purpose: wxFontMapperBase class implementation
4 // Author: Vadim Zeitlin
5 // Modified by:
6 // Created: 21.06.2003 (extracted from common/fontmap.cpp)
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999-2003 Vadim Zeitlin <vadim@wxwindows.org>
9 // License: wxWindows license
10 ///////////////////////////////////////////////////////////////////////////////
11
12 // ============================================================================
13 // declarations
14 // ============================================================================
15
16 // ----------------------------------------------------------------------------
17 // headers
18 // ----------------------------------------------------------------------------
19
20 // for compilers that support precompilation, includes "wx.h".
21 #include "wx/wxprec.h"
22
23 #ifdef __BORLANDC__
24 #pragma hdrstop
25 #endif
26
27 #if wxUSE_FONTMAP
28
29 #ifndef WX_PRECOMP
30 #include "wx/app.h"
31 #include "wx/log.h"
32 #include "wx/intl.h"
33 #endif //WX_PRECOMP
34
35 #if defined(__WXMSW__)
36 #include "wx/msw/private.h" // includes windows.h for LOGFONT
37 #include "wx/msw/winundef.h"
38 #endif
39
40 #include "wx/fontmap.h"
41 #include "wx/fmappriv.h"
42
43 #include "wx/apptrait.h"
44 #include "wx/module.h"
45
46 // wxMemoryConfig uses wxFileConfig
47 #if wxUSE_CONFIG && wxUSE_FILECONFIG
48 #include "wx/config.h"
49 #include "wx/memconf.h"
50 #endif
51
52 // ----------------------------------------------------------------------------
53 // constants
54 // ----------------------------------------------------------------------------
55
56 // encodings supported by GetEncodingDescription
57 static wxFontEncoding gs_encodings[] =
58 {
59 wxFONTENCODING_ISO8859_1,
60 wxFONTENCODING_ISO8859_2,
61 wxFONTENCODING_ISO8859_3,
62 wxFONTENCODING_ISO8859_4,
63 wxFONTENCODING_ISO8859_5,
64 wxFONTENCODING_ISO8859_6,
65 wxFONTENCODING_ISO8859_7,
66 wxFONTENCODING_ISO8859_8,
67 wxFONTENCODING_ISO8859_9,
68 wxFONTENCODING_ISO8859_10,
69 wxFONTENCODING_ISO8859_11,
70 wxFONTENCODING_ISO8859_12,
71 wxFONTENCODING_ISO8859_13,
72 wxFONTENCODING_ISO8859_14,
73 wxFONTENCODING_ISO8859_15,
74 wxFONTENCODING_KOI8,
75 wxFONTENCODING_CP932,
76 wxFONTENCODING_CP936,
77 wxFONTENCODING_CP949,
78 wxFONTENCODING_CP950,
79 wxFONTENCODING_CP1250,
80 wxFONTENCODING_CP1251,
81 wxFONTENCODING_CP1252,
82 wxFONTENCODING_CP1253,
83 wxFONTENCODING_CP1254,
84 wxFONTENCODING_CP1255,
85 wxFONTENCODING_CP1256,
86 wxFONTENCODING_CP1257,
87 wxFONTENCODING_CP437,
88 wxFONTENCODING_UTF7,
89 wxFONTENCODING_UTF8,
90 wxFONTENCODING_UTF16,
91 wxFONTENCODING_UTF16BE,
92 wxFONTENCODING_UTF16LE,
93 wxFONTENCODING_UTF32,
94 wxFONTENCODING_UTF32BE,
95 wxFONTENCODING_UTF32LE,
96 wxFONTENCODING_EUC_JP,
97 };
98
99 // the descriptions for them
100 static const wxChar* gs_encodingDescs[] =
101 {
102 wxTRANSLATE( "Western European (ISO-8859-1)" ),
103 wxTRANSLATE( "Central European (ISO-8859-2)" ),
104 wxTRANSLATE( "Esperanto (ISO-8859-3)" ),
105 wxTRANSLATE( "Baltic (old) (ISO-8859-4)" ),
106 wxTRANSLATE( "Cyrillic (ISO-8859-5)" ),
107 wxTRANSLATE( "Arabic (ISO-8859-6)" ),
108 wxTRANSLATE( "Greek (ISO-8859-7)" ),
109 wxTRANSLATE( "Hebrew (ISO-8859-8)" ),
110 wxTRANSLATE( "Turkish (ISO-8859-9)" ),
111 wxTRANSLATE( "Nordic (ISO-8859-10)" ),
112 wxTRANSLATE( "Thai (ISO-8859-11)" ),
113 wxTRANSLATE( "Indian (ISO-8859-12)" ),
114 wxTRANSLATE( "Baltic (ISO-8859-13)" ),
115 wxTRANSLATE( "Celtic (ISO-8859-14)" ),
116 wxTRANSLATE( "Western European with Euro (ISO-8859-15)" ),
117 wxTRANSLATE( "KOI8-R" ),
118 wxTRANSLATE( "Windows Japanese (CP 932)" ),
119 wxTRANSLATE( "Windows Chinese Simplified (CP 936)" ),
120 wxTRANSLATE( "Windows Korean (CP 949)" ),
121 wxTRANSLATE( "Windows Chinese Traditional (CP 950)" ),
122 wxTRANSLATE( "Windows Central European (CP 1250)" ),
123 wxTRANSLATE( "Windows Cyrillic (CP 1251)" ),
124 wxTRANSLATE( "Windows Western European (CP 1252)" ),
125 wxTRANSLATE( "Windows Greek (CP 1253)" ),
126 wxTRANSLATE( "Windows Turkish (CP 1254)" ),
127 wxTRANSLATE( "Windows Hebrew (CP 1255)" ),
128 wxTRANSLATE( "Windows Arabic (CP 1256)" ),
129 wxTRANSLATE( "Windows Baltic (CP 1257)" ),
130 wxTRANSLATE( "Windows/DOS OEM (CP 437)" ),
131 wxTRANSLATE( "Unicode 7 bit (UTF-7)" ),
132 wxTRANSLATE( "Unicode 8 bit (UTF-8)" ),
133 wxTRANSLATE( "Unicode 16 bit (UTF-16)" ),
134 wxTRANSLATE( "Unicode 16 bit Big Endian (UTF-16BE)" ),
135 wxTRANSLATE( "Unicode 16 bit Little Endian (UTF-16LE)" ),
136 wxTRANSLATE( "Unicode 32 bit (UTF-32)" ),
137 wxTRANSLATE( "Unicode 32 bit Big Endian (UTF-32BE)" ),
138 wxTRANSLATE( "Unicode 32 bit Little Endian (UTF-32LE)" ),
139 wxTRANSLATE( "Extended Unix Codepage for Japanese (EUC-JP)" ),
140 };
141
142 // and the internal names (these are not translated on purpose!)
143 static const wxChar* gs_encodingNames[] =
144 {
145 wxT( "iso-8859-1" ),
146 wxT( "iso-8859-2" ),
147 wxT( "iso-8859-3" ),
148 wxT( "iso-8859-4" ),
149 wxT( "iso-8859-5" ),
150 wxT( "iso-8859-6" ),
151 wxT( "iso-8859-7" ),
152 wxT( "iso-8859-8" ),
153 wxT( "iso-8859-9" ),
154 wxT( "iso-8859-10" ),
155 wxT( "iso-8859-11" ),
156 wxT( "iso-8859-12" ),
157 wxT( "iso-8859-13" ),
158 wxT( "iso-8859-14" ),
159 wxT( "iso-8859-15" ),
160 wxT( "koi8-r" ),
161 wxT( "windows-932" ),
162 wxT( "windows-936" ),
163 wxT( "windows-949" ),
164 wxT( "windows-950" ),
165 wxT( "windows-1250" ),
166 wxT( "windows-1251" ),
167 wxT( "windows-1252" ),
168 wxT( "windows-1253" ),
169 wxT( "windows-1254" ),
170 wxT( "windows-1255" ),
171 wxT( "windows-1256" ),
172 wxT( "windows-1257" ),
173 wxT( "windows-437" ),
174 wxT( "utf-7" ),
175 wxT( "utf-8" ),
176 wxT( "utf-16" ),
177 wxT( "utf-16be" ),
178 wxT( "utf-16le" ),
179 wxT( "utf-32" ),
180 wxT( "utf-32be" ),
181 wxT( "utf-32le" ),
182 wxT( "euc-jp" ),
183 };
184
185 wxCOMPILE_TIME_ASSERT( WXSIZEOF(gs_encodingDescs) == WXSIZEOF(gs_encodings) &&
186 WXSIZEOF(gs_encodingNames) == WXSIZEOF(gs_encodings),
187 EncodingsArraysNotInSync );
188
189 // ----------------------------------------------------------------------------
190 // private classes
191 // ----------------------------------------------------------------------------
192
193 // clean up the font mapper object
194 class wxFontMapperModule : public wxModule
195 {
196 public:
197 wxFontMapperModule() : wxModule() { }
198 virtual bool OnInit() { return TRUE; }
199 virtual void OnExit() { delete wxFontMapper::Set(NULL); }
200
201 DECLARE_DYNAMIC_CLASS(wxFontMapperModule)
202 };
203
204 IMPLEMENT_DYNAMIC_CLASS(wxFontMapperModule, wxModule)
205
206
207 // ============================================================================
208 // wxFontMapperBase implementation
209 // ============================================================================
210
211 wxFontMapper *wxFontMapperBase::sm_instance = NULL;
212
213 // ----------------------------------------------------------------------------
214 // ctor and dtor
215 // ----------------------------------------------------------------------------
216
217 wxFontMapperBase::wxFontMapperBase()
218 {
219 #if wxUSE_CONFIG && wxUSE_FILECONFIG
220 m_config = NULL;
221 m_configIsDummy = FALSE;
222 #endif // wxUSE_CONFIG
223 }
224
225 wxFontMapperBase::~wxFontMapperBase()
226 {
227 #if wxUSE_CONFIG && wxUSE_FILECONFIG
228 if ( m_configIsDummy )
229 delete m_config;
230 #endif // wxUSE_CONFIG
231 }
232
233 /* static */
234 wxFontMapper *wxFontMapperBase::Get()
235 {
236 if ( !sm_instance )
237 {
238 wxAppTraits *traits = wxTheApp ? wxTheApp->GetTraits() : NULL;
239 if ( traits )
240 {
241 sm_instance = traits->CreateFontMapper();
242
243 wxASSERT_MSG( sm_instance,
244 _T("wxAppTraits::CreateFontMapper() failed") );
245 }
246
247 if ( !sm_instance )
248 {
249 // last resort: we must create something because the existing code
250 // relies on always having a valid font mapper object
251 sm_instance = (wxFontMapper *)new wxFontMapperBase;
252 }
253 }
254
255 return sm_instance;
256 }
257
258 /* static */
259 wxFontMapper *wxFontMapperBase::Set(wxFontMapper *mapper)
260 {
261 wxFontMapper *old = sm_instance;
262 sm_instance = mapper;
263 return old;
264 }
265
266 #if wxUSE_CONFIG && wxUSE_FILECONFIG
267
268 // ----------------------------------------------------------------------------
269 // config usage customisation
270 // ----------------------------------------------------------------------------
271
272 /* static */
273 const wxChar *wxFontMapperBase::GetDefaultConfigPath()
274 {
275 return FONTMAPPER_ROOT_PATH;
276 }
277
278 void wxFontMapperBase::SetConfigPath(const wxString& prefix)
279 {
280 wxCHECK_RET( !prefix.IsEmpty() && prefix[0] == wxCONFIG_PATH_SEPARATOR,
281 wxT("an absolute path should be given to wxFontMapper::SetConfigPath()") );
282
283 m_configRootPath = prefix;
284 }
285
286 // ----------------------------------------------------------------------------
287 // get config object and path for it
288 // ----------------------------------------------------------------------------
289
290 wxConfigBase *wxFontMapperBase::GetConfig()
291 {
292 if ( !m_config )
293 {
294 // try the default
295 m_config = wxConfig::Get(FALSE /*don't create on demand*/ );
296
297 if ( !m_config )
298 {
299 // we still want to have a config object because otherwise we would
300 // keep asking the user the same questions in the interactive mode,
301 // so create a dummy config which won't write to any files/registry
302 // but will allow us to remember the results of the questions at
303 // least during this run
304 m_config = new wxMemoryConfig;
305 m_configIsDummy = TRUE;
306 // VS: we can't call wxConfig::Set(m_config) here because that would
307 // disable automatic wxConfig instance creation if this code was
308 // called before wxApp::OnInit (this happens in wxGTK -- it sets
309 // default wxFont encoding in wxApp::Initialize())
310 }
311 }
312
313 if ( m_configIsDummy && wxConfig::Get(FALSE) != NULL )
314 {
315 // VS: in case we created dummy m_config (see above), we want to switch back
316 // to the real one as soon as one becomes available.
317 delete m_config;
318 m_config = wxConfig::Get(FALSE);
319 m_configIsDummy = FALSE;
320 // FIXME: ideally, we should add keys from dummy config to the real one now,
321 // but it is a low-priority task because typical wxWin application
322 // either doesn't use wxConfig at all or creates wxConfig object in
323 // wxApp::OnInit(), before any real interaction with the user takes
324 // place...
325 }
326
327 return m_config;
328 }
329
330 const wxString& wxFontMapperBase::GetConfigPath()
331 {
332 if ( !m_configRootPath )
333 {
334 // use the default
335 m_configRootPath = GetDefaultConfigPath();
336 }
337
338 return m_configRootPath;
339 }
340
341 // ----------------------------------------------------------------------------
342 // config helpers
343 // ----------------------------------------------------------------------------
344
345 bool wxFontMapperBase::ChangePath(const wxString& pathNew, wxString *pathOld)
346 {
347 wxConfigBase *config = GetConfig();
348 if ( !config )
349 return FALSE;
350
351 *pathOld = config->GetPath();
352
353 wxString path = GetConfigPath();
354 if ( path.IsEmpty() || path.Last() != wxCONFIG_PATH_SEPARATOR )
355 {
356 path += wxCONFIG_PATH_SEPARATOR;
357 }
358
359 wxASSERT_MSG( !pathNew || (pathNew[0] != wxCONFIG_PATH_SEPARATOR),
360 wxT("should be a relative path") );
361
362 path += pathNew;
363
364 config->SetPath(path);
365
366 return TRUE;
367 }
368
369 void wxFontMapperBase::RestorePath(const wxString& pathOld)
370 {
371 GetConfig()->SetPath(pathOld);
372 }
373
374 #endif
375
376 // ----------------------------------------------------------------------------
377 // charset/encoding correspondence
378 // ----------------------------------------------------------------------------
379
380 wxFontEncoding
381 wxFontMapperBase::CharsetToEncoding(const wxString& charset,
382 bool WXUNUSED(interactive))
383 {
384 int enc = NonInteractiveCharsetToEncoding(charset);
385 if ( enc == wxFONTENCODING_UNKNOWN )
386 {
387 // we should return wxFONTENCODING_SYSTEM from here for unknown
388 // encodings
389 enc = wxFONTENCODING_SYSTEM;
390 }
391
392 return (wxFontEncoding)enc;
393 }
394
395 int
396 wxFontMapperBase::NonInteractiveCharsetToEncoding(const wxString& charset)
397 {
398 wxFontEncoding encoding = wxFONTENCODING_SYSTEM;
399
400 // we're going to modify it, make a copy
401 wxString cs = charset;
402
403 #if wxUSE_CONFIG && wxUSE_FILECONFIG
404 // first try the user-defined settings
405 wxFontMapperPathChanger path(this, FONTMAPPER_CHARSET_PATH);
406 if ( path.IsOk() )
407 {
408 wxConfigBase *config = GetConfig();
409
410 // do we have an encoding for this charset?
411 long value = config->Read(charset, -1l);
412 if ( value != -1 )
413 {
414 if ( value == wxFONTENCODING_UNKNOWN )
415 {
416 // don't try to find it, in particular don't ask the user
417 return value;
418 }
419
420 if ( value >= 0 && value <= wxFONTENCODING_MAX )
421 {
422 encoding = (wxFontEncoding)value;
423 }
424 else
425 {
426 wxLogDebug(wxT("corrupted config data: invalid encoding %ld for charset '%s' ignored"),
427 value, charset.c_str());
428 }
429 }
430
431 if ( encoding == wxFONTENCODING_SYSTEM )
432 {
433 // may be we have an alias?
434 config->SetPath(FONTMAPPER_CHARSET_ALIAS_PATH);
435
436 wxString alias = config->Read(charset);
437 if ( !!alias )
438 {
439 // yes, we do - use it instead
440 cs = alias;
441 }
442 }
443 }
444 #endif // wxUSE_CONFIG
445
446 // if didn't find it there, try to recognize it ourselves
447 if ( encoding == wxFONTENCODING_SYSTEM )
448 {
449 // trim any spaces
450 cs.Trim(true);
451 cs.Trim(false);
452
453 // discard the optional quotes
454 if ( !cs.empty() )
455 {
456 if ( cs[0u] == _T('"') && cs.Last() == _T('"') )
457 {
458 cs = wxString(cs.c_str(), cs.length() - 1);
459 }
460 }
461
462 cs.MakeUpper();
463
464 if ( cs.empty() || cs == _T("US-ASCII") )
465 {
466 encoding = wxFONTENCODING_DEFAULT;
467 }
468 else if ( cs == wxT("UTF-7") )
469 {
470 encoding = wxFONTENCODING_UTF7;
471 }
472 else if ( cs == wxT("UTF-8") )
473 {
474 encoding = wxFONTENCODING_UTF8;
475 }
476 else if ( cs == wxT("UTF-16") )
477 {
478 encoding = wxFONTENCODING_UTF16;
479 }
480 else if ( cs == wxT("UTF-16BE") )
481 {
482 encoding = wxFONTENCODING_UTF16BE;
483 }
484 else if ( cs == wxT("UTF-16LE") )
485 {
486 encoding = wxFONTENCODING_UTF16LE;
487 }
488 else if ( cs == wxT("UTF-32") || cs == wxT("UCS-4") )
489 {
490 encoding = wxFONTENCODING_UTF32;
491 }
492 else if ( cs == wxT("UTF-32BE") || cs == wxT("UCS-4BE") )
493 {
494 encoding = wxFONTENCODING_UTF32BE;
495 }
496 else if ( cs == wxT("UTF-32LE") || cs == wxT("UCS-4LE") )
497 {
498 encoding = wxFONTENCODING_UTF32LE;
499 }
500 else if ( cs == wxT("GB2312") )
501 {
502 encoding = wxFONTENCODING_GB2312;
503 }
504 else if ( cs == wxT("BIG5") )
505 {
506 encoding = wxFONTENCODING_BIG5;
507 }
508 else if ( cs == wxT("SJIS") ||
509 cs == wxT("SHIFT_JIS") ||
510 cs == wxT("SHIFT-JIS") )
511 {
512 encoding = wxFONTENCODING_SHIFT_JIS;
513 }
514 else if ( cs == wxT("EUC-JP") ||
515 cs == wxT("EUC_JP") )
516 {
517 encoding = wxFONTENCODING_EUC_JP;
518 }
519 else if ( cs == wxT("KOI8-R") ||
520 cs == wxT("KOI8-U") ||
521 cs == wxT("KOI8-RU") )
522 {
523 // although koi8-ru is not strictly speaking the same as koi8-r,
524 // they are similar enough to make mapping it to koi8 better than
525 // not reckognizing it at all
526 encoding = wxFONTENCODING_KOI8;
527 }
528 else if ( cs.Left(3) == wxT("ISO") )
529 {
530 // the dash is optional (or, to be exact, it is not, but
531 // several brokenmails "forget" it)
532 const wxChar *p = cs.c_str() + 3;
533 if ( *p == wxT('-') )
534 p++;
535
536 // printf( "iso %s\n", (const char*) cs.ToAscii() );
537
538 unsigned int value;
539 if ( wxSscanf(p, wxT("8859-%u"), &value) == 1 )
540 {
541 // printf( "value %d\n", (int)value );
542
543 // make it 0 based and check that it is strictly positive in
544 // the process (no such thing as iso8859-0 encoding)
545 if ( (value-- > 0) &&
546 (value < wxFONTENCODING_ISO8859_MAX -
547 wxFONTENCODING_ISO8859_1) )
548 {
549 // it's a valid ISO8859 encoding
550 value += wxFONTENCODING_ISO8859_1;
551 encoding = (wxFontEncoding)value;
552 }
553 }
554 }
555 else if ( cs.Left(4) == wxT("8859") )
556 {
557 const wxChar *p = cs.c_str();
558
559 unsigned int value;
560 if ( wxSscanf(p, wxT("8859-%u"), &value) == 1 )
561 {
562 // printf( "value %d\n", (int)value );
563
564 // make it 0 based and check that it is strictly positive in
565 // the process (no such thing as iso8859-0 encoding)
566 if ( (value-- > 0) &&
567 (value < wxFONTENCODING_ISO8859_MAX -
568 wxFONTENCODING_ISO8859_1) )
569 {
570 // it's a valid ISO8859 encoding
571 value += wxFONTENCODING_ISO8859_1;
572 encoding = (wxFontEncoding)value;
573 }
574 }
575 }
576 else // check for Windows charsets
577 {
578 size_t len;
579 if ( cs.Left(7) == wxT("WINDOWS") )
580 {
581 len = 7;
582 }
583 else if ( cs.Left(2) == wxT("CP") )
584 {
585 len = 2;
586 }
587 else // not a Windows encoding
588 {
589 len = 0;
590 }
591
592 if ( len )
593 {
594 const wxChar *p = cs.c_str() + len;
595 if ( *p == wxT('-') )
596 p++;
597
598 int value;
599 if ( wxSscanf(p, wxT("%u"), &value) == 1 )
600 {
601 if ( value >= 1250 )
602 {
603 value -= 1250;
604 if ( value < wxFONTENCODING_CP12_MAX -
605 wxFONTENCODING_CP1250 )
606 {
607 // a valid Windows code page
608 value += wxFONTENCODING_CP1250;
609 encoding = (wxFontEncoding)value;
610 }
611 }
612
613 switch ( value )
614 {
615 case 932:
616 encoding = wxFONTENCODING_CP932;
617 break;
618
619 case 936:
620 encoding = wxFONTENCODING_CP936;
621 break;
622
623 case 949:
624 encoding = wxFONTENCODING_CP949;
625 break;
626
627 case 950:
628 encoding = wxFONTENCODING_CP950;
629 break;
630 }
631 }
632 }
633 }
634 //else: unknown
635 }
636
637 return encoding;
638 }
639
640 /* static */
641 size_t wxFontMapperBase::GetSupportedEncodingsCount()
642 {
643 return WXSIZEOF(gs_encodings);
644 }
645
646 /* static */
647 wxFontEncoding wxFontMapperBase::GetEncoding(size_t n)
648 {
649 wxCHECK_MSG( n < WXSIZEOF(gs_encodings), wxFONTENCODING_SYSTEM,
650 _T("wxFontMapper::GetEncoding(): invalid index") );
651
652 return gs_encodings[n];
653 }
654
655 /* static */
656 wxString wxFontMapperBase::GetEncodingDescription(wxFontEncoding encoding)
657 {
658 if ( encoding == wxFONTENCODING_DEFAULT )
659 {
660 return _("Default encoding");
661 }
662
663 const size_t count = WXSIZEOF(gs_encodingDescs);
664
665 for ( size_t i = 0; i < count; i++ )
666 {
667 if ( gs_encodings[i] == encoding )
668 {
669 return wxGetTranslation(gs_encodingDescs[i]);
670 }
671 }
672
673 wxString str;
674 str.Printf(_("Unknown encoding (%d)"), encoding);
675
676 return str;
677 }
678
679 /* static */
680 wxString wxFontMapperBase::GetEncodingName(wxFontEncoding encoding)
681 {
682 if ( encoding == wxFONTENCODING_DEFAULT )
683 {
684 return _("default");
685 }
686
687 const size_t count = WXSIZEOF(gs_encodingNames);
688
689 for ( size_t i = 0; i < count; i++ )
690 {
691 if ( gs_encodings[i] == encoding )
692 {
693 return gs_encodingNames[i];
694 }
695 }
696
697 wxString str;
698 str.Printf(_("unknown-%d"), encoding);
699
700 return str;
701 }
702
703 #endif // wxUSE_FONTMAP
704