| 1 | ///////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: wx/encconv.h |
| 3 | // Purpose: wxEncodingConverter class for converting between different |
| 4 | // font encodings |
| 5 | // Author: Vaclav Slavik |
| 6 | // Copyright: (c) 1999 Vaclav Slavik |
| 7 | // Licence: wxWindows licence |
| 8 | ///////////////////////////////////////////////////////////////////////////// |
| 9 | |
| 10 | #ifndef _WX_ENCCONV_H_ |
| 11 | #define _WX_ENCCONV_H_ |
| 12 | |
| 13 | #include "wx/defs.h" |
| 14 | |
| 15 | #include "wx/object.h" |
| 16 | #include "wx/fontenc.h" |
| 17 | #include "wx/dynarray.h" |
| 18 | |
| 19 | // ---------------------------------------------------------------------------- |
| 20 | // constants |
| 21 | // ---------------------------------------------------------------------------- |
| 22 | |
| 23 | enum |
| 24 | { |
| 25 | wxCONVERT_STRICT, |
| 26 | wxCONVERT_SUBSTITUTE |
| 27 | }; |
| 28 | |
| 29 | |
| 30 | enum |
| 31 | { |
| 32 | wxPLATFORM_CURRENT = -1, |
| 33 | |
| 34 | wxPLATFORM_UNIX = 0, |
| 35 | wxPLATFORM_WINDOWS, |
| 36 | wxPLATFORM_OS2, |
| 37 | wxPLATFORM_MAC |
| 38 | }; |
| 39 | |
| 40 | // ---------------------------------------------------------------------------- |
| 41 | // types |
| 42 | // ---------------------------------------------------------------------------- |
| 43 | |
| 44 | WX_DEFINE_ARRAY_INT(wxFontEncoding, wxFontEncodingArray); |
| 45 | |
| 46 | //-------------------------------------------------------------------------------- |
| 47 | // wxEncodingConverter |
| 48 | // This class is capable of converting strings between any two |
| 49 | // 8bit encodings/charsets. It can also convert from/to Unicode |
| 50 | //-------------------------------------------------------------------------------- |
| 51 | |
| 52 | class WXDLLIMPEXP_BASE wxEncodingConverter : public wxObject |
| 53 | { |
| 54 | public: |
| 55 | |
| 56 | wxEncodingConverter(); |
| 57 | virtual ~wxEncodingConverter() { if (m_Table) delete[] m_Table; } |
| 58 | |
| 59 | // Initialize conversion. Both output or input encoding may |
| 60 | // be wxFONTENCODING_UNICODE, but only if wxUSE_WCHAR_T is set to 1. |
| 61 | // |
| 62 | // All subsequent calls to Convert() will interpret it's argument |
| 63 | // as a string in input_enc encoding and will output string in |
| 64 | // output_enc encoding. |
| 65 | // |
| 66 | // You must call this method before calling Convert. You may call |
| 67 | // it more than once in order to switch to another conversion |
| 68 | // |
| 69 | // Method affects behaviour of Convert() in case input character |
| 70 | // cannot be converted because it does not exist in output encoding: |
| 71 | // wxCONVERT_STRICT -- |
| 72 | // follow behaviour of GNU Recode - just copy unconvertable |
| 73 | // characters to output and don't change them (it's integer |
| 74 | // value will stay the same) |
| 75 | // wxCONVERT_SUBSTITUTE -- |
| 76 | // try some (lossy) substitutions - e.g. replace |
| 77 | // unconvertable latin capitals with acute by ordinary |
| 78 | // capitals, replace en-dash or em-dash by '-' etc. |
| 79 | // both modes gurantee that output string will have same length |
| 80 | // as input string |
| 81 | // |
| 82 | // Returns false if given conversion is impossible, true otherwise |
| 83 | // (conversion may be impossible either if you try to convert |
| 84 | // to Unicode with non-Unicode build of wxWidgets or if input |
| 85 | // or output encoding is not supported.) |
| 86 | bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method = wxCONVERT_STRICT); |
| 87 | |
| 88 | // Convert input string according to settings passed to Init. |
| 89 | // Note that you must call Init before using Convert! |
| 90 | bool Convert(const char* input, char* output) const; |
| 91 | bool Convert(char* str) const { return Convert(str, str); } |
| 92 | wxString Convert(const wxString& input) const; |
| 93 | |
| 94 | #if wxUSE_WCHAR_T |
| 95 | bool Convert(const char* input, wchar_t* output) const; |
| 96 | bool Convert(const wchar_t* input, char* output) const; |
| 97 | bool Convert(const wchar_t* input, wchar_t* output) const; |
| 98 | bool Convert(wchar_t* str) const { return Convert(str, str); } |
| 99 | #endif |
| 100 | // Return equivalent(s) for given font that are used |
| 101 | // under given platform. wxPLATFORM_CURRENT means the plaform |
| 102 | // this binary was compiled for |
| 103 | // |
| 104 | // Examples: |
| 105 | // current platform enc returned value |
| 106 | // ----------------------------------------------------- |
| 107 | // unix CP1250 {ISO8859_2} |
| 108 | // unix ISO8859_2 {} |
| 109 | // windows ISO8859_2 {CP1250} |
| 110 | // |
| 111 | // Equivalence is defined in terms of convertibility: |
| 112 | // 2 encodings are equivalent if you can convert text between |
| 113 | // then without loosing information (it may - and will - happen |
| 114 | // that you loose special chars like quotation marks or em-dashes |
| 115 | // but you shouldn't loose any diacritics and language-specific |
| 116 | // characters when converting between equivalent encodings). |
| 117 | // |
| 118 | // Convert() method is not limited to converting between |
| 119 | // equivalent encodings, it can convert between arbitrary |
| 120 | // two encodings! |
| 121 | // |
| 122 | // Remember that this function does _NOT_ check for presence of |
| 123 | // fonts in system. It only tells you what are most suitable |
| 124 | // encodings. (It usually returns only one encoding) |
| 125 | // |
| 126 | // Note that argument enc itself may be present in returned array! |
| 127 | // (so that you can -- as a side effect -- detect whether the |
| 128 | // encoding is native for this platform or not) |
| 129 | static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc, int platform = wxPLATFORM_CURRENT); |
| 130 | |
| 131 | // Similar to GetPlatformEquivalent, but this one will return ALL |
| 132 | // equivalent encodings, regardless the platform, including itself. |
| 133 | static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc); |
| 134 | |
| 135 | // Return true if [any text in] one multibyte encoding can be |
| 136 | // converted to another one losslessly. |
| 137 | // |
| 138 | // Do not call this with wxFONTENCODING_UNICODE, it doesn't make |
| 139 | // sense (always works in one sense and always depends on the text |
| 140 | // to convert in the other) |
| 141 | static bool CanConvert(wxFontEncoding encIn, wxFontEncoding encOut) |
| 142 | { |
| 143 | return GetAllEquivalents(encIn).Index(encOut) != wxNOT_FOUND; |
| 144 | } |
| 145 | |
| 146 | private: |
| 147 | |
| 148 | #if wxUSE_WCHAR_T |
| 149 | wchar_t *m_Table; |
| 150 | #else |
| 151 | char *m_Table; |
| 152 | #endif |
| 153 | bool m_UnicodeInput, m_UnicodeOutput; |
| 154 | bool m_JustCopy; |
| 155 | |
| 156 | DECLARE_NO_COPY_CLASS(wxEncodingConverter) |
| 157 | }; |
| 158 | |
| 159 | #endif // _WX_ENCCONV_H_ |