]> git.saurik.com Git - wxWidgets.git/blob - interface/wx/encconv.h
fix parsing of IP literals in URIs, added test for it
[wxWidgets.git] / interface / wx / encconv.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: encconv.h
3 // Purpose: interface of wxEncodingConverter
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10 @class wxEncodingConverter
11
12 This class is capable of converting strings between two 8-bit encodings/charsets.
13 It can also convert from/to Unicode (but only if you compiled wxWidgets
14 with @c wxUSE_WCHAR_T set to 1).
15
16 Only a limited subset of encodings is supported by wxEncodingConverter:
17 @c wxFONTENCODING_ISO8859_1..15, @c wxFONTENCODING_CP1250..1257 and
18 @c wxFONTENCODING_KOI8.
19
20 @note
21
22 Please use wxMBConv classes instead if possible. wxCSConv has much better
23 support for various encodings than wxEncodingConverter.
24 wxEncodingConverter is useful only if you rely on wxCONVERT_SUBSTITUTE mode
25 of operation (see wxEncodingConverter::Init()).
26
27 @library{wxbase}
28 @category{misc}
29
30 @see wxFontMapper, wxMBConv, @ref overview_nonenglish
31 */
32 class wxEncodingConverter : public wxObject
33 {
34 public:
35 /**
36 Constructor.
37 */
38 wxEncodingConverter();
39
40 /**
41 Return @true if (any text in) multibyte encoding @a encIn can be converted to
42 another one (@a encOut) losslessly.
43
44 Do not call this method with @c wxFONTENCODING_UNICODE as either parameter,
45 it doesn't make sense (always works in one sense and always depends
46 on the text to convert in the other).
47 */
48 static bool CanConvert(wxFontEncoding encIn,
49 wxFontEncoding encOut);
50
51 /**
52 @name Conversion functions
53
54 @{
55 */
56 /**
57 Convert input string according to settings passed to Init() and writes
58 the result to output.
59
60 All the Convert() function overloads return @true if the conversion was
61 lossless and @false if at least one of the characters couldn't be converted
62 was and replaced with '?' in the output.
63
64 Note that if @c wxCONVERT_SUBSTITUTE was passed to Init(), substitution is
65 considered a lossless operation.
66
67 @note You must call Init() before using this method!
68
69 @note wchar_t versions of the method are not available if wxWidgets was
70 compiled with @c wxUSE_WCHAR_T set to 0.
71 */
72 bool Convert(const char* input, char* output) const;
73 bool Convert(const wchar_t* input, wchar_t* output) const;
74 bool Convert(const char* input, wchar_t* output) const;
75 bool Convert(const wchar_t* input, char* output) const;
76
77 /**
78 Convert input string according to settings passed to Init() in-place,
79 i.e. write the result to the same memory area.
80
81 See the Convert(const char*,char*) const overload for more info.
82 */
83 bool Convert(char* str) const;
84 bool Convert(wchar_t* str) const;
85
86 /**
87 Convert a wxString and return a new wxString object.
88
89 See the Convert(const char*,char*) const overload for more info.
90 */
91 wxString Convert(const wxString& input) const;
92 //@}
93
94
95 /**
96 Similar to GetPlatformEquivalents(), but this one will return ALL
97 equivalent encodings, regardless of the platform, and including itself.
98
99 This platform's encodings are before others in the array.
100 And again, if @a enc is in the array, it is the very first item in it.
101 */
102 static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
103
104 /**
105 Return equivalents for given font that are used under given platform.
106
107 Supported platforms:
108 @li wxPLATFORM_UNIX
109 @li wxPLATFORM_WINDOWS
110 @li wxPLATFORM_OS2
111 @li wxPLATFORM_MAC
112 @li wxPLATFORM_CURRENT
113
114 wxPLATFORM_CURRENT means the platform this binary was compiled for.
115
116 Examples:
117
118 @verbatim
119 current platform enc returned value
120 ----------------------------------------------
121 unix CP1250 {ISO8859_2}
122 unix ISO8859_2 {ISO8859_2}
123 windows ISO8859_2 {CP1250}
124 unix CP1252 {ISO8859_1,ISO8859_15}
125 @endverbatim
126
127 Equivalence is defined in terms of convertibility: two encodings are
128 equivalent if you can convert text between then without losing
129 information (it may - and will - happen that you lose special chars
130 like quotation marks or em-dashes but you shouldn't lose any diacritics
131 and language-specific characters when converting between equivalent encodings).
132
133 Remember that this function does @b NOT check for presence of
134 fonts in system. It only tells you what are most suitable
135 encodings. (It usually returns only one encoding.)
136
137 @note Note that argument enc itself may be present in the returned array,
138 so that you can, as a side-effect, detect whether the encoding is
139 native for this platform or not.
140
141 @note Convert() is not limited to converting between equivalent encodings,
142 it can convert between two arbitrary encodings.
143
144 @note If @a enc is present in the returned array, then it is always the first
145 item of it.
146
147 @note Please note that the returned array may contain no items at all.
148 */
149 static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc,
150 int platform = wxPLATFORM_CURRENT);
151
152 /**
153 Initialize the conversion.
154
155 Both output or input encoding may be wxFONTENCODING_UNICODE, but only
156 if wxUSE_ENCODING is set to 1.
157
158 All subsequent calls to Convert() will interpret its argument
159 as a string in @a input_enc encoding and will output string in
160 @a output_enc encoding.
161
162 You must call this method before calling Convert. You may call
163 it more than once in order to switch to another conversion.
164
165 @a method affects behaviour of Convert() in case input character
166 cannot be converted because it does not exist in output encoding:
167
168 @li @b wxCONVERT_STRICT: follow behaviour of GNU Recode - just copy
169 unconvertible characters to output and don't change them
170 (its integer value will stay the same)
171 @li @b wxCONVERT_SUBSTITUTE: try some (lossy) substitutions - e.g.
172 replace unconvertible latin capitals with acute by ordinary
173 capitals, replace en-dash or em-dash by '-' etc.
174
175 Both modes guarantee that output string will have same length
176 as input string.
177
178 @return @false if given conversion is impossible, @true otherwise
179 (conversion may be impossible either if you try to convert
180 to Unicode with non-Unicode build of wxWidgets or if input
181 or output encoding is not supported).
182 */
183 bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc,
184 int method = wxCONVERT_STRICT);
185 };
186