]>
Commit | Line | Data |
---|---|---|
6001e347 RR |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Name: strconv.h | |
3 | // Purpose: conversion routines for char sets any Unicode | |
4 | // Author: Robert Roebling, Ove Kaaven | |
5 | // Modified by: | |
6 | // Created: 29/01/98 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) 1998 Ove Kaaven, Robert Roebling, Vadim Zeitlin | |
65571936 | 9 | // Licence: wxWindows licence |
6001e347 RR |
10 | /////////////////////////////////////////////////////////////////////////////// |
11 | ||
12 | #ifndef _WX_WXSTRCONVH__ | |
13 | #define _WX_WXSTRCONVH__ | |
14 | ||
6001e347 RR |
15 | #include "wx/defs.h" |
16 | #include "wx/wxchar.h" | |
17 | #include "wx/buffer.h" | |
18 | ||
7db39dd6 CE |
19 | #ifdef __DIGITALMARS__ |
20 | #include "typeinfo.h" | |
21 | #endif | |
22 | ||
9dea36ef DW |
23 | #if defined(__VISAGECPP__) && __IBMCPP__ >= 400 |
24 | # undef __BSEXCPT__ | |
25 | #endif | |
dccce9ea | 26 | |
6001e347 RR |
27 | #include <stdlib.h> |
28 | ||
29 | #if wxUSE_WCHAR_T | |
30 | ||
e90c1d2a | 31 | // ---------------------------------------------------------------------------- |
bde4baac | 32 | // wxMBConv (abstract base class for conversions) |
e90c1d2a | 33 | // ---------------------------------------------------------------------------- |
6001e347 | 34 | |
bddd7a8d | 35 | class WXDLLIMPEXP_BASE wxMBConv |
6001e347 RR |
36 | { |
37 | public: | |
e90c1d2a | 38 | // the actual conversion takes place here |
bde4baac | 39 | // |
e4e3bbb4 | 40 | // note that outputSize is the size of the output buffer, not the length of input |
75736a9c DS |
41 | // (the latter is always supposed to be NUL-terminated) |
42 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const = 0; | |
43 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const = 0; | |
e90c1d2a | 44 | |
bde4baac | 45 | // MB <-> WC |
e90c1d2a VZ |
46 | const wxWCharBuffer cMB2WC(const char *psz) const; |
47 | const wxCharBuffer cWC2MB(const wchar_t *psz) const; | |
6001e347 | 48 | |
f5fb6871 RN |
49 | // MB <-> WC for strings with embedded null characters |
50 | // | |
51 | // pszLen length of the input string | |
52 | // pOutSize gets the final size of the converted string | |
53 | const wxWCharBuffer cMB2WC(const char *psz, size_t pszLen, size_t* pOutSize) const; | |
54 | const wxCharBuffer cWC2MB(const wchar_t *psz, size_t pszLen, size_t* pOutSize) const; | |
55 | ||
bde4baac | 56 | // convenience functions for converting MB or WC to/from wxWin default |
6001e347 | 57 | #if wxUSE_UNICODE |
e90c1d2a VZ |
58 | const wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); } |
59 | const wxCharBuffer cWX2MB(const wchar_t *psz) const { return cWC2MB(psz); } | |
60 | const wchar_t* cWC2WX(const wchar_t *psz) const { return psz; } | |
f6bcfd97 | 61 | const wchar_t* cWX2WC(const wchar_t *psz) const { return psz; } |
e90c1d2a VZ |
62 | #else // ANSI |
63 | const char* cMB2WX(const char *psz) const { return psz; } | |
64 | const char* cWX2MB(const char *psz) const { return psz; } | |
65 | const wxCharBuffer cWC2WX(const wchar_t *psz) const { return cWC2MB(psz); } | |
66 | const wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); } | |
67 | #endif // Unicode/ANSI | |
2b5f62a0 VZ |
68 | |
69 | // virtual dtor for any base class | |
e4a4a50b | 70 | virtual ~wxMBConv(); |
6001e347 RR |
71 | }; |
72 | ||
bde4baac VZ |
73 | // ---------------------------------------------------------------------------- |
74 | // wxMBConvLibc uses standard mbstowcs() and wcstombs() functions for | |
75 | // conversion (hence it depends on the current locale) | |
76 | // ---------------------------------------------------------------------------- | |
77 | ||
78 | class WXDLLIMPEXP_BASE wxMBConvLibc : public wxMBConv | |
79 | { | |
80 | public: | |
75736a9c DS |
81 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
82 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
bde4baac VZ |
83 | }; |
84 | ||
5576edf8 RR |
85 | #ifdef __UNIX__ |
86 | ||
87 | // ---------------------------------------------------------------------------- | |
88 | // wxConvBrokenFileNames is made for Unix in Unicode mode when | |
89 | // files are accidentally written in an encoding which is not | |
90 | // the system encoding. Typically, the system encoding will be | |
91 | // UTF8 but there might be files stored in ISO8859-1 on disk. | |
92 | // ---------------------------------------------------------------------------- | |
93 | ||
94 | class WXDLLIMPEXP_BASE wxConvBrokenFileNames : public wxMBConv | |
95 | { | |
96 | public: | |
845905d5 | 97 | wxConvBrokenFileNames(const wxChar *charset); |
5576edf8 RR |
98 | virtual ~wxConvBrokenFileNames() { delete m_conv; } |
99 | ||
100 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
101 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
102 | ||
103 | private: | |
104 | // the conversion object we forward to | |
105 | wxMBConv *m_conv; | |
106 | }; | |
107 | ||
108 | #endif | |
109 | ||
e90c1d2a | 110 | // ---------------------------------------------------------------------------- |
6001e347 | 111 | // wxMBConvUTF7 (for conversion using UTF7 encoding) |
e90c1d2a | 112 | // ---------------------------------------------------------------------------- |
6001e347 | 113 | |
bddd7a8d | 114 | class WXDLLIMPEXP_BASE wxMBConvUTF7 : public wxMBConv |
6001e347 RR |
115 | { |
116 | public: | |
75736a9c DS |
117 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
118 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
6001e347 RR |
119 | }; |
120 | ||
e90c1d2a | 121 | // ---------------------------------------------------------------------------- |
6001e347 | 122 | // wxMBConvUTF8 (for conversion using UTF8 encoding) |
e90c1d2a | 123 | // ---------------------------------------------------------------------------- |
6001e347 | 124 | |
bddd7a8d | 125 | class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConv |
6001e347 RR |
126 | { |
127 | public: | |
ea8ce907 RR |
128 | enum { |
129 | MAP_INVALID_UTF8_NOT = 0, | |
130 | MAP_INVALID_UTF8_TO_PUA = 1, | |
131 | MAP_INVALID_UTF8_TO_OCTAL = 2 | |
132 | }; | |
133 | ||
134 | wxMBConvUTF8(int options = MAP_INVALID_UTF8_NOT) : m_options(options) { } | |
75736a9c DS |
135 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
136 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
ea8ce907 RR |
137 | |
138 | private: | |
139 | int m_options; | |
6001e347 RR |
140 | }; |
141 | ||
e90c1d2a | 142 | // ---------------------------------------------------------------------------- |
c91830cb VZ |
143 | // wxMBConvUTF16LE (for conversion using UTF16 Little Endian encoding) |
144 | // ---------------------------------------------------------------------------- | |
145 | ||
146 | class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConv | |
147 | { | |
148 | public: | |
75736a9c DS |
149 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
150 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
c91830cb VZ |
151 | }; |
152 | ||
153 | // ---------------------------------------------------------------------------- | |
154 | // wxMBConvUTF16BE (for conversion using UTF16 Big Endian encoding) | |
155 | // ---------------------------------------------------------------------------- | |
156 | ||
157 | class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConv | |
158 | { | |
159 | public: | |
75736a9c DS |
160 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
161 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
c91830cb VZ |
162 | }; |
163 | ||
164 | // ---------------------------------------------------------------------------- | |
8b9e1f43 | 165 | // wxMBConvUTF32LE (for conversion using UTF32 Little Endian encoding) |
c91830cb VZ |
166 | // ---------------------------------------------------------------------------- |
167 | ||
168 | class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConv | |
169 | { | |
170 | public: | |
75736a9c DS |
171 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
172 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
c91830cb VZ |
173 | }; |
174 | ||
175 | // ---------------------------------------------------------------------------- | |
8b9e1f43 | 176 | // wxMBConvUTF32BE (for conversion using UTF32 Big Endian encoding) |
c91830cb VZ |
177 | // ---------------------------------------------------------------------------- |
178 | ||
179 | class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConv | |
180 | { | |
181 | public: | |
75736a9c DS |
182 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
183 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
c91830cb VZ |
184 | }; |
185 | ||
186 | // ---------------------------------------------------------------------------- | |
e90c1d2a VZ |
187 | // wxCSConv (for conversion based on loadable char sets) |
188 | // ---------------------------------------------------------------------------- | |
6001e347 | 189 | |
8b04d4c4 VZ |
190 | #include "wx/fontenc.h" |
191 | ||
bddd7a8d | 192 | class WXDLLIMPEXP_BASE wxCSConv : public wxMBConv |
6001e347 | 193 | { |
6001e347 | 194 | public: |
e95354ec VZ |
195 | // we can be created either from charset name or from an encoding constant |
196 | // but we can't have both at once | |
e90c1d2a | 197 | wxCSConv(const wxChar *charset); |
8b04d4c4 | 198 | wxCSConv(wxFontEncoding encoding); |
e95354ec | 199 | |
54380f29 | 200 | wxCSConv(const wxCSConv& conv); |
e90c1d2a VZ |
201 | virtual ~wxCSConv(); |
202 | ||
54380f29 | 203 | wxCSConv& operator=(const wxCSConv& conv); |
2b5f62a0 | 204 | |
75736a9c DS |
205 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; |
206 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
e90c1d2a | 207 | |
65e50848 JS |
208 | void Clear() ; |
209 | ||
e90c1d2a | 210 | private: |
8b04d4c4 VZ |
211 | // common part of all ctors |
212 | void Init(); | |
213 | ||
e95354ec VZ |
214 | // creates m_convReal if necessary |
215 | void CreateConvIfNeeded() const; | |
216 | ||
217 | // do create m_convReal (unconditionally) | |
218 | wxMBConv *DoCreate() const; | |
219 | ||
bda3d86a VZ |
220 | // set the name (may be only called when m_name == NULL), makes copy of |
221 | // charset string | |
e90c1d2a VZ |
222 | void SetName(const wxChar *charset); |
223 | ||
e95354ec | 224 | |
dccce9ea VZ |
225 | // note that we can't use wxString here because of compilation |
226 | // dependencies: we're included from wx/string.h | |
e90c1d2a | 227 | wxChar *m_name; |
8b04d4c4 | 228 | wxFontEncoding m_encoding; |
e95354ec VZ |
229 | |
230 | // use CreateConvIfNeeded() before accessing m_convReal! | |
231 | wxMBConv *m_convReal; | |
e90c1d2a | 232 | bool m_deferred; |
6001e347 RR |
233 | }; |
234 | ||
c3c1a9a9 | 235 | |
f5a1953b VZ |
236 | // ---------------------------------------------------------------------------- |
237 | // declare predefined conversion objects | |
238 | // ---------------------------------------------------------------------------- | |
d5c8817c | 239 | |
f5a1953b VZ |
240 | // conversion to be used with all standard functions affected by locale, e.g. |
241 | // strtol(), strftime(), ... | |
242 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc; | |
243 | ||
244 | // conversion ISO-8859-1/UTF-7/UTF-8 <-> wchar_t | |
16cba29d | 245 | extern WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1; |
f5a1953b VZ |
246 | extern WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7; |
247 | extern WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8; | |
248 | ||
249 | // conversion used for the file names on the systems where they're not Unicode | |
250 | // (basically anything except Windows) | |
251 | // | |
252 | // this is used by all file functions, can be changed by the application | |
253 | // | |
254 | // by default UTF-8 under Mac OS X and wxConvLibc elsewhere (but it's not used | |
255 | // under Windows normally) | |
256 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName; | |
257 | ||
258 | // backwards compatible define | |
259 | #define wxConvFile (*wxConvFileName) | |
260 | ||
261 | // the current conversion object, may be set to any conversion, is used by | |
262 | // default in a couple of places inside wx (initially same as wxConvLibc) | |
16cba29d | 263 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent; |
6001e347 | 264 | |
f5a1953b VZ |
265 | // ??? |
266 | extern WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal; | |
267 | ||
268 | ||
e95354ec VZ |
269 | // ---------------------------------------------------------------------------- |
270 | // endianness-dependent conversions | |
271 | // ---------------------------------------------------------------------------- | |
272 | ||
273 | #ifdef WORDS_BIGENDIAN | |
274 | typedef wxMBConvUTF16BE wxMBConvUTF16; | |
275 | typedef wxMBConvUTF32BE wxMBConvUTF32; | |
276 | #else | |
277 | typedef wxMBConvUTF16LE wxMBConvUTF16; | |
278 | typedef wxMBConvUTF32LE wxMBConvUTF32; | |
279 | #endif | |
280 | ||
e90c1d2a | 281 | // ---------------------------------------------------------------------------- |
6001e347 | 282 | // filename conversion macros |
e90c1d2a | 283 | // ---------------------------------------------------------------------------- |
6001e347 RR |
284 | |
285 | // filenames are multibyte on Unix and probably widechar on Windows? | |
c4e41ce3 | 286 | #if defined(__UNIX__) || defined(__BORLANDC__) || defined(__WXMAC__ ) |
e90c1d2a | 287 | #define wxMBFILES 1 |
6001e347 | 288 | #else |
e90c1d2a | 289 | #define wxMBFILES 0 |
6001e347 RR |
290 | #endif |
291 | ||
80df4d31 | 292 | #if wxMBFILES && wxUSE_UNICODE |
f5a1953b | 293 | #define wxFNCONV(name) wxConvFileName->cWX2MB(name) |
e90c1d2a | 294 | #define wxFNSTRINGCAST wxMBSTRINGCAST |
d5c8817c SC |
295 | #else |
296 | #if defined( __WXOSX__ ) && wxMBFILES | |
f5a1953b | 297 | #define wxFNCONV(name) wxConvFileName->cWC2MB( wxConvLocal.cWX2WC(name) ) |
6001e347 | 298 | #else |
e90c1d2a | 299 | #define wxFNCONV(name) name |
d5c8817c | 300 | #endif |
e90c1d2a | 301 | #define wxFNSTRINGCAST WXSTRINGCAST |
6001e347 RR |
302 | #endif |
303 | ||
f5a1953b | 304 | #else // !wxUSE_WCHAR_T |
6001e347 | 305 | |
e90c1d2a | 306 | // ---------------------------------------------------------------------------- |
6001e347 | 307 | // stand-ins in absence of wchar_t |
e90c1d2a | 308 | // ---------------------------------------------------------------------------- |
6001e347 | 309 | |
bddd7a8d | 310 | class WXDLLIMPEXP_BASE wxMBConv |
6001e347 RR |
311 | { |
312 | public: | |
e90c1d2a VZ |
313 | const char* cMB2WX(const char *psz) const { return psz; } |
314 | const char* cWX2MB(const char *psz) const { return psz; } | |
6001e347 | 315 | }; |
e90c1d2a | 316 | |
bde4baac VZ |
317 | #define wxConvFile wxConvLocal |
318 | ||
16cba29d | 319 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc, |
8b04d4c4 VZ |
320 | wxConvLocal, |
321 | wxConvISO8859_1, | |
322 | wxConvUTF8; | |
16cba29d | 323 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent; |
6001e347 RR |
324 | |
325 | #define wxFNCONV(name) name | |
e90c1d2a | 326 | #define wxFNSTRINGCAST WXSTRINGCAST |
6001e347 RR |
327 | |
328 | #endif | |
329 | // wxUSE_WCHAR_T | |
330 | ||
e90c1d2a VZ |
331 | // ---------------------------------------------------------------------------- |
332 | // macros for the most common conversions | |
333 | // ---------------------------------------------------------------------------- | |
334 | ||
335 | #if wxUSE_UNICODE | |
336 | #define wxConvertWX2MB(s) wxConvCurrent->cWX2MB(s) | |
337 | #define wxConvertMB2WX(s) wxConvCurrent->cMB2WX(s) | |
338 | #else // ANSI | |
339 | // no conversions to do | |
340 | #define wxConvertWX2MB(s) (s) | |
341 | #define wxConvertMB2WX(s) (s) | |
342 | #endif // Unicode/ANSI | |
343 | ||
344 | #endif | |
6001e347 RR |
345 | // _WX_WXSTRCONVH__ |
346 |