]>
Commit | Line | Data |
---|---|---|
1 | /////////////////////////////////////////////////////////////////////////////// | |
2 | // Name: strconv.h | |
3 | // Purpose: conversion routines for char sets any Unicode | |
4 | // Author: Robert Roebling, Ove Kaaven | |
5 | // Modified by: | |
6 | // Created: 29/01/98 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) 1998 Ove Kaaven, Robert Roebling, Vadim Zeitlin | |
9 | // Licence: wxWindows licence | |
10 | /////////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | #ifndef _WX_WXSTRCONVH__ | |
13 | #define _WX_WXSTRCONVH__ | |
14 | ||
15 | #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) | |
16 | #pragma interface "strconv.h" | |
17 | #endif | |
18 | ||
19 | #include "wx/defs.h" | |
20 | #include "wx/wxchar.h" | |
21 | #include "wx/buffer.h" | |
22 | ||
23 | #ifdef __DIGITALMARS__ | |
24 | #include "typeinfo.h" | |
25 | #endif | |
26 | ||
27 | #if defined(__VISAGECPP__) && __IBMCPP__ >= 400 | |
28 | # undef __BSEXCPT__ | |
29 | #endif | |
30 | ||
31 | #include <stdlib.h> | |
32 | ||
33 | #if wxUSE_WCHAR_T | |
34 | ||
35 | // ---------------------------------------------------------------------------- | |
36 | // wxMBConv (abstract base class for conversions) | |
37 | // ---------------------------------------------------------------------------- | |
38 | ||
39 | class WXDLLIMPEXP_BASE wxMBConv | |
40 | { | |
41 | public: | |
42 | // the actual conversion takes place here | |
43 | // | |
44 | // note that outputSize is the size of the output buffer, not the length of input | |
45 | // (the latter is always supposed to be NUL-terminated) | |
46 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const = 0; | |
47 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const = 0; | |
48 | ||
49 | // MB <-> WC | |
50 | const wxWCharBuffer cMB2WC(const char *psz) const; | |
51 | const wxCharBuffer cWC2MB(const wchar_t *psz) const; | |
52 | ||
53 | // MB <-> WC for strings with embedded null characters | |
54 | // | |
55 | // pszLen length of the input string | |
56 | // pOutSize gets the final size of the converted string | |
57 | const wxWCharBuffer cMB2WC(const char *psz, size_t pszLen, size_t* pOutSize) const; | |
58 | const wxCharBuffer cWC2MB(const wchar_t *psz, size_t pszLen, size_t* pOutSize) const; | |
59 | ||
60 | // convenience functions for converting MB or WC to/from wxWin default | |
61 | #if wxUSE_UNICODE | |
62 | const wxWCharBuffer cMB2WX(const char *psz) const { return cMB2WC(psz); } | |
63 | const wxCharBuffer cWX2MB(const wchar_t *psz) const { return cWC2MB(psz); } | |
64 | const wchar_t* cWC2WX(const wchar_t *psz) const { return psz; } | |
65 | const wchar_t* cWX2WC(const wchar_t *psz) const { return psz; } | |
66 | #else // ANSI | |
67 | const char* cMB2WX(const char *psz) const { return psz; } | |
68 | const char* cWX2MB(const char *psz) const { return psz; } | |
69 | const wxCharBuffer cWC2WX(const wchar_t *psz) const { return cWC2MB(psz); } | |
70 | const wxWCharBuffer cWX2WC(const char *psz) const { return cMB2WC(psz); } | |
71 | #endif // Unicode/ANSI | |
72 | ||
73 | // virtual dtor for any base class | |
74 | virtual ~wxMBConv(); | |
75 | }; | |
76 | ||
77 | // ---------------------------------------------------------------------------- | |
78 | // wxMBConvLibc uses standard mbstowcs() and wcstombs() functions for | |
79 | // conversion (hence it depends on the current locale) | |
80 | // ---------------------------------------------------------------------------- | |
81 | ||
82 | class WXDLLIMPEXP_BASE wxMBConvLibc : public wxMBConv | |
83 | { | |
84 | public: | |
85 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
86 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
87 | }; | |
88 | ||
89 | #ifdef __UNIX__ | |
90 | ||
91 | // ---------------------------------------------------------------------------- | |
92 | // wxConvBrokenFileNames is made for Unix in Unicode mode when | |
93 | // files are accidentally written in an encoding which is not | |
94 | // the system encoding. Typically, the system encoding will be | |
95 | // UTF8 but there might be files stored in ISO8859-1 on disk. | |
96 | // ---------------------------------------------------------------------------- | |
97 | ||
98 | class WXDLLIMPEXP_BASE wxConvBrokenFileNames : public wxMBConv | |
99 | { | |
100 | public: | |
101 | wxConvBrokenFileNames(const wxChar *charset); | |
102 | virtual ~wxConvBrokenFileNames() { delete m_conv; } | |
103 | ||
104 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
105 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
106 | ||
107 | private: | |
108 | // the conversion object we forward to | |
109 | wxMBConv *m_conv; | |
110 | }; | |
111 | ||
112 | #endif | |
113 | ||
114 | // ---------------------------------------------------------------------------- | |
115 | // wxMBConvUTF7 (for conversion using UTF7 encoding) | |
116 | // ---------------------------------------------------------------------------- | |
117 | ||
118 | class WXDLLIMPEXP_BASE wxMBConvUTF7 : public wxMBConv | |
119 | { | |
120 | public: | |
121 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
122 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
123 | }; | |
124 | ||
125 | // ---------------------------------------------------------------------------- | |
126 | // wxMBConvUTF8 (for conversion using UTF8 encoding) | |
127 | // ---------------------------------------------------------------------------- | |
128 | ||
129 | class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConv | |
130 | { | |
131 | public: | |
132 | enum { | |
133 | MAP_INVALID_UTF8_NOT = 0, | |
134 | MAP_INVALID_UTF8_TO_PUA = 1, | |
135 | MAP_INVALID_UTF8_TO_OCTAL = 2 | |
136 | }; | |
137 | ||
138 | wxMBConvUTF8(int options = MAP_INVALID_UTF8_NOT) : m_options(options) { } | |
139 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
140 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
141 | ||
142 | private: | |
143 | int m_options; | |
144 | }; | |
145 | ||
146 | // ---------------------------------------------------------------------------- | |
147 | // wxMBConvUTF16LE (for conversion using UTF16 Little Endian encoding) | |
148 | // ---------------------------------------------------------------------------- | |
149 | ||
150 | class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConv | |
151 | { | |
152 | public: | |
153 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
154 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
155 | }; | |
156 | ||
157 | // ---------------------------------------------------------------------------- | |
158 | // wxMBConvUTF16BE (for conversion using UTF16 Big Endian encoding) | |
159 | // ---------------------------------------------------------------------------- | |
160 | ||
161 | class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConv | |
162 | { | |
163 | public: | |
164 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
165 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
166 | }; | |
167 | ||
168 | // ---------------------------------------------------------------------------- | |
169 | // wxMBConvUTF32LE (for conversion using UTF32 Little Endian encoding) | |
170 | // ---------------------------------------------------------------------------- | |
171 | ||
172 | class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConv | |
173 | { | |
174 | public: | |
175 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
176 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
177 | }; | |
178 | ||
179 | // ---------------------------------------------------------------------------- | |
180 | // wxMBConvUTF32BE (for conversion using UTF32 Big Endian encoding) | |
181 | // ---------------------------------------------------------------------------- | |
182 | ||
183 | class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConv | |
184 | { | |
185 | public: | |
186 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
187 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
188 | }; | |
189 | ||
190 | // ---------------------------------------------------------------------------- | |
191 | // wxCSConv (for conversion based on loadable char sets) | |
192 | // ---------------------------------------------------------------------------- | |
193 | ||
194 | #include "wx/fontenc.h" | |
195 | ||
196 | class WXDLLIMPEXP_BASE wxCSConv : public wxMBConv | |
197 | { | |
198 | public: | |
199 | // we can be created either from charset name or from an encoding constant | |
200 | // but we can't have both at once | |
201 | wxCSConv(const wxChar *charset); | |
202 | wxCSConv(wxFontEncoding encoding); | |
203 | ||
204 | wxCSConv(const wxCSConv& conv); | |
205 | virtual ~wxCSConv(); | |
206 | ||
207 | wxCSConv& operator=(const wxCSConv& conv); | |
208 | ||
209 | virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const; | |
210 | virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const; | |
211 | ||
212 | void Clear() ; | |
213 | ||
214 | private: | |
215 | // common part of all ctors | |
216 | void Init(); | |
217 | ||
218 | // creates m_convReal if necessary | |
219 | void CreateConvIfNeeded() const; | |
220 | ||
221 | // do create m_convReal (unconditionally) | |
222 | wxMBConv *DoCreate() const; | |
223 | ||
224 | // set the name (may be only called when m_name == NULL), makes copy of | |
225 | // charset string | |
226 | void SetName(const wxChar *charset); | |
227 | ||
228 | ||
229 | // note that we can't use wxString here because of compilation | |
230 | // dependencies: we're included from wx/string.h | |
231 | wxChar *m_name; | |
232 | wxFontEncoding m_encoding; | |
233 | ||
234 | // use CreateConvIfNeeded() before accessing m_convReal! | |
235 | wxMBConv *m_convReal; | |
236 | bool m_deferred; | |
237 | }; | |
238 | ||
239 | ||
240 | // ---------------------------------------------------------------------------- | |
241 | // declare predefined conversion objects | |
242 | // ---------------------------------------------------------------------------- | |
243 | ||
244 | // conversion to be used with all standard functions affected by locale, e.g. | |
245 | // strtol(), strftime(), ... | |
246 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc; | |
247 | ||
248 | // conversion ISO-8859-1/UTF-7/UTF-8 <-> wchar_t | |
249 | extern WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1; | |
250 | extern WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7; | |
251 | extern WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8; | |
252 | ||
253 | // conversion used for the file names on the systems where they're not Unicode | |
254 | // (basically anything except Windows) | |
255 | // | |
256 | // this is used by all file functions, can be changed by the application | |
257 | // | |
258 | // by default UTF-8 under Mac OS X and wxConvLibc elsewhere (but it's not used | |
259 | // under Windows normally) | |
260 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName; | |
261 | ||
262 | // backwards compatible define | |
263 | #define wxConvFile (*wxConvFileName) | |
264 | ||
265 | // the current conversion object, may be set to any conversion, is used by | |
266 | // default in a couple of places inside wx (initially same as wxConvLibc) | |
267 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent; | |
268 | ||
269 | // ??? | |
270 | extern WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal; | |
271 | ||
272 | ||
273 | // ---------------------------------------------------------------------------- | |
274 | // endianness-dependent conversions | |
275 | // ---------------------------------------------------------------------------- | |
276 | ||
277 | #ifdef WORDS_BIGENDIAN | |
278 | typedef wxMBConvUTF16BE wxMBConvUTF16; | |
279 | typedef wxMBConvUTF32BE wxMBConvUTF32; | |
280 | #else | |
281 | typedef wxMBConvUTF16LE wxMBConvUTF16; | |
282 | typedef wxMBConvUTF32LE wxMBConvUTF32; | |
283 | #endif | |
284 | ||
285 | // ---------------------------------------------------------------------------- | |
286 | // filename conversion macros | |
287 | // ---------------------------------------------------------------------------- | |
288 | ||
289 | // filenames are multibyte on Unix and probably widechar on Windows? | |
290 | #if defined(__UNIX__) || defined(__BORLANDC__) || defined(__WXMAC__ ) | |
291 | #define wxMBFILES 1 | |
292 | #else | |
293 | #define wxMBFILES 0 | |
294 | #endif | |
295 | ||
296 | #if wxMBFILES && wxUSE_UNICODE | |
297 | #define wxFNCONV(name) wxConvFileName->cWX2MB(name) | |
298 | #define wxFNSTRINGCAST wxMBSTRINGCAST | |
299 | #else | |
300 | #if defined( __WXOSX__ ) && wxMBFILES | |
301 | #define wxFNCONV(name) wxConvFileName->cWC2MB( wxConvLocal.cWX2WC(name) ) | |
302 | #else | |
303 | #define wxFNCONV(name) name | |
304 | #endif | |
305 | #define wxFNSTRINGCAST WXSTRINGCAST | |
306 | #endif | |
307 | ||
308 | #else // !wxUSE_WCHAR_T | |
309 | ||
310 | // ---------------------------------------------------------------------------- | |
311 | // stand-ins in absence of wchar_t | |
312 | // ---------------------------------------------------------------------------- | |
313 | ||
314 | class WXDLLIMPEXP_BASE wxMBConv | |
315 | { | |
316 | public: | |
317 | const char* cMB2WX(const char *psz) const { return psz; } | |
318 | const char* cWX2MB(const char *psz) const { return psz; } | |
319 | }; | |
320 | ||
321 | #define wxConvFile wxConvLocal | |
322 | ||
323 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc, | |
324 | wxConvLocal, | |
325 | wxConvISO8859_1, | |
326 | wxConvUTF8; | |
327 | extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent; | |
328 | ||
329 | #define wxFNCONV(name) name | |
330 | #define wxFNSTRINGCAST WXSTRINGCAST | |
331 | ||
332 | #endif | |
333 | // wxUSE_WCHAR_T | |
334 | ||
335 | // ---------------------------------------------------------------------------- | |
336 | // macros for the most common conversions | |
337 | // ---------------------------------------------------------------------------- | |
338 | ||
339 | #if wxUSE_UNICODE | |
340 | #define wxConvertWX2MB(s) wxConvCurrent->cWX2MB(s) | |
341 | #define wxConvertMB2WX(s) wxConvCurrent->cMB2WX(s) | |
342 | #else // ANSI | |
343 | // no conversions to do | |
344 | #define wxConvertWX2MB(s) (s) | |
345 | #define wxConvertMB2WX(s) (s) | |
346 | #endif // Unicode/ANSI | |
347 | ||
348 | #endif | |
349 | // _WX_WXSTRCONVH__ | |
350 |