]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
997899e769822839b58763a49ff0bdb9ed55bad8
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin
9 // Licence: wxWindows license
10 /////////////////////////////////////////////////////////////////////////////
11
12 // ============================================================================
13 // declarations
14 // ============================================================================
15
16 // ----------------------------------------------------------------------------
17 // headers
18 // ----------------------------------------------------------------------------
19
20 #ifdef __GNUG__
21 #pragma implementation "strconv.h"
22 #endif
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #include <ctype.h>
32 #include <string.h>
33 #include <stdlib.h>
34
35 #ifdef __SALFORDC__
36 #include <clib.h>
37 #endif
38
39 #include "wx/debug.h"
40 #include "wx/strconv.h"
41
42 // ----------------------------------------------------------------------------
43 // globals
44 // ----------------------------------------------------------------------------
45
46 WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc;
47
48 // ============================================================================
49 // implementation
50 // ============================================================================
51
52 #if wxUSE_WCHAR_T
53
54 // ----------------------------------------------------------------------------
55 // wxMBConv
56 // ----------------------------------------------------------------------------
57
58 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc;
59
60 size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
61 {
62 return wxMB2WC(buf, psz, n);
63 }
64
65 size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
66 {
67 return wxWC2MB(buf, psz, n);
68 }
69
70 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
71 {
72 if (psz)
73 {
74 size_t nLen = MB2WC((wchar_t *) NULL, psz, 0);
75 if (nLen == (size_t)-1)
76 return wxWCharBuffer((wchar_t *) NULL);
77 wxWCharBuffer buf(nLen);
78 MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen);
79 return buf;
80 }
81 else
82 return wxWCharBuffer((wchar_t *) NULL);
83 }
84
85 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const
86 {
87 if (psz)
88 {
89 size_t nLen = WC2MB((char *) NULL, psz, 0);
90 if (nLen == (size_t)-1)
91 return wxCharBuffer((char *) NULL);
92 wxCharBuffer buf(nLen);
93 WC2MB((char *)(const char *) buf, psz, nLen);
94 return buf;
95 }
96 else
97 return wxCharBuffer((char *) NULL);
98 }
99
100 // ----------------------------------------------------------------------------
101 // standard file conversion
102 // ----------------------------------------------------------------------------
103
104 WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile;
105
106 // just use the libc conversion for now
107 size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const
108 {
109 return wxMB2WC(buf, psz, n);
110 }
111
112 size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const
113 {
114 return wxWC2MB(buf, psz, n);
115 }
116
117 // ----------------------------------------------------------------------------
118 // standard gdk conversion
119 // ----------------------------------------------------------------------------
120
121 #ifdef __WXGTK12__
122
123 WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk;
124
125 #include <gdk/gdk.h>
126
127 size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const
128 {
129 if (buf) {
130 return gdk_mbstowcs((GdkWChar *)buf, psz, n);
131 } else {
132 GdkWChar *nbuf = new GdkWChar[n=strlen(psz)];
133 size_t len = gdk_mbstowcs(nbuf, psz, n);
134 delete [] nbuf;
135 return len;
136 }
137 }
138
139 size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const
140 {
141 char *mbstr = gdk_wcstombs((GdkWChar *)psz);
142 size_t len = mbstr ? strlen(mbstr) : 0;
143 if (buf) {
144 if (len > n) len = n;
145 memcpy(buf, psz, len);
146 if (len < n) buf[len] = 0;
147 }
148 return len;
149 }
150
151 #endif // GTK > 1.0
152
153 // ----------------------------------------------------------------------------
154 // UTF-7
155 // ----------------------------------------------------------------------------
156
157 WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7;
158
159 #if 0
160 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
161 "abcdefghijklmnopqrstuvwxyz"
162 "0123456789'(),-./:?";
163 static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}";
164 static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
165 "abcdefghijklmnopqrstuvwxyz"
166 "0123456789+/";
167 #endif
168
169 // TODO: write actual implementations of UTF-7 here
170 size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf),
171 const char * WXUNUSED(psz),
172 size_t WXUNUSED(n)) const
173 {
174 return 0;
175 }
176
177 size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf),
178 const wchar_t * WXUNUSED(psz),
179 size_t WXUNUSED(n)) const
180 {
181 return 0;
182 }
183
184 // ----------------------------------------------------------------------------
185 // UTF-8
186 // ----------------------------------------------------------------------------
187
188 WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8;
189
190 static unsigned long utf8_max[]={0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff,0xffffffff};
191
192 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
193 {
194 size_t len = 0;
195
196 while (*psz && ((!buf) || (len<n))) {
197 unsigned char cc=*psz++, fc=cc;
198 unsigned cnt;
199 for (cnt=0; fc&0x80; cnt++) fc<<=1;
200 if (!cnt) {
201 // plain ASCII char
202 if (buf) *buf++=cc;
203 len++;
204 } else {
205 cnt--;
206 if (!cnt) {
207 // invalid UTF-8 sequence
208 return (size_t)-1;
209 } else {
210 unsigned ocnt=cnt-1;
211 unsigned long res=cc&(0x3f>>cnt);
212 while (cnt--) {
213 cc = *psz++;
214 if ((cc&0xC0)!=0x80) {
215 // invalid UTF-8 sequence
216 return (size_t)-1;
217 }
218 res=(res<<6)|(cc&0x3f);
219 }
220 if (res<=utf8_max[ocnt]) {
221 // illegal UTF-8 encoding
222 return (size_t)-1;
223 }
224 if (buf) *buf++=res;
225 len++;
226 }
227 }
228 }
229 if (buf && (len<n)) *buf = 0;
230 return len;
231 }
232
233 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
234 {
235 size_t len = 0;
236
237 while (*psz && ((!buf) || (len<n))) {
238 unsigned long cc=(*psz++)&0x7fffffff;
239 unsigned cnt;
240 for (cnt=0; cc>utf8_max[cnt]; cnt++);
241 if (!cnt) {
242 // plain ASCII char
243 if (buf) *buf++=cc;
244 len++;
245 } else {
246 len+=cnt+1;
247 if (buf) {
248 *buf++=(-128>>cnt)|((cc>>(cnt*6))&(0x3f>>cnt));
249 while (cnt--)
250 *buf++=0x80|((cc>>(cnt*6))&0x3f);
251 }
252 }
253 }
254 if (buf && (len<n)) *buf = 0;
255 return len;
256 }
257
258 // ----------------------------------------------------------------------------
259 // specified character set
260 // ----------------------------------------------------------------------------
261
262 WXDLLEXPORT_DATA(wxCSConv) wxConvLocal((const wxChar *)NULL);
263
264 #include "wx/encconv.h"
265 #include "wx/fontmap.h"
266
267 class wxCharacterSet
268 {
269 public:
270 // temporarily just use wxEncodingConverter stuff,
271 // so that it works while a better implementation is built
272 wxFontEncoding enc;
273 wxEncodingConverter m2w, w2m;
274 wxCharacterSet(wxFontEncoding e) : enc(e)
275 {
276 m2w.Init(enc, wxFONTENCODING_UNICODE);
277 w2m.Init(wxFONTENCODING_UNICODE, enc);
278 }
279 };
280
281 static wxCharacterSet *wxGetCharacterSet(const wxChar *name)
282 {
283 wxFontEncoding enc = name ? wxTheFontMapper->CharsetToEncoding(name, FALSE)
284 : wxFONTENCODING_SYSTEM;
285 wxCharacterSet *cset = (enc != wxFONTENCODING_SYSTEM) ? new wxCharacterSet(enc)
286 : (wxCharacterSet *)NULL;
287 return cset;
288 }
289
290 wxCSConv::wxCSConv(const wxChar *charset)
291 {
292 m_name = (wxChar *) NULL;
293 m_cset = (wxCharacterSet *) NULL;
294 m_deferred = TRUE;
295 SetName(charset);
296 }
297
298 wxCSConv::~wxCSConv()
299 {
300 if (m_name) free(m_name);
301 if (m_cset) delete m_cset;
302 }
303
304 void wxCSConv::SetName(const wxChar *charset)
305 {
306 if (charset) {
307 m_name = wxStrdup(charset);
308 m_deferred = TRUE;
309 }
310 }
311
312 void wxCSConv::LoadNow()
313 {
314 // wxPrintf(wxT("Conversion request\n"));
315 if (m_deferred) {
316 if (!m_name) {
317 #ifdef __UNIX__
318 wxChar *lang = wxGetenv(wxT("LC_ALL"));
319 if (!lang) lang = wxGetenv(wxT("LANG"));
320 wxChar *dot = lang ? wxStrchr(lang, wxT('.')) : (wxChar *)NULL;
321 if (dot) SetName(dot+1);
322 #endif
323 }
324 m_cset = wxGetCharacterSet(m_name);
325 m_deferred = FALSE;
326 }
327 }
328
329 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
330 {
331 ((wxCSConv *)this)->LoadNow(); // discard constness
332 if (buf) {
333 if (m_cset) {
334 m_cset->m2w.Convert(psz, buf);
335 } else {
336 // latin-1 (direct)
337 for (size_t c=0; c<n; c++)
338 buf[c] = (unsigned char)(psz[c]);
339 }
340 return n;
341 }
342 return strlen(psz);
343 }
344
345 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
346 {
347 ((wxCSConv *)this)->LoadNow(); // discard constness
348 if (buf) {
349 if (m_cset) {
350 m_cset->w2m.Convert(psz, buf);
351 } else {
352 // latin-1 (direct)
353 for (size_t c=0; c<n; c++)
354 buf[c] = (psz[c]>0xff) ? '?' : psz[c];
355 }
356 return n;
357 }
358 #if defined(__BORLANDC__) && (__BORLANDC__ > 0x530)
359 return std::wcslen(psz);
360 #else
361 return ::wcslen(psz);
362 #endif
363 }
364
365 #else // !wxUSE_WCHAR_T
366
367 // ----------------------------------------------------------------------------
368 // stand-ins in absence of wchar_t
369 // ----------------------------------------------------------------------------
370
371 WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile;
372
373 #endif // wxUSE_WCHAR_T
374
375