making sure mac filenames are always decomposed D Unicode UTF8 and the internal wxStr...
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
17
18 #ifndef WX_PRECOMP
19 #include "wx/intl.h"
20 #include "wx/log.h"
21 #include "wx/utils.h"
22 #include "wx/hashmap.h"
23 #endif
24
25 #include "wx/strconv.h"
26
27 #if wxUSE_WCHAR_T
28
29 #ifdef __WINDOWS__
30 #include "wx/msw/private.h"
31 #include "wx/msw/missing.h"
32 #endif
33
34 #ifndef __WXWINCE__
35 #include <errno.h>
36 #endif
37
38 #include <ctype.h>
39 #include <string.h>
40 #include <stdlib.h>
41
42 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
43 #define wxHAVE_WIN32_MB2WC
44 #endif
45
46 #ifdef __SALFORDC__
47 #include <clib.h>
48 #endif
49
50 #ifdef HAVE_ICONV
51 #include <iconv.h>
52 #include "wx/thread.h"
53 #endif
54
55 #include "wx/encconv.h"
56 #include "wx/fontmap.h"
57
58 #ifdef __WXMAC__
59 #ifndef __DARWIN__
60 #include <ATSUnicode.h>
61 #include <TextCommon.h>
62 #include <TextEncodingConverter.h>
63 #endif
64
65 // includes Mac headers
66 #include "wx/mac/private.h"
67 #endif
68
69
70 #define TRACE_STRCONV _T("strconv")
71
72 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
73 // be 4 bytes
74 #if SIZEOF_WCHAR_T == 2
75 #define WC_UTF16
76 #endif
77
78
79 // ============================================================================
80 // implementation
81 // ============================================================================
82
83 // helper function of cMB2WC(): check if n bytes at this location are all NUL
84 static bool NotAllNULs(const char *p, size_t n)
85 {
86 while ( n && *p++ == '\0' )
87 n--;
88
89 return n != 0;
90 }
91
92 // ----------------------------------------------------------------------------
93 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
94 // ----------------------------------------------------------------------------
95
96 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
97 {
98 if (input <= 0xffff)
99 {
100 if (output)
101 *output = (wxUint16) input;
102
103 return 1;
104 }
105 else if (input >= 0x110000)
106 {
107 return wxCONV_FAILED;
108 }
109 else
110 {
111 if (output)
112 {
113 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
114 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
115 }
116
117 return 2;
118 }
119 }
120
121 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
122 {
123 if ((*input < 0xd800) || (*input > 0xdfff))
124 {
125 output = *input;
126 return 1;
127 }
128 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
129 {
130 output = *input;
131 return wxCONV_FAILED;
132 }
133 else
134 {
135 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
136 return 2;
137 }
138 }
139
140 #ifdef WC_UTF16
141 typedef wchar_t wxDecodeSurrogate_t;
142 #else // !WC_UTF16
143 typedef wxUint16 wxDecodeSurrogate_t;
144 #endif // WC_UTF16/!WC_UTF16
145
146 // returns the next UTF-32 character from the wchar_t buffer and advances the
147 // pointer to the character after this one
148 //
149 // if an invalid character is found, *pSrc is set to NULL, the caller must
150 // check for this
151 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
152 {
153 wxUint32 out;
154 const size_t
155 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
156 if ( n == wxCONV_FAILED )
157 *pSrc = NULL;
158 else
159 *pSrc += n;
160
161 return out;
162 }
163
164 // ----------------------------------------------------------------------------
165 // wxMBConv
166 // ----------------------------------------------------------------------------
167
168 size_t
169 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
170 const char *src, size_t srcLen) const
171 {
172 // although new conversion classes are supposed to implement this function
173 // directly, the existins ones only implement the old MB2WC() and so, to
174 // avoid to have to rewrite all conversion classes at once, we provide a
175 // default (but not efficient) implementation of this one in terms of the
176 // old function by copying the input to ensure that it's NUL-terminated and
177 // then using MB2WC() to convert it
178
179 // the number of chars [which would be] written to dst [if it were not NULL]
180 size_t dstWritten = 0;
181
182 // the number of NULs terminating this string
183 size_t nulLen = 0; // not really needed, but just to avoid warnings
184
185 // if we were not given the input size we just have to assume that the
186 // string is properly terminated as we have no way of knowing how long it
187 // is anyhow, but if we do have the size check whether there are enough
188 // NULs at the end
189 wxCharBuffer bufTmp;
190 const char *srcEnd;
191 if ( srcLen != wxNO_LEN )
192 {
193 // we need to know how to find the end of this string
194 nulLen = GetMBNulLen();
195 if ( nulLen == wxCONV_FAILED )
196 return wxCONV_FAILED;
197
198 // if there are enough NULs we can avoid the copy
199 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
200 {
201 // make a copy in order to properly NUL-terminate the string
202 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
203 char * const p = bufTmp.data();
204 memcpy(p, src, srcLen);
205 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
206 *s = '\0';
207
208 src = bufTmp;
209 }
210
211 srcEnd = src + srcLen;
212 }
213 else // quit after the first loop iteration
214 {
215 srcEnd = NULL;
216 }
217
218 for ( ;; )
219 {
220 // try to convert the current chunk
221 size_t lenChunk = MB2WC(NULL, src, 0);
222 if ( lenChunk == wxCONV_FAILED )
223 return wxCONV_FAILED;
224
225 lenChunk++; // for the L'\0' at the end of this chunk
226
227 dstWritten += lenChunk;
228
229 if ( lenChunk == 1 )
230 {
231 // nothing left in the input string, conversion succeeded
232 break;
233 }
234
235 if ( dst )
236 {
237 if ( dstWritten > dstLen )
238 return wxCONV_FAILED;
239
240 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
241 return wxCONV_FAILED;
242
243 dst += lenChunk;
244 }
245
246 if ( !srcEnd )
247 {
248 // we convert just one chunk in this case as this is the entire
249 // string anyhow
250 break;
251 }
252
253 // advance the input pointer past the end of this chunk
254 while ( NotAllNULs(src, nulLen) )
255 {
256 // notice that we must skip over multiple bytes here as we suppose
257 // that if NUL takes 2 or 4 bytes, then all the other characters do
258 // too and so if advanced by a single byte we might erroneously
259 // detect sequences of NUL bytes in the middle of the input
260 src += nulLen;
261 }
262
263 src += nulLen; // skipping over its terminator as well
264
265 // note that ">=" (and not just "==") is needed here as the terminator
266 // we skipped just above could be inside or just after the buffer
267 // delimited by inEnd
268 if ( src >= srcEnd )
269 break;
270 }
271
272 return dstWritten;
273 }
274
275 size_t
276 wxMBConv::FromWChar(char *dst, size_t dstLen,
277 const wchar_t *src, size_t srcLen) const
278 {
279 // the number of chars [which would be] written to dst [if it were not NULL]
280 size_t dstWritten = 0;
281
282 // make a copy of the input string unless it is already properly
283 // NUL-terminated
284 //
285 // if we don't know its length we have no choice but to assume that it is,
286 // indeed, properly terminated
287 wxWCharBuffer bufTmp;
288 if ( srcLen == wxNO_LEN )
289 {
290 srcLen = wxWcslen(src) + 1;
291 }
292 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
293 {
294 // make a copy in order to properly NUL-terminate the string
295 bufTmp = wxWCharBuffer(srcLen);
296 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
297 src = bufTmp;
298 }
299
300 const size_t lenNul = GetMBNulLen();
301 for ( const wchar_t * const srcEnd = src + srcLen;
302 src < srcEnd;
303 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
304 {
305 // try to convert the current chunk
306 size_t lenChunk = WC2MB(NULL, src, 0);
307
308 if ( lenChunk == wxCONV_FAILED )
309 return wxCONV_FAILED;
310
311 lenChunk += lenNul;
312 dstWritten += lenChunk;
313
314 if ( dst )
315 {
316 if ( dstWritten > dstLen )
317 return wxCONV_FAILED;
318
319 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
320 return wxCONV_FAILED;
321
322 dst += lenChunk;
323 }
324 }
325
326 return dstWritten;
327 }
328
329 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
330 {
331 size_t rc = ToWChar(outBuff, outLen, inBuff);
332 if ( rc != wxCONV_FAILED )
333 {
334 // ToWChar() returns the buffer length, i.e. including the trailing
335 // NUL, while this method doesn't take it into account
336 rc--;
337 }
338
339 return rc;
340 }
341
342 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
343 {
344 size_t rc = FromWChar(outBuff, outLen, inBuff);
345 if ( rc != wxCONV_FAILED )
346 {
347 rc -= GetMBNulLen();
348 }
349
350 return rc;
351 }
352
353 wxMBConv::~wxMBConv()
354 {
355 // nothing to do here (necessary for Darwin linking probably)
356 }
357
358 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
359 {
360 if ( psz )
361 {
362 // calculate the length of the buffer needed first
363 const size_t nLen = MB2WC(NULL, psz, 0);
364 if ( nLen != wxCONV_FAILED )
365 {
366 // now do the actual conversion
367 wxWCharBuffer buf(nLen /* +1 added implicitly */);
368
369 // +1 for the trailing NULL
370 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
371 return buf;
372 }
373 }
374
375 return wxWCharBuffer();
376 }
377
378 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
379 {
380 if ( pwz )
381 {
382 const size_t nLen = WC2MB(NULL, pwz, 0);
383 if ( nLen != wxCONV_FAILED )
384 {
385 // extra space for trailing NUL(s)
386 static const size_t extraLen = GetMaxMBNulLen();
387
388 wxCharBuffer buf(nLen + extraLen - 1);
389 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
390 return buf;
391 }
392 }
393
394 return wxCharBuffer();
395 }
396
397 const wxWCharBuffer
398 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
399 {
400 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
401 if ( dstLen != wxCONV_FAILED )
402 {
403 wxWCharBuffer wbuf(dstLen - 1);
404 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
405 {
406 if ( outLen )
407 {
408 *outLen = dstLen;
409 if ( wbuf[dstLen - 1] == L'\0' )
410 (*outLen)--;
411 }
412
413 return wbuf;
414 }
415 }
416
417 if ( outLen )
418 *outLen = 0;
419
420 return wxWCharBuffer();
421 }
422
423 const wxCharBuffer
424 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
425 {
426 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
427 if ( dstLen != wxCONV_FAILED )
428 {
429 // special case of empty input: can't allocate 0 size buffer below as
430 // wxCharBuffer insists on NUL-terminating it
431 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
432 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
433 {
434 if ( outLen )
435 {
436 *outLen = dstLen;
437
438 const size_t nulLen = GetMBNulLen();
439 if ( dstLen >= nulLen &&
440 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
441 {
442 // in this case the output is NUL-terminated and we're not
443 // supposed to count NUL
444 *outLen -= nulLen;
445 }
446 }
447
448 return buf;
449 }
450 }
451
452 if ( outLen )
453 *outLen = 0;
454
455 return wxCharBuffer();
456 }
457
458 // ----------------------------------------------------------------------------
459 // wxMBConvLibc
460 // ----------------------------------------------------------------------------
461
462 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
463 {
464 return wxMB2WC(buf, psz, n);
465 }
466
467 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
468 {
469 return wxWC2MB(buf, psz, n);
470 }
471
472 // ----------------------------------------------------------------------------
473 // wxConvBrokenFileNames
474 // ----------------------------------------------------------------------------
475
476 #ifdef __UNIX__
477
478 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
479 {
480 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
481 || wxStricmp(charset, _T("UTF8")) == 0 )
482 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
483 else
484 m_conv = new wxCSConv(charset);
485 }
486
487 #endif // __UNIX__
488
489 // ----------------------------------------------------------------------------
490 // UTF-7
491 // ----------------------------------------------------------------------------
492
493 // Implementation (C) 2004 Fredrik Roubert
494
495 //
496 // BASE64 decoding table
497 //
498 static const unsigned char utf7unb64[] =
499 {
500 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
506 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
507 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
508 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
509 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
510 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
511 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
513 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
514 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
515 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
532 };
533
534 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
535 {
536 size_t len = 0;
537
538 while ( *psz && (!buf || (len < n)) )
539 {
540 unsigned char cc = *psz++;
541 if (cc != '+')
542 {
543 // plain ASCII char
544 if (buf)
545 *buf++ = cc;
546 len++;
547 }
548 else if (*psz == '-')
549 {
550 // encoded plus sign
551 if (buf)
552 *buf++ = cc;
553 len++;
554 psz++;
555 }
556 else // start of BASE64 encoded string
557 {
558 bool lsb, ok;
559 unsigned int d, l;
560 for ( ok = lsb = false, d = 0, l = 0;
561 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
562 psz++ )
563 {
564 d <<= 6;
565 d += cc;
566 for (l += 6; l >= 8; lsb = !lsb)
567 {
568 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
569 if (lsb)
570 {
571 if (buf)
572 *buf++ |= c;
573 len ++;
574 }
575 else
576 {
577 if (buf)
578 *buf = (wchar_t)(c << 8);
579 }
580
581 ok = true;
582 }
583 }
584
585 if ( !ok )
586 {
587 // in valid UTF7 we should have valid characters after '+'
588 return wxCONV_FAILED;
589 }
590
591 if (*psz == '-')
592 psz++;
593 }
594 }
595
596 if ( buf && (len < n) )
597 *buf = '\0';
598
599 return len;
600 }
601
602 //
603 // BASE64 encoding table
604 //
605 static const unsigned char utf7enb64[] =
606 {
607 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
608 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
609 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
610 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
611 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
612 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
613 'w', 'x', 'y', 'z', '0', '1', '2', '3',
614 '4', '5', '6', '7', '8', '9', '+', '/'
615 };
616
617 //
618 // UTF-7 encoding table
619 //
620 // 0 - Set D (directly encoded characters)
621 // 1 - Set O (optional direct characters)
622 // 2 - whitespace characters (optional)
623 // 3 - special characters
624 //
625 static const unsigned char utf7encode[128] =
626 {
627 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
628 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
629 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
631 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
633 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
635 };
636
637 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
638 {
639 size_t len = 0;
640
641 while (*psz && ((!buf) || (len < n)))
642 {
643 wchar_t cc = *psz++;
644 if (cc < 0x80 && utf7encode[cc] < 1)
645 {
646 // plain ASCII char
647 if (buf)
648 *buf++ = (char)cc;
649
650 len++;
651 }
652 #ifndef WC_UTF16
653 else if (((wxUint32)cc) > 0xffff)
654 {
655 // no surrogate pair generation (yet?)
656 return wxCONV_FAILED;
657 }
658 #endif
659 else
660 {
661 if (buf)
662 *buf++ = '+';
663
664 len++;
665 if (cc != '+')
666 {
667 // BASE64 encode string
668 unsigned int lsb, d, l;
669 for (d = 0, l = 0; /*nothing*/; psz++)
670 {
671 for (lsb = 0; lsb < 2; lsb ++)
672 {
673 d <<= 8;
674 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
675
676 for (l += 8; l >= 6; )
677 {
678 l -= 6;
679 if (buf)
680 *buf++ = utf7enb64[(d >> l) % 64];
681 len++;
682 }
683 }
684
685 cc = *psz;
686 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
687 break;
688 }
689
690 if (l != 0)
691 {
692 if (buf)
693 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
694
695 len++;
696 }
697 }
698
699 if (buf)
700 *buf++ = '-';
701 len++;
702 }
703 }
704
705 if (buf && (len < n))
706 *buf = 0;
707
708 return len;
709 }
710
711 // ----------------------------------------------------------------------------
712 // UTF-8
713 // ----------------------------------------------------------------------------
714
715 static wxUint32 utf8_max[]=
716 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
717
718 // boundaries of the private use area we use to (temporarily) remap invalid
719 // characters invalid in a UTF-8 encoded string
720 const wxUint32 wxUnicodePUA = 0x100000;
721 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
722
723 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
724 {
725 size_t len = 0;
726
727 while (*psz && ((!buf) || (len < n)))
728 {
729 const char *opsz = psz;
730 bool invalid = false;
731 unsigned char cc = *psz++, fc = cc;
732 unsigned cnt;
733 for (cnt = 0; fc & 0x80; cnt++)
734 fc <<= 1;
735
736 if (!cnt)
737 {
738 // plain ASCII char
739 if (buf)
740 *buf++ = cc;
741 len++;
742
743 // escape the escape character for octal escapes
744 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
745 && cc == '\\' && (!buf || len < n))
746 {
747 if (buf)
748 *buf++ = cc;
749 len++;
750 }
751 }
752 else
753 {
754 cnt--;
755 if (!cnt)
756 {
757 // invalid UTF-8 sequence
758 invalid = true;
759 }
760 else
761 {
762 unsigned ocnt = cnt - 1;
763 wxUint32 res = cc & (0x3f >> cnt);
764 while (cnt--)
765 {
766 cc = *psz;
767 if ((cc & 0xC0) != 0x80)
768 {
769 // invalid UTF-8 sequence
770 invalid = true;
771 break;
772 }
773
774 psz++;
775 res = (res << 6) | (cc & 0x3f);
776 }
777
778 if (invalid || res <= utf8_max[ocnt])
779 {
780 // illegal UTF-8 encoding
781 invalid = true;
782 }
783 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
784 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
785 {
786 // if one of our PUA characters turns up externally
787 // it must also be treated as an illegal sequence
788 // (a bit like you have to escape an escape character)
789 invalid = true;
790 }
791 else
792 {
793 #ifdef WC_UTF16
794 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
795 size_t pa = encode_utf16(res, (wxUint16 *)buf);
796 if (pa == wxCONV_FAILED)
797 {
798 invalid = true;
799 }
800 else
801 {
802 if (buf)
803 buf += pa;
804 len += pa;
805 }
806 #else // !WC_UTF16
807 if (buf)
808 *buf++ = (wchar_t)res;
809 len++;
810 #endif // WC_UTF16/!WC_UTF16
811 }
812 }
813
814 if (invalid)
815 {
816 if (m_options & MAP_INVALID_UTF8_TO_PUA)
817 {
818 while (opsz < psz && (!buf || len < n))
819 {
820 #ifdef WC_UTF16
821 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
822 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
823 wxASSERT(pa != wxCONV_FAILED);
824 if (buf)
825 buf += pa;
826 opsz++;
827 len += pa;
828 #else
829 if (buf)
830 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
831 opsz++;
832 len++;
833 #endif
834 }
835 }
836 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
837 {
838 while (opsz < psz && (!buf || len < n))
839 {
840 if ( buf && len + 3 < n )
841 {
842 unsigned char on = *opsz;
843 *buf++ = L'\\';
844 *buf++ = (wchar_t)( L'0' + on / 0100 );
845 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
846 *buf++ = (wchar_t)( L'0' + on % 010 );
847 }
848
849 opsz++;
850 len += 4;
851 }
852 }
853 else // MAP_INVALID_UTF8_NOT
854 {
855 return wxCONV_FAILED;
856 }
857 }
858 }
859 }
860
861 if (buf && (len < n))
862 *buf = 0;
863
864 return len;
865 }
866
867 static inline bool isoctal(wchar_t wch)
868 {
869 return L'0' <= wch && wch <= L'7';
870 }
871
872 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
873 {
874 size_t len = 0;
875
876 while (*psz && ((!buf) || (len < n)))
877 {
878 wxUint32 cc;
879
880 #ifdef WC_UTF16
881 // cast is ok for WC_UTF16
882 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
883 psz += (pa == wxCONV_FAILED) ? 1 : pa;
884 #else
885 cc = (*psz++) & 0x7fffffff;
886 #endif
887
888 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
889 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
890 {
891 if (buf)
892 *buf++ = (char)(cc - wxUnicodePUA);
893 len++;
894 }
895 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
896 && cc == L'\\' && psz[0] == L'\\' )
897 {
898 if (buf)
899 *buf++ = (char)cc;
900 psz++;
901 len++;
902 }
903 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
904 cc == L'\\' &&
905 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
906 {
907 if (buf)
908 {
909 *buf++ = (char) ((psz[0] - L'0') * 0100 +
910 (psz[1] - L'0') * 010 +
911 (psz[2] - L'0'));
912 }
913
914 psz += 3;
915 len++;
916 }
917 else
918 {
919 unsigned cnt;
920 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
921 {
922 }
923
924 if (!cnt)
925 {
926 // plain ASCII char
927 if (buf)
928 *buf++ = (char) cc;
929 len++;
930 }
931 else
932 {
933 len += cnt + 1;
934 if (buf)
935 {
936 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
937 while (cnt--)
938 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
939 }
940 }
941 }
942 }
943
944 if (buf && (len < n))
945 *buf = 0;
946
947 return len;
948 }
949
950 // ============================================================================
951 // UTF-16
952 // ============================================================================
953
954 #ifdef WORDS_BIGENDIAN
955 #define wxMBConvUTF16straight wxMBConvUTF16BE
956 #define wxMBConvUTF16swap wxMBConvUTF16LE
957 #else
958 #define wxMBConvUTF16swap wxMBConvUTF16BE
959 #define wxMBConvUTF16straight wxMBConvUTF16LE
960 #endif
961
962 /* static */
963 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
964 {
965 if ( srcLen == wxNO_LEN )
966 {
967 // count the number of bytes in input, including the trailing NULs
968 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
969 for ( srcLen = 1; *inBuff++; srcLen++ )
970 ;
971
972 srcLen *= BYTES_PER_CHAR;
973 }
974 else // we already have the length
975 {
976 // we can only convert an entire number of UTF-16 characters
977 if ( srcLen % BYTES_PER_CHAR )
978 return wxCONV_FAILED;
979 }
980
981 return srcLen;
982 }
983
984 // case when in-memory representation is UTF-16 too
985 #ifdef WC_UTF16
986
987 // ----------------------------------------------------------------------------
988 // conversions without endianness change
989 // ----------------------------------------------------------------------------
990
991 size_t
992 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
993 const char *src, size_t srcLen) const
994 {
995 // set up the scene for using memcpy() (which is presumably more efficient
996 // than copying the bytes one by one)
997 srcLen = GetLength(src, srcLen);
998 if ( srcLen == wxNO_LEN )
999 return wxCONV_FAILED;
1000
1001 const size_t inLen = srcLen / BYTES_PER_CHAR;
1002 if ( dst )
1003 {
1004 if ( dstLen < inLen )
1005 return wxCONV_FAILED;
1006
1007 memcpy(dst, src, srcLen);
1008 }
1009
1010 return inLen;
1011 }
1012
1013 size_t
1014 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1015 const wchar_t *src, size_t srcLen) const
1016 {
1017 if ( srcLen == wxNO_LEN )
1018 srcLen = wxWcslen(src) + 1;
1019
1020 srcLen *= BYTES_PER_CHAR;
1021
1022 if ( dst )
1023 {
1024 if ( dstLen < srcLen )
1025 return wxCONV_FAILED;
1026
1027 memcpy(dst, src, srcLen);
1028 }
1029
1030 return srcLen;
1031 }
1032
1033 // ----------------------------------------------------------------------------
1034 // endian-reversing conversions
1035 // ----------------------------------------------------------------------------
1036
1037 size_t
1038 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1039 const char *src, size_t srcLen) const
1040 {
1041 srcLen = GetLength(src, srcLen);
1042 if ( srcLen == wxNO_LEN )
1043 return wxCONV_FAILED;
1044
1045 srcLen /= BYTES_PER_CHAR;
1046
1047 if ( dst )
1048 {
1049 if ( dstLen < srcLen )
1050 return wxCONV_FAILED;
1051
1052 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1053 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1054 {
1055 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1056 }
1057 }
1058
1059 return srcLen;
1060 }
1061
1062 size_t
1063 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1064 const wchar_t *src, size_t srcLen) const
1065 {
1066 if ( srcLen == wxNO_LEN )
1067 srcLen = wxWcslen(src) + 1;
1068
1069 srcLen *= BYTES_PER_CHAR;
1070
1071 if ( dst )
1072 {
1073 if ( dstLen < srcLen )
1074 return wxCONV_FAILED;
1075
1076 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1077 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1078 {
1079 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1080 }
1081 }
1082
1083 return srcLen;
1084 }
1085
1086 #else // !WC_UTF16: wchar_t is UTF-32
1087
1088 // ----------------------------------------------------------------------------
1089 // conversions without endianness change
1090 // ----------------------------------------------------------------------------
1091
1092 size_t
1093 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1094 const char *src, size_t srcLen) const
1095 {
1096 srcLen = GetLength(src, srcLen);
1097 if ( srcLen == wxNO_LEN )
1098 return wxCONV_FAILED;
1099
1100 const size_t inLen = srcLen / BYTES_PER_CHAR;
1101 if ( !dst )
1102 {
1103 // optimization: return maximal space which could be needed for this
1104 // string even if the real size could be smaller if the buffer contains
1105 // any surrogates
1106 return inLen;
1107 }
1108
1109 size_t outLen = 0;
1110 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1111 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1112 {
1113 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1114 if ( !inBuff )
1115 return wxCONV_FAILED;
1116
1117 if ( ++outLen > dstLen )
1118 return wxCONV_FAILED;
1119
1120 *dst++ = ch;
1121 }
1122
1123
1124 return outLen;
1125 }
1126
1127 size_t
1128 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1129 const wchar_t *src, size_t srcLen) const
1130 {
1131 if ( srcLen == wxNO_LEN )
1132 srcLen = wxWcslen(src) + 1;
1133
1134 size_t outLen = 0;
1135 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1136 for ( size_t n = 0; n < srcLen; n++ )
1137 {
1138 wxUint16 cc[2];
1139 const size_t numChars = encode_utf16(*src++, cc);
1140 if ( numChars == wxCONV_FAILED )
1141 return wxCONV_FAILED;
1142
1143 outLen += numChars * BYTES_PER_CHAR;
1144 if ( outBuff )
1145 {
1146 if ( outLen > dstLen )
1147 return wxCONV_FAILED;
1148
1149 *outBuff++ = cc[0];
1150 if ( numChars == 2 )
1151 {
1152 // second character of a surrogate
1153 *outBuff++ = cc[1];
1154 }
1155 }
1156 }
1157
1158 return outLen;
1159 }
1160
1161 // ----------------------------------------------------------------------------
1162 // endian-reversing conversions
1163 // ----------------------------------------------------------------------------
1164
1165 size_t
1166 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1167 const char *src, size_t srcLen) const
1168 {
1169 srcLen = GetLength(src, srcLen);
1170 if ( srcLen == wxNO_LEN )
1171 return wxCONV_FAILED;
1172
1173 const size_t inLen = srcLen / BYTES_PER_CHAR;
1174 if ( !dst )
1175 {
1176 // optimization: return maximal space which could be needed for this
1177 // string even if the real size could be smaller if the buffer contains
1178 // any surrogates
1179 return inLen;
1180 }
1181
1182 size_t outLen = 0;
1183 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1184 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1185 {
1186 wxUint32 ch;
1187 wxUint16 tmp[2];
1188
1189 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1190 inBuff++;
1191 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1192
1193 const size_t numChars = decode_utf16(tmp, ch);
1194 if ( numChars == wxCONV_FAILED )
1195 return wxCONV_FAILED;
1196
1197 if ( numChars == 2 )
1198 inBuff++;
1199
1200 if ( ++outLen > dstLen )
1201 return wxCONV_FAILED;
1202
1203 *dst++ = ch;
1204 }
1205
1206
1207 return outLen;
1208 }
1209
1210 size_t
1211 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1212 const wchar_t *src, size_t srcLen) const
1213 {
1214 if ( srcLen == wxNO_LEN )
1215 srcLen = wxWcslen(src) + 1;
1216
1217 size_t outLen = 0;
1218 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1219 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1220 {
1221 wxUint16 cc[2];
1222 const size_t numChars = encode_utf16(*src, cc);
1223 if ( numChars == wxCONV_FAILED )
1224 return wxCONV_FAILED;
1225
1226 outLen += numChars * BYTES_PER_CHAR;
1227 if ( outBuff )
1228 {
1229 if ( outLen > dstLen )
1230 return wxCONV_FAILED;
1231
1232 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1233 if ( numChars == 2 )
1234 {
1235 // second character of a surrogate
1236 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1237 }
1238 }
1239 }
1240
1241 return outLen;
1242 }
1243
1244 #endif // WC_UTF16/!WC_UTF16
1245
1246
1247 // ============================================================================
1248 // UTF-32
1249 // ============================================================================
1250
1251 #ifdef WORDS_BIGENDIAN
1252 #define wxMBConvUTF32straight wxMBConvUTF32BE
1253 #define wxMBConvUTF32swap wxMBConvUTF32LE
1254 #else
1255 #define wxMBConvUTF32swap wxMBConvUTF32BE
1256 #define wxMBConvUTF32straight wxMBConvUTF32LE
1257 #endif
1258
1259
1260 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1261 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1262
1263 /* static */
1264 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1265 {
1266 if ( srcLen == wxNO_LEN )
1267 {
1268 // count the number of bytes in input, including the trailing NULs
1269 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1270 for ( srcLen = 1; *inBuff++; srcLen++ )
1271 ;
1272
1273 srcLen *= BYTES_PER_CHAR;
1274 }
1275 else // we already have the length
1276 {
1277 // we can only convert an entire number of UTF-32 characters
1278 if ( srcLen % BYTES_PER_CHAR )
1279 return wxCONV_FAILED;
1280 }
1281
1282 return srcLen;
1283 }
1284
1285 // case when in-memory representation is UTF-16
1286 #ifdef WC_UTF16
1287
1288 // ----------------------------------------------------------------------------
1289 // conversions without endianness change
1290 // ----------------------------------------------------------------------------
1291
1292 size_t
1293 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1294 const char *src, size_t srcLen) const
1295 {
1296 srcLen = GetLength(src, srcLen);
1297 if ( srcLen == wxNO_LEN )
1298 return wxCONV_FAILED;
1299
1300 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1301 const size_t inLen = srcLen / BYTES_PER_CHAR;
1302 size_t outLen = 0;
1303 for ( size_t n = 0; n < inLen; n++ )
1304 {
1305 wxUint16 cc[2];
1306 const size_t numChars = encode_utf16(*inBuff++, cc);
1307 if ( numChars == wxCONV_FAILED )
1308 return wxCONV_FAILED;
1309
1310 outLen += numChars;
1311 if ( dst )
1312 {
1313 if ( outLen > dstLen )
1314 return wxCONV_FAILED;
1315
1316 *dst++ = cc[0];
1317 if ( numChars == 2 )
1318 {
1319 // second character of a surrogate
1320 *dst++ = cc[1];
1321 }
1322 }
1323 }
1324
1325 return outLen;
1326 }
1327
1328 size_t
1329 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1330 const wchar_t *src, size_t srcLen) const
1331 {
1332 if ( srcLen == wxNO_LEN )
1333 srcLen = wxWcslen(src) + 1;
1334
1335 if ( !dst )
1336 {
1337 // optimization: return maximal space which could be needed for this
1338 // string instead of the exact amount which could be less if there are
1339 // any surrogates in the input
1340 //
1341 // we consider that surrogates are rare enough to make it worthwhile to
1342 // avoid running the loop below at the cost of slightly extra memory
1343 // consumption
1344 return srcLen * BYTES_PER_CHAR;
1345 }
1346
1347 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1348 size_t outLen = 0;
1349 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1350 {
1351 const wxUint32 ch = wxDecodeSurrogate(&src);
1352 if ( !src )
1353 return wxCONV_FAILED;
1354
1355 outLen += BYTES_PER_CHAR;
1356
1357 if ( outLen > dstLen )
1358 return wxCONV_FAILED;
1359
1360 *outBuff++ = ch;
1361 }
1362
1363 return outLen;
1364 }
1365
1366 // ----------------------------------------------------------------------------
1367 // endian-reversing conversions
1368 // ----------------------------------------------------------------------------
1369
1370 size_t
1371 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1372 const char *src, size_t srcLen) const
1373 {
1374 srcLen = GetLength(src, srcLen);
1375 if ( srcLen == wxNO_LEN )
1376 return wxCONV_FAILED;
1377
1378 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1379 const size_t inLen = srcLen / BYTES_PER_CHAR;
1380 size_t outLen = 0;
1381 for ( size_t n = 0; n < inLen; n++, inBuff++ )
1382 {
1383 wxUint16 cc[2];
1384 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1385 if ( numChars == wxCONV_FAILED )
1386 return wxCONV_FAILED;
1387
1388 outLen += numChars;
1389 if ( dst )
1390 {
1391 if ( outLen > dstLen )
1392 return wxCONV_FAILED;
1393
1394 *dst++ = cc[0];
1395 if ( numChars == 2 )
1396 {
1397 // second character of a surrogate
1398 *dst++ = cc[1];
1399 }
1400 }
1401 }
1402
1403 return outLen;
1404 }
1405
1406 size_t
1407 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1408 const wchar_t *src, size_t srcLen) const
1409 {
1410 if ( srcLen == wxNO_LEN )
1411 srcLen = wxWcslen(src) + 1;
1412
1413 if ( !dst )
1414 {
1415 // optimization: return maximal space which could be needed for this
1416 // string instead of the exact amount which could be less if there are
1417 // any surrogates in the input
1418 //
1419 // we consider that surrogates are rare enough to make it worthwhile to
1420 // avoid running the loop below at the cost of slightly extra memory
1421 // consumption
1422 return srcLen*BYTES_PER_CHAR;
1423 }
1424
1425 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1426 size_t outLen = 0;
1427 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1428 {
1429 const wxUint32 ch = wxDecodeSurrogate(&src);
1430 if ( !src )
1431 return wxCONV_FAILED;
1432
1433 outLen += BYTES_PER_CHAR;
1434
1435 if ( outLen > dstLen )
1436 return wxCONV_FAILED;
1437
1438 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1439 }
1440
1441 return outLen;
1442 }
1443
1444 #else // !WC_UTF16: wchar_t is UTF-32
1445
1446 // ----------------------------------------------------------------------------
1447 // conversions without endianness change
1448 // ----------------------------------------------------------------------------
1449
1450 size_t
1451 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1452 const char *src, size_t srcLen) const
1453 {
1454 // use memcpy() as it should be much faster than hand-written loop
1455 srcLen = GetLength(src, srcLen);
1456 if ( srcLen == wxNO_LEN )
1457 return wxCONV_FAILED;
1458
1459 const size_t inLen = srcLen/BYTES_PER_CHAR;
1460 if ( dst )
1461 {
1462 if ( dstLen < inLen )
1463 return wxCONV_FAILED;
1464
1465 memcpy(dst, src, srcLen);
1466 }
1467
1468 return inLen;
1469 }
1470
1471 size_t
1472 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1473 const wchar_t *src, size_t srcLen) const
1474 {
1475 if ( srcLen == wxNO_LEN )
1476 srcLen = wxWcslen(src) + 1;
1477
1478 srcLen *= BYTES_PER_CHAR;
1479
1480 if ( dst )
1481 {
1482 if ( dstLen < srcLen )
1483 return wxCONV_FAILED;
1484
1485 memcpy(dst, src, srcLen);
1486 }
1487
1488 return srcLen;
1489 }
1490
1491 // ----------------------------------------------------------------------------
1492 // endian-reversing conversions
1493 // ----------------------------------------------------------------------------
1494
1495 size_t
1496 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1497 const char *src, size_t srcLen) const
1498 {
1499 srcLen = GetLength(src, srcLen);
1500 if ( srcLen == wxNO_LEN )
1501 return wxCONV_FAILED;
1502
1503 srcLen /= BYTES_PER_CHAR;
1504
1505 if ( dst )
1506 {
1507 if ( dstLen < srcLen )
1508 return wxCONV_FAILED;
1509
1510 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1511 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1512 {
1513 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
1514 }
1515 }
1516
1517 return srcLen;
1518 }
1519
1520 size_t
1521 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1522 const wchar_t *src, size_t srcLen) const
1523 {
1524 if ( srcLen == wxNO_LEN )
1525 srcLen = wxWcslen(src) + 1;
1526
1527 srcLen *= BYTES_PER_CHAR;
1528
1529 if ( dst )
1530 {
1531 if ( dstLen < srcLen )
1532 return wxCONV_FAILED;
1533
1534 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1535 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1536 {
1537 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
1538 }
1539 }
1540
1541 return srcLen;
1542 }
1543
1544 #endif // WC_UTF16/!WC_UTF16
1545
1546
1547 // ============================================================================
1548 // The classes doing conversion using the iconv_xxx() functions
1549 // ============================================================================
1550
1551 #ifdef HAVE_ICONV
1552
1553 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1554 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1555 // (unless there's yet another bug in glibc) the only case when iconv()
1556 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1557 // left in the input buffer -- when _real_ error occurs,
1558 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1559 // iconv() failure.
1560 // [This bug does not appear in glibc 2.2.]
1561 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1562 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1563 (errno != E2BIG || bufLeft != 0))
1564 #else
1565 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1566 #endif
1567
1568 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1569
1570 #define ICONV_T_INVALID ((iconv_t)-1)
1571
1572 #if SIZEOF_WCHAR_T == 4
1573 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1574 #define WC_ENC wxFONTENCODING_UTF32
1575 #elif SIZEOF_WCHAR_T == 2
1576 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1577 #define WC_ENC wxFONTENCODING_UTF16
1578 #else // sizeof(wchar_t) != 2 nor 4
1579 // does this ever happen?
1580 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1581 #endif
1582
1583 // ----------------------------------------------------------------------------
1584 // wxMBConv_iconv: encapsulates an iconv character set
1585 // ----------------------------------------------------------------------------
1586
1587 class wxMBConv_iconv : public wxMBConv
1588 {
1589 public:
1590 wxMBConv_iconv(const wxChar *name);
1591 virtual ~wxMBConv_iconv();
1592
1593 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1594 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1595
1596 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1597 virtual size_t GetMBNulLen() const;
1598
1599 virtual wxMBConv *Clone() const
1600 {
1601 wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1602 p->m_minMBCharWidth = m_minMBCharWidth;
1603 return p;
1604 }
1605
1606 bool IsOk() const
1607 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1608
1609 protected:
1610 // the iconv handlers used to translate from multibyte
1611 // to wide char and in the other direction
1612 iconv_t m2w,
1613 w2m;
1614
1615 #if wxUSE_THREADS
1616 // guards access to m2w and w2m objects
1617 wxMutex m_iconvMutex;
1618 #endif
1619
1620 private:
1621 // the name (for iconv_open()) of a wide char charset -- if none is
1622 // available on this machine, it will remain NULL
1623 static wxString ms_wcCharsetName;
1624
1625 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1626 // different endian-ness than the native one
1627 static bool ms_wcNeedsSwap;
1628
1629
1630 // name of the encoding handled by this conversion
1631 wxString m_name;
1632
1633 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1634 // initially
1635 size_t m_minMBCharWidth;
1636 };
1637
1638 // make the constructor available for unit testing
1639 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1640 {
1641 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1642 if ( !result->IsOk() )
1643 {
1644 delete result;
1645 return 0;
1646 }
1647
1648 return result;
1649 }
1650
1651 wxString wxMBConv_iconv::ms_wcCharsetName;
1652 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1653
1654 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1655 : m_name(name)
1656 {
1657 m_minMBCharWidth = 0;
1658
1659 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1660 // names for the charsets
1661 const wxCharBuffer cname(wxString(name).ToAscii());
1662
1663 // check for charset that represents wchar_t:
1664 if ( ms_wcCharsetName.empty() )
1665 {
1666 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1667
1668 #if wxUSE_FONTMAP
1669 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1670 #else // !wxUSE_FONTMAP
1671 static const wxChar *names[] =
1672 {
1673 #if SIZEOF_WCHAR_T == 4
1674 _T("UCS-4"),
1675 #elif SIZEOF_WCHAR_T = 2
1676 _T("UCS-2"),
1677 #endif
1678 NULL
1679 };
1680 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1681
1682 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1683 {
1684 const wxString nameCS(*names);
1685
1686 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1687 wxString nameXE(nameCS);
1688
1689 #ifdef WORDS_BIGENDIAN
1690 nameXE += _T("BE");
1691 #else // little endian
1692 nameXE += _T("LE");
1693 #endif
1694
1695 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1696 nameXE.c_str());
1697
1698 m2w = iconv_open(nameXE.ToAscii(), cname);
1699 if ( m2w == ICONV_T_INVALID )
1700 {
1701 // try charset w/o bytesex info (e.g. "UCS4")
1702 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1703 nameCS.c_str());
1704 m2w = iconv_open(nameCS.ToAscii(), cname);
1705
1706 // and check for bytesex ourselves:
1707 if ( m2w != ICONV_T_INVALID )
1708 {
1709 char buf[2], *bufPtr;
1710 wchar_t wbuf[2], *wbufPtr;
1711 size_t insz, outsz;
1712 size_t res;
1713
1714 buf[0] = 'A';
1715 buf[1] = 0;
1716 wbuf[0] = 0;
1717 insz = 2;
1718 outsz = SIZEOF_WCHAR_T * 2;
1719 wbufPtr = wbuf;
1720 bufPtr = buf;
1721
1722 res = iconv(
1723 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1724 (char**)&wbufPtr, &outsz);
1725
1726 if (ICONV_FAILED(res, insz))
1727 {
1728 wxLogLastError(wxT("iconv"));
1729 wxLogError(_("Conversion to charset '%s' doesn't work."),
1730 nameCS.c_str());
1731 }
1732 else // ok, can convert to this encoding, remember it
1733 {
1734 ms_wcCharsetName = nameCS;
1735 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1736 }
1737 }
1738 }
1739 else // use charset not requiring byte swapping
1740 {
1741 ms_wcCharsetName = nameXE;
1742 }
1743 }
1744
1745 wxLogTrace(TRACE_STRCONV,
1746 wxT("iconv wchar_t charset is \"%s\"%s"),
1747 ms_wcCharsetName.empty() ? _T("<none>")
1748 : ms_wcCharsetName.c_str(),
1749 ms_wcNeedsSwap ? _T(" (needs swap)")
1750 : _T(""));
1751 }
1752 else // we already have ms_wcCharsetName
1753 {
1754 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1755 }
1756
1757 if ( ms_wcCharsetName.empty() )
1758 {
1759 w2m = ICONV_T_INVALID;
1760 }
1761 else
1762 {
1763 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1764 if ( w2m == ICONV_T_INVALID )
1765 {
1766 wxLogTrace(TRACE_STRCONV,
1767 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1768 ms_wcCharsetName.c_str(), cname.data());
1769 }
1770 }
1771 }
1772
1773 wxMBConv_iconv::~wxMBConv_iconv()
1774 {
1775 if ( m2w != ICONV_T_INVALID )
1776 iconv_close(m2w);
1777 if ( w2m != ICONV_T_INVALID )
1778 iconv_close(w2m);
1779 }
1780
1781 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1782 {
1783 // find the string length: notice that must be done differently for
1784 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1785 size_t inbuf;
1786 const size_t nulLen = GetMBNulLen();
1787 switch ( nulLen )
1788 {
1789 default:
1790 return wxCONV_FAILED;
1791
1792 case 1:
1793 inbuf = strlen(psz); // arguably more optimized than our version
1794 break;
1795
1796 case 2:
1797 case 4:
1798 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1799 // they also have to start at character boundary and not span two
1800 // adjacent characters
1801 const char *p;
1802 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1803 ;
1804 inbuf = p - psz;
1805 break;
1806 }
1807
1808 #if wxUSE_THREADS
1809 // NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
1810 // Unfortunately there is a couple of global wxCSConv objects such as
1811 // wxConvLocal that are used all over wx code, so we have to make sure
1812 // the handle is used by at most one thread at the time. Otherwise
1813 // only a few wx classes would be safe to use from non-main threads
1814 // as MB<->WC conversion would fail "randomly".
1815 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1816 #endif // wxUSE_THREADS
1817
1818 size_t outbuf = n * SIZEOF_WCHAR_T;
1819 size_t res, cres;
1820 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1821 wchar_t *bufPtr = buf;
1822 const char *pszPtr = psz;
1823
1824 if (buf)
1825 {
1826 // have destination buffer, convert there
1827 cres = iconv(m2w,
1828 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1829 (char**)&bufPtr, &outbuf);
1830 res = n - (outbuf / SIZEOF_WCHAR_T);
1831
1832 if (ms_wcNeedsSwap)
1833 {
1834 // convert to native endianness
1835 for ( unsigned i = 0; i < res; i++ )
1836 buf[n] = WC_BSWAP(buf[i]);
1837 }
1838
1839 // NUL-terminate the string if there is any space left
1840 if (res < n)
1841 buf[res] = 0;
1842 }
1843 else
1844 {
1845 // no destination buffer... convert using temp buffer
1846 // to calculate destination buffer requirement
1847 wchar_t tbuf[8];
1848 res = 0;
1849
1850 do
1851 {
1852 bufPtr = tbuf;
1853 outbuf = 8 * SIZEOF_WCHAR_T;
1854
1855 cres = iconv(m2w,
1856 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1857 (char**)&bufPtr, &outbuf );
1858
1859 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1860 }
1861 while ((cres == (size_t)-1) && (errno == E2BIG));
1862 }
1863
1864 if (ICONV_FAILED(cres, inbuf))
1865 {
1866 //VS: it is ok if iconv fails, hence trace only
1867 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1868 return wxCONV_FAILED;
1869 }
1870
1871 return res;
1872 }
1873
1874 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1875 {
1876 #if wxUSE_THREADS
1877 // NB: explained in MB2WC
1878 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1879 #endif
1880
1881 size_t inlen = wxWcslen(psz);
1882 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1883 size_t outbuf = n;
1884 size_t res, cres;
1885
1886 wchar_t *tmpbuf = 0;
1887
1888 if (ms_wcNeedsSwap)
1889 {
1890 // need to copy to temp buffer to switch endianness
1891 // (doing WC_BSWAP twice on the original buffer won't help, as it
1892 // could be in read-only memory, or be accessed in some other thread)
1893 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1894 for ( size_t i = 0; i < inlen; i++ )
1895 tmpbuf[n] = WC_BSWAP(psz[i]);
1896
1897 tmpbuf[inlen] = L'\0';
1898 psz = tmpbuf;
1899 }
1900
1901 if (buf)
1902 {
1903 // have destination buffer, convert there
1904 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1905
1906 res = n - outbuf;
1907
1908 // NB: iconv was given only wcslen(psz) characters on input, and so
1909 // it couldn't convert the trailing zero. Let's do it ourselves
1910 // if there's some room left for it in the output buffer.
1911 if (res < n)
1912 buf[0] = 0;
1913 }
1914 else
1915 {
1916 // no destination buffer: convert using temp buffer
1917 // to calculate destination buffer requirement
1918 char tbuf[16];
1919 res = 0;
1920 do
1921 {
1922 buf = tbuf;
1923 outbuf = 16;
1924
1925 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1926
1927 res += 16 - outbuf;
1928 }
1929 while ((cres == (size_t)-1) && (errno == E2BIG));
1930 }
1931
1932 if (ms_wcNeedsSwap)
1933 {
1934 free(tmpbuf);
1935 }
1936
1937 if (ICONV_FAILED(cres, inbuf))
1938 {
1939 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1940 return wxCONV_FAILED;
1941 }
1942
1943 return res;
1944 }
1945
1946 size_t wxMBConv_iconv::GetMBNulLen() const
1947 {
1948 if ( m_minMBCharWidth == 0 )
1949 {
1950 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1951
1952 #if wxUSE_THREADS
1953 // NB: explained in MB2WC
1954 wxMutexLocker lock(self->m_iconvMutex);
1955 #endif
1956
1957 wchar_t *wnul = L"";
1958 char buf[8]; // should be enough for NUL in any encoding
1959 size_t inLen = sizeof(wchar_t),
1960 outLen = WXSIZEOF(buf);
1961 char *inBuff = (char *)wnul;
1962 char *outBuff = buf;
1963 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
1964 {
1965 self->m_minMBCharWidth = (size_t)-1;
1966 }
1967 else // ok
1968 {
1969 self->m_minMBCharWidth = outBuff - buf;
1970 }
1971 }
1972
1973 return m_minMBCharWidth;
1974 }
1975
1976 #endif // HAVE_ICONV
1977
1978
1979 // ============================================================================
1980 // Win32 conversion classes
1981 // ============================================================================
1982
1983 #ifdef wxHAVE_WIN32_MB2WC
1984
1985 // from utils.cpp
1986 #if wxUSE_FONTMAP
1987 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1988 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1989 #endif
1990
1991 class wxMBConv_win32 : public wxMBConv
1992 {
1993 public:
1994 wxMBConv_win32()
1995 {
1996 m_CodePage = CP_ACP;
1997 m_minMBCharWidth = 0;
1998 }
1999
2000 wxMBConv_win32(const wxMBConv_win32& conv)
2001 : wxMBConv()
2002 {
2003 m_CodePage = conv.m_CodePage;
2004 m_minMBCharWidth = conv.m_minMBCharWidth;
2005 }
2006
2007 #if wxUSE_FONTMAP
2008 wxMBConv_win32(const wxChar* name)
2009 {
2010 m_CodePage = wxCharsetToCodepage(name);
2011 m_minMBCharWidth = 0;
2012 }
2013
2014 wxMBConv_win32(wxFontEncoding encoding)
2015 {
2016 m_CodePage = wxEncodingToCodepage(encoding);
2017 m_minMBCharWidth = 0;
2018 }
2019 #endif // wxUSE_FONTMAP
2020
2021 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2022 {
2023 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2024 // the behaviour is not compatible with the Unix version (using iconv)
2025 // and break the library itself, e.g. wxTextInputStream::NextChar()
2026 // wouldn't work if reading an incomplete MB char didn't result in an
2027 // error
2028 //
2029 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2030 // Win XP or newer and it is not supported for UTF-[78] so we always
2031 // use our own conversions in this case. See
2032 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2033 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2034 if ( m_CodePage == CP_UTF8 )
2035 {
2036 return wxConvUTF8.MB2WC(buf, psz, n);
2037 }
2038
2039 if ( m_CodePage == CP_UTF7 )
2040 {
2041 return wxConvUTF7.MB2WC(buf, psz, n);
2042 }
2043
2044 int flags = 0;
2045 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2046 IsAtLeastWin2kSP4() )
2047 {
2048 flags = MB_ERR_INVALID_CHARS;
2049 }
2050
2051 const size_t len = ::MultiByteToWideChar
2052 (
2053 m_CodePage, // code page
2054 flags, // flags: fall on error
2055 psz, // input string
2056 -1, // its length (NUL-terminated)
2057 buf, // output string
2058 buf ? n : 0 // size of output buffer
2059 );
2060 if ( !len )
2061 {
2062 // function totally failed
2063 return wxCONV_FAILED;
2064 }
2065
2066 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2067 // check if we succeeded, by doing a double trip:
2068 if ( !flags && buf )
2069 {
2070 const size_t mbLen = strlen(psz);
2071 wxCharBuffer mbBuf(mbLen);
2072 if ( ::WideCharToMultiByte
2073 (
2074 m_CodePage,
2075 0,
2076 buf,
2077 -1,
2078 mbBuf.data(),
2079 mbLen + 1, // size in bytes, not length
2080 NULL,
2081 NULL
2082 ) == 0 ||
2083 strcmp(mbBuf, psz) != 0 )
2084 {
2085 // we didn't obtain the same thing we started from, hence
2086 // the conversion was lossy and we consider that it failed
2087 return wxCONV_FAILED;
2088 }
2089 }
2090
2091 // note that it returns count of written chars for buf != NULL and size
2092 // of the needed buffer for buf == NULL so in either case the length of
2093 // the string (which never includes the terminating NUL) is one less
2094 return len - 1;
2095 }
2096
2097 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2098 {
2099 /*
2100 we have a problem here: by default, WideCharToMultiByte() may
2101 replace characters unrepresentable in the target code page with bad
2102 quality approximations such as turning "1/2" symbol (U+00BD) into
2103 "1" for the code pages which don't have it and we, obviously, want
2104 to avoid this at any price
2105
2106 the trouble is that this function does it _silently_, i.e. it won't
2107 even tell us whether it did or not... Win98/2000 and higher provide
2108 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2109 we have to resort to a round trip, i.e. check that converting back
2110 results in the same string -- this is, of course, expensive but
2111 otherwise we simply can't be sure to not garble the data.
2112 */
2113
2114 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2115 // it doesn't work with CJK encodings (which we test for rather roughly
2116 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2117 // supporting it
2118 BOOL usedDef wxDUMMY_INITIALIZE(false);
2119 BOOL *pUsedDef;
2120 int flags;
2121 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2122 {
2123 // it's our lucky day
2124 flags = WC_NO_BEST_FIT_CHARS;
2125 pUsedDef = &usedDef;
2126 }
2127 else // old system or unsupported encoding
2128 {
2129 flags = 0;
2130 pUsedDef = NULL;
2131 }
2132
2133 const size_t len = ::WideCharToMultiByte
2134 (
2135 m_CodePage, // code page
2136 flags, // either none or no best fit
2137 pwz, // input string
2138 -1, // it is (wide) NUL-terminated
2139 buf, // output buffer
2140 buf ? n : 0, // and its size
2141 NULL, // default "replacement" char
2142 pUsedDef // [out] was it used?
2143 );
2144
2145 if ( !len )
2146 {
2147 // function totally failed
2148 return wxCONV_FAILED;
2149 }
2150
2151 // if we were really converting, check if we succeeded
2152 if ( buf )
2153 {
2154 if ( flags )
2155 {
2156 // check if the conversion failed, i.e. if any replacements
2157 // were done
2158 if ( usedDef )
2159 return wxCONV_FAILED;
2160 }
2161 else // we must resort to double tripping...
2162 {
2163 wxWCharBuffer wcBuf(n);
2164 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2165 wcscmp(wcBuf, pwz) != 0 )
2166 {
2167 // we didn't obtain the same thing we started from, hence
2168 // the conversion was lossy and we consider that it failed
2169 return wxCONV_FAILED;
2170 }
2171 }
2172 }
2173
2174 // see the comment above for the reason of "len - 1"
2175 return len - 1;
2176 }
2177
2178 virtual size_t GetMBNulLen() const
2179 {
2180 if ( m_minMBCharWidth == 0 )
2181 {
2182 int len = ::WideCharToMultiByte
2183 (
2184 m_CodePage, // code page
2185 0, // no flags
2186 L"", // input string
2187 1, // translate just the NUL
2188 NULL, // output buffer
2189 0, // and its size
2190 NULL, // no replacement char
2191 NULL // [out] don't care if it was used
2192 );
2193
2194 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2195 switch ( len )
2196 {
2197 default:
2198 wxLogDebug(_T("Unexpected NUL length %d"), len);
2199 self->m_minMBCharWidth = (size_t)-1;
2200 break;
2201
2202 case 0:
2203 self->m_minMBCharWidth = (size_t)-1;
2204 break;
2205
2206 case 1:
2207 case 2:
2208 case 4:
2209 self->m_minMBCharWidth = len;
2210 break;
2211 }
2212 }
2213
2214 return m_minMBCharWidth;
2215 }
2216
2217 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2218
2219 bool IsOk() const { return m_CodePage != -1; }
2220
2221 private:
2222 static bool CanUseNoBestFit()
2223 {
2224 static int s_isWin98Or2k = -1;
2225
2226 if ( s_isWin98Or2k == -1 )
2227 {
2228 int verMaj, verMin;
2229 switch ( wxGetOsVersion(&verMaj, &verMin) )
2230 {
2231 case wxWIN95:
2232 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2233 break;
2234
2235 case wxWINDOWS_NT:
2236 s_isWin98Or2k = verMaj >= 5;
2237 break;
2238
2239 default:
2240 // unknown: be conservative by default
2241 s_isWin98Or2k = 0;
2242 break;
2243 }
2244
2245 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2246 }
2247
2248 return s_isWin98Or2k == 1;
2249 }
2250
2251 static bool IsAtLeastWin2kSP4()
2252 {
2253 #ifdef __WXWINCE__
2254 return false;
2255 #else
2256 static int s_isAtLeastWin2kSP4 = -1;
2257
2258 if ( s_isAtLeastWin2kSP4 == -1 )
2259 {
2260 OSVERSIONINFOEX ver;
2261
2262 memset(&ver, 0, sizeof(ver));
2263 ver.dwOSVersionInfoSize = sizeof(ver);
2264 GetVersionEx((OSVERSIONINFO*)&ver);
2265
2266 s_isAtLeastWin2kSP4 =
2267 ((ver.dwMajorVersion > 5) || // Vista+
2268 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2269 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2270 ver.wServicePackMajor >= 4)) // 2000 SP4+
2271 ? 1 : 0;
2272 }
2273
2274 return s_isAtLeastWin2kSP4 == 1;
2275 #endif
2276 }
2277
2278
2279 // the code page we're working with
2280 long m_CodePage;
2281
2282 // cached result of GetMBNulLen(), set to 0 initially meaning
2283 // "unknown"
2284 size_t m_minMBCharWidth;
2285 };
2286
2287 #endif // wxHAVE_WIN32_MB2WC
2288
2289 // ============================================================================
2290 // Cocoa conversion classes
2291 // ============================================================================
2292
2293 #if defined(__WXCOCOA__)
2294
2295 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2296 // Strangely enough, internally Core Foundation uses
2297 // UTF-32 internally quite a bit - its just not public (yet).
2298
2299 #include <CoreFoundation/CFString.h>
2300 #include <CoreFoundation/CFStringEncodingExt.h>
2301
2302 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2303 {
2304 CFStringEncoding enc = kCFStringEncodingInvalidId ;
2305
2306 switch (encoding)
2307 {
2308 case wxFONTENCODING_DEFAULT :
2309 enc = CFStringGetSystemEncoding();
2310 break ;
2311
2312 case wxFONTENCODING_ISO8859_1 :
2313 enc = kCFStringEncodingISOLatin1 ;
2314 break ;
2315 case wxFONTENCODING_ISO8859_2 :
2316 enc = kCFStringEncodingISOLatin2;
2317 break ;
2318 case wxFONTENCODING_ISO8859_3 :
2319 enc = kCFStringEncodingISOLatin3 ;
2320 break ;
2321 case wxFONTENCODING_ISO8859_4 :
2322 enc = kCFStringEncodingISOLatin4;
2323 break ;
2324 case wxFONTENCODING_ISO8859_5 :
2325 enc = kCFStringEncodingISOLatinCyrillic;
2326 break ;
2327 case wxFONTENCODING_ISO8859_6 :
2328 enc = kCFStringEncodingISOLatinArabic;
2329 break ;
2330 case wxFONTENCODING_ISO8859_7 :
2331 enc = kCFStringEncodingISOLatinGreek;
2332 break ;
2333 case wxFONTENCODING_ISO8859_8 :
2334 enc = kCFStringEncodingISOLatinHebrew;
2335 break ;
2336 case wxFONTENCODING_ISO8859_9 :
2337 enc = kCFStringEncodingISOLatin5;
2338 break ;
2339 case wxFONTENCODING_ISO8859_10 :
2340 enc = kCFStringEncodingISOLatin6;
2341 break ;
2342 case wxFONTENCODING_ISO8859_11 :
2343 enc = kCFStringEncodingISOLatinThai;
2344 break ;
2345 case wxFONTENCODING_ISO8859_13 :
2346 enc = kCFStringEncodingISOLatin7;
2347 break ;
2348 case wxFONTENCODING_ISO8859_14 :
2349 enc = kCFStringEncodingISOLatin8;
2350 break ;
2351 case wxFONTENCODING_ISO8859_15 :
2352 enc = kCFStringEncodingISOLatin9;
2353 break ;
2354
2355 case wxFONTENCODING_KOI8 :
2356 enc = kCFStringEncodingKOI8_R;
2357 break ;
2358 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2359 enc = kCFStringEncodingDOSRussian;
2360 break ;
2361
2362 // case wxFONTENCODING_BULGARIAN :
2363 // enc = ;
2364 // break ;
2365
2366 case wxFONTENCODING_CP437 :
2367 enc = kCFStringEncodingDOSLatinUS ;
2368 break ;
2369 case wxFONTENCODING_CP850 :
2370 enc = kCFStringEncodingDOSLatin1;
2371 break ;
2372 case wxFONTENCODING_CP852 :
2373 enc = kCFStringEncodingDOSLatin2;
2374 break ;
2375 case wxFONTENCODING_CP855 :
2376 enc = kCFStringEncodingDOSCyrillic;
2377 break ;
2378 case wxFONTENCODING_CP866 :
2379 enc = kCFStringEncodingDOSRussian ;
2380 break ;
2381 case wxFONTENCODING_CP874 :
2382 enc = kCFStringEncodingDOSThai;
2383 break ;
2384 case wxFONTENCODING_CP932 :
2385 enc = kCFStringEncodingDOSJapanese;
2386 break ;
2387 case wxFONTENCODING_CP936 :
2388 enc = kCFStringEncodingDOSChineseSimplif ;
2389 break ;
2390 case wxFONTENCODING_CP949 :
2391 enc = kCFStringEncodingDOSKorean;
2392 break ;
2393 case wxFONTENCODING_CP950 :
2394 enc = kCFStringEncodingDOSChineseTrad;
2395 break ;
2396 case wxFONTENCODING_CP1250 :
2397 enc = kCFStringEncodingWindowsLatin2;
2398 break ;
2399 case wxFONTENCODING_CP1251 :
2400 enc = kCFStringEncodingWindowsCyrillic ;
2401 break ;
2402 case wxFONTENCODING_CP1252 :
2403 enc = kCFStringEncodingWindowsLatin1 ;
2404 break ;
2405 case wxFONTENCODING_CP1253 :
2406 enc = kCFStringEncodingWindowsGreek;
2407 break ;
2408 case wxFONTENCODING_CP1254 :
2409 enc = kCFStringEncodingWindowsLatin5;
2410 break ;
2411 case wxFONTENCODING_CP1255 :
2412 enc = kCFStringEncodingWindowsHebrew ;
2413 break ;
2414 case wxFONTENCODING_CP1256 :
2415 enc = kCFStringEncodingWindowsArabic ;
2416 break ;
2417 case wxFONTENCODING_CP1257 :
2418 enc = kCFStringEncodingWindowsBalticRim;
2419 break ;
2420 // This only really encodes to UTF7 (if that) evidently
2421 // case wxFONTENCODING_UTF7 :
2422 // enc = kCFStringEncodingNonLossyASCII ;
2423 // break ;
2424 case wxFONTENCODING_UTF8 :
2425 enc = kCFStringEncodingUTF8 ;
2426 break ;
2427 case wxFONTENCODING_EUC_JP :
2428 enc = kCFStringEncodingEUC_JP;
2429 break ;
2430 case wxFONTENCODING_UTF16 :
2431 enc = kCFStringEncodingUnicode ;
2432 break ;
2433 case wxFONTENCODING_MACROMAN :
2434 enc = kCFStringEncodingMacRoman ;
2435 break ;
2436 case wxFONTENCODING_MACJAPANESE :
2437 enc = kCFStringEncodingMacJapanese ;
2438 break ;
2439 case wxFONTENCODING_MACCHINESETRAD :
2440 enc = kCFStringEncodingMacChineseTrad ;
2441 break ;
2442 case wxFONTENCODING_MACKOREAN :
2443 enc = kCFStringEncodingMacKorean ;
2444 break ;
2445 case wxFONTENCODING_MACARABIC :
2446 enc = kCFStringEncodingMacArabic ;
2447 break ;
2448 case wxFONTENCODING_MACHEBREW :
2449 enc = kCFStringEncodingMacHebrew ;
2450 break ;
2451 case wxFONTENCODING_MACGREEK :
2452 enc = kCFStringEncodingMacGreek ;
2453 break ;
2454 case wxFONTENCODING_MACCYRILLIC :
2455 enc = kCFStringEncodingMacCyrillic ;
2456 break ;
2457 case wxFONTENCODING_MACDEVANAGARI :
2458 enc = kCFStringEncodingMacDevanagari ;
2459 break ;
2460 case wxFONTENCODING_MACGURMUKHI :
2461 enc = kCFStringEncodingMacGurmukhi ;
2462 break ;
2463 case wxFONTENCODING_MACGUJARATI :
2464 enc = kCFStringEncodingMacGujarati ;
2465 break ;
2466 case wxFONTENCODING_MACORIYA :
2467 enc = kCFStringEncodingMacOriya ;
2468 break ;
2469 case wxFONTENCODING_MACBENGALI :
2470 enc = kCFStringEncodingMacBengali ;
2471 break ;
2472 case wxFONTENCODING_MACTAMIL :
2473 enc = kCFStringEncodingMacTamil ;
2474 break ;
2475 case wxFONTENCODING_MACTELUGU :
2476 enc = kCFStringEncodingMacTelugu ;
2477 break ;
2478 case wxFONTENCODING_MACKANNADA :
2479 enc = kCFStringEncodingMacKannada ;
2480 break ;
2481 case wxFONTENCODING_MACMALAJALAM :
2482 enc = kCFStringEncodingMacMalayalam ;
2483 break ;
2484 case wxFONTENCODING_MACSINHALESE :
2485 enc = kCFStringEncodingMacSinhalese ;
2486 break ;
2487 case wxFONTENCODING_MACBURMESE :
2488 enc = kCFStringEncodingMacBurmese ;
2489 break ;
2490 case wxFONTENCODING_MACKHMER :
2491 enc = kCFStringEncodingMacKhmer ;
2492 break ;
2493 case wxFONTENCODING_MACTHAI :
2494 enc = kCFStringEncodingMacThai ;
2495 break ;
2496 case wxFONTENCODING_MACLAOTIAN :
2497 enc = kCFStringEncodingMacLaotian ;
2498 break ;
2499 case wxFONTENCODING_MACGEORGIAN :
2500 enc = kCFStringEncodingMacGeorgian ;
2501 break ;
2502 case wxFONTENCODING_MACARMENIAN :
2503 enc = kCFStringEncodingMacArmenian ;
2504 break ;
2505 case wxFONTENCODING_MACCHINESESIMP :
2506 enc = kCFStringEncodingMacChineseSimp ;
2507 break ;
2508 case wxFONTENCODING_MACTIBETAN :
2509 enc = kCFStringEncodingMacTibetan ;
2510 break ;
2511 case wxFONTENCODING_MACMONGOLIAN :
2512 enc = kCFStringEncodingMacMongolian ;
2513 break ;
2514 case wxFONTENCODING_MACETHIOPIC :
2515 enc = kCFStringEncodingMacEthiopic ;
2516 break ;
2517 case wxFONTENCODING_MACCENTRALEUR :
2518 enc = kCFStringEncodingMacCentralEurRoman ;
2519 break ;
2520 case wxFONTENCODING_MACVIATNAMESE :
2521 enc = kCFStringEncodingMacVietnamese ;
2522 break ;
2523 case wxFONTENCODING_MACARABICEXT :
2524 enc = kCFStringEncodingMacExtArabic ;
2525 break ;
2526 case wxFONTENCODING_MACSYMBOL :
2527 enc = kCFStringEncodingMacSymbol ;
2528 break ;
2529 case wxFONTENCODING_MACDINGBATS :
2530 enc = kCFStringEncodingMacDingbats ;
2531 break ;
2532 case wxFONTENCODING_MACTURKISH :
2533 enc = kCFStringEncodingMacTurkish ;
2534 break ;
2535 case wxFONTENCODING_MACCROATIAN :
2536 enc = kCFStringEncodingMacCroatian ;
2537 break ;
2538 case wxFONTENCODING_MACICELANDIC :
2539 enc = kCFStringEncodingMacIcelandic ;
2540 break ;
2541 case wxFONTENCODING_MACROMANIAN :
2542 enc = kCFStringEncodingMacRomanian ;
2543 break ;
2544 case wxFONTENCODING_MACCELTIC :
2545 enc = kCFStringEncodingMacCeltic ;
2546 break ;
2547 case wxFONTENCODING_MACGAELIC :
2548 enc = kCFStringEncodingMacGaelic ;
2549 break ;
2550 // case wxFONTENCODING_MACKEYBOARD :
2551 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2552 // break ;
2553
2554 default :
2555 // because gcc is picky
2556 break ;
2557 }
2558
2559 return enc ;
2560 }
2561
2562 class wxMBConv_cocoa : public wxMBConv
2563 {
2564 public:
2565 wxMBConv_cocoa()
2566 {
2567 Init(CFStringGetSystemEncoding()) ;
2568 }
2569
2570 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2571 {
2572 m_encoding = conv.m_encoding;
2573 }
2574
2575 #if wxUSE_FONTMAP
2576 wxMBConv_cocoa(const wxChar* name)
2577 {
2578 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2579 }
2580 #endif
2581
2582 wxMBConv_cocoa(wxFontEncoding encoding)
2583 {
2584 Init( wxCFStringEncFromFontEnc(encoding) );
2585 }
2586
2587 ~wxMBConv_cocoa()
2588 {
2589 }
2590
2591 void Init( CFStringEncoding encoding)
2592 {
2593 m_encoding = encoding ;
2594 }
2595
2596 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2597 {
2598 wxASSERT(szUnConv);
2599
2600 CFStringRef theString = CFStringCreateWithBytes (
2601 NULL, //the allocator
2602 (const UInt8*)szUnConv,
2603 strlen(szUnConv),
2604 m_encoding,
2605 false //no BOM/external representation
2606 );
2607
2608 wxASSERT(theString);
2609
2610 size_t nOutLength = CFStringGetLength(theString);
2611
2612 if (szOut == NULL)
2613 {
2614 CFRelease(theString);
2615 return nOutLength;
2616 }
2617
2618 CFRange theRange = { 0, nOutSize };
2619
2620 #if SIZEOF_WCHAR_T == 4
2621 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2622 #endif
2623
2624 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2625
2626 CFRelease(theString);
2627
2628 szUniCharBuffer[nOutLength] = '\0';
2629
2630 #if SIZEOF_WCHAR_T == 4
2631 wxMBConvUTF16 converter;
2632 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2633 delete [] szUniCharBuffer;
2634 #endif
2635
2636 return nOutLength;
2637 }
2638
2639 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2640 {
2641 wxASSERT(szUnConv);
2642
2643 size_t nRealOutSize;
2644 size_t nBufSize = wxWcslen(szUnConv);
2645 UniChar* szUniBuffer = (UniChar*) szUnConv;
2646
2647 #if SIZEOF_WCHAR_T == 4
2648 wxMBConvUTF16 converter ;
2649 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2650 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2651 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
2652 nBufSize /= sizeof(UniChar);
2653 #endif
2654
2655 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2656 NULL, //allocator
2657 szUniBuffer,
2658 nBufSize,
2659 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2660 );
2661
2662 wxASSERT(theString);
2663
2664 //Note that CER puts a BOM when converting to unicode
2665 //so we check and use getchars instead in that case
2666 if (m_encoding == kCFStringEncodingUnicode)
2667 {
2668 if (szOut != NULL)
2669 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2670
2671 nRealOutSize = CFStringGetLength(theString) + 1;
2672 }
2673 else
2674 {
2675 CFStringGetBytes(
2676 theString,
2677 CFRangeMake(0, CFStringGetLength(theString)),
2678 m_encoding,
2679 0, //what to put in characters that can't be converted -
2680 //0 tells CFString to return NULL if it meets such a character
2681 false, //not an external representation
2682 (UInt8*) szOut,
2683 nOutSize,
2684 (CFIndex*) &nRealOutSize
2685 );
2686 }
2687
2688 CFRelease(theString);
2689
2690 #if SIZEOF_WCHAR_T == 4
2691 delete[] szUniBuffer;
2692 #endif
2693
2694 return nRealOutSize - 1;
2695 }
2696
2697 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2698
2699 bool IsOk() const
2700 {
2701 return m_encoding != kCFStringEncodingInvalidId &&
2702 CFStringIsEncodingAvailable(m_encoding);
2703 }
2704
2705 private:
2706 CFStringEncoding m_encoding ;
2707 };
2708
2709 #endif // defined(__WXCOCOA__)
2710
2711 // ============================================================================
2712 // Mac conversion classes
2713 // ============================================================================
2714
2715 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2716
2717 class wxMBConv_mac : public wxMBConv
2718 {
2719 public:
2720 wxMBConv_mac()
2721 {
2722 Init(CFStringGetSystemEncoding()) ;
2723 }
2724
2725 wxMBConv_mac(const wxMBConv_mac& conv)
2726 {
2727 Init(conv.m_char_encoding);
2728 }
2729
2730 #if wxUSE_FONTMAP
2731 wxMBConv_mac(const wxChar* name)
2732 {
2733 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
2734 }
2735 #endif
2736
2737 wxMBConv_mac(wxFontEncoding encoding)
2738 {
2739 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2740 }
2741
2742 ~wxMBConv_mac()
2743 {
2744 OSStatus status = noErr ;
2745 if (m_MB2WC_converter)
2746 status = TECDisposeConverter(m_MB2WC_converter);
2747 if (m_WC2MB_converter)
2748 status = TECDisposeConverter(m_WC2MB_converter);
2749 }
2750
2751 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2752 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
2753 {
2754 m_MB2WC_converter = NULL ;
2755 m_WC2MB_converter = NULL ;
2756 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
2757 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
2758 }
2759
2760 virtual void CreateIfNeeded() const
2761 {
2762 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2763 {
2764 OSStatus status = noErr ;
2765 status = TECCreateConverter(&m_MB2WC_converter,
2766 m_char_encoding,
2767 m_unicode_encoding);
2768 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2769 status = TECCreateConverter(&m_WC2MB_converter,
2770 m_unicode_encoding,
2771 m_char_encoding);
2772 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2773 }
2774 }
2775
2776 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2777 {
2778 CreateIfNeeded() ;
2779 OSStatus status = noErr ;
2780 ByteCount byteOutLen ;
2781 ByteCount byteInLen = strlen(psz) + 1;
2782 wchar_t *tbuf = NULL ;
2783 UniChar* ubuf = NULL ;
2784 size_t res = 0 ;
2785
2786 if (buf == NULL)
2787 {
2788 // Apple specs say at least 32
2789 n = wxMax( 32, byteInLen ) ;
2790 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
2791 }
2792
2793 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2794
2795 #if SIZEOF_WCHAR_T == 4
2796 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2797 #else
2798 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2799 #endif
2800
2801 status = TECConvertText(
2802 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2803 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2804
2805 #if SIZEOF_WCHAR_T == 4
2806 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2807 // is not properly terminated we get random characters at the end
2808 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2809 wxMBConvUTF16 converter ;
2810 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
2811 free( ubuf ) ;
2812 #else
2813 res = byteOutLen / sizeof( UniChar ) ;
2814 #endif
2815
2816 if ( buf == NULL )
2817 free(tbuf) ;
2818
2819 if ( buf && res < n)
2820 buf[res] = 0;
2821
2822 return res ;
2823 }
2824
2825 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2826 {
2827 CreateIfNeeded() ;
2828 OSStatus status = noErr ;
2829 ByteCount byteOutLen ;
2830 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2831
2832 char *tbuf = NULL ;
2833
2834 if (buf == NULL)
2835 {
2836 // Apple specs say at least 32
2837 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2838 tbuf = (char*) malloc( n ) ;
2839 }
2840
2841 ByteCount byteBufferLen = n ;
2842 UniChar* ubuf = NULL ;
2843
2844 #if SIZEOF_WCHAR_T == 4
2845 wxMBConvUTF16 converter ;
2846 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2847 byteInLen = unicharlen ;
2848 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2849 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2850 #else
2851 ubuf = (UniChar*) psz ;
2852 #endif
2853
2854 status = TECConvertText(
2855 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2856 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2857
2858 #if SIZEOF_WCHAR_T == 4
2859 free( ubuf ) ;
2860 #endif
2861
2862 if ( buf == NULL )
2863 free(tbuf) ;
2864
2865 size_t res = byteOutLen ;
2866 if ( buf && res < n)
2867 {
2868 buf[res] = 0;
2869
2870 //we need to double-trip to verify it didn't insert any ? in place
2871 //of bogus characters
2872 wxWCharBuffer wcBuf(n);
2873 size_t pszlen = wxWcslen(psz);
2874 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2875 wxWcslen(wcBuf) != pszlen ||
2876 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2877 {
2878 // we didn't obtain the same thing we started from, hence
2879 // the conversion was lossy and we consider that it failed
2880 return wxCONV_FAILED;
2881 }
2882 }
2883
2884 return res ;
2885 }
2886
2887 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
2888
2889 bool IsOk() const
2890 {
2891 CreateIfNeeded() ;
2892 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2893 }
2894
2895 protected :
2896 mutable TECObjectRef m_MB2WC_converter;
2897 mutable TECObjectRef m_WC2MB_converter;
2898
2899 TextEncodingBase m_char_encoding;
2900 TextEncodingBase m_unicode_encoding;
2901 };
2902
2903 // MB is decomposed (D) normalized UTF8
2904
2905 class wxMBConv_macUTF8D : public wxMBConv_mac
2906 {
2907 public :
2908 wxMBConv_macUTF8D()
2909 {
2910 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2911 m_uni = NULL;
2912 }
2913
2914 ~wxMBConv_macUTF8D()
2915 {
2916 DisposeUnicodeToTextInfo(&m_uni);
2917 }
2918
2919 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2920 {
2921 CreateIfNeeded() ;
2922 OSStatus status = noErr ;
2923 ByteCount byteOutLen ;
2924 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2925
2926 char *tbuf = NULL ;
2927
2928 if (buf == NULL)
2929 {
2930 // Apple specs say at least 32
2931 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2932 tbuf = (char*) malloc( n ) ;
2933 }
2934
2935 ByteCount byteBufferLen = n ;
2936 UniChar* ubuf = NULL ;
2937
2938 #if SIZEOF_WCHAR_T == 4
2939 wxMBConvUTF16 converter ;
2940 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2941 byteInLen = unicharlen ;
2942 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2943 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2944 #else
2945 ubuf = (UniChar*) psz ;
2946 #endif
2947
2948 // ubuf is a non-decomposed UniChar buffer
2949
2950 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2951 ByteCount dcubufread , dcubufwritten ;
2952 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2953
2954 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2955 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
2956
2957 // we now convert that decomposed buffer into UTF8
2958
2959 status = TECConvertText(
2960 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2961 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2962
2963 free( dcubuf );
2964
2965 #if SIZEOF_WCHAR_T == 4
2966 free( ubuf ) ;
2967 #endif
2968
2969 if ( buf == NULL )
2970 free(tbuf) ;
2971
2972 size_t res = byteOutLen ;
2973 if ( buf && res < n)
2974 {
2975 buf[res] = 0;
2976 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2977 }
2978
2979 return res ;
2980 }
2981
2982 virtual void CreateIfNeeded() const
2983 {
2984 wxMBConv_mac::CreateIfNeeded() ;
2985 if ( m_uni == NULL )
2986 {
2987 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
2988 kUnicodeNoSubset, kTextEncodingDefaultFormat);
2989 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
2990 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
2991 m_map.mappingVersion = kUnicodeUseLatestMapping;
2992
2993 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
2994 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
2995 }
2996 }
2997 protected :
2998 mutable UnicodeToTextInfo m_uni;
2999 mutable UnicodeMapping m_map;
3000 };
3001 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3002
3003 // ============================================================================
3004 // wxEncodingConverter based conversion classes
3005 // ============================================================================
3006
3007 #if wxUSE_FONTMAP
3008
3009 class wxMBConv_wxwin : public wxMBConv
3010 {
3011 private:
3012 void Init()
3013 {
3014 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3015 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3016 }
3017
3018 public:
3019 // temporarily just use wxEncodingConverter stuff,
3020 // so that it works while a better implementation is built
3021 wxMBConv_wxwin(const wxChar* name)
3022 {
3023 if (name)
3024 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3025 else
3026 m_enc = wxFONTENCODING_SYSTEM;
3027
3028 Init();
3029 }
3030
3031 wxMBConv_wxwin(wxFontEncoding enc)
3032 {
3033 m_enc = enc;
3034
3035 Init();
3036 }
3037
3038 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
3039 {
3040 size_t inbuf = strlen(psz);
3041 if (buf)
3042 {
3043 if (!m2w.Convert(psz, buf))
3044 return wxCONV_FAILED;
3045 }
3046 return inbuf;
3047 }
3048
3049 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
3050 {
3051 const size_t inbuf = wxWcslen(psz);
3052 if (buf)
3053 {
3054 if (!w2m.Convert(psz, buf))
3055 return wxCONV_FAILED;
3056 }
3057
3058 return inbuf;
3059 }
3060
3061 virtual size_t GetMBNulLen() const
3062 {
3063 switch ( m_enc )
3064 {
3065 case wxFONTENCODING_UTF16BE:
3066 case wxFONTENCODING_UTF16LE:
3067 return 2;
3068
3069 case wxFONTENCODING_UTF32BE:
3070 case wxFONTENCODING_UTF32LE:
3071 return 4;
3072
3073 default:
3074 return 1;
3075 }
3076 }
3077
3078 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3079
3080 bool IsOk() const { return m_ok; }
3081
3082 public:
3083 wxFontEncoding m_enc;
3084 wxEncodingConverter m2w, w2m;
3085
3086 private:
3087 // were we initialized successfully?
3088 bool m_ok;
3089
3090 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
3091 };
3092
3093 // make the constructors available for unit testing
3094 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3095 {
3096 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3097 if ( !result->IsOk() )
3098 {
3099 delete result;
3100 return 0;
3101 }
3102
3103 return result;
3104 }
3105
3106 #endif // wxUSE_FONTMAP
3107
3108 // ============================================================================
3109 // wxCSConv implementation
3110 // ============================================================================
3111
3112 void wxCSConv::Init()
3113 {
3114 m_name = NULL;
3115 m_convReal = NULL;
3116 m_deferred = true;
3117 }
3118
3119 wxCSConv::wxCSConv(const wxChar *charset)
3120 {
3121 Init();
3122
3123 if ( charset )
3124 {
3125 SetName(charset);
3126 }
3127
3128 #if wxUSE_FONTMAP
3129 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3130 #else
3131 m_encoding = wxFONTENCODING_SYSTEM;
3132 #endif
3133 }
3134
3135 wxCSConv::wxCSConv(wxFontEncoding encoding)
3136 {
3137 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3138 {
3139 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3140
3141 encoding = wxFONTENCODING_SYSTEM;
3142 }
3143
3144 Init();
3145
3146 m_encoding = encoding;
3147 }
3148
3149 wxCSConv::~wxCSConv()
3150 {
3151 Clear();
3152 }
3153
3154 wxCSConv::wxCSConv(const wxCSConv& conv)
3155 : wxMBConv()
3156 {
3157 Init();
3158
3159 SetName(conv.m_name);
3160 m_encoding = conv.m_encoding;
3161 }
3162
3163 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3164 {
3165 Clear();
3166
3167 SetName(conv.m_name);
3168 m_encoding = conv.m_encoding;
3169
3170 return *this;
3171 }
3172
3173 void wxCSConv::Clear()
3174 {
3175 free(m_name);
3176 delete m_convReal;
3177
3178 m_name = NULL;
3179 m_convReal = NULL;
3180 }
3181
3182 void wxCSConv::SetName(const wxChar *charset)
3183 {
3184 if (charset)
3185 {
3186 m_name = wxStrdup(charset);
3187 m_deferred = true;
3188 }
3189 }
3190
3191 #if wxUSE_FONTMAP
3192
3193 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3194 wxEncodingNameCache );
3195
3196 static wxEncodingNameCache gs_nameCache;
3197 #endif
3198
3199 wxMBConv *wxCSConv::DoCreate() const
3200 {
3201 #if wxUSE_FONTMAP
3202 wxLogTrace(TRACE_STRCONV,
3203 wxT("creating conversion for %s"),
3204 (m_name ? m_name
3205 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3206 #endif // wxUSE_FONTMAP
3207
3208 // check for the special case of ASCII or ISO8859-1 charset: as we have
3209 // special knowledge of it anyhow, we don't need to create a special
3210 // conversion object
3211 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3212 m_encoding == wxFONTENCODING_DEFAULT )
3213 {
3214 // don't convert at all
3215 return NULL;
3216 }
3217
3218 // we trust OS to do conversion better than we can so try external
3219 // conversion methods first
3220 //
3221 // the full order is:
3222 // 1. OS conversion (iconv() under Unix or Win32 API)
3223 // 2. hard coded conversions for UTF
3224 // 3. wxEncodingConverter as fall back
3225
3226 // step (1)
3227 #ifdef HAVE_ICONV
3228 #if !wxUSE_FONTMAP
3229 if ( m_name )
3230 #endif // !wxUSE_FONTMAP
3231 {
3232 wxString name(m_name);
3233 wxFontEncoding encoding(m_encoding);
3234
3235 if ( !name.empty() )
3236 {
3237 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3238 if ( conv->IsOk() )
3239 return conv;
3240
3241 delete conv;
3242
3243 #if wxUSE_FONTMAP
3244 encoding =
3245 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3246 #endif // wxUSE_FONTMAP
3247 }
3248 #if wxUSE_FONTMAP
3249 {
3250 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3251 if ( it != gs_nameCache.end() )
3252 {
3253 if ( it->second.empty() )
3254 return NULL;
3255
3256 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3257 if ( conv->IsOk() )
3258 return conv;
3259
3260 delete conv;
3261 }
3262
3263 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3264
3265 for ( ; *names; ++names )
3266 {
3267 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3268 if ( conv->IsOk() )
3269 {
3270 gs_nameCache[encoding] = *names;
3271 return conv;
3272 }
3273
3274 delete conv;
3275 }
3276
3277 gs_nameCache[encoding] = _T(""); // cache the failure
3278 }
3279 #endif // wxUSE_FONTMAP
3280 }
3281 #endif // HAVE_ICONV
3282
3283 #ifdef wxHAVE_WIN32_MB2WC
3284 {
3285 #if wxUSE_FONTMAP
3286 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3287 : new wxMBConv_win32(m_encoding);
3288 if ( conv->IsOk() )
3289 return conv;
3290
3291 delete conv;
3292 #else
3293 return NULL;
3294 #endif
3295 }
3296 #endif // wxHAVE_WIN32_MB2WC
3297
3298 #if defined(__WXMAC__)
3299 {
3300 // leave UTF16 and UTF32 to the built-ins of wx
3301 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3302 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3303 {
3304 #if wxUSE_FONTMAP
3305 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3306 : new wxMBConv_mac(m_encoding);
3307 #else
3308 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3309 #endif
3310 if ( conv->IsOk() )
3311 return conv;
3312
3313 delete conv;
3314 }
3315 }
3316 #endif
3317
3318 #if defined(__WXCOCOA__)
3319 {
3320 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3321 {
3322 #if wxUSE_FONTMAP
3323 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3324 : new wxMBConv_cocoa(m_encoding);
3325 #else
3326 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3327 #endif
3328
3329 if ( conv->IsOk() )
3330 return conv;
3331
3332 delete conv;
3333 }
3334 }
3335 #endif
3336 // step (2)
3337 wxFontEncoding enc = m_encoding;
3338 #if wxUSE_FONTMAP
3339 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3340 {
3341 // use "false" to suppress interactive dialogs -- we can be called from
3342 // anywhere and popping up a dialog from here is the last thing we want to
3343 // do
3344 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3345 }
3346 #endif // wxUSE_FONTMAP
3347
3348 switch ( enc )
3349 {
3350 case wxFONTENCODING_UTF7:
3351 return new wxMBConvUTF7;
3352
3353 case wxFONTENCODING_UTF8:
3354 return new wxMBConvUTF8;
3355
3356 case wxFONTENCODING_UTF16BE:
3357 return new wxMBConvUTF16BE;
3358
3359 case wxFONTENCODING_UTF16LE:
3360 return new wxMBConvUTF16LE;
3361
3362 case wxFONTENCODING_UTF32BE:
3363 return new wxMBConvUTF32BE;
3364
3365 case wxFONTENCODING_UTF32LE:
3366 return new wxMBConvUTF32LE;
3367
3368 default:
3369 // nothing to do but put here to suppress gcc warnings
3370 break;
3371 }
3372
3373 // step (3)
3374 #if wxUSE_FONTMAP
3375 {
3376 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3377 : new wxMBConv_wxwin(m_encoding);
3378 if ( conv->IsOk() )
3379 return conv;
3380
3381 delete conv;
3382 }
3383 #endif // wxUSE_FONTMAP
3384
3385 // NB: This is a hack to prevent deadlock. What could otherwise happen
3386 // in Unicode build: wxConvLocal creation ends up being here
3387 // because of some failure and logs the error. But wxLog will try to
3388 // attach timestamp, for which it will need wxConvLocal (to convert
3389 // time to char* and then wchar_t*), but that fails, tries to log
3390 // error, but wxLog has a (already locked) critical section that
3391 // guards static buffer.
3392 static bool alreadyLoggingError = false;
3393 if (!alreadyLoggingError)
3394 {
3395 alreadyLoggingError = true;
3396 wxLogError(_("Cannot convert from the charset '%s'!"),
3397 m_name ? m_name
3398 :
3399 #if wxUSE_FONTMAP
3400 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
3401 #else // !wxUSE_FONTMAP
3402 wxString::Format(_("encoding %s"), m_encoding).c_str()
3403 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3404 );
3405
3406 alreadyLoggingError = false;
3407 }
3408
3409 return NULL;
3410 }
3411
3412 void wxCSConv::CreateConvIfNeeded() const
3413 {
3414 if ( m_deferred )
3415 {
3416 wxCSConv *self = (wxCSConv *)this; // const_cast
3417
3418 #if wxUSE_INTL
3419 // if we don't have neither the name nor the encoding, use the default
3420 // encoding for this system
3421 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3422 {
3423 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
3424 }
3425 #endif // wxUSE_INTL
3426
3427 self->m_convReal = DoCreate();
3428 self->m_deferred = false;
3429 }
3430 }
3431
3432 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3433 {
3434 CreateConvIfNeeded();
3435
3436 if (m_convReal)
3437 return m_convReal->MB2WC(buf, psz, n);
3438
3439 // latin-1 (direct)
3440 size_t len = strlen(psz);
3441
3442 if (buf)
3443 {
3444 for (size_t c = 0; c <= len; c++)
3445 buf[c] = (unsigned char)(psz[c]);
3446 }
3447
3448 return len;
3449 }
3450
3451 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3452 {
3453 CreateConvIfNeeded();
3454
3455 if (m_convReal)
3456 return m_convReal->WC2MB(buf, psz, n);
3457
3458 // latin-1 (direct)
3459 const size_t len = wxWcslen(psz);
3460 if (buf)
3461 {
3462 for (size_t c = 0; c <= len; c++)
3463 {
3464 if (psz[c] > 0xFF)
3465 return wxCONV_FAILED;
3466
3467 buf[c] = (char)psz[c];
3468 }
3469 }
3470 else
3471 {
3472 for (size_t c = 0; c <= len; c++)
3473 {
3474 if (psz[c] > 0xFF)
3475 return wxCONV_FAILED;
3476 }
3477 }
3478
3479 return len;
3480 }
3481
3482 size_t wxCSConv::GetMBNulLen() const
3483 {
3484 CreateConvIfNeeded();
3485
3486 if ( m_convReal )
3487 {
3488 return m_convReal->GetMBNulLen();
3489 }
3490
3491 return 1;
3492 }
3493
3494 // ----------------------------------------------------------------------------
3495 // globals
3496 // ----------------------------------------------------------------------------
3497
3498 #ifdef __WINDOWS__
3499 static wxMBConv_win32 wxConvLibcObj;
3500 #elif defined(__WXMAC__) && !defined(__MACH__)
3501 static wxMBConv_mac wxConvLibcObj ;
3502 #else
3503 static wxMBConvLibc wxConvLibcObj;
3504 #endif
3505
3506 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3507 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3508 static wxMBConvUTF7 wxConvUTF7Obj;
3509 static wxMBConvUTF8 wxConvUTF8Obj;
3510 #ifdef __WXOSX__
3511 static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3512 #endif
3513 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3514 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3515 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3516 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3517 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3518 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3519 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
3520 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3521 #ifdef __WXOSX__
3522 wxConvMacUTF8DObj;
3523 #else
3524 wxConvLibcObj;
3525 #endif
3526
3527 #else // !wxUSE_WCHAR_T
3528
3529 // stand-ins in absence of wchar_t
3530 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3531 wxConvISO8859_1,
3532 wxConvLocal,
3533 wxConvUTF8;
3534
3535 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T