]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
added wxUSE_FONTENUM for wxFontEnumerator
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
17
18 #ifdef __BORLANDC__
19 #pragma hdrstop
20 #endif //__BORLANDC__
21
22 #ifndef WX_PRECOMP
23 #include "wx/intl.h"
24 #include "wx/log.h"
25 #include "wx/utils.h"
26 #include "wx/hashmap.h"
27 #endif
28
29 #include "wx/strconv.h"
30
31 #if wxUSE_WCHAR_T
32
33 #ifndef __WXWINCE__
34 #include <errno.h>
35 #endif
36
37 #include <ctype.h>
38 #include <string.h>
39 #include <stdlib.h>
40
41 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
42 #include "wx/msw/private.h"
43 #include "wx/msw/missing.h"
44 #define wxHAVE_WIN32_MB2WC
45 #endif
46
47 #ifdef __SALFORDC__
48 #include <clib.h>
49 #endif
50
51 #ifdef HAVE_ICONV
52 #include <iconv.h>
53 #include "wx/thread.h"
54 #endif
55
56 #include "wx/encconv.h"
57 #include "wx/fontmap.h"
58
59 #ifdef __DARWIN__
60 #include "wx/mac/corefoundation/private/strconv_cf.h"
61 #endif //def __DARWIN__
62
63
64 #define TRACE_STRCONV _T("strconv")
65
66 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
67 // be 4 bytes
68 #if SIZEOF_WCHAR_T == 2
69 #define WC_UTF16
70 #endif
71
72
73 // ============================================================================
74 // implementation
75 // ============================================================================
76
77 // helper function of cMB2WC(): check if n bytes at this location are all NUL
78 static bool NotAllNULs(const char *p, size_t n)
79 {
80 while ( n && *p++ == '\0' )
81 n--;
82
83 return n != 0;
84 }
85
86 // ----------------------------------------------------------------------------
87 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
88 // ----------------------------------------------------------------------------
89
90 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
91 {
92 if (input <= 0xffff)
93 {
94 if (output)
95 *output = (wxUint16) input;
96
97 return 1;
98 }
99 else if (input >= 0x110000)
100 {
101 return wxCONV_FAILED;
102 }
103 else
104 {
105 if (output)
106 {
107 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
108 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
109 }
110
111 return 2;
112 }
113 }
114
115 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
116 {
117 if ((*input < 0xd800) || (*input > 0xdfff))
118 {
119 output = *input;
120 return 1;
121 }
122 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
123 {
124 output = *input;
125 return wxCONV_FAILED;
126 }
127 else
128 {
129 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
130 return 2;
131 }
132 }
133
134 #ifdef WC_UTF16
135 typedef wchar_t wxDecodeSurrogate_t;
136 #else // !WC_UTF16
137 typedef wxUint16 wxDecodeSurrogate_t;
138 #endif // WC_UTF16/!WC_UTF16
139
140 // returns the next UTF-32 character from the wchar_t buffer and advances the
141 // pointer to the character after this one
142 //
143 // if an invalid character is found, *pSrc is set to NULL, the caller must
144 // check for this
145 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
146 {
147 wxUint32 out;
148 const size_t
149 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
150 if ( n == wxCONV_FAILED )
151 *pSrc = NULL;
152 else
153 *pSrc += n;
154
155 return out;
156 }
157
158 // ----------------------------------------------------------------------------
159 // wxMBConv
160 // ----------------------------------------------------------------------------
161
162 size_t
163 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
164 const char *src, size_t srcLen) const
165 {
166 // although new conversion classes are supposed to implement this function
167 // directly, the existins ones only implement the old MB2WC() and so, to
168 // avoid to have to rewrite all conversion classes at once, we provide a
169 // default (but not efficient) implementation of this one in terms of the
170 // old function by copying the input to ensure that it's NUL-terminated and
171 // then using MB2WC() to convert it
172
173 // the number of chars [which would be] written to dst [if it were not NULL]
174 size_t dstWritten = 0;
175
176 // the number of NULs terminating this string
177 size_t nulLen = 0; // not really needed, but just to avoid warnings
178
179 // if we were not given the input size we just have to assume that the
180 // string is properly terminated as we have no way of knowing how long it
181 // is anyhow, but if we do have the size check whether there are enough
182 // NULs at the end
183 wxCharBuffer bufTmp;
184 const char *srcEnd;
185 if ( srcLen != wxNO_LEN )
186 {
187 // we need to know how to find the end of this string
188 nulLen = GetMBNulLen();
189 if ( nulLen == wxCONV_FAILED )
190 return wxCONV_FAILED;
191
192 // if there are enough NULs we can avoid the copy
193 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
194 {
195 // make a copy in order to properly NUL-terminate the string
196 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
197 char * const p = bufTmp.data();
198 memcpy(p, src, srcLen);
199 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
200 *s = '\0';
201
202 src = bufTmp;
203 }
204
205 srcEnd = src + srcLen;
206 }
207 else // quit after the first loop iteration
208 {
209 srcEnd = NULL;
210 }
211
212 for ( ;; )
213 {
214 // try to convert the current chunk
215 size_t lenChunk = MB2WC(NULL, src, 0);
216 if ( lenChunk == wxCONV_FAILED )
217 return wxCONV_FAILED;
218
219 lenChunk++; // for the L'\0' at the end of this chunk
220
221 dstWritten += lenChunk;
222
223 if ( lenChunk == 1 )
224 {
225 // nothing left in the input string, conversion succeeded
226 break;
227 }
228
229 if ( dst )
230 {
231 if ( dstWritten > dstLen )
232 return wxCONV_FAILED;
233
234 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
235 return wxCONV_FAILED;
236
237 dst += lenChunk;
238 }
239
240 if ( !srcEnd )
241 {
242 // we convert just one chunk in this case as this is the entire
243 // string anyhow
244 break;
245 }
246
247 // advance the input pointer past the end of this chunk
248 while ( NotAllNULs(src, nulLen) )
249 {
250 // notice that we must skip over multiple bytes here as we suppose
251 // that if NUL takes 2 or 4 bytes, then all the other characters do
252 // too and so if advanced by a single byte we might erroneously
253 // detect sequences of NUL bytes in the middle of the input
254 src += nulLen;
255 }
256
257 src += nulLen; // skipping over its terminator as well
258
259 // note that ">=" (and not just "==") is needed here as the terminator
260 // we skipped just above could be inside or just after the buffer
261 // delimited by inEnd
262 if ( src >= srcEnd )
263 break;
264 }
265
266 return dstWritten;
267 }
268
269 size_t
270 wxMBConv::FromWChar(char *dst, size_t dstLen,
271 const wchar_t *src, size_t srcLen) const
272 {
273 // the number of chars [which would be] written to dst [if it were not NULL]
274 size_t dstWritten = 0;
275
276 // make a copy of the input string unless it is already properly
277 // NUL-terminated
278 //
279 // if we don't know its length we have no choice but to assume that it is,
280 // indeed, properly terminated
281 wxWCharBuffer bufTmp;
282 if ( srcLen == wxNO_LEN )
283 {
284 srcLen = wxWcslen(src) + 1;
285 }
286 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
287 {
288 // make a copy in order to properly NUL-terminate the string
289 bufTmp = wxWCharBuffer(srcLen);
290 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
291 src = bufTmp;
292 }
293
294 const size_t lenNul = GetMBNulLen();
295 for ( const wchar_t * const srcEnd = src + srcLen;
296 src < srcEnd;
297 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
298 {
299 // try to convert the current chunk
300 size_t lenChunk = WC2MB(NULL, src, 0);
301
302 if ( lenChunk == wxCONV_FAILED )
303 return wxCONV_FAILED;
304
305 lenChunk += lenNul;
306 dstWritten += lenChunk;
307
308 if ( dst )
309 {
310 if ( dstWritten > dstLen )
311 return wxCONV_FAILED;
312
313 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
314 return wxCONV_FAILED;
315
316 dst += lenChunk;
317 }
318 }
319
320 return dstWritten;
321 }
322
323 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
324 {
325 size_t rc = ToWChar(outBuff, outLen, inBuff);
326 if ( rc != wxCONV_FAILED )
327 {
328 // ToWChar() returns the buffer length, i.e. including the trailing
329 // NUL, while this method doesn't take it into account
330 rc--;
331 }
332
333 return rc;
334 }
335
336 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
337 {
338 size_t rc = FromWChar(outBuff, outLen, inBuff);
339 if ( rc != wxCONV_FAILED )
340 {
341 rc -= GetMBNulLen();
342 }
343
344 return rc;
345 }
346
347 wxMBConv::~wxMBConv()
348 {
349 // nothing to do here (necessary for Darwin linking probably)
350 }
351
352 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
353 {
354 if ( psz )
355 {
356 // calculate the length of the buffer needed first
357 const size_t nLen = MB2WC(NULL, psz, 0);
358 if ( nLen != wxCONV_FAILED )
359 {
360 // now do the actual conversion
361 wxWCharBuffer buf(nLen /* +1 added implicitly */);
362
363 // +1 for the trailing NULL
364 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
365 return buf;
366 }
367 }
368
369 return wxWCharBuffer();
370 }
371
372 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
373 {
374 if ( pwz )
375 {
376 const size_t nLen = WC2MB(NULL, pwz, 0);
377 if ( nLen != wxCONV_FAILED )
378 {
379 // extra space for trailing NUL(s)
380 static const size_t extraLen = GetMaxMBNulLen();
381
382 wxCharBuffer buf(nLen + extraLen - 1);
383 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
384 return buf;
385 }
386 }
387
388 return wxCharBuffer();
389 }
390
391 const wxWCharBuffer
392 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
393 {
394 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
395 if ( dstLen != wxCONV_FAILED )
396 {
397 wxWCharBuffer wbuf(dstLen - 1);
398 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
399 {
400 if ( outLen )
401 {
402 *outLen = dstLen;
403 if ( wbuf[dstLen - 1] == L'\0' )
404 (*outLen)--;
405 }
406
407 return wbuf;
408 }
409 }
410
411 if ( outLen )
412 *outLen = 0;
413
414 return wxWCharBuffer();
415 }
416
417 const wxCharBuffer
418 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
419 {
420 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
421 if ( dstLen != wxCONV_FAILED )
422 {
423 // special case of empty input: can't allocate 0 size buffer below as
424 // wxCharBuffer insists on NUL-terminating it
425 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
426 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
427 {
428 if ( outLen )
429 {
430 *outLen = dstLen;
431
432 const size_t nulLen = GetMBNulLen();
433 if ( dstLen >= nulLen &&
434 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
435 {
436 // in this case the output is NUL-terminated and we're not
437 // supposed to count NUL
438 *outLen -= nulLen;
439 }
440 }
441
442 return buf;
443 }
444 }
445
446 if ( outLen )
447 *outLen = 0;
448
449 return wxCharBuffer();
450 }
451
452 // ----------------------------------------------------------------------------
453 // wxMBConvLibc
454 // ----------------------------------------------------------------------------
455
456 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
457 {
458 return wxMB2WC(buf, psz, n);
459 }
460
461 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
462 {
463 return wxWC2MB(buf, psz, n);
464 }
465
466 // ----------------------------------------------------------------------------
467 // wxConvBrokenFileNames
468 // ----------------------------------------------------------------------------
469
470 #ifdef __UNIX__
471
472 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxString& charset)
473 {
474 if ( wxStricmp(charset, _T("UTF-8")) == 0 ||
475 wxStricmp(charset, _T("UTF8")) == 0 )
476 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
477 else
478 m_conv = new wxCSConv(charset);
479 }
480
481 #endif // __UNIX__
482
483 // ----------------------------------------------------------------------------
484 // UTF-7
485 // ----------------------------------------------------------------------------
486
487 // Implementation (C) 2004 Fredrik Roubert
488
489 //
490 // BASE64 decoding table
491 //
492 static const unsigned char utf7unb64[] =
493 {
494 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
495 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
496 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
497 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
498 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
499 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
500 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
501 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
503 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
504 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
505 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
506 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
507 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
508 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
509 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
510 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
511 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
513 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
514 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
515 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
526 };
527
528 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
529 {
530 size_t len = 0;
531
532 while ( *psz && (!buf || (len < n)) )
533 {
534 unsigned char cc = *psz++;
535 if (cc != '+')
536 {
537 // plain ASCII char
538 if (buf)
539 *buf++ = cc;
540 len++;
541 }
542 else if (*psz == '-')
543 {
544 // encoded plus sign
545 if (buf)
546 *buf++ = cc;
547 len++;
548 psz++;
549 }
550 else // start of BASE64 encoded string
551 {
552 bool lsb, ok;
553 unsigned int d, l;
554 for ( ok = lsb = false, d = 0, l = 0;
555 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
556 psz++ )
557 {
558 d <<= 6;
559 d += cc;
560 for (l += 6; l >= 8; lsb = !lsb)
561 {
562 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
563 if (lsb)
564 {
565 if (buf)
566 *buf++ |= c;
567 len ++;
568 }
569 else
570 {
571 if (buf)
572 *buf = (wchar_t)(c << 8);
573 }
574
575 ok = true;
576 }
577 }
578
579 if ( !ok )
580 {
581 // in valid UTF7 we should have valid characters after '+'
582 return wxCONV_FAILED;
583 }
584
585 if (*psz == '-')
586 psz++;
587 }
588 }
589
590 if ( buf && (len < n) )
591 *buf = '\0';
592
593 return len;
594 }
595
596 //
597 // BASE64 encoding table
598 //
599 static const unsigned char utf7enb64[] =
600 {
601 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
602 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
603 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
604 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
605 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
606 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
607 'w', 'x', 'y', 'z', '0', '1', '2', '3',
608 '4', '5', '6', '7', '8', '9', '+', '/'
609 };
610
611 //
612 // UTF-7 encoding table
613 //
614 // 0 - Set D (directly encoded characters)
615 // 1 - Set O (optional direct characters)
616 // 2 - whitespace characters (optional)
617 // 3 - special characters
618 //
619 static const unsigned char utf7encode[128] =
620 {
621 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
622 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
623 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
625 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
627 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
629 };
630
631 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
632 {
633 size_t len = 0;
634
635 while (*psz && ((!buf) || (len < n)))
636 {
637 wchar_t cc = *psz++;
638 if (cc < 0x80 && utf7encode[cc] < 1)
639 {
640 // plain ASCII char
641 if (buf)
642 *buf++ = (char)cc;
643
644 len++;
645 }
646 #ifndef WC_UTF16
647 else if (((wxUint32)cc) > 0xffff)
648 {
649 // no surrogate pair generation (yet?)
650 return wxCONV_FAILED;
651 }
652 #endif
653 else
654 {
655 if (buf)
656 *buf++ = '+';
657
658 len++;
659 if (cc != '+')
660 {
661 // BASE64 encode string
662 unsigned int lsb, d, l;
663 for (d = 0, l = 0; /*nothing*/; psz++)
664 {
665 for (lsb = 0; lsb < 2; lsb ++)
666 {
667 d <<= 8;
668 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
669
670 for (l += 8; l >= 6; )
671 {
672 l -= 6;
673 if (buf)
674 *buf++ = utf7enb64[(d >> l) % 64];
675 len++;
676 }
677 }
678
679 cc = *psz;
680 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
681 break;
682 }
683
684 if (l != 0)
685 {
686 if (buf)
687 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
688
689 len++;
690 }
691 }
692
693 if (buf)
694 *buf++ = '-';
695 len++;
696 }
697 }
698
699 if (buf && (len < n))
700 *buf = 0;
701
702 return len;
703 }
704
705 // ----------------------------------------------------------------------------
706 // UTF-8
707 // ----------------------------------------------------------------------------
708
709 static wxUint32 utf8_max[]=
710 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
711
712 // boundaries of the private use area we use to (temporarily) remap invalid
713 // characters invalid in a UTF-8 encoded string
714 const wxUint32 wxUnicodePUA = 0x100000;
715 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
716
717 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
718 {
719 size_t len = 0;
720
721 while (*psz && ((!buf) || (len < n)))
722 {
723 const char *opsz = psz;
724 bool invalid = false;
725 unsigned char cc = *psz++, fc = cc;
726 unsigned cnt;
727 for (cnt = 0; fc & 0x80; cnt++)
728 fc <<= 1;
729
730 if (!cnt)
731 {
732 // plain ASCII char
733 if (buf)
734 *buf++ = cc;
735 len++;
736
737 // escape the escape character for octal escapes
738 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
739 && cc == '\\' && (!buf || len < n))
740 {
741 if (buf)
742 *buf++ = cc;
743 len++;
744 }
745 }
746 else
747 {
748 cnt--;
749 if (!cnt)
750 {
751 // invalid UTF-8 sequence
752 invalid = true;
753 }
754 else
755 {
756 unsigned ocnt = cnt - 1;
757 wxUint32 res = cc & (0x3f >> cnt);
758 while (cnt--)
759 {
760 cc = *psz;
761 if ((cc & 0xC0) != 0x80)
762 {
763 // invalid UTF-8 sequence
764 invalid = true;
765 break;
766 }
767
768 psz++;
769 res = (res << 6) | (cc & 0x3f);
770 }
771
772 if (invalid || res <= utf8_max[ocnt])
773 {
774 // illegal UTF-8 encoding
775 invalid = true;
776 }
777 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
778 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
779 {
780 // if one of our PUA characters turns up externally
781 // it must also be treated as an illegal sequence
782 // (a bit like you have to escape an escape character)
783 invalid = true;
784 }
785 else
786 {
787 #ifdef WC_UTF16
788 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
789 size_t pa = encode_utf16(res, (wxUint16 *)buf);
790 if (pa == wxCONV_FAILED)
791 {
792 invalid = true;
793 }
794 else
795 {
796 if (buf)
797 buf += pa;
798 len += pa;
799 }
800 #else // !WC_UTF16
801 if (buf)
802 *buf++ = (wchar_t)res;
803 len++;
804 #endif // WC_UTF16/!WC_UTF16
805 }
806 }
807
808 if (invalid)
809 {
810 if (m_options & MAP_INVALID_UTF8_TO_PUA)
811 {
812 while (opsz < psz && (!buf || len < n))
813 {
814 #ifdef WC_UTF16
815 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
816 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
817 wxASSERT(pa != wxCONV_FAILED);
818 if (buf)
819 buf += pa;
820 opsz++;
821 len += pa;
822 #else
823 if (buf)
824 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
825 opsz++;
826 len++;
827 #endif
828 }
829 }
830 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
831 {
832 while (opsz < psz && (!buf || len < n))
833 {
834 if ( buf && len + 3 < n )
835 {
836 unsigned char on = *opsz;
837 *buf++ = L'\\';
838 *buf++ = (wchar_t)( L'0' + on / 0100 );
839 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
840 *buf++ = (wchar_t)( L'0' + on % 010 );
841 }
842
843 opsz++;
844 len += 4;
845 }
846 }
847 else // MAP_INVALID_UTF8_NOT
848 {
849 return wxCONV_FAILED;
850 }
851 }
852 }
853 }
854
855 if (buf && (len < n))
856 *buf = 0;
857
858 return len;
859 }
860
861 static inline bool isoctal(wchar_t wch)
862 {
863 return L'0' <= wch && wch <= L'7';
864 }
865
866 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
867 {
868 size_t len = 0;
869
870 while (*psz && ((!buf) || (len < n)))
871 {
872 wxUint32 cc;
873
874 #ifdef WC_UTF16
875 // cast is ok for WC_UTF16
876 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
877 psz += (pa == wxCONV_FAILED) ? 1 : pa;
878 #else
879 cc = (*psz++) & 0x7fffffff;
880 #endif
881
882 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
883 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
884 {
885 if (buf)
886 *buf++ = (char)(cc - wxUnicodePUA);
887 len++;
888 }
889 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
890 && cc == L'\\' && psz[0] == L'\\' )
891 {
892 if (buf)
893 *buf++ = (char)cc;
894 psz++;
895 len++;
896 }
897 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
898 cc == L'\\' &&
899 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
900 {
901 if (buf)
902 {
903 *buf++ = (char) ((psz[0] - L'0') * 0100 +
904 (psz[1] - L'0') * 010 +
905 (psz[2] - L'0'));
906 }
907
908 psz += 3;
909 len++;
910 }
911 else
912 {
913 unsigned cnt;
914 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
915 {
916 }
917
918 if (!cnt)
919 {
920 // plain ASCII char
921 if (buf)
922 *buf++ = (char) cc;
923 len++;
924 }
925 else
926 {
927 len += cnt + 1;
928 if (buf)
929 {
930 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
931 while (cnt--)
932 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
933 }
934 }
935 }
936 }
937
938 if (buf && (len < n))
939 *buf = 0;
940
941 return len;
942 }
943
944 // ============================================================================
945 // UTF-16
946 // ============================================================================
947
948 #ifdef WORDS_BIGENDIAN
949 #define wxMBConvUTF16straight wxMBConvUTF16BE
950 #define wxMBConvUTF16swap wxMBConvUTF16LE
951 #else
952 #define wxMBConvUTF16swap wxMBConvUTF16BE
953 #define wxMBConvUTF16straight wxMBConvUTF16LE
954 #endif
955
956 /* static */
957 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
958 {
959 if ( srcLen == wxNO_LEN )
960 {
961 // count the number of bytes in input, including the trailing NULs
962 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
963 for ( srcLen = 1; *inBuff++; srcLen++ )
964 ;
965
966 srcLen *= BYTES_PER_CHAR;
967 }
968 else // we already have the length
969 {
970 // we can only convert an entire number of UTF-16 characters
971 if ( srcLen % BYTES_PER_CHAR )
972 return wxCONV_FAILED;
973 }
974
975 return srcLen;
976 }
977
978 // case when in-memory representation is UTF-16 too
979 #ifdef WC_UTF16
980
981 // ----------------------------------------------------------------------------
982 // conversions without endianness change
983 // ----------------------------------------------------------------------------
984
985 size_t
986 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
987 const char *src, size_t srcLen) const
988 {
989 // set up the scene for using memcpy() (which is presumably more efficient
990 // than copying the bytes one by one)
991 srcLen = GetLength(src, srcLen);
992 if ( srcLen == wxNO_LEN )
993 return wxCONV_FAILED;
994
995 const size_t inLen = srcLen / BYTES_PER_CHAR;
996 if ( dst )
997 {
998 if ( dstLen < inLen )
999 return wxCONV_FAILED;
1000
1001 memcpy(dst, src, srcLen);
1002 }
1003
1004 return inLen;
1005 }
1006
1007 size_t
1008 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1009 const wchar_t *src, size_t srcLen) const
1010 {
1011 if ( srcLen == wxNO_LEN )
1012 srcLen = wxWcslen(src) + 1;
1013
1014 srcLen *= BYTES_PER_CHAR;
1015
1016 if ( dst )
1017 {
1018 if ( dstLen < srcLen )
1019 return wxCONV_FAILED;
1020
1021 memcpy(dst, src, srcLen);
1022 }
1023
1024 return srcLen;
1025 }
1026
1027 // ----------------------------------------------------------------------------
1028 // endian-reversing conversions
1029 // ----------------------------------------------------------------------------
1030
1031 size_t
1032 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1033 const char *src, size_t srcLen) const
1034 {
1035 srcLen = GetLength(src, srcLen);
1036 if ( srcLen == wxNO_LEN )
1037 return wxCONV_FAILED;
1038
1039 srcLen /= BYTES_PER_CHAR;
1040
1041 if ( dst )
1042 {
1043 if ( dstLen < srcLen )
1044 return wxCONV_FAILED;
1045
1046 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1047 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1048 {
1049 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1050 }
1051 }
1052
1053 return srcLen;
1054 }
1055
1056 size_t
1057 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1058 const wchar_t *src, size_t srcLen) const
1059 {
1060 if ( srcLen == wxNO_LEN )
1061 srcLen = wxWcslen(src) + 1;
1062
1063 srcLen *= BYTES_PER_CHAR;
1064
1065 if ( dst )
1066 {
1067 if ( dstLen < srcLen )
1068 return wxCONV_FAILED;
1069
1070 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1071 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1072 {
1073 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1074 }
1075 }
1076
1077 return srcLen;
1078 }
1079
1080 #else // !WC_UTF16: wchar_t is UTF-32
1081
1082 // ----------------------------------------------------------------------------
1083 // conversions without endianness change
1084 // ----------------------------------------------------------------------------
1085
1086 size_t
1087 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1088 const char *src, size_t srcLen) const
1089 {
1090 srcLen = GetLength(src, srcLen);
1091 if ( srcLen == wxNO_LEN )
1092 return wxCONV_FAILED;
1093
1094 const size_t inLen = srcLen / BYTES_PER_CHAR;
1095 if ( !dst )
1096 {
1097 // optimization: return maximal space which could be needed for this
1098 // string even if the real size could be smaller if the buffer contains
1099 // any surrogates
1100 return inLen;
1101 }
1102
1103 size_t outLen = 0;
1104 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1105 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1106 {
1107 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1108 if ( !inBuff )
1109 return wxCONV_FAILED;
1110
1111 if ( ++outLen > dstLen )
1112 return wxCONV_FAILED;
1113
1114 *dst++ = ch;
1115 }
1116
1117
1118 return outLen;
1119 }
1120
1121 size_t
1122 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1123 const wchar_t *src, size_t srcLen) const
1124 {
1125 if ( srcLen == wxNO_LEN )
1126 srcLen = wxWcslen(src) + 1;
1127
1128 size_t outLen = 0;
1129 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1130 for ( size_t n = 0; n < srcLen; n++ )
1131 {
1132 wxUint16 cc[2];
1133 const size_t numChars = encode_utf16(*src++, cc);
1134 if ( numChars == wxCONV_FAILED )
1135 return wxCONV_FAILED;
1136
1137 outLen += numChars * BYTES_PER_CHAR;
1138 if ( outBuff )
1139 {
1140 if ( outLen > dstLen )
1141 return wxCONV_FAILED;
1142
1143 *outBuff++ = cc[0];
1144 if ( numChars == 2 )
1145 {
1146 // second character of a surrogate
1147 *outBuff++ = cc[1];
1148 }
1149 }
1150 }
1151
1152 return outLen;
1153 }
1154
1155 // ----------------------------------------------------------------------------
1156 // endian-reversing conversions
1157 // ----------------------------------------------------------------------------
1158
1159 size_t
1160 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1161 const char *src, size_t srcLen) const
1162 {
1163 srcLen = GetLength(src, srcLen);
1164 if ( srcLen == wxNO_LEN )
1165 return wxCONV_FAILED;
1166
1167 const size_t inLen = srcLen / BYTES_PER_CHAR;
1168 if ( !dst )
1169 {
1170 // optimization: return maximal space which could be needed for this
1171 // string even if the real size could be smaller if the buffer contains
1172 // any surrogates
1173 return inLen;
1174 }
1175
1176 size_t outLen = 0;
1177 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1178 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1179 {
1180 wxUint32 ch;
1181 wxUint16 tmp[2];
1182
1183 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1184 inBuff++;
1185 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1186
1187 const size_t numChars = decode_utf16(tmp, ch);
1188 if ( numChars == wxCONV_FAILED )
1189 return wxCONV_FAILED;
1190
1191 if ( numChars == 2 )
1192 inBuff++;
1193
1194 if ( ++outLen > dstLen )
1195 return wxCONV_FAILED;
1196
1197 *dst++ = ch;
1198 }
1199
1200
1201 return outLen;
1202 }
1203
1204 size_t
1205 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1206 const wchar_t *src, size_t srcLen) const
1207 {
1208 if ( srcLen == wxNO_LEN )
1209 srcLen = wxWcslen(src) + 1;
1210
1211 size_t outLen = 0;
1212 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1213 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1214 {
1215 wxUint16 cc[2];
1216 const size_t numChars = encode_utf16(*src, cc);
1217 if ( numChars == wxCONV_FAILED )
1218 return wxCONV_FAILED;
1219
1220 outLen += numChars * BYTES_PER_CHAR;
1221 if ( outBuff )
1222 {
1223 if ( outLen > dstLen )
1224 return wxCONV_FAILED;
1225
1226 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1227 if ( numChars == 2 )
1228 {
1229 // second character of a surrogate
1230 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1231 }
1232 }
1233 }
1234
1235 return outLen;
1236 }
1237
1238 #endif // WC_UTF16/!WC_UTF16
1239
1240
1241 // ============================================================================
1242 // UTF-32
1243 // ============================================================================
1244
1245 #ifdef WORDS_BIGENDIAN
1246 #define wxMBConvUTF32straight wxMBConvUTF32BE
1247 #define wxMBConvUTF32swap wxMBConvUTF32LE
1248 #else
1249 #define wxMBConvUTF32swap wxMBConvUTF32BE
1250 #define wxMBConvUTF32straight wxMBConvUTF32LE
1251 #endif
1252
1253
1254 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1255 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1256
1257 /* static */
1258 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1259 {
1260 if ( srcLen == wxNO_LEN )
1261 {
1262 // count the number of bytes in input, including the trailing NULs
1263 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1264 for ( srcLen = 1; *inBuff++; srcLen++ )
1265 ;
1266
1267 srcLen *= BYTES_PER_CHAR;
1268 }
1269 else // we already have the length
1270 {
1271 // we can only convert an entire number of UTF-32 characters
1272 if ( srcLen % BYTES_PER_CHAR )
1273 return wxCONV_FAILED;
1274 }
1275
1276 return srcLen;
1277 }
1278
1279 // case when in-memory representation is UTF-16
1280 #ifdef WC_UTF16
1281
1282 // ----------------------------------------------------------------------------
1283 // conversions without endianness change
1284 // ----------------------------------------------------------------------------
1285
1286 size_t
1287 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1288 const char *src, size_t srcLen) const
1289 {
1290 srcLen = GetLength(src, srcLen);
1291 if ( srcLen == wxNO_LEN )
1292 return wxCONV_FAILED;
1293
1294 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1295 const size_t inLen = srcLen / BYTES_PER_CHAR;
1296 size_t outLen = 0;
1297 for ( size_t n = 0; n < inLen; n++ )
1298 {
1299 wxUint16 cc[2];
1300 const size_t numChars = encode_utf16(*inBuff++, cc);
1301 if ( numChars == wxCONV_FAILED )
1302 return wxCONV_FAILED;
1303
1304 outLen += numChars;
1305 if ( dst )
1306 {
1307 if ( outLen > dstLen )
1308 return wxCONV_FAILED;
1309
1310 *dst++ = cc[0];
1311 if ( numChars == 2 )
1312 {
1313 // second character of a surrogate
1314 *dst++ = cc[1];
1315 }
1316 }
1317 }
1318
1319 return outLen;
1320 }
1321
1322 size_t
1323 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1324 const wchar_t *src, size_t srcLen) const
1325 {
1326 if ( srcLen == wxNO_LEN )
1327 srcLen = wxWcslen(src) + 1;
1328
1329 if ( !dst )
1330 {
1331 // optimization: return maximal space which could be needed for this
1332 // string instead of the exact amount which could be less if there are
1333 // any surrogates in the input
1334 //
1335 // we consider that surrogates are rare enough to make it worthwhile to
1336 // avoid running the loop below at the cost of slightly extra memory
1337 // consumption
1338 return srcLen * BYTES_PER_CHAR;
1339 }
1340
1341 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1342 size_t outLen = 0;
1343 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1344 {
1345 const wxUint32 ch = wxDecodeSurrogate(&src);
1346 if ( !src )
1347 return wxCONV_FAILED;
1348
1349 outLen += BYTES_PER_CHAR;
1350
1351 if ( outLen > dstLen )
1352 return wxCONV_FAILED;
1353
1354 *outBuff++ = ch;
1355 }
1356
1357 return outLen;
1358 }
1359
1360 // ----------------------------------------------------------------------------
1361 // endian-reversing conversions
1362 // ----------------------------------------------------------------------------
1363
1364 size_t
1365 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1366 const char *src, size_t srcLen) const
1367 {
1368 srcLen = GetLength(src, srcLen);
1369 if ( srcLen == wxNO_LEN )
1370 return wxCONV_FAILED;
1371
1372 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1373 const size_t inLen = srcLen / BYTES_PER_CHAR;
1374 size_t outLen = 0;
1375 for ( size_t n = 0; n < inLen; n++, inBuff++ )
1376 {
1377 wxUint16 cc[2];
1378 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1379 if ( numChars == wxCONV_FAILED )
1380 return wxCONV_FAILED;
1381
1382 outLen += numChars;
1383 if ( dst )
1384 {
1385 if ( outLen > dstLen )
1386 return wxCONV_FAILED;
1387
1388 *dst++ = cc[0];
1389 if ( numChars == 2 )
1390 {
1391 // second character of a surrogate
1392 *dst++ = cc[1];
1393 }
1394 }
1395 }
1396
1397 return outLen;
1398 }
1399
1400 size_t
1401 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1402 const wchar_t *src, size_t srcLen) const
1403 {
1404 if ( srcLen == wxNO_LEN )
1405 srcLen = wxWcslen(src) + 1;
1406
1407 if ( !dst )
1408 {
1409 // optimization: return maximal space which could be needed for this
1410 // string instead of the exact amount which could be less if there are
1411 // any surrogates in the input
1412 //
1413 // we consider that surrogates are rare enough to make it worthwhile to
1414 // avoid running the loop below at the cost of slightly extra memory
1415 // consumption
1416 return srcLen*BYTES_PER_CHAR;
1417 }
1418
1419 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1420 size_t outLen = 0;
1421 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1422 {
1423 const wxUint32 ch = wxDecodeSurrogate(&src);
1424 if ( !src )
1425 return wxCONV_FAILED;
1426
1427 outLen += BYTES_PER_CHAR;
1428
1429 if ( outLen > dstLen )
1430 return wxCONV_FAILED;
1431
1432 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1433 }
1434
1435 return outLen;
1436 }
1437
1438 #else // !WC_UTF16: wchar_t is UTF-32
1439
1440 // ----------------------------------------------------------------------------
1441 // conversions without endianness change
1442 // ----------------------------------------------------------------------------
1443
1444 size_t
1445 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1446 const char *src, size_t srcLen) const
1447 {
1448 // use memcpy() as it should be much faster than hand-written loop
1449 srcLen = GetLength(src, srcLen);
1450 if ( srcLen == wxNO_LEN )
1451 return wxCONV_FAILED;
1452
1453 const size_t inLen = srcLen/BYTES_PER_CHAR;
1454 if ( dst )
1455 {
1456 if ( dstLen < inLen )
1457 return wxCONV_FAILED;
1458
1459 memcpy(dst, src, srcLen);
1460 }
1461
1462 return inLen;
1463 }
1464
1465 size_t
1466 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1467 const wchar_t *src, size_t srcLen) const
1468 {
1469 if ( srcLen == wxNO_LEN )
1470 srcLen = wxWcslen(src) + 1;
1471
1472 srcLen *= BYTES_PER_CHAR;
1473
1474 if ( dst )
1475 {
1476 if ( dstLen < srcLen )
1477 return wxCONV_FAILED;
1478
1479 memcpy(dst, src, srcLen);
1480 }
1481
1482 return srcLen;
1483 }
1484
1485 // ----------------------------------------------------------------------------
1486 // endian-reversing conversions
1487 // ----------------------------------------------------------------------------
1488
1489 size_t
1490 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1491 const char *src, size_t srcLen) const
1492 {
1493 srcLen = GetLength(src, srcLen);
1494 if ( srcLen == wxNO_LEN )
1495 return wxCONV_FAILED;
1496
1497 srcLen /= BYTES_PER_CHAR;
1498
1499 if ( dst )
1500 {
1501 if ( dstLen < srcLen )
1502 return wxCONV_FAILED;
1503
1504 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1505 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1506 {
1507 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
1508 }
1509 }
1510
1511 return srcLen;
1512 }
1513
1514 size_t
1515 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1516 const wchar_t *src, size_t srcLen) const
1517 {
1518 if ( srcLen == wxNO_LEN )
1519 srcLen = wxWcslen(src) + 1;
1520
1521 srcLen *= BYTES_PER_CHAR;
1522
1523 if ( dst )
1524 {
1525 if ( dstLen < srcLen )
1526 return wxCONV_FAILED;
1527
1528 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1529 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1530 {
1531 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
1532 }
1533 }
1534
1535 return srcLen;
1536 }
1537
1538 #endif // WC_UTF16/!WC_UTF16
1539
1540
1541 // ============================================================================
1542 // The classes doing conversion using the iconv_xxx() functions
1543 // ============================================================================
1544
1545 #ifdef HAVE_ICONV
1546
1547 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1548 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1549 // (unless there's yet another bug in glibc) the only case when iconv()
1550 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1551 // left in the input buffer -- when _real_ error occurs,
1552 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1553 // iconv() failure.
1554 // [This bug does not appear in glibc 2.2.]
1555 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1556 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1557 (errno != E2BIG || bufLeft != 0))
1558 #else
1559 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1560 #endif
1561
1562 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1563
1564 #define ICONV_T_INVALID ((iconv_t)-1)
1565
1566 #if SIZEOF_WCHAR_T == 4
1567 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1568 #define WC_ENC wxFONTENCODING_UTF32
1569 #elif SIZEOF_WCHAR_T == 2
1570 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1571 #define WC_ENC wxFONTENCODING_UTF16
1572 #else // sizeof(wchar_t) != 2 nor 4
1573 // does this ever happen?
1574 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1575 #endif
1576
1577 // ----------------------------------------------------------------------------
1578 // wxMBConv_iconv: encapsulates an iconv character set
1579 // ----------------------------------------------------------------------------
1580
1581 class wxMBConv_iconv : public wxMBConv
1582 {
1583 public:
1584 wxMBConv_iconv(const char *name);
1585 virtual ~wxMBConv_iconv();
1586
1587 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1588 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1589
1590 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1591 virtual size_t GetMBNulLen() const;
1592
1593 #if wxUSE_UNICODE_UTF8
1594 virtual bool IsUTF8() const;
1595 #endif
1596
1597 virtual wxMBConv *Clone() const
1598 {
1599 wxMBConv_iconv *p = new wxMBConv_iconv(m_name.ToAscii());
1600 p->m_minMBCharWidth = m_minMBCharWidth;
1601 return p;
1602 }
1603
1604 bool IsOk() const
1605 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1606
1607 protected:
1608 // the iconv handlers used to translate from multibyte
1609 // to wide char and in the other direction
1610 iconv_t m2w,
1611 w2m;
1612
1613 #if wxUSE_THREADS
1614 // guards access to m2w and w2m objects
1615 wxMutex m_iconvMutex;
1616 #endif
1617
1618 private:
1619 // the name (for iconv_open()) of a wide char charset -- if none is
1620 // available on this machine, it will remain NULL
1621 static wxString ms_wcCharsetName;
1622
1623 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1624 // different endian-ness than the native one
1625 static bool ms_wcNeedsSwap;
1626
1627
1628 // name of the encoding handled by this conversion
1629 wxString m_name;
1630
1631 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1632 // initially
1633 size_t m_minMBCharWidth;
1634 };
1635
1636 // make the constructor available for unit testing
1637 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name )
1638 {
1639 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1640 if ( !result->IsOk() )
1641 {
1642 delete result;
1643 return 0;
1644 }
1645
1646 return result;
1647 }
1648
1649 wxString wxMBConv_iconv::ms_wcCharsetName;
1650 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1651
1652 wxMBConv_iconv::wxMBConv_iconv(const char *name)
1653 : m_name(name)
1654 {
1655 m_minMBCharWidth = 0;
1656
1657 // check for charset that represents wchar_t:
1658 if ( ms_wcCharsetName.empty() )
1659 {
1660 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1661
1662 #if wxUSE_FONTMAP
1663 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1664 #else // !wxUSE_FONTMAP
1665 static const wxChar *names_static[] =
1666 {
1667 #if SIZEOF_WCHAR_T == 4
1668 _T("UCS-4"),
1669 #elif SIZEOF_WCHAR_T = 2
1670 _T("UCS-2"),
1671 #endif
1672 NULL
1673 };
1674 const wxChar **names = names_static;
1675 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1676
1677 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1678 {
1679 const wxString nameCS(*names);
1680
1681 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1682 wxString nameXE(nameCS);
1683
1684 #ifdef WORDS_BIGENDIAN
1685 nameXE += _T("BE");
1686 #else // little endian
1687 nameXE += _T("LE");
1688 #endif
1689
1690 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1691 nameXE.c_str());
1692
1693 m2w = iconv_open(nameXE.ToAscii(), name);
1694 if ( m2w == ICONV_T_INVALID )
1695 {
1696 // try charset w/o bytesex info (e.g. "UCS4")
1697 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1698 nameCS.c_str());
1699 m2w = iconv_open(nameCS.ToAscii(), name);
1700
1701 // and check for bytesex ourselves:
1702 if ( m2w != ICONV_T_INVALID )
1703 {
1704 char buf[2], *bufPtr;
1705 wchar_t wbuf[2], *wbufPtr;
1706 size_t insz, outsz;
1707 size_t res;
1708
1709 buf[0] = 'A';
1710 buf[1] = 0;
1711 wbuf[0] = 0;
1712 insz = 2;
1713 outsz = SIZEOF_WCHAR_T * 2;
1714 wbufPtr = wbuf;
1715 bufPtr = buf;
1716
1717 res = iconv(
1718 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1719 (char**)&wbufPtr, &outsz);
1720
1721 if (ICONV_FAILED(res, insz))
1722 {
1723 wxLogLastError(wxT("iconv"));
1724 wxLogError(_("Conversion to charset '%s' doesn't work."),
1725 nameCS.c_str());
1726 }
1727 else // ok, can convert to this encoding, remember it
1728 {
1729 ms_wcCharsetName = nameCS;
1730 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1731 }
1732 }
1733 }
1734 else // use charset not requiring byte swapping
1735 {
1736 ms_wcCharsetName = nameXE;
1737 }
1738 }
1739
1740 wxLogTrace(TRACE_STRCONV,
1741 wxT("iconv wchar_t charset is \"%s\"%s"),
1742 ms_wcCharsetName.empty() ? wxString("<none>")
1743 : ms_wcCharsetName,
1744 ms_wcNeedsSwap ? _T(" (needs swap)")
1745 : _T(""));
1746 }
1747 else // we already have ms_wcCharsetName
1748 {
1749 m2w = iconv_open(ms_wcCharsetName.ToAscii(), name);
1750 }
1751
1752 if ( ms_wcCharsetName.empty() )
1753 {
1754 w2m = ICONV_T_INVALID;
1755 }
1756 else
1757 {
1758 w2m = iconv_open(name, ms_wcCharsetName.ToAscii());
1759 if ( w2m == ICONV_T_INVALID )
1760 {
1761 wxLogTrace(TRACE_STRCONV,
1762 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1763 ms_wcCharsetName.c_str(), name);
1764 }
1765 }
1766 }
1767
1768 wxMBConv_iconv::~wxMBConv_iconv()
1769 {
1770 if ( m2w != ICONV_T_INVALID )
1771 iconv_close(m2w);
1772 if ( w2m != ICONV_T_INVALID )
1773 iconv_close(w2m);
1774 }
1775
1776 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1777 {
1778 // find the string length: notice that must be done differently for
1779 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1780 size_t inbuf;
1781 const size_t nulLen = GetMBNulLen();
1782 switch ( nulLen )
1783 {
1784 default:
1785 return wxCONV_FAILED;
1786
1787 case 1:
1788 inbuf = strlen(psz); // arguably more optimized than our version
1789 break;
1790
1791 case 2:
1792 case 4:
1793 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1794 // they also have to start at character boundary and not span two
1795 // adjacent characters
1796 const char *p;
1797 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1798 ;
1799 inbuf = p - psz;
1800 break;
1801 }
1802
1803 #if wxUSE_THREADS
1804 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1805 // Unfortunately there are a couple of global wxCSConv objects such as
1806 // wxConvLocal that are used all over wx code, so we have to make sure
1807 // the handle is used by at most one thread at the time. Otherwise
1808 // only a few wx classes would be safe to use from non-main threads
1809 // as MB<->WC conversion would fail "randomly".
1810 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1811 #endif // wxUSE_THREADS
1812
1813 size_t outbuf = n * SIZEOF_WCHAR_T;
1814 size_t res, cres;
1815 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1816 wchar_t *bufPtr = buf;
1817 const char *pszPtr = psz;
1818
1819 if (buf)
1820 {
1821 // have destination buffer, convert there
1822 cres = iconv(m2w,
1823 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1824 (char**)&bufPtr, &outbuf);
1825 res = n - (outbuf / SIZEOF_WCHAR_T);
1826
1827 if (ms_wcNeedsSwap)
1828 {
1829 // convert to native endianness
1830 for ( unsigned i = 0; i < res; i++ )
1831 buf[n] = WC_BSWAP(buf[i]);
1832 }
1833
1834 // NUL-terminate the string if there is any space left
1835 if (res < n)
1836 buf[res] = 0;
1837 }
1838 else
1839 {
1840 // no destination buffer... convert using temp buffer
1841 // to calculate destination buffer requirement
1842 wchar_t tbuf[8];
1843 res = 0;
1844
1845 do
1846 {
1847 bufPtr = tbuf;
1848 outbuf = 8 * SIZEOF_WCHAR_T;
1849
1850 cres = iconv(m2w,
1851 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1852 (char**)&bufPtr, &outbuf );
1853
1854 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1855 }
1856 while ((cres == (size_t)-1) && (errno == E2BIG));
1857 }
1858
1859 if (ICONV_FAILED(cres, inbuf))
1860 {
1861 //VS: it is ok if iconv fails, hence trace only
1862 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1863 return wxCONV_FAILED;
1864 }
1865
1866 return res;
1867 }
1868
1869 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1870 {
1871 #if wxUSE_THREADS
1872 // NB: explained in MB2WC
1873 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1874 #endif
1875
1876 size_t inlen = wxWcslen(psz);
1877 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1878 size_t outbuf = n;
1879 size_t res, cres;
1880
1881 wchar_t *tmpbuf = 0;
1882
1883 if (ms_wcNeedsSwap)
1884 {
1885 // need to copy to temp buffer to switch endianness
1886 // (doing WC_BSWAP twice on the original buffer won't help, as it
1887 // could be in read-only memory, or be accessed in some other thread)
1888 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1889 for ( size_t i = 0; i < inlen; i++ )
1890 tmpbuf[n] = WC_BSWAP(psz[i]);
1891
1892 tmpbuf[inlen] = L'\0';
1893 psz = tmpbuf;
1894 }
1895
1896 if (buf)
1897 {
1898 // have destination buffer, convert there
1899 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1900
1901 res = n - outbuf;
1902
1903 // NB: iconv was given only wcslen(psz) characters on input, and so
1904 // it couldn't convert the trailing zero. Let's do it ourselves
1905 // if there's some room left for it in the output buffer.
1906 if (res < n)
1907 buf[0] = 0;
1908 }
1909 else
1910 {
1911 // no destination buffer: convert using temp buffer
1912 // to calculate destination buffer requirement
1913 char tbuf[16];
1914 res = 0;
1915 do
1916 {
1917 buf = tbuf;
1918 outbuf = 16;
1919
1920 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1921
1922 res += 16 - outbuf;
1923 }
1924 while ((cres == (size_t)-1) && (errno == E2BIG));
1925 }
1926
1927 if (ms_wcNeedsSwap)
1928 {
1929 free(tmpbuf);
1930 }
1931
1932 if (ICONV_FAILED(cres, inbuf))
1933 {
1934 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1935 return wxCONV_FAILED;
1936 }
1937
1938 return res;
1939 }
1940
1941 size_t wxMBConv_iconv::GetMBNulLen() const
1942 {
1943 if ( m_minMBCharWidth == 0 )
1944 {
1945 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1946
1947 #if wxUSE_THREADS
1948 // NB: explained in MB2WC
1949 wxMutexLocker lock(self->m_iconvMutex);
1950 #endif
1951
1952 const wchar_t *wnul = L"";
1953 char buf[8]; // should be enough for NUL in any encoding
1954 size_t inLen = sizeof(wchar_t),
1955 outLen = WXSIZEOF(buf);
1956 char *inBuff = (char *)wnul;
1957 char *outBuff = buf;
1958 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
1959 {
1960 self->m_minMBCharWidth = (size_t)-1;
1961 }
1962 else // ok
1963 {
1964 self->m_minMBCharWidth = outBuff - buf;
1965 }
1966 }
1967
1968 return m_minMBCharWidth;
1969 }
1970
1971 #if wxUSE_UNICODE_UTF8
1972 bool wxMBConv_iconv::IsUTF8() const
1973 {
1974 return wxStricmp(m_name, "UTF-8") == 0 ||
1975 wxStricmp(m_name, "UTF8") == 0;
1976 }
1977 #endif
1978
1979 #endif // HAVE_ICONV
1980
1981
1982 // ============================================================================
1983 // Win32 conversion classes
1984 // ============================================================================
1985
1986 #ifdef wxHAVE_WIN32_MB2WC
1987
1988 // from utils.cpp
1989 #if wxUSE_FONTMAP
1990 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const char *charset);
1991 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1992 #endif
1993
1994 class wxMBConv_win32 : public wxMBConv
1995 {
1996 public:
1997 wxMBConv_win32()
1998 {
1999 m_CodePage = CP_ACP;
2000 m_minMBCharWidth = 0;
2001 }
2002
2003 wxMBConv_win32(const wxMBConv_win32& conv)
2004 : wxMBConv()
2005 {
2006 m_CodePage = conv.m_CodePage;
2007 m_minMBCharWidth = conv.m_minMBCharWidth;
2008 }
2009
2010 #if wxUSE_FONTMAP
2011 wxMBConv_win32(const char* name)
2012 {
2013 m_CodePage = wxCharsetToCodepage(name);
2014 m_minMBCharWidth = 0;
2015 }
2016
2017 wxMBConv_win32(wxFontEncoding encoding)
2018 {
2019 m_CodePage = wxEncodingToCodepage(encoding);
2020 m_minMBCharWidth = 0;
2021 }
2022 #endif // wxUSE_FONTMAP
2023
2024 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2025 {
2026 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2027 // the behaviour is not compatible with the Unix version (using iconv)
2028 // and break the library itself, e.g. wxTextInputStream::NextChar()
2029 // wouldn't work if reading an incomplete MB char didn't result in an
2030 // error
2031 //
2032 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2033 // Win XP or newer and it is not supported for UTF-[78] so we always
2034 // use our own conversions in this case. See
2035 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2036 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2037 if ( m_CodePage == CP_UTF8 )
2038 {
2039 return wxMBConvUTF8().MB2WC(buf, psz, n);
2040 }
2041
2042 if ( m_CodePage == CP_UTF7 )
2043 {
2044 return wxMBConvUTF7().MB2WC(buf, psz, n);
2045 }
2046
2047 int flags = 0;
2048 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2049 IsAtLeastWin2kSP4() )
2050 {
2051 flags = MB_ERR_INVALID_CHARS;
2052 }
2053
2054 const size_t len = ::MultiByteToWideChar
2055 (
2056 m_CodePage, // code page
2057 flags, // flags: fall on error
2058 psz, // input string
2059 -1, // its length (NUL-terminated)
2060 buf, // output string
2061 buf ? n : 0 // size of output buffer
2062 );
2063 if ( !len )
2064 {
2065 // function totally failed
2066 return wxCONV_FAILED;
2067 }
2068
2069 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2070 // check if we succeeded, by doing a double trip:
2071 if ( !flags && buf )
2072 {
2073 const size_t mbLen = strlen(psz);
2074 wxCharBuffer mbBuf(mbLen);
2075 if ( ::WideCharToMultiByte
2076 (
2077 m_CodePage,
2078 0,
2079 buf,
2080 -1,
2081 mbBuf.data(),
2082 mbLen + 1, // size in bytes, not length
2083 NULL,
2084 NULL
2085 ) == 0 ||
2086 strcmp(mbBuf, psz) != 0 )
2087 {
2088 // we didn't obtain the same thing we started from, hence
2089 // the conversion was lossy and we consider that it failed
2090 return wxCONV_FAILED;
2091 }
2092 }
2093
2094 // note that it returns count of written chars for buf != NULL and size
2095 // of the needed buffer for buf == NULL so in either case the length of
2096 // the string (which never includes the terminating NUL) is one less
2097 return len - 1;
2098 }
2099
2100 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2101 {
2102 /*
2103 we have a problem here: by default, WideCharToMultiByte() may
2104 replace characters unrepresentable in the target code page with bad
2105 quality approximations such as turning "1/2" symbol (U+00BD) into
2106 "1" for the code pages which don't have it and we, obviously, want
2107 to avoid this at any price
2108
2109 the trouble is that this function does it _silently_, i.e. it won't
2110 even tell us whether it did or not... Win98/2000 and higher provide
2111 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2112 we have to resort to a round trip, i.e. check that converting back
2113 results in the same string -- this is, of course, expensive but
2114 otherwise we simply can't be sure to not garble the data.
2115 */
2116
2117 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2118 // it doesn't work with CJK encodings (which we test for rather roughly
2119 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2120 // supporting it
2121 BOOL usedDef wxDUMMY_INITIALIZE(false);
2122 BOOL *pUsedDef;
2123 int flags;
2124 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2125 {
2126 // it's our lucky day
2127 flags = WC_NO_BEST_FIT_CHARS;
2128 pUsedDef = &usedDef;
2129 }
2130 else // old system or unsupported encoding
2131 {
2132 flags = 0;
2133 pUsedDef = NULL;
2134 }
2135
2136 const size_t len = ::WideCharToMultiByte
2137 (
2138 m_CodePage, // code page
2139 flags, // either none or no best fit
2140 pwz, // input string
2141 -1, // it is (wide) NUL-terminated
2142 buf, // output buffer
2143 buf ? n : 0, // and its size
2144 NULL, // default "replacement" char
2145 pUsedDef // [out] was it used?
2146 );
2147
2148 if ( !len )
2149 {
2150 // function totally failed
2151 return wxCONV_FAILED;
2152 }
2153
2154 // if we were really converting, check if we succeeded
2155 if ( buf )
2156 {
2157 if ( flags )
2158 {
2159 // check if the conversion failed, i.e. if any replacements
2160 // were done
2161 if ( usedDef )
2162 return wxCONV_FAILED;
2163 }
2164 else // we must resort to double tripping...
2165 {
2166 wxWCharBuffer wcBuf(n);
2167 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2168 wcscmp(wcBuf, pwz) != 0 )
2169 {
2170 // we didn't obtain the same thing we started from, hence
2171 // the conversion was lossy and we consider that it failed
2172 return wxCONV_FAILED;
2173 }
2174 }
2175 }
2176
2177 // see the comment above for the reason of "len - 1"
2178 return len - 1;
2179 }
2180
2181 virtual size_t GetMBNulLen() const
2182 {
2183 if ( m_minMBCharWidth == 0 )
2184 {
2185 int len = ::WideCharToMultiByte
2186 (
2187 m_CodePage, // code page
2188 0, // no flags
2189 L"", // input string
2190 1, // translate just the NUL
2191 NULL, // output buffer
2192 0, // and its size
2193 NULL, // no replacement char
2194 NULL // [out] don't care if it was used
2195 );
2196
2197 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2198 switch ( len )
2199 {
2200 default:
2201 wxLogDebug(_T("Unexpected NUL length %d"), len);
2202 self->m_minMBCharWidth = (size_t)-1;
2203 break;
2204
2205 case 0:
2206 self->m_minMBCharWidth = (size_t)-1;
2207 break;
2208
2209 case 1:
2210 case 2:
2211 case 4:
2212 self->m_minMBCharWidth = len;
2213 break;
2214 }
2215 }
2216
2217 return m_minMBCharWidth;
2218 }
2219
2220 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2221
2222 bool IsOk() const { return m_CodePage != -1; }
2223
2224 private:
2225 static bool CanUseNoBestFit()
2226 {
2227 static int s_isWin98Or2k = -1;
2228
2229 if ( s_isWin98Or2k == -1 )
2230 {
2231 int verMaj, verMin;
2232 switch ( wxGetOsVersion(&verMaj, &verMin) )
2233 {
2234 case wxOS_WINDOWS_9X:
2235 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2236 break;
2237
2238 case wxOS_WINDOWS_NT:
2239 s_isWin98Or2k = verMaj >= 5;
2240 break;
2241
2242 default:
2243 // unknown: be conservative by default
2244 s_isWin98Or2k = 0;
2245 break;
2246 }
2247
2248 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2249 }
2250
2251 return s_isWin98Or2k == 1;
2252 }
2253
2254 static bool IsAtLeastWin2kSP4()
2255 {
2256 #ifdef __WXWINCE__
2257 return false;
2258 #else
2259 static int s_isAtLeastWin2kSP4 = -1;
2260
2261 if ( s_isAtLeastWin2kSP4 == -1 )
2262 {
2263 OSVERSIONINFOEX ver;
2264
2265 memset(&ver, 0, sizeof(ver));
2266 ver.dwOSVersionInfoSize = sizeof(ver);
2267 GetVersionEx((OSVERSIONINFO*)&ver);
2268
2269 s_isAtLeastWin2kSP4 =
2270 ((ver.dwMajorVersion > 5) || // Vista+
2271 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2272 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2273 ver.wServicePackMajor >= 4)) // 2000 SP4+
2274 ? 1 : 0;
2275 }
2276
2277 return s_isAtLeastWin2kSP4 == 1;
2278 #endif
2279 }
2280
2281
2282 // the code page we're working with
2283 long m_CodePage;
2284
2285 // cached result of GetMBNulLen(), set to 0 initially meaning
2286 // "unknown"
2287 size_t m_minMBCharWidth;
2288 };
2289
2290 #endif // wxHAVE_WIN32_MB2WC
2291
2292
2293 // ============================================================================
2294 // wxEncodingConverter based conversion classes
2295 // ============================================================================
2296
2297 #if wxUSE_FONTMAP
2298
2299 class wxMBConv_wxwin : public wxMBConv
2300 {
2301 private:
2302 void Init()
2303 {
2304 // Refuse to use broken wxEncodingConverter code for Mac-specific encodings.
2305 // The wxMBConv_cf class does a better job.
2306 m_ok = (m_enc < wxFONTENCODING_MACMIN || m_enc > wxFONTENCODING_MACMAX) &&
2307 m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
2308 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
2309 }
2310
2311 public:
2312 // temporarily just use wxEncodingConverter stuff,
2313 // so that it works while a better implementation is built
2314 wxMBConv_wxwin(const char* name)
2315 {
2316 if (name)
2317 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2318 else
2319 m_enc = wxFONTENCODING_SYSTEM;
2320
2321 Init();
2322 }
2323
2324 wxMBConv_wxwin(wxFontEncoding enc)
2325 {
2326 m_enc = enc;
2327
2328 Init();
2329 }
2330
2331 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
2332 {
2333 size_t inbuf = strlen(psz);
2334 if (buf)
2335 {
2336 if (!m2w.Convert(psz, buf))
2337 return wxCONV_FAILED;
2338 }
2339 return inbuf;
2340 }
2341
2342 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
2343 {
2344 const size_t inbuf = wxWcslen(psz);
2345 if (buf)
2346 {
2347 if (!w2m.Convert(psz, buf))
2348 return wxCONV_FAILED;
2349 }
2350
2351 return inbuf;
2352 }
2353
2354 virtual size_t GetMBNulLen() const
2355 {
2356 switch ( m_enc )
2357 {
2358 case wxFONTENCODING_UTF16BE:
2359 case wxFONTENCODING_UTF16LE:
2360 return 2;
2361
2362 case wxFONTENCODING_UTF32BE:
2363 case wxFONTENCODING_UTF32LE:
2364 return 4;
2365
2366 default:
2367 return 1;
2368 }
2369 }
2370
2371 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
2372
2373 bool IsOk() const { return m_ok; }
2374
2375 public:
2376 wxFontEncoding m_enc;
2377 wxEncodingConverter m2w, w2m;
2378
2379 private:
2380 // were we initialized successfully?
2381 bool m_ok;
2382
2383 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
2384 };
2385
2386 // make the constructors available for unit testing
2387 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const char* name )
2388 {
2389 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
2390 if ( !result->IsOk() )
2391 {
2392 delete result;
2393 return 0;
2394 }
2395
2396 return result;
2397 }
2398
2399 #endif // wxUSE_FONTMAP
2400
2401 // ============================================================================
2402 // wxCSConv implementation
2403 // ============================================================================
2404
2405 void wxCSConv::Init()
2406 {
2407 m_name = NULL;
2408 m_convReal = NULL;
2409 m_deferred = true;
2410 }
2411
2412 wxCSConv::wxCSConv(const wxString& charset)
2413 {
2414 Init();
2415
2416 if ( !charset.empty() )
2417 {
2418 SetName(charset.ToAscii());
2419 }
2420
2421 #if wxUSE_FONTMAP
2422 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
2423 #else
2424 m_encoding = wxFONTENCODING_SYSTEM;
2425 #endif
2426 }
2427
2428 wxCSConv::wxCSConv(wxFontEncoding encoding)
2429 {
2430 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
2431 {
2432 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
2433
2434 encoding = wxFONTENCODING_SYSTEM;
2435 }
2436
2437 Init();
2438
2439 m_encoding = encoding;
2440 }
2441
2442 wxCSConv::~wxCSConv()
2443 {
2444 Clear();
2445 }
2446
2447 wxCSConv::wxCSConv(const wxCSConv& conv)
2448 : wxMBConv()
2449 {
2450 Init();
2451
2452 SetName(conv.m_name);
2453 m_encoding = conv.m_encoding;
2454 }
2455
2456 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
2457 {
2458 Clear();
2459
2460 SetName(conv.m_name);
2461 m_encoding = conv.m_encoding;
2462
2463 return *this;
2464 }
2465
2466 void wxCSConv::Clear()
2467 {
2468 free(m_name);
2469 delete m_convReal;
2470
2471 m_name = NULL;
2472 m_convReal = NULL;
2473 }
2474
2475 void wxCSConv::SetName(const char *charset)
2476 {
2477 if (charset)
2478 {
2479 m_name = strdup(charset);
2480 m_deferred = true;
2481 }
2482 }
2483
2484 #if wxUSE_FONTMAP
2485
2486 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
2487 wxEncodingNameCache );
2488
2489 static wxEncodingNameCache gs_nameCache;
2490 #endif
2491
2492 wxMBConv *wxCSConv::DoCreate() const
2493 {
2494 #if wxUSE_FONTMAP
2495 wxLogTrace(TRACE_STRCONV,
2496 wxT("creating conversion for %s"),
2497 (m_name ? m_name
2498 : (const char*)wxFontMapperBase::GetEncodingName(m_encoding).mb_str()));
2499 #endif // wxUSE_FONTMAP
2500
2501 // check for the special case of ASCII or ISO8859-1 charset: as we have
2502 // special knowledge of it anyhow, we don't need to create a special
2503 // conversion object
2504 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
2505 m_encoding == wxFONTENCODING_DEFAULT )
2506 {
2507 // don't convert at all
2508 return NULL;
2509 }
2510
2511 // we trust OS to do conversion better than we can so try external
2512 // conversion methods first
2513 //
2514 // the full order is:
2515 // 1. OS conversion (iconv() under Unix or Win32 API)
2516 // 2. hard coded conversions for UTF
2517 // 3. wxEncodingConverter as fall back
2518
2519 // step (1)
2520 #ifdef HAVE_ICONV
2521 #if !wxUSE_FONTMAP
2522 if ( m_name )
2523 #endif // !wxUSE_FONTMAP
2524 {
2525 #if wxUSE_FONTMAP
2526 wxFontEncoding encoding(m_encoding);
2527 #endif
2528
2529 if ( m_name )
2530 {
2531 wxMBConv_iconv *conv = new wxMBConv_iconv(m_name);
2532 if ( conv->IsOk() )
2533 return conv;
2534
2535 delete conv;
2536
2537 #if wxUSE_FONTMAP
2538 encoding =
2539 wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2540 #endif // wxUSE_FONTMAP
2541 }
2542 #if wxUSE_FONTMAP
2543 {
2544 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
2545 if ( it != gs_nameCache.end() )
2546 {
2547 if ( it->second.empty() )
2548 return NULL;
2549
2550 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second.ToAscii());
2551 if ( conv->IsOk() )
2552 return conv;
2553
2554 delete conv;
2555 }
2556
2557 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
2558 // CS : in case this does not return valid names (eg for MacRoman)
2559 // encoding got a 'failure' entry in the cache all the same,
2560 // although it just has to be created using a different method, so
2561 // only store failed iconv creation attempts (or perhaps we
2562 // shoulnd't do this at all ?)
2563 if ( names[0] != NULL )
2564 {
2565 for ( ; *names; ++names )
2566 {
2567 // FIXME-UTF8: wxFontMapperBase::GetAllEncodingNames()
2568 // will need changes that will obsolete this
2569 wxString name(*names);
2570 wxMBConv_iconv *conv = new wxMBConv_iconv(name.ToAscii());
2571 if ( conv->IsOk() )
2572 {
2573 gs_nameCache[encoding] = *names;
2574 return conv;
2575 }
2576
2577 delete conv;
2578 }
2579
2580 gs_nameCache[encoding] = _T(""); // cache the failure
2581 }
2582 }
2583 #endif // wxUSE_FONTMAP
2584 }
2585 #endif // HAVE_ICONV
2586
2587 #ifdef wxHAVE_WIN32_MB2WC
2588 {
2589 #if wxUSE_FONTMAP
2590 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
2591 : new wxMBConv_win32(m_encoding);
2592 if ( conv->IsOk() )
2593 return conv;
2594
2595 delete conv;
2596 #else
2597 return NULL;
2598 #endif
2599 }
2600 #endif // wxHAVE_WIN32_MB2WC
2601
2602 #ifdef __DARWIN__
2603 {
2604 // leave UTF16 and UTF32 to the built-ins of wx
2605 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
2606 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
2607 {
2608 #if wxUSE_FONTMAP
2609 wxMBConv_cf *conv = m_name ? new wxMBConv_cf(m_name)
2610 : new wxMBConv_cf(m_encoding);
2611 #else
2612 wxMBConv_cf *conv = new wxMBConv_cf(m_encoding);
2613 #endif
2614
2615 if ( conv->IsOk() )
2616 return conv;
2617
2618 delete conv;
2619 }
2620 }
2621 #endif // __DARWIN__
2622
2623 // step (2)
2624 wxFontEncoding enc = m_encoding;
2625 #if wxUSE_FONTMAP
2626 if ( enc == wxFONTENCODING_SYSTEM && m_name )
2627 {
2628 // use "false" to suppress interactive dialogs -- we can be called from
2629 // anywhere and popping up a dialog from here is the last thing we want to
2630 // do
2631 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
2632 }
2633 #endif // wxUSE_FONTMAP
2634
2635 switch ( enc )
2636 {
2637 case wxFONTENCODING_UTF7:
2638 return new wxMBConvUTF7;
2639
2640 case wxFONTENCODING_UTF8:
2641 return new wxMBConvUTF8;
2642
2643 case wxFONTENCODING_UTF16BE:
2644 return new wxMBConvUTF16BE;
2645
2646 case wxFONTENCODING_UTF16LE:
2647 return new wxMBConvUTF16LE;
2648
2649 case wxFONTENCODING_UTF32BE:
2650 return new wxMBConvUTF32BE;
2651
2652 case wxFONTENCODING_UTF32LE:
2653 return new wxMBConvUTF32LE;
2654
2655 default:
2656 // nothing to do but put here to suppress gcc warnings
2657 break;
2658 }
2659
2660 // step (3)
2661 #if wxUSE_FONTMAP
2662 {
2663 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
2664 : new wxMBConv_wxwin(m_encoding);
2665 if ( conv->IsOk() )
2666 return conv;
2667
2668 delete conv;
2669 }
2670 #endif // wxUSE_FONTMAP
2671
2672 // NB: This is a hack to prevent deadlock. What could otherwise happen
2673 // in Unicode build: wxConvLocal creation ends up being here
2674 // because of some failure and logs the error. But wxLog will try to
2675 // attach a timestamp, for which it will need wxConvLocal (to convert
2676 // time to char* and then wchar_t*), but that fails, tries to log the
2677 // error, but wxLog has an (already locked) critical section that
2678 // guards the static buffer.
2679 static bool alreadyLoggingError = false;
2680 if (!alreadyLoggingError)
2681 {
2682 alreadyLoggingError = true;
2683 wxLogError(_("Cannot convert from the charset '%s'!"),
2684 m_name ? m_name
2685 :
2686 #if wxUSE_FONTMAP
2687 (const char*)wxFontMapperBase::GetEncodingDescription(m_encoding).ToAscii()
2688 #else // !wxUSE_FONTMAP
2689 (const char*)wxString::Format(_("encoding %i"), m_encoding).ToAscii()
2690 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
2691 );
2692
2693 alreadyLoggingError = false;
2694 }
2695
2696 return NULL;
2697 }
2698
2699 void wxCSConv::CreateConvIfNeeded() const
2700 {
2701 if ( m_deferred )
2702 {
2703 wxCSConv *self = (wxCSConv *)this; // const_cast
2704
2705 // if we don't have neither the name nor the encoding, use the default
2706 // encoding for this system
2707 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
2708 {
2709 #if wxUSE_INTL
2710 self->m_encoding = wxLocale::GetSystemEncoding();
2711 #else
2712 // fallback to some reasonable default:
2713 self->m_encoding = wxFONTENCODING_ISO8859_1;
2714 #endif // wxUSE_INTL
2715 }
2716
2717 self->m_convReal = DoCreate();
2718 self->m_deferred = false;
2719 }
2720 }
2721
2722 bool wxCSConv::IsOk() const
2723 {
2724 CreateConvIfNeeded();
2725
2726 // special case: no convReal created for wxFONTENCODING_ISO8859_1
2727 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
2728 return true; // always ok as we do it ourselves
2729
2730 // m_convReal->IsOk() is called at its own creation, so we know it must
2731 // be ok if m_convReal is non-NULL
2732 return m_convReal != NULL;
2733 }
2734
2735 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
2736 const char *src, size_t srcLen) const
2737 {
2738 CreateConvIfNeeded();
2739
2740 if (m_convReal)
2741 return m_convReal->ToWChar(dst, dstLen, src, srcLen);
2742
2743 // latin-1 (direct)
2744 return wxMBConv::ToWChar(dst, dstLen, src, srcLen);
2745 }
2746
2747 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
2748 const wchar_t *src, size_t srcLen) const
2749 {
2750 CreateConvIfNeeded();
2751
2752 if (m_convReal)
2753 return m_convReal->FromWChar(dst, dstLen, src, srcLen);
2754
2755 // latin-1 (direct)
2756 return wxMBConv::FromWChar(dst, dstLen, src, srcLen);
2757 }
2758
2759 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
2760 {
2761 CreateConvIfNeeded();
2762
2763 if (m_convReal)
2764 return m_convReal->MB2WC(buf, psz, n);
2765
2766 // latin-1 (direct)
2767 size_t len = strlen(psz);
2768
2769 if (buf)
2770 {
2771 for (size_t c = 0; c <= len; c++)
2772 buf[c] = (unsigned char)(psz[c]);
2773 }
2774
2775 return len;
2776 }
2777
2778 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
2779 {
2780 CreateConvIfNeeded();
2781
2782 if (m_convReal)
2783 return m_convReal->WC2MB(buf, psz, n);
2784
2785 // latin-1 (direct)
2786 const size_t len = wxWcslen(psz);
2787 if (buf)
2788 {
2789 for (size_t c = 0; c <= len; c++)
2790 {
2791 if (psz[c] > 0xFF)
2792 return wxCONV_FAILED;
2793
2794 buf[c] = (char)psz[c];
2795 }
2796 }
2797 else
2798 {
2799 for (size_t c = 0; c <= len; c++)
2800 {
2801 if (psz[c] > 0xFF)
2802 return wxCONV_FAILED;
2803 }
2804 }
2805
2806 return len;
2807 }
2808
2809 size_t wxCSConv::GetMBNulLen() const
2810 {
2811 CreateConvIfNeeded();
2812
2813 if ( m_convReal )
2814 {
2815 return m_convReal->GetMBNulLen();
2816 }
2817
2818 // otherwise, we are ISO-8859-1
2819 return 1;
2820 }
2821
2822 #if wxUSE_UNICODE_UTF8
2823 bool wxCSConv::IsUTF8() const
2824 {
2825 CreateConvIfNeeded();
2826
2827 if ( m_convReal )
2828 {
2829 return m_convReal->IsUTF8();
2830 }
2831
2832 // otherwise, we are ISO-8859-1
2833 return false;
2834 }
2835 #endif
2836
2837
2838 #if wxUSE_UNICODE
2839
2840 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
2841 {
2842 if ( !s )
2843 return wxWCharBuffer();
2844
2845 wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
2846 if ( !wbuf )
2847 wbuf = wxMBConvUTF8().cMB2WX(s);
2848 if ( !wbuf )
2849 wbuf = wxConvISO8859_1.cMB2WX(s);
2850
2851 return wbuf;
2852 }
2853
2854 wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
2855 {
2856 if ( !ws )
2857 return wxCharBuffer();
2858
2859 wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
2860 if ( !buf )
2861 buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
2862
2863 return buf;
2864 }
2865
2866 #endif // wxUSE_UNICODE
2867
2868 // ----------------------------------------------------------------------------
2869 // globals
2870 // ----------------------------------------------------------------------------
2871
2872 // NB: The reason why we create converted objects in this convoluted way,
2873 // using a factory function instead of global variable, is that they
2874 // may be used at static initialization time (some of them are used by
2875 // wxString ctors and there may be a global wxString object). In other
2876 // words, possibly _before_ the converter global object would be
2877 // initialized.
2878
2879 #undef wxConvLibc
2880 #undef wxConvUTF8
2881 #undef wxConvUTF7
2882 #undef wxConvLocal
2883 #undef wxConvISO8859_1
2884
2885 #define WX_DEFINE_GLOBAL_CONV2(klass, impl_klass, name, ctor_args) \
2886 WXDLLIMPEXP_DATA_BASE(klass*) name##Ptr = NULL; \
2887 WXDLLIMPEXP_BASE klass* wxGet_##name##Ptr() \
2888 { \
2889 static impl_klass name##Obj ctor_args; \
2890 return &name##Obj; \
2891 } \
2892 /* this ensures that all global converter objects are created */ \
2893 /* by the time static initialization is done, i.e. before any */ \
2894 /* thread is launched: */ \
2895 static klass* gs_##name##instance = wxGet_##name##Ptr()
2896
2897 #define WX_DEFINE_GLOBAL_CONV(klass, name, ctor_args) \
2898 WX_DEFINE_GLOBAL_CONV2(klass, klass, name, ctor_args)
2899
2900 #ifdef __WINDOWS__
2901 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConv_win32, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
2902 #else
2903 WX_DEFINE_GLOBAL_CONV2(wxMBConv, wxMBConvLibc, wxConvLibc, wxEMPTY_PARAMETER_VALUE);
2904 #endif
2905
2906 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF8, wxConvUTF8, wxEMPTY_PARAMETER_VALUE);
2907 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, wxEMPTY_PARAMETER_VALUE);
2908
2909 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
2910 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
2911
2912 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr();
2913 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr();
2914
2915 #ifdef __DARWIN__
2916 // The xnu kernel always communicates file paths in decomposed UTF-8.
2917 // WARNING: Are we sure that CFString's conversion will cause decomposition?
2918 static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8);
2919 #endif
2920
2921 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
2922 #ifdef __DARWIN__
2923 &wxConvMacUTF8DObj;
2924 #else // !__DARWIN__
2925 wxGet_wxConvLibcPtr();
2926 #endif // __DARWIN__/!__DARWIN__
2927
2928 #else // !wxUSE_WCHAR_T
2929
2930 // FIXME-UTF8: remove this, wxUSE_WCHAR_T is required now
2931 // stand-ins in absence of wchar_t
2932 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
2933 wxConvISO8859_1,
2934 wxConvLocal,
2935 wxConvUTF8;
2936
2937 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T