]> git.saurik.com Git - wxWidgets.git/blob - src/common/strconv.cpp
WinCE build fixes.
[wxWidgets.git] / src / common / strconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id$
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
17
18 #ifndef WX_PRECOMP
19 #include "wx/intl.h"
20 #include "wx/log.h"
21 #include "wx/utils.h"
22 #include "wx/hashmap.h"
23 #endif
24
25 #include "wx/strconv.h"
26
27 #if wxUSE_WCHAR_T
28
29 #ifdef __WINDOWS__
30 #include "wx/msw/private.h"
31 #include "wx/msw/missing.h"
32 #endif
33
34 #ifndef __WXWINCE__
35 #include <errno.h>
36 #endif
37
38 #include <ctype.h>
39 #include <string.h>
40 #include <stdlib.h>
41
42 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
43 #define wxHAVE_WIN32_MB2WC
44 #endif
45
46 #ifdef __SALFORDC__
47 #include <clib.h>
48 #endif
49
50 #ifdef HAVE_ICONV
51 #include <iconv.h>
52 #include "wx/thread.h"
53 #endif
54
55 #include "wx/encconv.h"
56 #include "wx/fontmap.h"
57
58 #ifdef __WXMAC__
59 #ifndef __DARWIN__
60 #include <ATSUnicode.h>
61 #include <TextCommon.h>
62 #include <TextEncodingConverter.h>
63 #endif
64
65 // includes Mac headers
66 #include "wx/mac/private.h"
67 #endif
68
69
70 #define TRACE_STRCONV _T("strconv")
71
72 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
73 // be 4 bytes
74 #if SIZEOF_WCHAR_T == 2
75 #define WC_UTF16
76 #endif
77
78
79 // ============================================================================
80 // implementation
81 // ============================================================================
82
83 // helper function of cMB2WC(): check if n bytes at this location are all NUL
84 static bool NotAllNULs(const char *p, size_t n)
85 {
86 while ( n && *p++ == '\0' )
87 n--;
88
89 return n != 0;
90 }
91
92 // ----------------------------------------------------------------------------
93 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
94 // ----------------------------------------------------------------------------
95
96 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
97 {
98 if (input <= 0xffff)
99 {
100 if (output)
101 *output = (wxUint16) input;
102
103 return 1;
104 }
105 else if (input >= 0x110000)
106 {
107 return wxCONV_FAILED;
108 }
109 else
110 {
111 if (output)
112 {
113 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
114 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
115 }
116
117 return 2;
118 }
119 }
120
121 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
122 {
123 if ((*input < 0xd800) || (*input > 0xdfff))
124 {
125 output = *input;
126 return 1;
127 }
128 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
129 {
130 output = *input;
131 return wxCONV_FAILED;
132 }
133 else
134 {
135 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
136 return 2;
137 }
138 }
139
140 #ifdef WC_UTF16
141 typedef wchar_t wxDecodeSurrogate_t;
142 #else // !WC_UTF16
143 typedef wxUint16 wxDecodeSurrogate_t;
144 #endif // WC_UTF16/!WC_UTF16
145
146 // returns the next UTF-32 character from the wchar_t buffer and advances the
147 // pointer to the character after this one
148 //
149 // if an invalid character is found, *pSrc is set to NULL, the caller must
150 // check for this
151 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
152 {
153 wxUint32 out;
154 const size_t
155 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
156 if ( n == wxCONV_FAILED )
157 *pSrc = NULL;
158 else
159 *pSrc += n;
160
161 return out;
162 }
163
164 // ----------------------------------------------------------------------------
165 // wxMBConv
166 // ----------------------------------------------------------------------------
167
168 size_t
169 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
170 const char *src, size_t srcLen) const
171 {
172 // although new conversion classes are supposed to implement this function
173 // directly, the existins ones only implement the old MB2WC() and so, to
174 // avoid to have to rewrite all conversion classes at once, we provide a
175 // default (but not efficient) implementation of this one in terms of the
176 // old function by copying the input to ensure that it's NUL-terminated and
177 // then using MB2WC() to convert it
178
179 // the number of chars [which would be] written to dst [if it were not NULL]
180 size_t dstWritten = 0;
181
182 // the number of NULs terminating this string
183 size_t nulLen = 0; // not really needed, but just to avoid warnings
184
185 // if we were not given the input size we just have to assume that the
186 // string is properly terminated as we have no way of knowing how long it
187 // is anyhow, but if we do have the size check whether there are enough
188 // NULs at the end
189 wxCharBuffer bufTmp;
190 const char *srcEnd;
191 if ( srcLen != wxNO_LEN )
192 {
193 // we need to know how to find the end of this string
194 nulLen = GetMBNulLen();
195 if ( nulLen == wxCONV_FAILED )
196 return wxCONV_FAILED;
197
198 // if there are enough NULs we can avoid the copy
199 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
200 {
201 // make a copy in order to properly NUL-terminate the string
202 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
203 char * const p = bufTmp.data();
204 memcpy(p, src, srcLen);
205 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
206 *s = '\0';
207
208 src = bufTmp;
209 }
210
211 srcEnd = src + srcLen;
212 }
213 else // quit after the first loop iteration
214 {
215 srcEnd = NULL;
216 }
217
218 for ( ;; )
219 {
220 // try to convert the current chunk
221 size_t lenChunk = MB2WC(NULL, src, 0);
222 if ( lenChunk == wxCONV_FAILED )
223 return wxCONV_FAILED;
224
225 lenChunk++; // for the L'\0' at the end of this chunk
226
227 dstWritten += lenChunk;
228
229 if ( lenChunk == 1 )
230 {
231 // nothing left in the input string, conversion succeeded
232 break;
233 }
234
235 if ( dst )
236 {
237 if ( dstWritten > dstLen )
238 return wxCONV_FAILED;
239
240 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
241 return wxCONV_FAILED;
242
243 dst += lenChunk;
244 }
245
246 if ( !srcEnd )
247 {
248 // we convert just one chunk in this case as this is the entire
249 // string anyhow
250 break;
251 }
252
253 // advance the input pointer past the end of this chunk
254 while ( NotAllNULs(src, nulLen) )
255 {
256 // notice that we must skip over multiple bytes here as we suppose
257 // that if NUL takes 2 or 4 bytes, then all the other characters do
258 // too and so if advanced by a single byte we might erroneously
259 // detect sequences of NUL bytes in the middle of the input
260 src += nulLen;
261 }
262
263 src += nulLen; // skipping over its terminator as well
264
265 // note that ">=" (and not just "==") is needed here as the terminator
266 // we skipped just above could be inside or just after the buffer
267 // delimited by inEnd
268 if ( src >= srcEnd )
269 break;
270 }
271
272 return dstWritten;
273 }
274
275 size_t
276 wxMBConv::FromWChar(char *dst, size_t dstLen,
277 const wchar_t *src, size_t srcLen) const
278 {
279 // the number of chars [which would be] written to dst [if it were not NULL]
280 size_t dstWritten = 0;
281
282 // make a copy of the input string unless it is already properly
283 // NUL-terminated
284 //
285 // if we don't know its length we have no choice but to assume that it is,
286 // indeed, properly terminated
287 wxWCharBuffer bufTmp;
288 if ( srcLen == wxNO_LEN )
289 {
290 srcLen = wxWcslen(src) + 1;
291 }
292 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
293 {
294 // make a copy in order to properly NUL-terminate the string
295 bufTmp = wxWCharBuffer(srcLen);
296 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
297 src = bufTmp;
298 }
299
300 const size_t lenNul = GetMBNulLen();
301 for ( const wchar_t * const srcEnd = src + srcLen;
302 src < srcEnd;
303 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
304 {
305 // try to convert the current chunk
306 size_t lenChunk = WC2MB(NULL, src, 0);
307
308 if ( lenChunk == wxCONV_FAILED )
309 return wxCONV_FAILED;
310
311 lenChunk += lenNul;
312 dstWritten += lenChunk;
313
314 if ( dst )
315 {
316 if ( dstWritten > dstLen )
317 return wxCONV_FAILED;
318
319 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
320 return wxCONV_FAILED;
321
322 dst += lenChunk;
323 }
324 }
325
326 return dstWritten;
327 }
328
329 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
330 {
331 size_t rc = ToWChar(outBuff, outLen, inBuff);
332 if ( rc != wxCONV_FAILED )
333 {
334 // ToWChar() returns the buffer length, i.e. including the trailing
335 // NUL, while this method doesn't take it into account
336 rc--;
337 }
338
339 return rc;
340 }
341
342 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
343 {
344 size_t rc = FromWChar(outBuff, outLen, inBuff);
345 if ( rc != wxCONV_FAILED )
346 {
347 rc -= GetMBNulLen();
348 }
349
350 return rc;
351 }
352
353 wxMBConv::~wxMBConv()
354 {
355 // nothing to do here (necessary for Darwin linking probably)
356 }
357
358 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
359 {
360 if ( psz )
361 {
362 // calculate the length of the buffer needed first
363 const size_t nLen = MB2WC(NULL, psz, 0);
364 if ( nLen != wxCONV_FAILED )
365 {
366 // now do the actual conversion
367 wxWCharBuffer buf(nLen /* +1 added implicitly */);
368
369 // +1 for the trailing NULL
370 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
371 return buf;
372 }
373 }
374
375 return wxWCharBuffer();
376 }
377
378 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
379 {
380 if ( pwz )
381 {
382 const size_t nLen = WC2MB(NULL, pwz, 0);
383 if ( nLen != wxCONV_FAILED )
384 {
385 // extra space for trailing NUL(s)
386 static const size_t extraLen = GetMaxMBNulLen();
387
388 wxCharBuffer buf(nLen + extraLen - 1);
389 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
390 return buf;
391 }
392 }
393
394 return wxCharBuffer();
395 }
396
397 const wxWCharBuffer
398 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
399 {
400 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
401 if ( dstLen != wxCONV_FAILED )
402 {
403 wxWCharBuffer wbuf(dstLen - 1);
404 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
405 {
406 if ( outLen )
407 {
408 *outLen = dstLen;
409 if ( wbuf[dstLen - 1] == L'\0' )
410 (*outLen)--;
411 }
412
413 return wbuf;
414 }
415 }
416
417 if ( outLen )
418 *outLen = 0;
419
420 return wxWCharBuffer();
421 }
422
423 const wxCharBuffer
424 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
425 {
426 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
427 if ( dstLen != wxCONV_FAILED )
428 {
429 // special case of empty input: can't allocate 0 size buffer below as
430 // wxCharBuffer insists on NUL-terminating it
431 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
432 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
433 {
434 if ( outLen )
435 {
436 *outLen = dstLen;
437
438 const size_t nulLen = GetMBNulLen();
439 if ( dstLen >= nulLen &&
440 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
441 {
442 // in this case the output is NUL-terminated and we're not
443 // supposed to count NUL
444 *outLen -= nulLen;
445 }
446 }
447
448 return buf;
449 }
450 }
451
452 if ( outLen )
453 *outLen = 0;
454
455 return wxCharBuffer();
456 }
457
458 // ----------------------------------------------------------------------------
459 // wxMBConvLibc
460 // ----------------------------------------------------------------------------
461
462 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
463 {
464 return wxMB2WC(buf, psz, n);
465 }
466
467 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
468 {
469 return wxWC2MB(buf, psz, n);
470 }
471
472 // ----------------------------------------------------------------------------
473 // wxConvBrokenFileNames
474 // ----------------------------------------------------------------------------
475
476 #ifdef __UNIX__
477
478 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
479 {
480 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
481 || wxStricmp(charset, _T("UTF8")) == 0 )
482 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
483 else
484 m_conv = new wxCSConv(charset);
485 }
486
487 #endif // __UNIX__
488
489 // ----------------------------------------------------------------------------
490 // UTF-7
491 // ----------------------------------------------------------------------------
492
493 // Implementation (C) 2004 Fredrik Roubert
494
495 //
496 // BASE64 decoding table
497 //
498 static const unsigned char utf7unb64[] =
499 {
500 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
501 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
502 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
503 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
506 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
507 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
508 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
509 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
510 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
511 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
513 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
514 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
515 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
517 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
518 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
519 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
532 };
533
534 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
535 {
536 size_t len = 0;
537
538 while ( *psz && (!buf || (len < n)) )
539 {
540 unsigned char cc = *psz++;
541 if (cc != '+')
542 {
543 // plain ASCII char
544 if (buf)
545 *buf++ = cc;
546 len++;
547 }
548 else if (*psz == '-')
549 {
550 // encoded plus sign
551 if (buf)
552 *buf++ = cc;
553 len++;
554 psz++;
555 }
556 else // start of BASE64 encoded string
557 {
558 bool lsb, ok;
559 unsigned int d, l;
560 for ( ok = lsb = false, d = 0, l = 0;
561 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
562 psz++ )
563 {
564 d <<= 6;
565 d += cc;
566 for (l += 6; l >= 8; lsb = !lsb)
567 {
568 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
569 if (lsb)
570 {
571 if (buf)
572 *buf++ |= c;
573 len ++;
574 }
575 else
576 {
577 if (buf)
578 *buf = (wchar_t)(c << 8);
579 }
580
581 ok = true;
582 }
583 }
584
585 if ( !ok )
586 {
587 // in valid UTF7 we should have valid characters after '+'
588 return wxCONV_FAILED;
589 }
590
591 if (*psz == '-')
592 psz++;
593 }
594 }
595
596 if ( buf && (len < n) )
597 *buf = '\0';
598
599 return len;
600 }
601
602 //
603 // BASE64 encoding table
604 //
605 static const unsigned char utf7enb64[] =
606 {
607 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
608 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
609 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
610 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
611 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
612 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
613 'w', 'x', 'y', 'z', '0', '1', '2', '3',
614 '4', '5', '6', '7', '8', '9', '+', '/'
615 };
616
617 //
618 // UTF-7 encoding table
619 //
620 // 0 - Set D (directly encoded characters)
621 // 1 - Set O (optional direct characters)
622 // 2 - whitespace characters (optional)
623 // 3 - special characters
624 //
625 static const unsigned char utf7encode[128] =
626 {
627 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
628 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
629 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
631 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
633 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
635 };
636
637 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
638 {
639 size_t len = 0;
640
641 while (*psz && ((!buf) || (len < n)))
642 {
643 wchar_t cc = *psz++;
644 if (cc < 0x80 && utf7encode[cc] < 1)
645 {
646 // plain ASCII char
647 if (buf)
648 *buf++ = (char)cc;
649
650 len++;
651 }
652 #ifndef WC_UTF16
653 else if (((wxUint32)cc) > 0xffff)
654 {
655 // no surrogate pair generation (yet?)
656 return wxCONV_FAILED;
657 }
658 #endif
659 else
660 {
661 if (buf)
662 *buf++ = '+';
663
664 len++;
665 if (cc != '+')
666 {
667 // BASE64 encode string
668 unsigned int lsb, d, l;
669 for (d = 0, l = 0; /*nothing*/; psz++)
670 {
671 for (lsb = 0; lsb < 2; lsb ++)
672 {
673 d <<= 8;
674 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
675
676 for (l += 8; l >= 6; )
677 {
678 l -= 6;
679 if (buf)
680 *buf++ = utf7enb64[(d >> l) % 64];
681 len++;
682 }
683 }
684
685 cc = *psz;
686 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
687 break;
688 }
689
690 if (l != 0)
691 {
692 if (buf)
693 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
694
695 len++;
696 }
697 }
698
699 if (buf)
700 *buf++ = '-';
701 len++;
702 }
703 }
704
705 if (buf && (len < n))
706 *buf = 0;
707
708 return len;
709 }
710
711 // ----------------------------------------------------------------------------
712 // UTF-8
713 // ----------------------------------------------------------------------------
714
715 static wxUint32 utf8_max[]=
716 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
717
718 // boundaries of the private use area we use to (temporarily) remap invalid
719 // characters invalid in a UTF-8 encoded string
720 const wxUint32 wxUnicodePUA = 0x100000;
721 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
722
723 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
724 {
725 size_t len = 0;
726
727 while (*psz && ((!buf) || (len < n)))
728 {
729 const char *opsz = psz;
730 bool invalid = false;
731 unsigned char cc = *psz++, fc = cc;
732 unsigned cnt;
733 for (cnt = 0; fc & 0x80; cnt++)
734 fc <<= 1;
735
736 if (!cnt)
737 {
738 // plain ASCII char
739 if (buf)
740 *buf++ = cc;
741 len++;
742
743 // escape the escape character for octal escapes
744 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
745 && cc == '\\' && (!buf || len < n))
746 {
747 if (buf)
748 *buf++ = cc;
749 len++;
750 }
751 }
752 else
753 {
754 cnt--;
755 if (!cnt)
756 {
757 // invalid UTF-8 sequence
758 invalid = true;
759 }
760 else
761 {
762 unsigned ocnt = cnt - 1;
763 wxUint32 res = cc & (0x3f >> cnt);
764 while (cnt--)
765 {
766 cc = *psz;
767 if ((cc & 0xC0) != 0x80)
768 {
769 // invalid UTF-8 sequence
770 invalid = true;
771 break;
772 }
773
774 psz++;
775 res = (res << 6) | (cc & 0x3f);
776 }
777
778 if (invalid || res <= utf8_max[ocnt])
779 {
780 // illegal UTF-8 encoding
781 invalid = true;
782 }
783 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
784 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
785 {
786 // if one of our PUA characters turns up externally
787 // it must also be treated as an illegal sequence
788 // (a bit like you have to escape an escape character)
789 invalid = true;
790 }
791 else
792 {
793 #ifdef WC_UTF16
794 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
795 size_t pa = encode_utf16(res, (wxUint16 *)buf);
796 if (pa == wxCONV_FAILED)
797 {
798 invalid = true;
799 }
800 else
801 {
802 if (buf)
803 buf += pa;
804 len += pa;
805 }
806 #else // !WC_UTF16
807 if (buf)
808 *buf++ = (wchar_t)res;
809 len++;
810 #endif // WC_UTF16/!WC_UTF16
811 }
812 }
813
814 if (invalid)
815 {
816 if (m_options & MAP_INVALID_UTF8_TO_PUA)
817 {
818 while (opsz < psz && (!buf || len < n))
819 {
820 #ifdef WC_UTF16
821 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
822 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
823 wxASSERT(pa != wxCONV_FAILED);
824 if (buf)
825 buf += pa;
826 opsz++;
827 len += pa;
828 #else
829 if (buf)
830 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
831 opsz++;
832 len++;
833 #endif
834 }
835 }
836 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
837 {
838 while (opsz < psz && (!buf || len < n))
839 {
840 if ( buf && len + 3 < n )
841 {
842 unsigned char on = *opsz;
843 *buf++ = L'\\';
844 *buf++ = (wchar_t)( L'0' + on / 0100 );
845 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
846 *buf++ = (wchar_t)( L'0' + on % 010 );
847 }
848
849 opsz++;
850 len += 4;
851 }
852 }
853 else // MAP_INVALID_UTF8_NOT
854 {
855 return wxCONV_FAILED;
856 }
857 }
858 }
859 }
860
861 if (buf && (len < n))
862 *buf = 0;
863
864 return len;
865 }
866
867 static inline bool isoctal(wchar_t wch)
868 {
869 return L'0' <= wch && wch <= L'7';
870 }
871
872 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
873 {
874 size_t len = 0;
875
876 while (*psz && ((!buf) || (len < n)))
877 {
878 wxUint32 cc;
879
880 #ifdef WC_UTF16
881 // cast is ok for WC_UTF16
882 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
883 psz += (pa == wxCONV_FAILED) ? 1 : pa;
884 #else
885 cc = (*psz++) & 0x7fffffff;
886 #endif
887
888 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
889 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
890 {
891 if (buf)
892 *buf++ = (char)(cc - wxUnicodePUA);
893 len++;
894 }
895 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
896 && cc == L'\\' && psz[0] == L'\\' )
897 {
898 if (buf)
899 *buf++ = (char)cc;
900 psz++;
901 len++;
902 }
903 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
904 cc == L'\\' &&
905 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
906 {
907 if (buf)
908 {
909 *buf++ = (char) ((psz[0] - L'0') * 0100 +
910 (psz[1] - L'0') * 010 +
911 (psz[2] - L'0'));
912 }
913
914 psz += 3;
915 len++;
916 }
917 else
918 {
919 unsigned cnt;
920 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
921 {
922 }
923
924 if (!cnt)
925 {
926 // plain ASCII char
927 if (buf)
928 *buf++ = (char) cc;
929 len++;
930 }
931 else
932 {
933 len += cnt + 1;
934 if (buf)
935 {
936 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
937 while (cnt--)
938 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
939 }
940 }
941 }
942 }
943
944 if (buf && (len < n))
945 *buf = 0;
946
947 return len;
948 }
949
950 // ============================================================================
951 // UTF-16
952 // ============================================================================
953
954 #ifdef WORDS_BIGENDIAN
955 #define wxMBConvUTF16straight wxMBConvUTF16BE
956 #define wxMBConvUTF16swap wxMBConvUTF16LE
957 #else
958 #define wxMBConvUTF16swap wxMBConvUTF16BE
959 #define wxMBConvUTF16straight wxMBConvUTF16LE
960 #endif
961
962 /* static */
963 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
964 {
965 if ( srcLen == wxNO_LEN )
966 {
967 // count the number of bytes in input, including the trailing NULs
968 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
969 for ( srcLen = 1; *inBuff++; srcLen++ )
970 ;
971
972 srcLen *= BYTES_PER_CHAR;
973 }
974 else // we already have the length
975 {
976 // we can only convert an entire number of UTF-16 characters
977 if ( srcLen % BYTES_PER_CHAR )
978 return wxCONV_FAILED;
979 }
980
981 return srcLen;
982 }
983
984 // case when in-memory representation is UTF-16 too
985 #ifdef WC_UTF16
986
987 // ----------------------------------------------------------------------------
988 // conversions without endianness change
989 // ----------------------------------------------------------------------------
990
991 size_t
992 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
993 const char *src, size_t srcLen) const
994 {
995 // set up the scene for using memcpy() (which is presumably more efficient
996 // than copying the bytes one by one)
997 srcLen = GetLength(src, srcLen);
998 if ( srcLen == wxNO_LEN )
999 return wxCONV_FAILED;
1000
1001 const size_t inLen = srcLen / BYTES_PER_CHAR;
1002 if ( dst )
1003 {
1004 if ( dstLen < inLen )
1005 return wxCONV_FAILED;
1006
1007 memcpy(dst, src, srcLen);
1008 }
1009
1010 return inLen;
1011 }
1012
1013 size_t
1014 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1015 const wchar_t *src, size_t srcLen) const
1016 {
1017 if ( srcLen == wxNO_LEN )
1018 srcLen = wxWcslen(src) + 1;
1019
1020 srcLen *= BYTES_PER_CHAR;
1021
1022 if ( dst )
1023 {
1024 if ( dstLen < srcLen )
1025 return wxCONV_FAILED;
1026
1027 memcpy(dst, src, srcLen);
1028 }
1029
1030 return srcLen;
1031 }
1032
1033 // ----------------------------------------------------------------------------
1034 // endian-reversing conversions
1035 // ----------------------------------------------------------------------------
1036
1037 size_t
1038 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1039 const char *src, size_t srcLen) const
1040 {
1041 srcLen = GetLength(src, srcLen);
1042 if ( srcLen == wxNO_LEN )
1043 return wxCONV_FAILED;
1044
1045 srcLen /= BYTES_PER_CHAR;
1046
1047 if ( dst )
1048 {
1049 if ( dstLen < srcLen )
1050 return wxCONV_FAILED;
1051
1052 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1053 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1054 {
1055 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1056 }
1057 }
1058
1059 return srcLen;
1060 }
1061
1062 size_t
1063 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1064 const wchar_t *src, size_t srcLen) const
1065 {
1066 if ( srcLen == wxNO_LEN )
1067 srcLen = wxWcslen(src) + 1;
1068
1069 srcLen *= BYTES_PER_CHAR;
1070
1071 if ( dst )
1072 {
1073 if ( dstLen < srcLen )
1074 return wxCONV_FAILED;
1075
1076 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1077 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1078 {
1079 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1080 }
1081 }
1082
1083 return srcLen;
1084 }
1085
1086 #else // !WC_UTF16: wchar_t is UTF-32
1087
1088 // ----------------------------------------------------------------------------
1089 // conversions without endianness change
1090 // ----------------------------------------------------------------------------
1091
1092 size_t
1093 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1094 const char *src, size_t srcLen) const
1095 {
1096 srcLen = GetLength(src, srcLen);
1097 if ( srcLen == wxNO_LEN )
1098 return wxCONV_FAILED;
1099
1100 const size_t inLen = srcLen / BYTES_PER_CHAR;
1101 if ( !dst )
1102 {
1103 // optimization: return maximal space which could be needed for this
1104 // string even if the real size could be smaller if the buffer contains
1105 // any surrogates
1106 return inLen;
1107 }
1108
1109 size_t outLen = 0;
1110 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1111 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1112 {
1113 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1114 if ( !inBuff )
1115 return wxCONV_FAILED;
1116
1117 if ( ++outLen > dstLen )
1118 return wxCONV_FAILED;
1119
1120 *dst++ = ch;
1121 }
1122
1123
1124 return outLen;
1125 }
1126
1127 size_t
1128 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1129 const wchar_t *src, size_t srcLen) const
1130 {
1131 if ( srcLen == wxNO_LEN )
1132 srcLen = wxWcslen(src) + 1;
1133
1134 size_t outLen = 0;
1135 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1136 for ( size_t n = 0; n < srcLen; n++ )
1137 {
1138 wxUint16 cc[2];
1139 const size_t numChars = encode_utf16(*src++, cc);
1140 if ( numChars == wxCONV_FAILED )
1141 return wxCONV_FAILED;
1142
1143 outLen += numChars * BYTES_PER_CHAR;
1144 if ( outBuff )
1145 {
1146 if ( outLen > dstLen )
1147 return wxCONV_FAILED;
1148
1149 *outBuff++ = cc[0];
1150 if ( numChars == 2 )
1151 {
1152 // second character of a surrogate
1153 *outBuff++ = cc[1];
1154 }
1155 }
1156 }
1157
1158 return outLen;
1159 }
1160
1161 // ----------------------------------------------------------------------------
1162 // endian-reversing conversions
1163 // ----------------------------------------------------------------------------
1164
1165 size_t
1166 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1167 const char *src, size_t srcLen) const
1168 {
1169 srcLen = GetLength(src, srcLen);
1170 if ( srcLen == wxNO_LEN )
1171 return wxCONV_FAILED;
1172
1173 const size_t inLen = srcLen / BYTES_PER_CHAR;
1174 if ( !dst )
1175 {
1176 // optimization: return maximal space which could be needed for this
1177 // string even if the real size could be smaller if the buffer contains
1178 // any surrogates
1179 return inLen;
1180 }
1181
1182 size_t outLen = 0;
1183 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1184 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1185 {
1186 wxUint32 ch;
1187 wxUint16 tmp[2];
1188
1189 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1190 inBuff++;
1191 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1192
1193 const size_t numChars = decode_utf16(tmp, ch);
1194 if ( numChars == wxCONV_FAILED )
1195 return wxCONV_FAILED;
1196
1197 if ( numChars == 2 )
1198 inBuff++;
1199
1200 if ( ++outLen > dstLen )
1201 return wxCONV_FAILED;
1202
1203 *dst++ = ch;
1204 }
1205
1206
1207 return outLen;
1208 }
1209
1210 size_t
1211 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1212 const wchar_t *src, size_t srcLen) const
1213 {
1214 if ( srcLen == wxNO_LEN )
1215 srcLen = wxWcslen(src) + 1;
1216
1217 size_t outLen = 0;
1218 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1219 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1220 {
1221 wxUint16 cc[2];
1222 const size_t numChars = encode_utf16(*src, cc);
1223 if ( numChars == wxCONV_FAILED )
1224 return wxCONV_FAILED;
1225
1226 outLen += numChars * BYTES_PER_CHAR;
1227 if ( outBuff )
1228 {
1229 if ( outLen > dstLen )
1230 return wxCONV_FAILED;
1231
1232 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1233 if ( numChars == 2 )
1234 {
1235 // second character of a surrogate
1236 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1237 }
1238 }
1239 }
1240
1241 return outLen;
1242 }
1243
1244 #endif // WC_UTF16/!WC_UTF16
1245
1246
1247 // ============================================================================
1248 // UTF-32
1249 // ============================================================================
1250
1251 #ifdef WORDS_BIGENDIAN
1252 #define wxMBConvUTF32straight wxMBConvUTF32BE
1253 #define wxMBConvUTF32swap wxMBConvUTF32LE
1254 #else
1255 #define wxMBConvUTF32swap wxMBConvUTF32BE
1256 #define wxMBConvUTF32straight wxMBConvUTF32LE
1257 #endif
1258
1259
1260 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1261 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1262
1263 /* static */
1264 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1265 {
1266 if ( srcLen == wxNO_LEN )
1267 {
1268 // count the number of bytes in input, including the trailing NULs
1269 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1270 for ( srcLen = 1; *inBuff++; srcLen++ )
1271 ;
1272
1273 srcLen *= BYTES_PER_CHAR;
1274 }
1275 else // we already have the length
1276 {
1277 // we can only convert an entire number of UTF-32 characters
1278 if ( srcLen % BYTES_PER_CHAR )
1279 return wxCONV_FAILED;
1280 }
1281
1282 return srcLen;
1283 }
1284
1285 // case when in-memory representation is UTF-16
1286 #ifdef WC_UTF16
1287
1288 // ----------------------------------------------------------------------------
1289 // conversions without endianness change
1290 // ----------------------------------------------------------------------------
1291
1292 size_t
1293 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1294 const char *src, size_t srcLen) const
1295 {
1296 srcLen = GetLength(src, srcLen);
1297 if ( srcLen == wxNO_LEN )
1298 return wxCONV_FAILED;
1299
1300 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1301 const size_t inLen = srcLen / BYTES_PER_CHAR;
1302 size_t outLen = 0;
1303 for ( size_t n = 0; n < inLen; n++ )
1304 {
1305 wxUint16 cc[2];
1306 const size_t numChars = encode_utf16(*inBuff++, cc);
1307 if ( numChars == wxCONV_FAILED )
1308 return wxCONV_FAILED;
1309
1310 outLen += numChars;
1311 if ( dst )
1312 {
1313 if ( outLen > dstLen )
1314 return wxCONV_FAILED;
1315
1316 *dst++ = cc[0];
1317 if ( numChars == 2 )
1318 {
1319 // second character of a surrogate
1320 *dst++ = cc[1];
1321 }
1322 }
1323 }
1324
1325 return outLen;
1326 }
1327
1328 size_t
1329 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1330 const wchar_t *src, size_t srcLen) const
1331 {
1332 if ( srcLen == wxNO_LEN )
1333 srcLen = wxWcslen(src) + 1;
1334
1335 if ( !dst )
1336 {
1337 // optimization: return maximal space which could be needed for this
1338 // string instead of the exact amount which could be less if there are
1339 // any surrogates in the input
1340 //
1341 // we consider that surrogates are rare enough to make it worthwhile to
1342 // avoid running the loop below at the cost of slightly extra memory
1343 // consumption
1344 return srcLen * BYTES_PER_CHAR;
1345 }
1346
1347 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1348 size_t outLen = 0;
1349 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1350 {
1351 const wxUint32 ch = wxDecodeSurrogate(&src);
1352 if ( !src )
1353 return wxCONV_FAILED;
1354
1355 outLen += BYTES_PER_CHAR;
1356
1357 if ( outLen > dstLen )
1358 return wxCONV_FAILED;
1359
1360 *outBuff++ = ch;
1361 }
1362
1363 return outLen;
1364 }
1365
1366 // ----------------------------------------------------------------------------
1367 // endian-reversing conversions
1368 // ----------------------------------------------------------------------------
1369
1370 size_t
1371 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1372 const char *src, size_t srcLen) const
1373 {
1374 srcLen = GetLength(src, srcLen);
1375 if ( srcLen == wxNO_LEN )
1376 return wxCONV_FAILED;
1377
1378 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1379 const size_t inLen = srcLen / BYTES_PER_CHAR;
1380 size_t outLen = 0;
1381 for ( size_t n = 0; n < inLen; n++, inBuff++ )
1382 {
1383 wxUint16 cc[2];
1384 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1385 if ( numChars == wxCONV_FAILED )
1386 return wxCONV_FAILED;
1387
1388 outLen += numChars;
1389 if ( dst )
1390 {
1391 if ( outLen > dstLen )
1392 return wxCONV_FAILED;
1393
1394 *dst++ = cc[0];
1395 if ( numChars == 2 )
1396 {
1397 // second character of a surrogate
1398 *dst++ = cc[1];
1399 }
1400 }
1401 }
1402
1403 return outLen;
1404 }
1405
1406 size_t
1407 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1408 const wchar_t *src, size_t srcLen) const
1409 {
1410 if ( srcLen == wxNO_LEN )
1411 srcLen = wxWcslen(src) + 1;
1412
1413 if ( !dst )
1414 {
1415 // optimization: return maximal space which could be needed for this
1416 // string instead of the exact amount which could be less if there are
1417 // any surrogates in the input
1418 //
1419 // we consider that surrogates are rare enough to make it worthwhile to
1420 // avoid running the loop below at the cost of slightly extra memory
1421 // consumption
1422 return srcLen*BYTES_PER_CHAR;
1423 }
1424
1425 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1426 size_t outLen = 0;
1427 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1428 {
1429 const wxUint32 ch = wxDecodeSurrogate(&src);
1430 if ( !src )
1431 return wxCONV_FAILED;
1432
1433 outLen += BYTES_PER_CHAR;
1434
1435 if ( outLen > dstLen )
1436 return wxCONV_FAILED;
1437
1438 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1439 }
1440
1441 return outLen;
1442 }
1443
1444 #else // !WC_UTF16: wchar_t is UTF-32
1445
1446 // ----------------------------------------------------------------------------
1447 // conversions without endianness change
1448 // ----------------------------------------------------------------------------
1449
1450 size_t
1451 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1452 const char *src, size_t srcLen) const
1453 {
1454 // use memcpy() as it should be much faster than hand-written loop
1455 srcLen = GetLength(src, srcLen);
1456 if ( srcLen == wxNO_LEN )
1457 return wxCONV_FAILED;
1458
1459 const size_t inLen = srcLen/BYTES_PER_CHAR;
1460 if ( dst )
1461 {
1462 if ( dstLen < inLen )
1463 return wxCONV_FAILED;
1464
1465 memcpy(dst, src, srcLen);
1466 }
1467
1468 return inLen;
1469 }
1470
1471 size_t
1472 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1473 const wchar_t *src, size_t srcLen) const
1474 {
1475 if ( srcLen == wxNO_LEN )
1476 srcLen = wxWcslen(src) + 1;
1477
1478 srcLen *= BYTES_PER_CHAR;
1479
1480 if ( dst )
1481 {
1482 if ( dstLen < srcLen )
1483 return wxCONV_FAILED;
1484
1485 memcpy(dst, src, srcLen);
1486 }
1487
1488 return srcLen;
1489 }
1490
1491 // ----------------------------------------------------------------------------
1492 // endian-reversing conversions
1493 // ----------------------------------------------------------------------------
1494
1495 size_t
1496 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1497 const char *src, size_t srcLen) const
1498 {
1499 srcLen = GetLength(src, srcLen);
1500 if ( srcLen == wxNO_LEN )
1501 return wxCONV_FAILED;
1502
1503 srcLen /= BYTES_PER_CHAR;
1504
1505 if ( dst )
1506 {
1507 if ( dstLen < srcLen )
1508 return wxCONV_FAILED;
1509
1510 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1511 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1512 {
1513 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
1514 }
1515 }
1516
1517 return srcLen;
1518 }
1519
1520 size_t
1521 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1522 const wchar_t *src, size_t srcLen) const
1523 {
1524 if ( srcLen == wxNO_LEN )
1525 srcLen = wxWcslen(src) + 1;
1526
1527 srcLen *= BYTES_PER_CHAR;
1528
1529 if ( dst )
1530 {
1531 if ( dstLen < srcLen )
1532 return wxCONV_FAILED;
1533
1534 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1535 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1536 {
1537 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
1538 }
1539 }
1540
1541 return srcLen;
1542 }
1543
1544 #endif // WC_UTF16/!WC_UTF16
1545
1546
1547 // ============================================================================
1548 // The classes doing conversion using the iconv_xxx() functions
1549 // ============================================================================
1550
1551 #ifdef HAVE_ICONV
1552
1553 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1554 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1555 // (unless there's yet another bug in glibc) the only case when iconv()
1556 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1557 // left in the input buffer -- when _real_ error occurs,
1558 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1559 // iconv() failure.
1560 // [This bug does not appear in glibc 2.2.]
1561 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1562 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1563 (errno != E2BIG || bufLeft != 0))
1564 #else
1565 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1566 #endif
1567
1568 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1569
1570 #define ICONV_T_INVALID ((iconv_t)-1)
1571
1572 #if SIZEOF_WCHAR_T == 4
1573 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1574 #define WC_ENC wxFONTENCODING_UTF32
1575 #elif SIZEOF_WCHAR_T == 2
1576 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1577 #define WC_ENC wxFONTENCODING_UTF16
1578 #else // sizeof(wchar_t) != 2 nor 4
1579 // does this ever happen?
1580 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1581 #endif
1582
1583 // ----------------------------------------------------------------------------
1584 // wxMBConv_iconv: encapsulates an iconv character set
1585 // ----------------------------------------------------------------------------
1586
1587 class wxMBConv_iconv : public wxMBConv
1588 {
1589 public:
1590 wxMBConv_iconv(const wxChar *name);
1591 virtual ~wxMBConv_iconv();
1592
1593 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1594 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1595
1596 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1597 virtual size_t GetMBNulLen() const;
1598
1599 virtual wxMBConv *Clone() const
1600 {
1601 wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1602 p->m_minMBCharWidth = m_minMBCharWidth;
1603 return p;
1604 }
1605
1606 bool IsOk() const
1607 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1608
1609 protected:
1610 // the iconv handlers used to translate from multibyte
1611 // to wide char and in the other direction
1612 iconv_t m2w,
1613 w2m;
1614
1615 #if wxUSE_THREADS
1616 // guards access to m2w and w2m objects
1617 wxMutex m_iconvMutex;
1618 #endif
1619
1620 private:
1621 // the name (for iconv_open()) of a wide char charset -- if none is
1622 // available on this machine, it will remain NULL
1623 static wxString ms_wcCharsetName;
1624
1625 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1626 // different endian-ness than the native one
1627 static bool ms_wcNeedsSwap;
1628
1629
1630 // name of the encoding handled by this conversion
1631 wxString m_name;
1632
1633 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1634 // initially
1635 size_t m_minMBCharWidth;
1636 };
1637
1638 // make the constructor available for unit testing
1639 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1640 {
1641 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1642 if ( !result->IsOk() )
1643 {
1644 delete result;
1645 return 0;
1646 }
1647
1648 return result;
1649 }
1650
1651 wxString wxMBConv_iconv::ms_wcCharsetName;
1652 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1653
1654 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1655 : m_name(name)
1656 {
1657 m_minMBCharWidth = 0;
1658
1659 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1660 // names for the charsets
1661 const wxCharBuffer cname(wxString(name).ToAscii());
1662
1663 // check for charset that represents wchar_t:
1664 if ( ms_wcCharsetName.empty() )
1665 {
1666 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1667
1668 #if wxUSE_FONTMAP
1669 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1670 #else // !wxUSE_FONTMAP
1671 static const wxChar *names_static[] =
1672 {
1673 #if SIZEOF_WCHAR_T == 4
1674 _T("UCS-4"),
1675 #elif SIZEOF_WCHAR_T = 2
1676 _T("UCS-2"),
1677 #endif
1678 NULL
1679 };
1680 const wxChar **names = names_static;
1681 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1682
1683 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1684 {
1685 const wxString nameCS(*names);
1686
1687 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1688 wxString nameXE(nameCS);
1689
1690 #ifdef WORDS_BIGENDIAN
1691 nameXE += _T("BE");
1692 #else // little endian
1693 nameXE += _T("LE");
1694 #endif
1695
1696 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1697 nameXE.c_str());
1698
1699 m2w = iconv_open(nameXE.ToAscii(), cname);
1700 if ( m2w == ICONV_T_INVALID )
1701 {
1702 // try charset w/o bytesex info (e.g. "UCS4")
1703 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1704 nameCS.c_str());
1705 m2w = iconv_open(nameCS.ToAscii(), cname);
1706
1707 // and check for bytesex ourselves:
1708 if ( m2w != ICONV_T_INVALID )
1709 {
1710 char buf[2], *bufPtr;
1711 wchar_t wbuf[2], *wbufPtr;
1712 size_t insz, outsz;
1713 size_t res;
1714
1715 buf[0] = 'A';
1716 buf[1] = 0;
1717 wbuf[0] = 0;
1718 insz = 2;
1719 outsz = SIZEOF_WCHAR_T * 2;
1720 wbufPtr = wbuf;
1721 bufPtr = buf;
1722
1723 res = iconv(
1724 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1725 (char**)&wbufPtr, &outsz);
1726
1727 if (ICONV_FAILED(res, insz))
1728 {
1729 wxLogLastError(wxT("iconv"));
1730 wxLogError(_("Conversion to charset '%s' doesn't work."),
1731 nameCS.c_str());
1732 }
1733 else // ok, can convert to this encoding, remember it
1734 {
1735 ms_wcCharsetName = nameCS;
1736 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1737 }
1738 }
1739 }
1740 else // use charset not requiring byte swapping
1741 {
1742 ms_wcCharsetName = nameXE;
1743 }
1744 }
1745
1746 wxLogTrace(TRACE_STRCONV,
1747 wxT("iconv wchar_t charset is \"%s\"%s"),
1748 ms_wcCharsetName.empty() ? _T("<none>")
1749 : ms_wcCharsetName.c_str(),
1750 ms_wcNeedsSwap ? _T(" (needs swap)")
1751 : _T(""));
1752 }
1753 else // we already have ms_wcCharsetName
1754 {
1755 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1756 }
1757
1758 if ( ms_wcCharsetName.empty() )
1759 {
1760 w2m = ICONV_T_INVALID;
1761 }
1762 else
1763 {
1764 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1765 if ( w2m == ICONV_T_INVALID )
1766 {
1767 wxLogTrace(TRACE_STRCONV,
1768 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1769 ms_wcCharsetName.c_str(), cname.data());
1770 }
1771 }
1772 }
1773
1774 wxMBConv_iconv::~wxMBConv_iconv()
1775 {
1776 if ( m2w != ICONV_T_INVALID )
1777 iconv_close(m2w);
1778 if ( w2m != ICONV_T_INVALID )
1779 iconv_close(w2m);
1780 }
1781
1782 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1783 {
1784 // find the string length: notice that must be done differently for
1785 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1786 size_t inbuf;
1787 const size_t nulLen = GetMBNulLen();
1788 switch ( nulLen )
1789 {
1790 default:
1791 return wxCONV_FAILED;
1792
1793 case 1:
1794 inbuf = strlen(psz); // arguably more optimized than our version
1795 break;
1796
1797 case 2:
1798 case 4:
1799 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1800 // they also have to start at character boundary and not span two
1801 // adjacent characters
1802 const char *p;
1803 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1804 ;
1805 inbuf = p - psz;
1806 break;
1807 }
1808
1809 #if wxUSE_THREADS
1810 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1811 // Unfortunately there are a couple of global wxCSConv objects such as
1812 // wxConvLocal that are used all over wx code, so we have to make sure
1813 // the handle is used by at most one thread at the time. Otherwise
1814 // only a few wx classes would be safe to use from non-main threads
1815 // as MB<->WC conversion would fail "randomly".
1816 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1817 #endif // wxUSE_THREADS
1818
1819 size_t outbuf = n * SIZEOF_WCHAR_T;
1820 size_t res, cres;
1821 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1822 wchar_t *bufPtr = buf;
1823 const char *pszPtr = psz;
1824
1825 if (buf)
1826 {
1827 // have destination buffer, convert there
1828 cres = iconv(m2w,
1829 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1830 (char**)&bufPtr, &outbuf);
1831 res = n - (outbuf / SIZEOF_WCHAR_T);
1832
1833 if (ms_wcNeedsSwap)
1834 {
1835 // convert to native endianness
1836 for ( unsigned i = 0; i < res; i++ )
1837 buf[n] = WC_BSWAP(buf[i]);
1838 }
1839
1840 // NUL-terminate the string if there is any space left
1841 if (res < n)
1842 buf[res] = 0;
1843 }
1844 else
1845 {
1846 // no destination buffer... convert using temp buffer
1847 // to calculate destination buffer requirement
1848 wchar_t tbuf[8];
1849 res = 0;
1850
1851 do
1852 {
1853 bufPtr = tbuf;
1854 outbuf = 8 * SIZEOF_WCHAR_T;
1855
1856 cres = iconv(m2w,
1857 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1858 (char**)&bufPtr, &outbuf );
1859
1860 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1861 }
1862 while ((cres == (size_t)-1) && (errno == E2BIG));
1863 }
1864
1865 if (ICONV_FAILED(cres, inbuf))
1866 {
1867 //VS: it is ok if iconv fails, hence trace only
1868 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1869 return wxCONV_FAILED;
1870 }
1871
1872 return res;
1873 }
1874
1875 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1876 {
1877 #if wxUSE_THREADS
1878 // NB: explained in MB2WC
1879 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1880 #endif
1881
1882 size_t inlen = wxWcslen(psz);
1883 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1884 size_t outbuf = n;
1885 size_t res, cres;
1886
1887 wchar_t *tmpbuf = 0;
1888
1889 if (ms_wcNeedsSwap)
1890 {
1891 // need to copy to temp buffer to switch endianness
1892 // (doing WC_BSWAP twice on the original buffer won't help, as it
1893 // could be in read-only memory, or be accessed in some other thread)
1894 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1895 for ( size_t i = 0; i < inlen; i++ )
1896 tmpbuf[n] = WC_BSWAP(psz[i]);
1897
1898 tmpbuf[inlen] = L'\0';
1899 psz = tmpbuf;
1900 }
1901
1902 if (buf)
1903 {
1904 // have destination buffer, convert there
1905 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1906
1907 res = n - outbuf;
1908
1909 // NB: iconv was given only wcslen(psz) characters on input, and so
1910 // it couldn't convert the trailing zero. Let's do it ourselves
1911 // if there's some room left for it in the output buffer.
1912 if (res < n)
1913 buf[0] = 0;
1914 }
1915 else
1916 {
1917 // no destination buffer: convert using temp buffer
1918 // to calculate destination buffer requirement
1919 char tbuf[16];
1920 res = 0;
1921 do
1922 {
1923 buf = tbuf;
1924 outbuf = 16;
1925
1926 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1927
1928 res += 16 - outbuf;
1929 }
1930 while ((cres == (size_t)-1) && (errno == E2BIG));
1931 }
1932
1933 if (ms_wcNeedsSwap)
1934 {
1935 free(tmpbuf);
1936 }
1937
1938 if (ICONV_FAILED(cres, inbuf))
1939 {
1940 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1941 return wxCONV_FAILED;
1942 }
1943
1944 return res;
1945 }
1946
1947 size_t wxMBConv_iconv::GetMBNulLen() const
1948 {
1949 if ( m_minMBCharWidth == 0 )
1950 {
1951 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1952
1953 #if wxUSE_THREADS
1954 // NB: explained in MB2WC
1955 wxMutexLocker lock(self->m_iconvMutex);
1956 #endif
1957
1958 wchar_t *wnul = L"";
1959 char buf[8]; // should be enough for NUL in any encoding
1960 size_t inLen = sizeof(wchar_t),
1961 outLen = WXSIZEOF(buf);
1962 char *inBuff = (char *)wnul;
1963 char *outBuff = buf;
1964 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
1965 {
1966 self->m_minMBCharWidth = (size_t)-1;
1967 }
1968 else // ok
1969 {
1970 self->m_minMBCharWidth = outBuff - buf;
1971 }
1972 }
1973
1974 return m_minMBCharWidth;
1975 }
1976
1977 #endif // HAVE_ICONV
1978
1979
1980 // ============================================================================
1981 // Win32 conversion classes
1982 // ============================================================================
1983
1984 #ifdef wxHAVE_WIN32_MB2WC
1985
1986 // from utils.cpp
1987 #if wxUSE_FONTMAP
1988 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1989 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1990 #endif
1991
1992 class wxMBConv_win32 : public wxMBConv
1993 {
1994 public:
1995 wxMBConv_win32()
1996 {
1997 m_CodePage = CP_ACP;
1998 m_minMBCharWidth = 0;
1999 }
2000
2001 wxMBConv_win32(const wxMBConv_win32& conv)
2002 : wxMBConv()
2003 {
2004 m_CodePage = conv.m_CodePage;
2005 m_minMBCharWidth = conv.m_minMBCharWidth;
2006 }
2007
2008 #if wxUSE_FONTMAP
2009 wxMBConv_win32(const wxChar* name)
2010 {
2011 m_CodePage = wxCharsetToCodepage(name);
2012 m_minMBCharWidth = 0;
2013 }
2014
2015 wxMBConv_win32(wxFontEncoding encoding)
2016 {
2017 m_CodePage = wxEncodingToCodepage(encoding);
2018 m_minMBCharWidth = 0;
2019 }
2020 #endif // wxUSE_FONTMAP
2021
2022 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2023 {
2024 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2025 // the behaviour is not compatible with the Unix version (using iconv)
2026 // and break the library itself, e.g. wxTextInputStream::NextChar()
2027 // wouldn't work if reading an incomplete MB char didn't result in an
2028 // error
2029 //
2030 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2031 // Win XP or newer and it is not supported for UTF-[78] so we always
2032 // use our own conversions in this case. See
2033 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2034 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2035 if ( m_CodePage == CP_UTF8 )
2036 {
2037 return wxConvUTF8.MB2WC(buf, psz, n);
2038 }
2039
2040 if ( m_CodePage == CP_UTF7 )
2041 {
2042 return wxConvUTF7.MB2WC(buf, psz, n);
2043 }
2044
2045 int flags = 0;
2046 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2047 IsAtLeastWin2kSP4() )
2048 {
2049 flags = MB_ERR_INVALID_CHARS;
2050 }
2051
2052 const size_t len = ::MultiByteToWideChar
2053 (
2054 m_CodePage, // code page
2055 flags, // flags: fall on error
2056 psz, // input string
2057 -1, // its length (NUL-terminated)
2058 buf, // output string
2059 buf ? n : 0 // size of output buffer
2060 );
2061 if ( !len )
2062 {
2063 // function totally failed
2064 return wxCONV_FAILED;
2065 }
2066
2067 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2068 // check if we succeeded, by doing a double trip:
2069 if ( !flags && buf )
2070 {
2071 const size_t mbLen = strlen(psz);
2072 wxCharBuffer mbBuf(mbLen);
2073 if ( ::WideCharToMultiByte
2074 (
2075 m_CodePage,
2076 0,
2077 buf,
2078 -1,
2079 mbBuf.data(),
2080 mbLen + 1, // size in bytes, not length
2081 NULL,
2082 NULL
2083 ) == 0 ||
2084 strcmp(mbBuf, psz) != 0 )
2085 {
2086 // we didn't obtain the same thing we started from, hence
2087 // the conversion was lossy and we consider that it failed
2088 return wxCONV_FAILED;
2089 }
2090 }
2091
2092 // note that it returns count of written chars for buf != NULL and size
2093 // of the needed buffer for buf == NULL so in either case the length of
2094 // the string (which never includes the terminating NUL) is one less
2095 return len - 1;
2096 }
2097
2098 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2099 {
2100 /*
2101 we have a problem here: by default, WideCharToMultiByte() may
2102 replace characters unrepresentable in the target code page with bad
2103 quality approximations such as turning "1/2" symbol (U+00BD) into
2104 "1" for the code pages which don't have it and we, obviously, want
2105 to avoid this at any price
2106
2107 the trouble is that this function does it _silently_, i.e. it won't
2108 even tell us whether it did or not... Win98/2000 and higher provide
2109 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2110 we have to resort to a round trip, i.e. check that converting back
2111 results in the same string -- this is, of course, expensive but
2112 otherwise we simply can't be sure to not garble the data.
2113 */
2114
2115 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2116 // it doesn't work with CJK encodings (which we test for rather roughly
2117 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2118 // supporting it
2119 BOOL usedDef wxDUMMY_INITIALIZE(false);
2120 BOOL *pUsedDef;
2121 int flags;
2122 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2123 {
2124 // it's our lucky day
2125 flags = WC_NO_BEST_FIT_CHARS;
2126 pUsedDef = &usedDef;
2127 }
2128 else // old system or unsupported encoding
2129 {
2130 flags = 0;
2131 pUsedDef = NULL;
2132 }
2133
2134 const size_t len = ::WideCharToMultiByte
2135 (
2136 m_CodePage, // code page
2137 flags, // either none or no best fit
2138 pwz, // input string
2139 -1, // it is (wide) NUL-terminated
2140 buf, // output buffer
2141 buf ? n : 0, // and its size
2142 NULL, // default "replacement" char
2143 pUsedDef // [out] was it used?
2144 );
2145
2146 if ( !len )
2147 {
2148 // function totally failed
2149 return wxCONV_FAILED;
2150 }
2151
2152 // if we were really converting, check if we succeeded
2153 if ( buf )
2154 {
2155 if ( flags )
2156 {
2157 // check if the conversion failed, i.e. if any replacements
2158 // were done
2159 if ( usedDef )
2160 return wxCONV_FAILED;
2161 }
2162 else // we must resort to double tripping...
2163 {
2164 wxWCharBuffer wcBuf(n);
2165 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2166 wcscmp(wcBuf, pwz) != 0 )
2167 {
2168 // we didn't obtain the same thing we started from, hence
2169 // the conversion was lossy and we consider that it failed
2170 return wxCONV_FAILED;
2171 }
2172 }
2173 }
2174
2175 // see the comment above for the reason of "len - 1"
2176 return len - 1;
2177 }
2178
2179 virtual size_t GetMBNulLen() const
2180 {
2181 if ( m_minMBCharWidth == 0 )
2182 {
2183 int len = ::WideCharToMultiByte
2184 (
2185 m_CodePage, // code page
2186 0, // no flags
2187 L"", // input string
2188 1, // translate just the NUL
2189 NULL, // output buffer
2190 0, // and its size
2191 NULL, // no replacement char
2192 NULL // [out] don't care if it was used
2193 );
2194
2195 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2196 switch ( len )
2197 {
2198 default:
2199 wxLogDebug(_T("Unexpected NUL length %d"), len);
2200 self->m_minMBCharWidth = (size_t)-1;
2201 break;
2202
2203 case 0:
2204 self->m_minMBCharWidth = (size_t)-1;
2205 break;
2206
2207 case 1:
2208 case 2:
2209 case 4:
2210 self->m_minMBCharWidth = len;
2211 break;
2212 }
2213 }
2214
2215 return m_minMBCharWidth;
2216 }
2217
2218 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2219
2220 bool IsOk() const { return m_CodePage != -1; }
2221
2222 private:
2223 static bool CanUseNoBestFit()
2224 {
2225 static int s_isWin98Or2k = -1;
2226
2227 if ( s_isWin98Or2k == -1 )
2228 {
2229 int verMaj, verMin;
2230 switch ( wxGetOsVersion(&verMaj, &verMin) )
2231 {
2232 case wxOS_WINDOWS_9X:
2233 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2234 break;
2235
2236 case wxOS_WINDOWS_NT:
2237 s_isWin98Or2k = verMaj >= 5;
2238 break;
2239
2240 default:
2241 // unknown: be conservative by default
2242 s_isWin98Or2k = 0;
2243 break;
2244 }
2245
2246 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2247 }
2248
2249 return s_isWin98Or2k == 1;
2250 }
2251
2252 static bool IsAtLeastWin2kSP4()
2253 {
2254 #ifdef __WXWINCE__
2255 return false;
2256 #else
2257 static int s_isAtLeastWin2kSP4 = -1;
2258
2259 if ( s_isAtLeastWin2kSP4 == -1 )
2260 {
2261 OSVERSIONINFOEX ver;
2262
2263 memset(&ver, 0, sizeof(ver));
2264 ver.dwOSVersionInfoSize = sizeof(ver);
2265 GetVersionEx((OSVERSIONINFO*)&ver);
2266
2267 s_isAtLeastWin2kSP4 =
2268 ((ver.dwMajorVersion > 5) || // Vista+
2269 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2270 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2271 ver.wServicePackMajor >= 4)) // 2000 SP4+
2272 ? 1 : 0;
2273 }
2274
2275 return s_isAtLeastWin2kSP4 == 1;
2276 #endif
2277 }
2278
2279
2280 // the code page we're working with
2281 long m_CodePage;
2282
2283 // cached result of GetMBNulLen(), set to 0 initially meaning
2284 // "unknown"
2285 size_t m_minMBCharWidth;
2286 };
2287
2288 #endif // wxHAVE_WIN32_MB2WC
2289
2290 // ============================================================================
2291 // Cocoa conversion classes
2292 // ============================================================================
2293
2294 #if defined(__WXCOCOA__)
2295
2296 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2297 // Strangely enough, internally Core Foundation uses
2298 // UTF-32 internally quite a bit - its just not public (yet).
2299
2300 #include <CoreFoundation/CFString.h>
2301 #include <CoreFoundation/CFStringEncodingExt.h>
2302
2303 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2304 {
2305 CFStringEncoding enc = kCFStringEncodingInvalidId ;
2306
2307 switch (encoding)
2308 {
2309 case wxFONTENCODING_DEFAULT :
2310 enc = CFStringGetSystemEncoding();
2311 break ;
2312
2313 case wxFONTENCODING_ISO8859_1 :
2314 enc = kCFStringEncodingISOLatin1 ;
2315 break ;
2316 case wxFONTENCODING_ISO8859_2 :
2317 enc = kCFStringEncodingISOLatin2;
2318 break ;
2319 case wxFONTENCODING_ISO8859_3 :
2320 enc = kCFStringEncodingISOLatin3 ;
2321 break ;
2322 case wxFONTENCODING_ISO8859_4 :
2323 enc = kCFStringEncodingISOLatin4;
2324 break ;
2325 case wxFONTENCODING_ISO8859_5 :
2326 enc = kCFStringEncodingISOLatinCyrillic;
2327 break ;
2328 case wxFONTENCODING_ISO8859_6 :
2329 enc = kCFStringEncodingISOLatinArabic;
2330 break ;
2331 case wxFONTENCODING_ISO8859_7 :
2332 enc = kCFStringEncodingISOLatinGreek;
2333 break ;
2334 case wxFONTENCODING_ISO8859_8 :
2335 enc = kCFStringEncodingISOLatinHebrew;
2336 break ;
2337 case wxFONTENCODING_ISO8859_9 :
2338 enc = kCFStringEncodingISOLatin5;
2339 break ;
2340 case wxFONTENCODING_ISO8859_10 :
2341 enc = kCFStringEncodingISOLatin6;
2342 break ;
2343 case wxFONTENCODING_ISO8859_11 :
2344 enc = kCFStringEncodingISOLatinThai;
2345 break ;
2346 case wxFONTENCODING_ISO8859_13 :
2347 enc = kCFStringEncodingISOLatin7;
2348 break ;
2349 case wxFONTENCODING_ISO8859_14 :
2350 enc = kCFStringEncodingISOLatin8;
2351 break ;
2352 case wxFONTENCODING_ISO8859_15 :
2353 enc = kCFStringEncodingISOLatin9;
2354 break ;
2355
2356 case wxFONTENCODING_KOI8 :
2357 enc = kCFStringEncodingKOI8_R;
2358 break ;
2359 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2360 enc = kCFStringEncodingDOSRussian;
2361 break ;
2362
2363 // case wxFONTENCODING_BULGARIAN :
2364 // enc = ;
2365 // break ;
2366
2367 case wxFONTENCODING_CP437 :
2368 enc = kCFStringEncodingDOSLatinUS ;
2369 break ;
2370 case wxFONTENCODING_CP850 :
2371 enc = kCFStringEncodingDOSLatin1;
2372 break ;
2373 case wxFONTENCODING_CP852 :
2374 enc = kCFStringEncodingDOSLatin2;
2375 break ;
2376 case wxFONTENCODING_CP855 :
2377 enc = kCFStringEncodingDOSCyrillic;
2378 break ;
2379 case wxFONTENCODING_CP866 :
2380 enc = kCFStringEncodingDOSRussian ;
2381 break ;
2382 case wxFONTENCODING_CP874 :
2383 enc = kCFStringEncodingDOSThai;
2384 break ;
2385 case wxFONTENCODING_CP932 :
2386 enc = kCFStringEncodingDOSJapanese;
2387 break ;
2388 case wxFONTENCODING_CP936 :
2389 enc = kCFStringEncodingDOSChineseSimplif ;
2390 break ;
2391 case wxFONTENCODING_CP949 :
2392 enc = kCFStringEncodingDOSKorean;
2393 break ;
2394 case wxFONTENCODING_CP950 :
2395 enc = kCFStringEncodingDOSChineseTrad;
2396 break ;
2397 case wxFONTENCODING_CP1250 :
2398 enc = kCFStringEncodingWindowsLatin2;
2399 break ;
2400 case wxFONTENCODING_CP1251 :
2401 enc = kCFStringEncodingWindowsCyrillic ;
2402 break ;
2403 case wxFONTENCODING_CP1252 :
2404 enc = kCFStringEncodingWindowsLatin1 ;
2405 break ;
2406 case wxFONTENCODING_CP1253 :
2407 enc = kCFStringEncodingWindowsGreek;
2408 break ;
2409 case wxFONTENCODING_CP1254 :
2410 enc = kCFStringEncodingWindowsLatin5;
2411 break ;
2412 case wxFONTENCODING_CP1255 :
2413 enc = kCFStringEncodingWindowsHebrew ;
2414 break ;
2415 case wxFONTENCODING_CP1256 :
2416 enc = kCFStringEncodingWindowsArabic ;
2417 break ;
2418 case wxFONTENCODING_CP1257 :
2419 enc = kCFStringEncodingWindowsBalticRim;
2420 break ;
2421 // This only really encodes to UTF7 (if that) evidently
2422 // case wxFONTENCODING_UTF7 :
2423 // enc = kCFStringEncodingNonLossyASCII ;
2424 // break ;
2425 case wxFONTENCODING_UTF8 :
2426 enc = kCFStringEncodingUTF8 ;
2427 break ;
2428 case wxFONTENCODING_EUC_JP :
2429 enc = kCFStringEncodingEUC_JP;
2430 break ;
2431 case wxFONTENCODING_UTF16 :
2432 enc = kCFStringEncodingUnicode ;
2433 break ;
2434 case wxFONTENCODING_MACROMAN :
2435 enc = kCFStringEncodingMacRoman ;
2436 break ;
2437 case wxFONTENCODING_MACJAPANESE :
2438 enc = kCFStringEncodingMacJapanese ;
2439 break ;
2440 case wxFONTENCODING_MACCHINESETRAD :
2441 enc = kCFStringEncodingMacChineseTrad ;
2442 break ;
2443 case wxFONTENCODING_MACKOREAN :
2444 enc = kCFStringEncodingMacKorean ;
2445 break ;
2446 case wxFONTENCODING_MACARABIC :
2447 enc = kCFStringEncodingMacArabic ;
2448 break ;
2449 case wxFONTENCODING_MACHEBREW :
2450 enc = kCFStringEncodingMacHebrew ;
2451 break ;
2452 case wxFONTENCODING_MACGREEK :
2453 enc = kCFStringEncodingMacGreek ;
2454 break ;
2455 case wxFONTENCODING_MACCYRILLIC :
2456 enc = kCFStringEncodingMacCyrillic ;
2457 break ;
2458 case wxFONTENCODING_MACDEVANAGARI :
2459 enc = kCFStringEncodingMacDevanagari ;
2460 break ;
2461 case wxFONTENCODING_MACGURMUKHI :
2462 enc = kCFStringEncodingMacGurmukhi ;
2463 break ;
2464 case wxFONTENCODING_MACGUJARATI :
2465 enc = kCFStringEncodingMacGujarati ;
2466 break ;
2467 case wxFONTENCODING_MACORIYA :
2468 enc = kCFStringEncodingMacOriya ;
2469 break ;
2470 case wxFONTENCODING_MACBENGALI :
2471 enc = kCFStringEncodingMacBengali ;
2472 break ;
2473 case wxFONTENCODING_MACTAMIL :
2474 enc = kCFStringEncodingMacTamil ;
2475 break ;
2476 case wxFONTENCODING_MACTELUGU :
2477 enc = kCFStringEncodingMacTelugu ;
2478 break ;
2479 case wxFONTENCODING_MACKANNADA :
2480 enc = kCFStringEncodingMacKannada ;
2481 break ;
2482 case wxFONTENCODING_MACMALAJALAM :
2483 enc = kCFStringEncodingMacMalayalam ;
2484 break ;
2485 case wxFONTENCODING_MACSINHALESE :
2486 enc = kCFStringEncodingMacSinhalese ;
2487 break ;
2488 case wxFONTENCODING_MACBURMESE :
2489 enc = kCFStringEncodingMacBurmese ;
2490 break ;
2491 case wxFONTENCODING_MACKHMER :
2492 enc = kCFStringEncodingMacKhmer ;
2493 break ;
2494 case wxFONTENCODING_MACTHAI :
2495 enc = kCFStringEncodingMacThai ;
2496 break ;
2497 case wxFONTENCODING_MACLAOTIAN :
2498 enc = kCFStringEncodingMacLaotian ;
2499 break ;
2500 case wxFONTENCODING_MACGEORGIAN :
2501 enc = kCFStringEncodingMacGeorgian ;
2502 break ;
2503 case wxFONTENCODING_MACARMENIAN :
2504 enc = kCFStringEncodingMacArmenian ;
2505 break ;
2506 case wxFONTENCODING_MACCHINESESIMP :
2507 enc = kCFStringEncodingMacChineseSimp ;
2508 break ;
2509 case wxFONTENCODING_MACTIBETAN :
2510 enc = kCFStringEncodingMacTibetan ;
2511 break ;
2512 case wxFONTENCODING_MACMONGOLIAN :
2513 enc = kCFStringEncodingMacMongolian ;
2514 break ;
2515 case wxFONTENCODING_MACETHIOPIC :
2516 enc = kCFStringEncodingMacEthiopic ;
2517 break ;
2518 case wxFONTENCODING_MACCENTRALEUR :
2519 enc = kCFStringEncodingMacCentralEurRoman ;
2520 break ;
2521 case wxFONTENCODING_MACVIATNAMESE :
2522 enc = kCFStringEncodingMacVietnamese ;
2523 break ;
2524 case wxFONTENCODING_MACARABICEXT :
2525 enc = kCFStringEncodingMacExtArabic ;
2526 break ;
2527 case wxFONTENCODING_MACSYMBOL :
2528 enc = kCFStringEncodingMacSymbol ;
2529 break ;
2530 case wxFONTENCODING_MACDINGBATS :
2531 enc = kCFStringEncodingMacDingbats ;
2532 break ;
2533 case wxFONTENCODING_MACTURKISH :
2534 enc = kCFStringEncodingMacTurkish ;
2535 break ;
2536 case wxFONTENCODING_MACCROATIAN :
2537 enc = kCFStringEncodingMacCroatian ;
2538 break ;
2539 case wxFONTENCODING_MACICELANDIC :
2540 enc = kCFStringEncodingMacIcelandic ;
2541 break ;
2542 case wxFONTENCODING_MACROMANIAN :
2543 enc = kCFStringEncodingMacRomanian ;
2544 break ;
2545 case wxFONTENCODING_MACCELTIC :
2546 enc = kCFStringEncodingMacCeltic ;
2547 break ;
2548 case wxFONTENCODING_MACGAELIC :
2549 enc = kCFStringEncodingMacGaelic ;
2550 break ;
2551 // case wxFONTENCODING_MACKEYBOARD :
2552 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2553 // break ;
2554
2555 default :
2556 // because gcc is picky
2557 break ;
2558 }
2559
2560 return enc ;
2561 }
2562
2563 class wxMBConv_cocoa : public wxMBConv
2564 {
2565 public:
2566 wxMBConv_cocoa()
2567 {
2568 Init(CFStringGetSystemEncoding()) ;
2569 }
2570
2571 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2572 {
2573 m_encoding = conv.m_encoding;
2574 }
2575
2576 #if wxUSE_FONTMAP
2577 wxMBConv_cocoa(const wxChar* name)
2578 {
2579 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2580 }
2581 #endif
2582
2583 wxMBConv_cocoa(wxFontEncoding encoding)
2584 {
2585 Init( wxCFStringEncFromFontEnc(encoding) );
2586 }
2587
2588 ~wxMBConv_cocoa()
2589 {
2590 }
2591
2592 void Init( CFStringEncoding encoding)
2593 {
2594 m_encoding = encoding ;
2595 }
2596
2597 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2598 {
2599 wxASSERT(szUnConv);
2600
2601 CFStringRef theString = CFStringCreateWithBytes (
2602 NULL, //the allocator
2603 (const UInt8*)szUnConv,
2604 strlen(szUnConv),
2605 m_encoding,
2606 false //no BOM/external representation
2607 );
2608
2609 wxASSERT(theString);
2610
2611 size_t nOutLength = CFStringGetLength(theString);
2612
2613 if (szOut == NULL)
2614 {
2615 CFRelease(theString);
2616 return nOutLength;
2617 }
2618
2619 CFRange theRange = { 0, nOutSize };
2620
2621 #if SIZEOF_WCHAR_T == 4
2622 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2623 #endif
2624
2625 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2626
2627 CFRelease(theString);
2628
2629 szUniCharBuffer[nOutLength] = '\0';
2630
2631 #if SIZEOF_WCHAR_T == 4
2632 wxMBConvUTF16 converter;
2633 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2634 delete [] szUniCharBuffer;
2635 #endif
2636
2637 return nOutLength;
2638 }
2639
2640 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2641 {
2642 wxASSERT(szUnConv);
2643
2644 size_t nRealOutSize;
2645 size_t nBufSize = wxWcslen(szUnConv);
2646 UniChar* szUniBuffer = (UniChar*) szUnConv;
2647
2648 #if SIZEOF_WCHAR_T == 4
2649 wxMBConvUTF16 converter ;
2650 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2651 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2652 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
2653 nBufSize /= sizeof(UniChar);
2654 #endif
2655
2656 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2657 NULL, //allocator
2658 szUniBuffer,
2659 nBufSize,
2660 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2661 );
2662
2663 wxASSERT(theString);
2664
2665 //Note that CER puts a BOM when converting to unicode
2666 //so we check and use getchars instead in that case
2667 if (m_encoding == kCFStringEncodingUnicode)
2668 {
2669 if (szOut != NULL)
2670 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2671
2672 nRealOutSize = CFStringGetLength(theString) + 1;
2673 }
2674 else
2675 {
2676 CFStringGetBytes(
2677 theString,
2678 CFRangeMake(0, CFStringGetLength(theString)),
2679 m_encoding,
2680 0, //what to put in characters that can't be converted -
2681 //0 tells CFString to return NULL if it meets such a character
2682 false, //not an external representation
2683 (UInt8*) szOut,
2684 nOutSize,
2685 (CFIndex*) &nRealOutSize
2686 );
2687 }
2688
2689 CFRelease(theString);
2690
2691 #if SIZEOF_WCHAR_T == 4
2692 delete[] szUniBuffer;
2693 #endif
2694
2695 return nRealOutSize - 1;
2696 }
2697
2698 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2699
2700 bool IsOk() const
2701 {
2702 return m_encoding != kCFStringEncodingInvalidId &&
2703 CFStringIsEncodingAvailable(m_encoding);
2704 }
2705
2706 private:
2707 CFStringEncoding m_encoding ;
2708 };
2709
2710 #endif // defined(__WXCOCOA__)
2711
2712 // ============================================================================
2713 // Mac conversion classes
2714 // ============================================================================
2715
2716 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2717
2718 class wxMBConv_mac : public wxMBConv
2719 {
2720 public:
2721 wxMBConv_mac()
2722 {
2723 Init(CFStringGetSystemEncoding()) ;
2724 }
2725
2726 wxMBConv_mac(const wxMBConv_mac& conv)
2727 {
2728 Init(conv.m_char_encoding);
2729 }
2730
2731 #if wxUSE_FONTMAP
2732 wxMBConv_mac(const wxChar* name)
2733 {
2734 Init( wxMacGetSystemEncFromFontEnc( wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) );
2735 }
2736 #endif
2737
2738 wxMBConv_mac(wxFontEncoding encoding)
2739 {
2740 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2741 }
2742
2743 ~wxMBConv_mac()
2744 {
2745 OSStatus status = noErr ;
2746 if (m_MB2WC_converter)
2747 status = TECDisposeConverter(m_MB2WC_converter);
2748 if (m_WC2MB_converter)
2749 status = TECDisposeConverter(m_WC2MB_converter);
2750 }
2751
2752 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2753 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
2754 {
2755 m_MB2WC_converter = NULL ;
2756 m_WC2MB_converter = NULL ;
2757 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
2758 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
2759 }
2760
2761 virtual void CreateIfNeeded() const
2762 {
2763 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL )
2764 {
2765 OSStatus status = noErr ;
2766 status = TECCreateConverter(&m_MB2WC_converter,
2767 m_char_encoding,
2768 m_unicode_encoding);
2769 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2770 status = TECCreateConverter(&m_WC2MB_converter,
2771 m_unicode_encoding,
2772 m_char_encoding);
2773 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2774 }
2775 }
2776
2777 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2778 {
2779 CreateIfNeeded() ;
2780 OSStatus status = noErr ;
2781 ByteCount byteOutLen ;
2782 ByteCount byteInLen = strlen(psz) + 1;
2783 wchar_t *tbuf = NULL ;
2784 UniChar* ubuf = NULL ;
2785 size_t res = 0 ;
2786
2787 if (buf == NULL)
2788 {
2789 // Apple specs say at least 32
2790 n = wxMax( 32, byteInLen ) ;
2791 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
2792 }
2793
2794 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2795
2796 #if SIZEOF_WCHAR_T == 4
2797 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2798 #else
2799 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2800 #endif
2801
2802 status = TECConvertText(
2803 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2804 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2805
2806 #if SIZEOF_WCHAR_T == 4
2807 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2808 // is not properly terminated we get random characters at the end
2809 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2810 wxMBConvUTF16 converter ;
2811 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
2812 free( ubuf ) ;
2813 #else
2814 res = byteOutLen / sizeof( UniChar ) ;
2815 #endif
2816
2817 if ( buf == NULL )
2818 free(tbuf) ;
2819
2820 if ( buf && res < n)
2821 buf[res] = 0;
2822
2823 return res ;
2824 }
2825
2826 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2827 {
2828 CreateIfNeeded() ;
2829 OSStatus status = noErr ;
2830 ByteCount byteOutLen ;
2831 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2832
2833 char *tbuf = NULL ;
2834
2835 if (buf == NULL)
2836 {
2837 // Apple specs say at least 32
2838 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2839 tbuf = (char*) malloc( n ) ;
2840 }
2841
2842 ByteCount byteBufferLen = n ;
2843 UniChar* ubuf = NULL ;
2844
2845 #if SIZEOF_WCHAR_T == 4
2846 wxMBConvUTF16 converter ;
2847 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2848 byteInLen = unicharlen ;
2849 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2850 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2851 #else
2852 ubuf = (UniChar*) psz ;
2853 #endif
2854
2855 status = TECConvertText(
2856 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2857 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2858
2859 #if SIZEOF_WCHAR_T == 4
2860 free( ubuf ) ;
2861 #endif
2862
2863 if ( buf == NULL )
2864 free(tbuf) ;
2865
2866 size_t res = byteOutLen ;
2867 if ( buf && res < n)
2868 {
2869 buf[res] = 0;
2870
2871 //we need to double-trip to verify it didn't insert any ? in place
2872 //of bogus characters
2873 wxWCharBuffer wcBuf(n);
2874 size_t pszlen = wxWcslen(psz);
2875 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2876 wxWcslen(wcBuf) != pszlen ||
2877 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2878 {
2879 // we didn't obtain the same thing we started from, hence
2880 // the conversion was lossy and we consider that it failed
2881 return wxCONV_FAILED;
2882 }
2883 }
2884
2885 return res ;
2886 }
2887
2888 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
2889
2890 bool IsOk() const
2891 {
2892 CreateIfNeeded() ;
2893 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2894 }
2895
2896 protected :
2897 mutable TECObjectRef m_MB2WC_converter;
2898 mutable TECObjectRef m_WC2MB_converter;
2899
2900 TextEncodingBase m_char_encoding;
2901 TextEncodingBase m_unicode_encoding;
2902 };
2903
2904 // MB is decomposed (D) normalized UTF8
2905
2906 class wxMBConv_macUTF8D : public wxMBConv_mac
2907 {
2908 public :
2909 wxMBConv_macUTF8D()
2910 {
2911 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2912 m_uni = NULL;
2913 m_uniBack = NULL ;
2914 }
2915
2916 ~wxMBConv_macUTF8D()
2917 {
2918 if (m_uni!=NULL)
2919 DisposeUnicodeToTextInfo(&m_uni);
2920 if (m_uniBack!=NULL)
2921 DisposeUnicodeToTextInfo(&m_uniBack);
2922 }
2923
2924 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2925 {
2926 CreateIfNeeded() ;
2927 OSStatus status = noErr ;
2928 ByteCount byteOutLen ;
2929 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2930
2931 char *tbuf = NULL ;
2932
2933 if (buf == NULL)
2934 {
2935 // Apple specs say at least 32
2936 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2937 tbuf = (char*) malloc( n ) ;
2938 }
2939
2940 ByteCount byteBufferLen = n ;
2941 UniChar* ubuf = NULL ;
2942
2943 #if SIZEOF_WCHAR_T == 4
2944 wxMBConvUTF16 converter ;
2945 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2946 byteInLen = unicharlen ;
2947 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2948 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2949 #else
2950 ubuf = (UniChar*) psz ;
2951 #endif
2952
2953 // ubuf is a non-decomposed UniChar buffer
2954
2955 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2956 ByteCount dcubufread , dcubufwritten ;
2957 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2958
2959 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2960 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
2961
2962 // we now convert that decomposed buffer into UTF8
2963
2964 status = TECConvertText(
2965 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2966 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2967
2968 free( dcubuf );
2969
2970 #if SIZEOF_WCHAR_T == 4
2971 free( ubuf ) ;
2972 #endif
2973
2974 if ( buf == NULL )
2975 free(tbuf) ;
2976
2977 size_t res = byteOutLen ;
2978 if ( buf && res < n)
2979 {
2980 buf[res] = 0;
2981 // don't test for round-trip fidelity yet, we cannot guarantee it yet
2982 }
2983
2984 return res ;
2985 }
2986
2987 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2988 {
2989 CreateIfNeeded() ;
2990 OSStatus status = noErr ;
2991 ByteCount byteOutLen ;
2992 ByteCount byteInLen = strlen(psz) + 1;
2993 wchar_t *tbuf = NULL ;
2994 UniChar* ubuf = NULL ;
2995 size_t res = 0 ;
2996
2997 if (buf == NULL)
2998 {
2999 // Apple specs say at least 32
3000 n = wxMax( 32, byteInLen ) ;
3001 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3002 }
3003
3004 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
3005
3006 #if SIZEOF_WCHAR_T == 4
3007 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3008 #else
3009 ubuf = (UniChar*) (buf ? buf : tbuf) ;
3010 #endif
3011
3012 ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3013 ByteCount dcubufread , dcubufwritten ;
3014 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
3015
3016 status = TECConvertText(
3017 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3018 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3019 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3020 // is not properly terminated we get random characters at the end
3021 dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3022
3023 // now from the decomposed UniChar to properly composed uniChar
3024 ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
3025 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
3026
3027 free( dcubuf );
3028 byteOutLen = dcubufwritten ;
3029 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3030
3031
3032 #if SIZEOF_WCHAR_T == 4
3033 wxMBConvUTF16 converter ;
3034 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3035 free( ubuf ) ;
3036 #else
3037 res = byteOutLen / sizeof( UniChar ) ;
3038 #endif
3039
3040 if ( buf == NULL )
3041 free(tbuf) ;
3042
3043 if ( buf && res < n)
3044 buf[res] = 0;
3045
3046 return res ;
3047 }
3048
3049 virtual void CreateIfNeeded() const
3050 {
3051 wxMBConv_mac::CreateIfNeeded() ;
3052 if ( m_uni == NULL )
3053 {
3054 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3055 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3056 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3057 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3058 m_map.mappingVersion = kUnicodeUseLatestMapping;
3059
3060 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
3061 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3062
3063 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3064 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3065 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3066 kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3067 m_map.mappingVersion = kUnicodeUseLatestMapping;
3068 err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
3069 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3070 }
3071 }
3072 protected :
3073 mutable UnicodeToTextInfo m_uni;
3074 mutable UnicodeToTextInfo m_uniBack;
3075 mutable UnicodeMapping m_map;
3076 };
3077 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3078
3079 // ============================================================================
3080 // wxEncodingConverter based conversion classes
3081 // ============================================================================
3082
3083 #if wxUSE_FONTMAP
3084
3085 class wxMBConv_wxwin : public wxMBConv
3086 {
3087 private:
3088 void Init()
3089 {
3090 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3091 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3092 }
3093
3094 public:
3095 // temporarily just use wxEncodingConverter stuff,
3096 // so that it works while a better implementation is built
3097 wxMBConv_wxwin(const wxChar* name)
3098 {
3099 if (name)
3100 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3101 else
3102 m_enc = wxFONTENCODING_SYSTEM;
3103
3104 Init();
3105 }
3106
3107 wxMBConv_wxwin(wxFontEncoding enc)
3108 {
3109 m_enc = enc;
3110
3111 Init();
3112 }
3113
3114 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
3115 {
3116 size_t inbuf = strlen(psz);
3117 if (buf)
3118 {
3119 if (!m2w.Convert(psz, buf))
3120 return wxCONV_FAILED;
3121 }
3122 return inbuf;
3123 }
3124
3125 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
3126 {
3127 const size_t inbuf = wxWcslen(psz);
3128 if (buf)
3129 {
3130 if (!w2m.Convert(psz, buf))
3131 return wxCONV_FAILED;
3132 }
3133
3134 return inbuf;
3135 }
3136
3137 virtual size_t GetMBNulLen() const
3138 {
3139 switch ( m_enc )
3140 {
3141 case wxFONTENCODING_UTF16BE:
3142 case wxFONTENCODING_UTF16LE:
3143 return 2;
3144
3145 case wxFONTENCODING_UTF32BE:
3146 case wxFONTENCODING_UTF32LE:
3147 return 4;
3148
3149 default:
3150 return 1;
3151 }
3152 }
3153
3154 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3155
3156 bool IsOk() const { return m_ok; }
3157
3158 public:
3159 wxFontEncoding m_enc;
3160 wxEncodingConverter m2w, w2m;
3161
3162 private:
3163 // were we initialized successfully?
3164 bool m_ok;
3165
3166 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
3167 };
3168
3169 // make the constructors available for unit testing
3170 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3171 {
3172 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3173 if ( !result->IsOk() )
3174 {
3175 delete result;
3176 return 0;
3177 }
3178
3179 return result;
3180 }
3181
3182 #endif // wxUSE_FONTMAP
3183
3184 // ============================================================================
3185 // wxCSConv implementation
3186 // ============================================================================
3187
3188 void wxCSConv::Init()
3189 {
3190 m_name = NULL;
3191 m_convReal = NULL;
3192 m_deferred = true;
3193 }
3194
3195 wxCSConv::wxCSConv(const wxChar *charset)
3196 {
3197 Init();
3198
3199 if ( charset )
3200 {
3201 SetName(charset);
3202 }
3203
3204 #if wxUSE_FONTMAP
3205 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3206 #else
3207 m_encoding = wxFONTENCODING_SYSTEM;
3208 #endif
3209 }
3210
3211 wxCSConv::wxCSConv(wxFontEncoding encoding)
3212 {
3213 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3214 {
3215 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3216
3217 encoding = wxFONTENCODING_SYSTEM;
3218 }
3219
3220 Init();
3221
3222 m_encoding = encoding;
3223 }
3224
3225 wxCSConv::~wxCSConv()
3226 {
3227 Clear();
3228 }
3229
3230 wxCSConv::wxCSConv(const wxCSConv& conv)
3231 : wxMBConv()
3232 {
3233 Init();
3234
3235 SetName(conv.m_name);
3236 m_encoding = conv.m_encoding;
3237 }
3238
3239 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3240 {
3241 Clear();
3242
3243 SetName(conv.m_name);
3244 m_encoding = conv.m_encoding;
3245
3246 return *this;
3247 }
3248
3249 void wxCSConv::Clear()
3250 {
3251 free(m_name);
3252 delete m_convReal;
3253
3254 m_name = NULL;
3255 m_convReal = NULL;
3256 }
3257
3258 void wxCSConv::SetName(const wxChar *charset)
3259 {
3260 if (charset)
3261 {
3262 m_name = wxStrdup(charset);
3263 m_deferred = true;
3264 }
3265 }
3266
3267 #if wxUSE_FONTMAP
3268
3269 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3270 wxEncodingNameCache );
3271
3272 static wxEncodingNameCache gs_nameCache;
3273 #endif
3274
3275 wxMBConv *wxCSConv::DoCreate() const
3276 {
3277 #if wxUSE_FONTMAP
3278 wxLogTrace(TRACE_STRCONV,
3279 wxT("creating conversion for %s"),
3280 (m_name ? m_name
3281 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3282 #endif // wxUSE_FONTMAP
3283
3284 // check for the special case of ASCII or ISO8859-1 charset: as we have
3285 // special knowledge of it anyhow, we don't need to create a special
3286 // conversion object
3287 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3288 m_encoding == wxFONTENCODING_DEFAULT )
3289 {
3290 // don't convert at all
3291 return NULL;
3292 }
3293
3294 // we trust OS to do conversion better than we can so try external
3295 // conversion methods first
3296 //
3297 // the full order is:
3298 // 1. OS conversion (iconv() under Unix or Win32 API)
3299 // 2. hard coded conversions for UTF
3300 // 3. wxEncodingConverter as fall back
3301
3302 // step (1)
3303 #ifdef HAVE_ICONV
3304 #if !wxUSE_FONTMAP
3305 if ( m_name )
3306 #endif // !wxUSE_FONTMAP
3307 {
3308 wxString name(m_name);
3309 #if wxUSE_FONTMAP
3310 wxFontEncoding encoding(m_encoding);
3311 #endif
3312
3313 if ( !name.empty() )
3314 {
3315 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3316 if ( conv->IsOk() )
3317 return conv;
3318
3319 delete conv;
3320
3321 #if wxUSE_FONTMAP
3322 encoding =
3323 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3324 #endif // wxUSE_FONTMAP
3325 }
3326 #if wxUSE_FONTMAP
3327 {
3328 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3329 if ( it != gs_nameCache.end() )
3330 {
3331 if ( it->second.empty() )
3332 return NULL;
3333
3334 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3335 if ( conv->IsOk() )
3336 return conv;
3337
3338 delete conv;
3339 }
3340
3341 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3342 // CS : in case this does not return valid names (eg for MacRoman) encoding
3343 // got a 'failure' entry in the cache all the same, although it just has to
3344 // be created using a different method, so only store failed iconv creation
3345 // attempts (or perhaps we shoulnd't do this at all ?)
3346 if ( names[0] != NULL )
3347 {
3348 for ( ; *names; ++names )
3349 {
3350 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3351 if ( conv->IsOk() )
3352 {
3353 gs_nameCache[encoding] = *names;
3354 return conv;
3355 }
3356
3357 delete conv;
3358 }
3359
3360 gs_nameCache[encoding] = _T(""); // cache the failure
3361 }
3362 }
3363 #endif // wxUSE_FONTMAP
3364 }
3365 #endif // HAVE_ICONV
3366
3367 #ifdef wxHAVE_WIN32_MB2WC
3368 {
3369 #if wxUSE_FONTMAP
3370 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3371 : new wxMBConv_win32(m_encoding);
3372 if ( conv->IsOk() )
3373 return conv;
3374
3375 delete conv;
3376 #else
3377 return NULL;
3378 #endif
3379 }
3380 #endif // wxHAVE_WIN32_MB2WC
3381
3382 #if defined(__WXMAC__)
3383 {
3384 // leave UTF16 and UTF32 to the built-ins of wx
3385 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3386 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3387 {
3388 #if wxUSE_FONTMAP
3389 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3390 : new wxMBConv_mac(m_encoding);
3391 #else
3392 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3393 #endif
3394 if ( conv->IsOk() )
3395 return conv;
3396
3397 delete conv;
3398 }
3399 }
3400 #endif
3401
3402 #if defined(__WXCOCOA__)
3403 {
3404 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3405 {
3406 #if wxUSE_FONTMAP
3407 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3408 : new wxMBConv_cocoa(m_encoding);
3409 #else
3410 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3411 #endif
3412
3413 if ( conv->IsOk() )
3414 return conv;
3415
3416 delete conv;
3417 }
3418 }
3419 #endif
3420 // step (2)
3421 wxFontEncoding enc = m_encoding;
3422 #if wxUSE_FONTMAP
3423 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3424 {
3425 // use "false" to suppress interactive dialogs -- we can be called from
3426 // anywhere and popping up a dialog from here is the last thing we want to
3427 // do
3428 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3429 }
3430 #endif // wxUSE_FONTMAP
3431
3432 switch ( enc )
3433 {
3434 case wxFONTENCODING_UTF7:
3435 return new wxMBConvUTF7;
3436
3437 case wxFONTENCODING_UTF8:
3438 return new wxMBConvUTF8;
3439
3440 case wxFONTENCODING_UTF16BE:
3441 return new wxMBConvUTF16BE;
3442
3443 case wxFONTENCODING_UTF16LE:
3444 return new wxMBConvUTF16LE;
3445
3446 case wxFONTENCODING_UTF32BE:
3447 return new wxMBConvUTF32BE;
3448
3449 case wxFONTENCODING_UTF32LE:
3450 return new wxMBConvUTF32LE;
3451
3452 default:
3453 // nothing to do but put here to suppress gcc warnings
3454 break;
3455 }
3456
3457 // step (3)
3458 #if wxUSE_FONTMAP
3459 {
3460 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3461 : new wxMBConv_wxwin(m_encoding);
3462 if ( conv->IsOk() )
3463 return conv;
3464
3465 delete conv;
3466 }
3467 #endif // wxUSE_FONTMAP
3468
3469 // NB: This is a hack to prevent deadlock. What could otherwise happen
3470 // in Unicode build: wxConvLocal creation ends up being here
3471 // because of some failure and logs the error. But wxLog will try to
3472 // attach a timestamp, for which it will need wxConvLocal (to convert
3473 // time to char* and then wchar_t*), but that fails, tries to log the
3474 // error, but wxLog has an (already locked) critical section that
3475 // guards the static buffer.
3476 static bool alreadyLoggingError = false;
3477 if (!alreadyLoggingError)
3478 {
3479 alreadyLoggingError = true;
3480 wxLogError(_("Cannot convert from the charset '%s'!"),
3481 m_name ? m_name
3482 :
3483 #if wxUSE_FONTMAP
3484 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
3485 #else // !wxUSE_FONTMAP
3486 wxString::Format(_("encoding %i"), m_encoding).c_str()
3487 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3488 );
3489
3490 alreadyLoggingError = false;
3491 }
3492
3493 return NULL;
3494 }
3495
3496 void wxCSConv::CreateConvIfNeeded() const
3497 {
3498 if ( m_deferred )
3499 {
3500 wxCSConv *self = (wxCSConv *)this; // const_cast
3501
3502 #if wxUSE_INTL
3503 // if we don't have neither the name nor the encoding, use the default
3504 // encoding for this system
3505 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3506 {
3507 self->m_name = wxStrdup(wxLocale::GetSystemEncodingName());
3508 }
3509 #endif // wxUSE_INTL
3510
3511 self->m_convReal = DoCreate();
3512 self->m_deferred = false;
3513 }
3514 }
3515
3516 size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3517 {
3518 CreateConvIfNeeded();
3519
3520 if (m_convReal)
3521 return m_convReal->MB2WC(buf, psz, n);
3522
3523 // latin-1 (direct)
3524 size_t len = strlen(psz);
3525
3526 if (buf)
3527 {
3528 for (size_t c = 0; c <= len; c++)
3529 buf[c] = (unsigned char)(psz[c]);
3530 }
3531
3532 return len;
3533 }
3534
3535 size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3536 {
3537 CreateConvIfNeeded();
3538
3539 if (m_convReal)
3540 return m_convReal->WC2MB(buf, psz, n);
3541
3542 // latin-1 (direct)
3543 const size_t len = wxWcslen(psz);
3544 if (buf)
3545 {
3546 for (size_t c = 0; c <= len; c++)
3547 {
3548 if (psz[c] > 0xFF)
3549 return wxCONV_FAILED;
3550
3551 buf[c] = (char)psz[c];
3552 }
3553 }
3554 else
3555 {
3556 for (size_t c = 0; c <= len; c++)
3557 {
3558 if (psz[c] > 0xFF)
3559 return wxCONV_FAILED;
3560 }
3561 }
3562
3563 return len;
3564 }
3565
3566 size_t wxCSConv::GetMBNulLen() const
3567 {
3568 CreateConvIfNeeded();
3569
3570 if ( m_convReal )
3571 {
3572 return m_convReal->GetMBNulLen();
3573 }
3574
3575 return 1;
3576 }
3577
3578 // ----------------------------------------------------------------------------
3579 // globals
3580 // ----------------------------------------------------------------------------
3581
3582 #ifdef __WINDOWS__
3583 static wxMBConv_win32 wxConvLibcObj;
3584 #elif defined(__WXMAC__) && !defined(__MACH__)
3585 static wxMBConv_mac wxConvLibcObj ;
3586 #else
3587 static wxMBConvLibc wxConvLibcObj;
3588 #endif
3589
3590 static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3591 static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3592 static wxMBConvUTF7 wxConvUTF7Obj;
3593 static wxMBConvUTF8 wxConvUTF8Obj;
3594 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3595 static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3596 #endif
3597 WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3598 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3599 WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3600 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3601 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3602 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3603 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
3604 WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3605 #ifdef __WXOSX__
3606 #if defined(__WXMAC__) && defined(TARGET_CARBON)
3607 wxConvMacUTF8DObj;
3608 #else
3609 wxConvUTF8Obj;
3610 #endif
3611 #else
3612 wxConvLibcObj;
3613 #endif
3614
3615 #else // !wxUSE_WCHAR_T
3616
3617 // stand-ins in absence of wchar_t
3618 WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3619 wxConvISO8859_1,
3620 wxConvLocal,
3621 wxConvUTF8;
3622
3623 #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T