depends on the system.}
\end{twocollist}
+
+\wxheading{Constants}
+
+\texttt{wxCONV\_FAILED} value is defined as \texttt{(size\_t)$-1$} and is
+returned by the conversion functions instead of the length of the converted
+string if the conversion fails.
+
+
\wxheading{Derived from}
No base class
\helpref{wxEncodingConverter}{wxencodingconverter},
\helpref{wxMBConv classes overview}{mbconvclasses}
+
\latexignore{\rtfignore{\wxheading{Members}}}
\func{}{wxMBConv}{\void}
-Constructor.
+Trivial default constructor.
+
\membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc}
\constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}}
+\deprecated{\helpref{ToWChar}{wxmbconvtowchar}}
+
Converts from a string \arg{in} in multibyte encoding to Unicode putting up to
\arg{outLen} characters into the buffer \arg{out}.
\constfunc{virtual size\_t}{WC2MB}{\param{char* }{buf}, \param{const wchar\_t* }{psz}, \param{size\_t }{n}}
+\deprecated{\helpref{FromWChar}{wxmbconvfromwchar}}
+
Converts from Unicode to multibyte encoding. The semantics of this function
(including the return value meaning) is the same as for
\helpref{MB2WC}{wxmbconvmb2wc}.
return type (without const).
+\membersection{wxMBConv::FromWChar}\label{wxmbconvfromwchar}
+
+\constfunc{virtual size\_t}{FromWChar}{\param{wchar\_t *}{dst}, \param{size\_t }{dstLen}, \param{const char *}{src}, \param{size\_t }{srcLen = $-1$}}
+
+The most general function for converting a multibyte string to a wide string.
+The main case is when \arg{dst} is not \NULL and \arg{srcLen} is not $-1$: then
+the function converts exactly \arg{srcLen} bytes starting at \arg{src} into
+wide string which it output to \arg{dst}. If the length of the resulting wide
+string is greater than \arg{dstLen}, an error is returned. Note that if
+\arg{srcLen} bytes don't include \NUL characters, the resulting wide string is
+not \NUL-terminated neither.
+
+If \arg{srcLen} is $-1$, the function supposes that the string is properly
+(i.e. as necessary for the encoding handled by this conversion) \NUL-terminated
+and converts the entire string, including any trailing \NUL bytes. In this case
+the wide string is also \NUL-terminated.
+
+Finally, if \arg{dst} is \NULL, the function returns the length of the needed
+buffer.
+
+\wxheading{Return value}
+
+The number of characters written to \arg{dst} (or the number of characters
+which would have been written to it if it were non-\NULL) on success or
+\texttt{wxCONV\_FAILED} on error.
+
+
+\membersection{wxMBConv::GetMaxMBNulLen}\label{wxmbconvgetmaxmbnullen}
+
+\func{const size\_t}{GetMaxMBNulLen}{\void}
+
+Returns the maximal value which can be returned by
+\helpref{GetMBNulLen}{wxmbconvgetmbnullen} for any conversion object. Currently
+this value is $4$.
+
+This method can be used to allocate the buffer with enough space for the
+trailing \NUL characters for any encoding.
+
+
\membersection{wxMBConv::GetMBNulLen}\label{wxmbconvgetmbnullen}
\constfunc{size\_t}{GetMBNulLen}{\void}
The other cases are not currently supported and $-1$ is returned for them.
+\membersection{wxMBConv::ToWChar}\label{wxmbconvtowchar}
+
+\constfunc{virtual size\_t}{ToWChar}{\param{char\_t *}{dst}, \param{size\_t }{dstLen}, \param{const wchar\_t *}{src}, \param{size\_t }{srcLen = $-1$}}
+
+This function has the same semantics as \helpref{FromWChar}{wxmbconvfromwchar}
+except that it converts a wide string to multibyte one.
+
+
#if wxUSE_WCHAR_T
+// the error value returned by wxMBConv methods
+#define wxCONV_FAILED ((size_t)-1)
+
// ----------------------------------------------------------------------------
// wxMBConv (abstract base class for conversions)
// ----------------------------------------------------------------------------
class WXDLLIMPEXP_BASE wxMBConv
{
public:
- // The functions doing actual conversion. On success, the return value is
- // the length (i.e. the number of characters, not bytes, and not counting
- // the trailing L'\0') of the converted string. On failure, (size_t)-1 is
- // returned. In the special case when outputBuf is NULL the return value is
- // the same one but nothing is written to the buffer.
+ // The functions doing actual conversion from/to narrow to/from wide
+ // character strings.
//
- // Note that outLen is the length of the output buffer, not the length of
- // the input (which is always supposed to be terminated by one or more
- // NULs, as appropriate for the encoding)!
- virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0;
- virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0;
+ // On success, the return value is the length (i.e. the number of
+ // characters, not bytes) of the converted string including any trailing
+ // L'\0' or (possibly multiple) '\0'(s). If the conversion fails or if
+ // there is not enough space for everything, including the trailing NUL
+ // character(s), in the output buffer, (size_t)-1 is returned.
+ //
+ // In the special case when dstLen is 0 (outputBuf may be NULL then) the
+ // return value is the length of the needed buffer but nothing happens
+ // otherwise. If srcLen is -1, the entire string, up to and including the
+ // trailing NUL(s), is converted, otherwise exactly srcLen bytes are.
+ //
+ // Typical usage:
+ //
+ // size_t dstLen = conv.ToWChar(NULL, 0, src);
+ // if ( dstLen != wxCONV_FAILED )
+ // ... handle error ...
+ // wchar_t *wbuf = new wchar_t[dstLen];
+ // conv.ToWChar(wbuf, dstLen, src);
+ //
+ virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen = -1) const;
+
+ virtual size_t FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen = -1) const;
+
- // MB <-> WC
+ // Convenience functions for translating NUL-terminated strings: returns
+ // the buffer containing the converted string or NULL pointer if the
+ // conversion failed.
const wxWCharBuffer cMB2WC(const char *in) const;
const wxCharBuffer cWC2MB(const wchar_t *in) const;
- // Functions converting strings which may contain embedded NULs and don't
- // have to be NUL-terminated.
+ // Convenience functions for converting strings which may contain embedded
+ // NULs and don't have to be NUL-terminated.
//
// inLen is the length of the buffer including trailing NUL if any: if the
// last 4 bytes of the buffer are all NULs, these functions are more
// anything else is not supported currently and -1 should be returned
virtual size_t GetMBNulLen() const { return 1; }
+ // return the maximal value currently returned by GetMBNulLen() for any
+ // encoding
+ static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; }
+
+
+ // The old conversion functions. The existing classes currently mostly
+ // implement these ones but we're in transition to using To/FromWChar()
+ // instead and any new classes should implement just the new functions.
+ // For now, however, we provide default implementation of To/FromWChar() in
+ // this base class in terms of MB2WC/WC2MB() to avoid having to rewrite all
+ // the conversions at once.
+ //
+ // On success, the return value is the length (i.e. the number of
+ // characters, not bytes) not counting the trailing NUL(s) of the converted
+ // string. On failure, (size_t)-1 is returned. In the special case when
+ // outputBuf is NULL the return value is the same one but nothing is
+ // written to the buffer.
+ //
+ // Note that outLen is the length of the output buffer, not the length of
+ // the input (which is always supposed to be terminated by one or more
+ // NULs, as appropriate for the encoding)!
+ virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0;
+ virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0;
+
+
// virtual dtor for any base class
virtual ~wxMBConv();
};
// wxMBConv
// ----------------------------------------------------------------------------
-wxMBConv::~wxMBConv()
+size_t
+wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
+ const char *src, size_t srcLen) const
{
- // nothing to do here (necessary for Darwin linking probably)
-}
+ // although new conversion classes are supposed to implement this function
+ // directly, the existins ones only implement the old MB2WC() and so, to
+ // avoid to have to rewrite all conversion classes at once, we provide a
+ // default (but not efficient) implementation of this one in terms of the
+ // old function by copying the input to ensure that it's NUL-terminated and
+ // then using MB2WC() to convert it
-const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
-{
- if ( psz )
- {
- // calculate the length of the buffer needed first
- size_t nLen = MB2WC(NULL, psz, 0);
- if ( nLen != (size_t)-1 )
- {
- // now do the actual conversion
- wxWCharBuffer buf(nLen);
- nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
- if ( nLen != (size_t)-1 )
- {
- return buf;
- }
- }
- }
-
- wxWCharBuffer buf((wchar_t *)NULL);
-
- return buf;
-}
-
-const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
-{
- if ( pwz )
- {
- size_t nLen = WC2MB(NULL, pwz, 0);
- if ( nLen != (size_t)-1 )
- {
- wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
- nLen = WC2MB(buf.data(), pwz, nLen + 4);
- if ( nLen != (size_t)-1 )
- {
- return buf;
- }
- }
- }
-
- wxCharBuffer buf((char *)NULL);
-
- return buf;
-}
-
-const wxWCharBuffer
-wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
-{
- // the currently accumulated wide characters
- wxWCharBuffer wbuf;
-
- // the current length of wbuf
- size_t lenBuf = 0;
+ // the number of chars [which would be] written to dst [if it were not NULL]
+ size_t dstWritten = 0;
// the number of NULs terminating this string
- size_t nulLen wxDUMMY_INITIALIZE(0);
-
- // make a copy of the input string unless it is already properly
- // NUL-terminated
- wxCharBuffer bufTmp;
+ size_t nulLen wxDUMMY_INITIALIZE(0);
// if we were not given the input size we just have to assume that the
// string is properly terminated as we have no way of knowing how long it
// is anyhow, but if we do have the size check whether there are enough
// NULs at the end
- if ( inLen != (size_t)-1 )
+ wxCharBuffer bufTmp;
+ const char *srcEnd;
+ if ( srcLen != (size_t)-1 )
{
// we need to know how to find the end of this string
nulLen = GetMBNulLen();
- if ( nulLen == (size_t)-1 )
- return wbuf;
+ if ( nulLen == wxCONV_FAILED )
+ return wxCONV_FAILED;
// if there are enough NULs we can avoid the copy
- if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
+ if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
{
// make a copy in order to properly NUL-terminate the string
- bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
+ bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
char * const p = bufTmp.data();
- memcpy(p, in, inLen);
- for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
+ memcpy(p, src, srcLen);
+ for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
*s = '\0';
+
+ src = bufTmp;
}
- }
- if ( bufTmp )
- in = bufTmp;
+ srcEnd = src + srcLen;
+ }
+ else // quit after the first loop iteration
+ {
+ srcEnd = NULL;
+ }
- size_t lenChunk;
- for ( const char * const inEnd = in + inLen;; )
+ for ( ;; )
{
// try to convert the current chunk
- lenChunk = MB2WC(NULL, in, 0);
+ size_t lenChunk = MB2WC(NULL, src, 0);
if ( lenChunk == 0 )
{
// nothing left in the input string, conversion succeeded
break;
}
- if ( lenChunk == (size_t)-1 )
- break;
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
// if we already have a previous chunk, leave the NUL separating it
// from this one
- if ( lenBuf )
- lenBuf++;
-
- const size_t lenBufNew = lenBuf + lenChunk;
- if ( !wbuf.extend(lenBufNew) )
+ if ( dstWritten )
{
- lenChunk = (size_t)-1;
- break;
+ dstWritten++;
+ if ( dst )
+ dst++;
}
- lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
- if ( lenChunk == (size_t)-1 )
- break;
+ dstWritten += lenChunk;
+
+ if ( dst )
+ {
+ if ( dstWritten > dstLen )
+ return wxCONV_FAILED;
+
+ lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
- lenBuf = lenBufNew;
+ dst += lenChunk;
+ }
- if ( inLen == (size_t)-1 )
+ if ( !srcEnd )
{
- // convert only one chunk in this case, as we suppose that the
- // string is NUL-terminated and so inEnd is not used at all
+ // we convert the entire string in this cas, as we suppose that the
+ // string is NUL-terminated and so srcEnd is not used at all
break;
}
// advance the input pointer past the end of this chunk
- while ( NotAllNULs(in, nulLen) )
+ while ( NotAllNULs(src, nulLen) )
{
// notice that we must skip over multiple bytes here as we suppose
// that if NUL takes 2 or 4 bytes, then all the other characters do
// too and so if advanced by a single byte we might erroneously
// detect sequences of NUL bytes in the middle of the input
- in += nulLen;
+ src += nulLen;
}
- in += nulLen; // skipping over its terminator as well
+ src += nulLen; // skipping over its terminator as well
// note that ">=" (and not just "==") is needed here as the terminator
// we skipped just above could be inside or just after the buffer
// delimited by inEnd
- if ( in >= inEnd )
+ if ( src >= srcEnd )
break;
}
- if ( lenChunk == (size_t)-1 )
- {
- // conversion failed
- lenBuf = 0;
- wbuf.reset();
- }
-
- if ( outLen )
- *outLen = lenBuf;
-
- return wbuf;
+ return dstWritten;
}
-const wxCharBuffer
-wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+size_t
+wxMBConv::FromWChar(char *dst, size_t dstLen,
+ const wchar_t *src, size_t srcLen) const
{
- // the currently accumulated multibyte characters
- wxCharBuffer buf;
-
- // the current length of buf
- size_t lenBuf = 0;
+ // the number of chars [which would be] written to dst [if it were not NULL]
+ size_t dstWritten = 0;
// make a copy of the input string unless it is already properly
// NUL-terminated
// if we don't know its length we have no choice but to assume that it is,
// indeed, properly terminated
wxWCharBuffer bufTmp;
- if ( inLen == (size_t)-1 )
+ if ( srcLen == (size_t)-1 )
{
- inLen = wxWcslen(in) + 1;
+ srcLen = wxWcslen(src) + 1;
}
- else if ( inLen != 0 && in[inLen - 1] != L'\0' )
+ else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
{
// make a copy in order to properly NUL-terminate the string
- bufTmp = wxWCharBuffer(inLen);
- memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
+ bufTmp = wxWCharBuffer(srcLen);
+ memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
+ src = bufTmp;
+ }
+
+ const size_t lenNul = GetMBNulLen();
+ for ( const wchar_t * const srcEnd = src + srcLen;
+ src < srcEnd;
+ src += wxWcslen(src) + 1 /* skip L'\0' too */ )
+ {
+ // try to convert the current chunk
+ size_t lenChunk = WC2MB(NULL, src, 0);
+
+ if ( lenChunk == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ lenChunk += lenNul;
+ dstWritten += lenChunk;
+
+ if ( dst )
+ {
+ if ( dstWritten > dstLen )
+ return wxCONV_FAILED;
+
+ if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
+ return wxCONV_FAILED;
+
+ dst += lenChunk;
+ }
}
- if ( bufTmp )
- in = bufTmp;
+ return dstWritten;
+}
+
+wxMBConv::~wxMBConv()
+{
+ // nothing to do here (necessary for Darwin linking probably)
+}
- for ( const wchar_t * const inEnd = in + inLen;; )
+const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
+{
+ if ( psz )
{
- // try to convert the current chunk, if anything left
- size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
- if ( lenChunk == 0 )
+ // calculate the length of the buffer needed first
+ const size_t nLen = MB2WC(NULL, psz, 0);
+ if ( nLen != wxCONV_FAILED )
{
- // nothing left in the input string, conversion succeeded
- if ( outLen )
- *outLen = lenBuf ? lenBuf - 1 : lenBuf;
+ // now do the actual conversion
+ wxWCharBuffer buf(nLen /* +1 added implicitly */);
- return buf;
+ // +1 for the trailing NULL
+ if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
+ return buf;
}
+ }
- if ( lenChunk == (size_t)-1 )
- break;
+ return wxWCharBuffer();
+}
- const size_t lenBufNew = lenBuf + lenChunk;
- if ( !buf.extend(lenBufNew) )
- break;
+const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
+{
+ if ( pwz )
+ {
+ const size_t nLen = WC2MB(NULL, pwz, 0);
+ if ( nLen != wxCONV_FAILED )
+ {
+ // extra space for trailing NUL(s)
+ static const size_t extraLen = GetMaxMBNulLen();
- lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
- if ( lenChunk == (size_t)-1 )
- break;
+ wxCharBuffer buf(nLen + extraLen - 1);
+ if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
+ return buf;
+ }
+ }
+
+ return wxCharBuffer();
+}
+
+const wxWCharBuffer
+wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
+{
+ const size_t dstLen = ToWChar(NULL, 0, in, inLen);
+ if ( dstLen != wxCONV_FAILED )
+ {
+ wxWCharBuffer wbuf(dstLen);
+ if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
+ {
+ if ( outLen )
+ *outLen = dstLen;
+ return wbuf;
+ }
+ }
+
+ if ( outLen )
+ *outLen = 0;
+
+ return wxWCharBuffer();
+}
- // chunk successfully converted, go to the next one
- in += wxWcslen(in) + 1 /* skip NUL too */;
- lenBuf = lenBufNew + 1;
+const wxCharBuffer
+wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+{
+ const size_t dstLen = FromWChar(NULL, 0, in, inLen);
+ if ( dstLen != wxCONV_FAILED )
+ {
+ wxCharBuffer buf(dstLen);
+ if ( FromWChar(buf.data(), dstLen, in, inLen) )
+ {
+ if ( outLen )
+ *outLen = dstLen;
+ return buf;
+ }
}
- // conversion failed
if ( outLen )
*outLen = 0;