From: Vadim Zeitlin Date: Tue, 4 Apr 2006 12:35:21 +0000 (+0000) Subject: added new To/FromWChar() API with more reasonable semantics than old MB2WC/WC2MB... X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/483b0434bfa1b10d2522ac0affe8758e629c21ee added new To/FromWChar() API with more reasonable semantics than old MB2WC/WC2MB; for now both coexist and the change is/should be backwards compatible git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38541 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- diff --git a/docs/latex/wx/mbconv.tex b/docs/latex/wx/mbconv.tex index f1cc6811e2..cf9e3553bf 100644 --- a/docs/latex/wx/mbconv.tex +++ b/docs/latex/wx/mbconv.tex @@ -34,6 +34,14 @@ current locale.} depends on the system.} \end{twocollist} + +\wxheading{Constants} + +\texttt{wxCONV\_FAILED} value is defined as \texttt{(size\_t)$-1$} and is +returned by the conversion functions instead of the length of the converted +string if the conversion fails. + + \wxheading{Derived from} No base class @@ -48,6 +56,7 @@ No base class \helpref{wxEncodingConverter}{wxencodingconverter}, \helpref{wxMBConv classes overview}{mbconvclasses} + \latexignore{\rtfignore{\wxheading{Members}}} @@ -55,12 +64,15 @@ No base class \func{}{wxMBConv}{\void} -Constructor. +Trivial default constructor. + \membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc} \constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}} +\deprecated{\helpref{ToWChar}{wxmbconvtowchar}} + Converts from a string \arg{in} in multibyte encoding to Unicode putting up to \arg{outLen} characters into the buffer \arg{out}. @@ -89,6 +101,8 @@ The length of the converted string \emph{excluding} the trailing \NUL. \constfunc{virtual size\_t}{WC2MB}{\param{char* }{buf}, \param{const wchar\_t* }{psz}, \param{size\_t }{n}} +\deprecated{\helpref{FromWChar}{wxmbconvfromwchar}} + Converts from Unicode to multibyte encoding. The semantics of this function (including the return value meaning) is the same as for \helpref{MB2WC}{wxmbconvmb2wc}. @@ -191,6 +205,45 @@ result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct return type (without const). +\membersection{wxMBConv::FromWChar}\label{wxmbconvfromwchar} + +\constfunc{virtual size\_t}{FromWChar}{\param{wchar\_t *}{dst}, \param{size\_t }{dstLen}, \param{const char *}{src}, \param{size\_t }{srcLen = $-1$}} + +The most general function for converting a multibyte string to a wide string. +The main case is when \arg{dst} is not \NULL and \arg{srcLen} is not $-1$: then +the function converts exactly \arg{srcLen} bytes starting at \arg{src} into +wide string which it output to \arg{dst}. If the length of the resulting wide +string is greater than \arg{dstLen}, an error is returned. Note that if +\arg{srcLen} bytes don't include \NUL characters, the resulting wide string is +not \NUL-terminated neither. + +If \arg{srcLen} is $-1$, the function supposes that the string is properly +(i.e. as necessary for the encoding handled by this conversion) \NUL-terminated +and converts the entire string, including any trailing \NUL bytes. In this case +the wide string is also \NUL-terminated. + +Finally, if \arg{dst} is \NULL, the function returns the length of the needed +buffer. + +\wxheading{Return value} + +The number of characters written to \arg{dst} (or the number of characters +which would have been written to it if it were non-\NULL) on success or +\texttt{wxCONV\_FAILED} on error. + + +\membersection{wxMBConv::GetMaxMBNulLen}\label{wxmbconvgetmaxmbnullen} + +\func{const size\_t}{GetMaxMBNulLen}{\void} + +Returns the maximal value which can be returned by +\helpref{GetMBNulLen}{wxmbconvgetmbnullen} for any conversion object. Currently +this value is $4$. + +This method can be used to allocate the buffer with enough space for the +trailing \NUL characters for any encoding. + + \membersection{wxMBConv::GetMBNulLen}\label{wxmbconvgetmbnullen} \constfunc{size\_t}{GetMBNulLen}{\void} @@ -201,3 +254,11 @@ which the string is terminated with $2$ and $4$ \NUL characters respectively. The other cases are not currently supported and $-1$ is returned for them. +\membersection{wxMBConv::ToWChar}\label{wxmbconvtowchar} + +\constfunc{virtual size\_t}{ToWChar}{\param{char\_t *}{dst}, \param{size\_t }{dstLen}, \param{const wchar\_t *}{src}, \param{size\_t }{srcLen = $-1$}} + +This function has the same semantics as \helpref{FromWChar}{wxmbconvfromwchar} +except that it converts a wide string to multibyte one. + + diff --git a/include/wx/strconv.h b/include/wx/strconv.h index a5f0423c3a..61738e0cff 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -28,6 +28,9 @@ #if wxUSE_WCHAR_T +// the error value returned by wxMBConv methods +#define wxCONV_FAILED ((size_t)-1) + // ---------------------------------------------------------------------------- // wxMBConv (abstract base class for conversions) // ---------------------------------------------------------------------------- @@ -35,24 +38,43 @@ class WXDLLIMPEXP_BASE wxMBConv { public: - // The functions doing actual conversion. On success, the return value is - // the length (i.e. the number of characters, not bytes, and not counting - // the trailing L'\0') of the converted string. On failure, (size_t)-1 is - // returned. In the special case when outputBuf is NULL the return value is - // the same one but nothing is written to the buffer. + // The functions doing actual conversion from/to narrow to/from wide + // character strings. // - // Note that outLen is the length of the output buffer, not the length of - // the input (which is always supposed to be terminated by one or more - // NULs, as appropriate for the encoding)! - virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0; - virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0; + // On success, the return value is the length (i.e. the number of + // characters, not bytes) of the converted string including any trailing + // L'\0' or (possibly multiple) '\0'(s). If the conversion fails or if + // there is not enough space for everything, including the trailing NUL + // character(s), in the output buffer, (size_t)-1 is returned. + // + // In the special case when dstLen is 0 (outputBuf may be NULL then) the + // return value is the length of the needed buffer but nothing happens + // otherwise. If srcLen is -1, the entire string, up to and including the + // trailing NUL(s), is converted, otherwise exactly srcLen bytes are. + // + // Typical usage: + // + // size_t dstLen = conv.ToWChar(NULL, 0, src); + // if ( dstLen != wxCONV_FAILED ) + // ... handle error ... + // wchar_t *wbuf = new wchar_t[dstLen]; + // conv.ToWChar(wbuf, dstLen, src); + // + virtual size_t ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen = -1) const; + + virtual size_t FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen = -1) const; + - // MB <-> WC + // Convenience functions for translating NUL-terminated strings: returns + // the buffer containing the converted string or NULL pointer if the + // conversion failed. const wxWCharBuffer cMB2WC(const char *in) const; const wxCharBuffer cWC2MB(const wchar_t *in) const; - // Functions converting strings which may contain embedded NULs and don't - // have to be NUL-terminated. + // Convenience functions for converting strings which may contain embedded + // NULs and don't have to be NUL-terminated. // // inLen is the length of the buffer including trailing NUL if any: if the // last 4 bytes of the buffer are all NULs, these functions are more @@ -94,6 +116,31 @@ public: // anything else is not supported currently and -1 should be returned virtual size_t GetMBNulLen() const { return 1; } + // return the maximal value currently returned by GetMBNulLen() for any + // encoding + static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; } + + + // The old conversion functions. The existing classes currently mostly + // implement these ones but we're in transition to using To/FromWChar() + // instead and any new classes should implement just the new functions. + // For now, however, we provide default implementation of To/FromWChar() in + // this base class in terms of MB2WC/WC2MB() to avoid having to rewrite all + // the conversions at once. + // + // On success, the return value is the length (i.e. the number of + // characters, not bytes) not counting the trailing NUL(s) of the converted + // string. On failure, (size_t)-1 is returned. In the special case when + // outputBuf is NULL the return value is the same one but nothing is + // written to the buffer. + // + // Note that outLen is the length of the output buffer, not the length of + // the input (which is always supposed to be terminated by one or more + // NULs, as appropriate for the encoding)! + virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0; + virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0; + + // virtual dtor for any base class virtual ~wxMBConv(); }; diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 39a6996c19..f610bf76e0 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -147,176 +147,127 @@ static size_t decode_utf16(const wxUint16* input, wxUint32& output) // wxMBConv // ---------------------------------------------------------------------------- -wxMBConv::~wxMBConv() +size_t +wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen) const { - // nothing to do here (necessary for Darwin linking probably) -} + // although new conversion classes are supposed to implement this function + // directly, the existins ones only implement the old MB2WC() and so, to + // avoid to have to rewrite all conversion classes at once, we provide a + // default (but not efficient) implementation of this one in terms of the + // old function by copying the input to ensure that it's NUL-terminated and + // then using MB2WC() to convert it -const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const -{ - if ( psz ) - { - // calculate the length of the buffer needed first - size_t nLen = MB2WC(NULL, psz, 0); - if ( nLen != (size_t)-1 ) - { - // now do the actual conversion - wxWCharBuffer buf(nLen); - nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL - if ( nLen != (size_t)-1 ) - { - return buf; - } - } - } - - wxWCharBuffer buf((wchar_t *)NULL); - - return buf; -} - -const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const -{ - if ( pwz ) - { - size_t nLen = WC2MB(NULL, pwz, 0); - if ( nLen != (size_t)-1 ) - { - wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero - nLen = WC2MB(buf.data(), pwz, nLen + 4); - if ( nLen != (size_t)-1 ) - { - return buf; - } - } - } - - wxCharBuffer buf((char *)NULL); - - return buf; -} - -const wxWCharBuffer -wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const -{ - // the currently accumulated wide characters - wxWCharBuffer wbuf; - - // the current length of wbuf - size_t lenBuf = 0; + // the number of chars [which would be] written to dst [if it were not NULL] + size_t dstWritten = 0; // the number of NULs terminating this string - size_t nulLen wxDUMMY_INITIALIZE(0); - - // make a copy of the input string unless it is already properly - // NUL-terminated - wxCharBuffer bufTmp; + size_t nulLen wxDUMMY_INITIALIZE(0); // if we were not given the input size we just have to assume that the // string is properly terminated as we have no way of knowing how long it // is anyhow, but if we do have the size check whether there are enough // NULs at the end - if ( inLen != (size_t)-1 ) + wxCharBuffer bufTmp; + const char *srcEnd; + if ( srcLen != (size_t)-1 ) { // we need to know how to find the end of this string nulLen = GetMBNulLen(); - if ( nulLen == (size_t)-1 ) - return wbuf; + if ( nulLen == wxCONV_FAILED ) + return wxCONV_FAILED; // if there are enough NULs we can avoid the copy - if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) ) + if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) ) { // make a copy in order to properly NUL-terminate the string - bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */); + bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */); char * const p = bufTmp.data(); - memcpy(p, in, inLen); - for ( char *s = p + inLen; s < p + inLen + nulLen; s++ ) + memcpy(p, src, srcLen); + for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ ) *s = '\0'; + + src = bufTmp; } - } - if ( bufTmp ) - in = bufTmp; + srcEnd = src + srcLen; + } + else // quit after the first loop iteration + { + srcEnd = NULL; + } - size_t lenChunk; - for ( const char * const inEnd = in + inLen;; ) + for ( ;; ) { // try to convert the current chunk - lenChunk = MB2WC(NULL, in, 0); + size_t lenChunk = MB2WC(NULL, src, 0); if ( lenChunk == 0 ) { // nothing left in the input string, conversion succeeded break; } - if ( lenChunk == (size_t)-1 ) - break; + if ( lenChunk == wxCONV_FAILED ) + return wxCONV_FAILED; // if we already have a previous chunk, leave the NUL separating it // from this one - if ( lenBuf ) - lenBuf++; - - const size_t lenBufNew = lenBuf + lenChunk; - if ( !wbuf.extend(lenBufNew) ) + if ( dstWritten ) { - lenChunk = (size_t)-1; - break; + dstWritten++; + if ( dst ) + dst++; } - lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */); - if ( lenChunk == (size_t)-1 ) - break; + dstWritten += lenChunk; + + if ( dst ) + { + if ( dstWritten > dstLen ) + return wxCONV_FAILED; + + lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */); + if ( lenChunk == wxCONV_FAILED ) + return wxCONV_FAILED; - lenBuf = lenBufNew; + dst += lenChunk; + } - if ( inLen == (size_t)-1 ) + if ( !srcEnd ) { - // convert only one chunk in this case, as we suppose that the - // string is NUL-terminated and so inEnd is not used at all + // we convert the entire string in this cas, as we suppose that the + // string is NUL-terminated and so srcEnd is not used at all break; } // advance the input pointer past the end of this chunk - while ( NotAllNULs(in, nulLen) ) + while ( NotAllNULs(src, nulLen) ) { // notice that we must skip over multiple bytes here as we suppose // that if NUL takes 2 or 4 bytes, then all the other characters do // too and so if advanced by a single byte we might erroneously // detect sequences of NUL bytes in the middle of the input - in += nulLen; + src += nulLen; } - in += nulLen; // skipping over its terminator as well + src += nulLen; // skipping over its terminator as well // note that ">=" (and not just "==") is needed here as the terminator // we skipped just above could be inside or just after the buffer // delimited by inEnd - if ( in >= inEnd ) + if ( src >= srcEnd ) break; } - if ( lenChunk == (size_t)-1 ) - { - // conversion failed - lenBuf = 0; - wbuf.reset(); - } - - if ( outLen ) - *outLen = lenBuf; - - return wbuf; + return dstWritten; } -const wxCharBuffer -wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const +size_t +wxMBConv::FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen) const { - // the currently accumulated multibyte characters - wxCharBuffer buf; - - // the current length of buf - size_t lenBuf = 0; + // the number of chars [which would be] written to dst [if it were not NULL] + size_t dstWritten = 0; // make a copy of the input string unless it is already properly // NUL-terminated @@ -324,50 +275,127 @@ wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const // if we don't know its length we have no choice but to assume that it is, // indeed, properly terminated wxWCharBuffer bufTmp; - if ( inLen == (size_t)-1 ) + if ( srcLen == (size_t)-1 ) { - inLen = wxWcslen(in) + 1; + srcLen = wxWcslen(src) + 1; } - else if ( inLen != 0 && in[inLen - 1] != L'\0' ) + else if ( srcLen != 0 && src[srcLen - 1] != L'\0' ) { // make a copy in order to properly NUL-terminate the string - bufTmp = wxWCharBuffer(inLen); - memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t)); + bufTmp = wxWCharBuffer(srcLen); + memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t)); + src = bufTmp; + } + + const size_t lenNul = GetMBNulLen(); + for ( const wchar_t * const srcEnd = src + srcLen; + src < srcEnd; + src += wxWcslen(src) + 1 /* skip L'\0' too */ ) + { + // try to convert the current chunk + size_t lenChunk = WC2MB(NULL, src, 0); + + if ( lenChunk == wxCONV_FAILED ) + return wxCONV_FAILED; + + lenChunk += lenNul; + dstWritten += lenChunk; + + if ( dst ) + { + if ( dstWritten > dstLen ) + return wxCONV_FAILED; + + if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED ) + return wxCONV_FAILED; + + dst += lenChunk; + } } - if ( bufTmp ) - in = bufTmp; + return dstWritten; +} + +wxMBConv::~wxMBConv() +{ + // nothing to do here (necessary for Darwin linking probably) +} - for ( const wchar_t * const inEnd = in + inLen;; ) +const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const +{ + if ( psz ) { - // try to convert the current chunk, if anything left - size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0; - if ( lenChunk == 0 ) + // calculate the length of the buffer needed first + const size_t nLen = MB2WC(NULL, psz, 0); + if ( nLen != wxCONV_FAILED ) { - // nothing left in the input string, conversion succeeded - if ( outLen ) - *outLen = lenBuf ? lenBuf - 1 : lenBuf; + // now do the actual conversion + wxWCharBuffer buf(nLen /* +1 added implicitly */); - return buf; + // +1 for the trailing NULL + if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED ) + return buf; } + } - if ( lenChunk == (size_t)-1 ) - break; + return wxWCharBuffer(); +} - const size_t lenBufNew = lenBuf + lenChunk; - if ( !buf.extend(lenBufNew) ) - break; +const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const +{ + if ( pwz ) + { + const size_t nLen = WC2MB(NULL, pwz, 0); + if ( nLen != wxCONV_FAILED ) + { + // extra space for trailing NUL(s) + static const size_t extraLen = GetMaxMBNulLen(); - lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */); - if ( lenChunk == (size_t)-1 ) - break; + wxCharBuffer buf(nLen + extraLen - 1); + if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED ) + return buf; + } + } + + return wxCharBuffer(); +} + +const wxWCharBuffer +wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const +{ + const size_t dstLen = ToWChar(NULL, 0, in, inLen); + if ( dstLen != wxCONV_FAILED ) + { + wxWCharBuffer wbuf(dstLen); + if ( ToWChar(wbuf.data(), dstLen, in, inLen) ) + { + if ( outLen ) + *outLen = dstLen; + return wbuf; + } + } + + if ( outLen ) + *outLen = 0; + + return wxWCharBuffer(); +} - // chunk successfully converted, go to the next one - in += wxWcslen(in) + 1 /* skip NUL too */; - lenBuf = lenBufNew + 1; +const wxCharBuffer +wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const +{ + const size_t dstLen = FromWChar(NULL, 0, in, inLen); + if ( dstLen != wxCONV_FAILED ) + { + wxCharBuffer buf(dstLen); + if ( FromWChar(buf.data(), dstLen, in, inLen) ) + { + if ( outLen ) + *outLen = dstLen; + return buf; + } } - // conversion failed if ( outLen ) *outLen = 0;