added new To/FromWChar() API with more reasonable semantics than old MB2WC/WC2MB...

author Vadim Zeitlin <vadim@wxwidgets.org>

Tue, 4 Apr 2006 12:35:21 +0000 (12:35 +0000)

committer Vadim Zeitlin <vadim@wxwidgets.org>

Tue, 4 Apr 2006 12:35:21 +0000 (12:35 +0000)
author Vadim Zeitlin <vadim@wxwidgets.org>
Tue, 4 Apr 2006 12:35:21 +0000 (12:35 +0000)
committer Vadim Zeitlin <vadim@wxwidgets.org>
Tue, 4 Apr 2006 12:35:21 +0000 (12:35 +0000)
diff --git a/docs/latex/wx/mbconv.tex b/docs/latex/wx/mbconv.tex

index f1cc6811e206cbfe738c1043f0c10af4b5eb68e6..cf9e3553bf21f1d48844233320ad2dc1b6948c60 100644 (file)
--- a/docs/latex/wx/mbconv.tex
+++ b/docs/latex/wx/mbconv.tex
@@ -34,6 +34,14 @@ current locale.}
  depends on the system.}
  \end{twocollist}
  
+
+\wxheading{Constants}
+
+\texttt{wxCONV\_FAILED} value is defined as \texttt{(size\_t)$-1$} and is
+returned by the conversion functions instead of the length of the converted
+string if the conversion fails.
+
+
  \wxheading{Derived from}
  
  No base class
@@ -48,6 +56,7 @@ No base class
  \helpref{wxEncodingConverter}{wxencodingconverter}, 
  \helpref{wxMBConv classes overview}{mbconvclasses}
  
+
  \latexignore{\rtfignore{\wxheading{Members}}}
  
  
@@ -55,12 +64,15 @@ No base class
  
  \func{}{wxMBConv}{\void}
  
-Constructor.
+Trivial default constructor.
+
  
  \membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc}
  
  \constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}}
  
+\deprecated{\helpref{ToWChar}{wxmbconvtowchar}}
+
  Converts from a string \arg{in} in multibyte encoding to Unicode putting up to 
  \arg{outLen} characters into the buffer \arg{out}.
  
@@ -89,6 +101,8 @@ The length of the converted string \emph{excluding} the trailing \NUL.
  
  \constfunc{virtual size\_t}{WC2MB}{\param{char* }{buf}, \param{const wchar\_t* }{psz}, \param{size\_t }{n}}
  
+\deprecated{\helpref{FromWChar}{wxmbconvfromwchar}}
+
  Converts from Unicode to multibyte encoding. The semantics of this function
  (including the return value meaning) is the same as for 
  \helpref{MB2WC}{wxmbconvmb2wc}.
@@ -191,6 +205,45 @@ result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
  return type (without const).
  
  
+\membersection{wxMBConv::FromWChar}\label{wxmbconvfromwchar}
+
+\constfunc{virtual size\_t}{FromWChar}{\param{wchar\_t *}{dst}, \param{size\_t }{dstLen}, \param{const char *}{src}, \param{size\_t }{srcLen = $-1$}}
+
+The most general function for converting a multibyte string to a wide string.
+The main case is when \arg{dst} is not \NULL and \arg{srcLen} is not $-1$: then
+the function converts exactly \arg{srcLen} bytes starting at \arg{src} into
+wide string which it output to \arg{dst}. If the length of the resulting wide
+string is greater than \arg{dstLen}, an error is returned. Note that if 
+\arg{srcLen} bytes don't include \NUL characters, the resulting wide string is
+not \NUL-terminated neither.
+
+If \arg{srcLen} is $-1$, the function supposes that the string is properly
+(i.e. as necessary for the encoding handled by this conversion) \NUL-terminated
+and converts the entire string, including any trailing \NUL bytes. In this case
+the wide string is also \NUL-terminated.
+
+Finally, if \arg{dst} is \NULL, the function returns the length of the needed
+buffer.
+
+\wxheading{Return value}
+
+The number of characters written to \arg{dst} (or the number of characters
+which would have been written to it if it were non-\NULL) on success or 
+\texttt{wxCONV\_FAILED} on error.
+
+
+\membersection{wxMBConv::GetMaxMBNulLen}\label{wxmbconvgetmaxmbnullen}
+
+\func{const size\_t}{GetMaxMBNulLen}{\void}
+
+Returns the maximal value which can be returned by 
+\helpref{GetMBNulLen}{wxmbconvgetmbnullen} for any conversion object. Currently
+this value is $4$.
+
+This method can be used to allocate the buffer with enough space for the
+trailing \NUL characters for any encoding.
+
+
  \membersection{wxMBConv::GetMBNulLen}\label{wxmbconvgetmbnullen}
  
  \constfunc{size\_t}{GetMBNulLen}{\void}
@@ -201,3 +254,11 @@ which the string is terminated with $2$ and $4$ \NUL characters respectively.
  The other cases are not currently supported and $-1$ is returned for them.
  
  
+\membersection{wxMBConv::ToWChar}\label{wxmbconvtowchar}
+
+\constfunc{virtual size\_t}{ToWChar}{\param{char\_t *}{dst}, \param{size\_t }{dstLen}, \param{const wchar\_t *}{src}, \param{size\_t }{srcLen = $-1$}}
+
+This function has the same semantics as \helpref{FromWChar}{wxmbconvfromwchar} 
+except that it converts a wide string to multibyte one.
+
+
diff --git a/include/wx/strconv.h b/include/wx/strconv.h

index a5f0423c3a916aae4b5df1fe1025f2284f18db7e..61738e0cff54fa1213a91b8f6e88d6187af1f03f 100644 (file)
--- a/include/wx/strconv.h
+++ b/include/wx/strconv.h
@@ -28,6 +28,9 @@
  
  #if wxUSE_WCHAR_T
  
+// the error value returned by wxMBConv methods
+#define wxCONV_FAILED ((size_t)-1)
+
  // ----------------------------------------------------------------------------
  // wxMBConv (abstract base class for conversions)
  // ----------------------------------------------------------------------------
@@ -35,24 +38,43 @@
  class WXDLLIMPEXP_BASE wxMBConv
  {
  public:
-    // The functions doing actual conversion. On success, the return value is
-    // the length (i.e. the number of characters, not bytes, and not counting
-    // the trailing L'\0') of the converted string. On failure, (size_t)-1 is
-    // returned. In the special case when outputBuf is NULL the return value is
-    // the same one but nothing is written to the buffer.
+    // The functions doing actual conversion from/to narrow to/from wide
+    // character strings.
      //
-    // Note that outLen is the length of the output buffer, not the length of
-    // the input (which is always supposed to be terminated by one or more
-    // NULs, as appropriate for the encoding)!
-    virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0;
-    virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0;
+    // On success, the return value is the length (i.e. the number of
+    // characters, not bytes) of the converted string including any trailing
+    // L'\0' or (possibly multiple) '\0'(s). If the conversion fails or if
+    // there is not enough space for everything, including the trailing NUL
+    // character(s), in the output buffer, (size_t)-1 is returned.
+    //
+    // In the special case when dstLen is 0 (outputBuf may be NULL then) the
+    // return value is the length of the needed buffer but nothing happens
+    // otherwise. If srcLen is -1, the entire string, up to and including the
+    // trailing NUL(s), is converted, otherwise exactly srcLen bytes are.
+    //
+    // Typical usage:
+    //
+    //          size_t dstLen = conv.ToWChar(NULL, 0, src);
+    //          if ( dstLen != wxCONV_FAILED )
+    //              ... handle error ...
+    //          wchar_t *wbuf = new wchar_t[dstLen];
+    //          conv.ToWChar(wbuf, dstLen, src);
+    //
+    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
+                           const char *src, size_t srcLen = -1) const;
+
+    virtual size_t FromWChar(char *dst, size_t dstLen,
+                             const wchar_t *src, size_t srcLen = -1) const;
+
  
-    // MB <-> WC
+    // Convenience functions for translating NUL-terminated strings: returns
+    // the buffer containing the converted string or NULL pointer if the
+    // conversion failed.
      const wxWCharBuffer cMB2WC(const char *in) const;
      const wxCharBuffer cWC2MB(const wchar_t *in) const;
  
-    // Functions converting strings which may contain embedded NULs and don't
-    // have to be NUL-terminated.
+    // Convenience functions for converting strings which may contain embedded
+    // NULs and don't have to be NUL-terminated.
      //
      // inLen is the length of the buffer including trailing NUL if any: if the
      // last 4 bytes of the buffer are all NULs, these functions are more
@@ -94,6 +116,31 @@ public:
      // anything else is not supported currently and -1 should be returned
      virtual size_t GetMBNulLen() const { return 1; }
  
+    // return the maximal value currently returned by GetMBNulLen() for any
+    // encoding
+    static size_t GetMaxMBNulLen() { return 4 /* for UTF-32 */; }
+
+
+    // The old conversion functions. The existing classes currently mostly
+    // implement these ones but we're in transition to using To/FromWChar()
+    // instead and any new classes should implement just the new functions.
+    // For now, however, we provide default implementation of To/FromWChar() in
+    // this base class in terms of MB2WC/WC2MB() to avoid having to rewrite all
+    // the conversions at once.
+    //
+    // On success, the return value is the length (i.e. the number of
+    // characters, not bytes) not counting the trailing NUL(s) of the converted
+    // string. On failure, (size_t)-1 is returned. In the special case when
+    // outputBuf is NULL the return value is the same one but nothing is
+    // written to the buffer.
+    //
+    // Note that outLen is the length of the output buffer, not the length of
+    // the input (which is always supposed to be terminated by one or more
+    // NULs, as appropriate for the encoding)!
+    virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0;
+    virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0;
+
+
      // virtual dtor for any base class
      virtual ~wxMBConv();
  };
diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp

index 39a6996c19cfe311411d2c62b0d504b3d6b465c6..f610bf76e0a0b1116956b92d603fc3c7dc4e2d63 100644 (file)
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -147,176 +147,127 @@ static size_t decode_utf16(const wxUint16* input, wxUint32& output)
  // wxMBConv
  // ----------------------------------------------------------------------------
  
-wxMBConv::~wxMBConv()
+size_t
+wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
+                  const char *src, size_t srcLen) const
  {
-    // nothing to do here (necessary for Darwin linking probably)
-}
+    // although new conversion classes are supposed to implement this function
+    // directly, the existins ones only implement the old MB2WC() and so, to
+    // avoid to have to rewrite all conversion classes at once, we provide a
+    // default (but not efficient) implementation of this one in terms of the
+    // old function by copying the input to ensure that it's NUL-terminated and
+    // then using MB2WC() to convert it
  
-const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
-{
-    if ( psz )
-    {
-        // calculate the length of the buffer needed first
-        size_t nLen = MB2WC(NULL, psz, 0);
-        if ( nLen != (size_t)-1 )
-        {
-            // now do the actual conversion
-            wxWCharBuffer buf(nLen);
-            nLen = MB2WC(buf.data(), psz, nLen + 1); // with the trailing NULL
-            if ( nLen != (size_t)-1 )
-            {
-                return buf;
-            }
-        }
-    }
-
-    wxWCharBuffer buf((wchar_t *)NULL);
-
-    return buf;
-}
-
-const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
-{
-    if ( pwz )
-    {
-        size_t nLen = WC2MB(NULL, pwz, 0);
-        if ( nLen != (size_t)-1 )
-        {
-            wxCharBuffer buf(nLen+3);       // space for a wxUint32 trailing zero
-            nLen = WC2MB(buf.data(), pwz, nLen + 4);
-            if ( nLen != (size_t)-1 )
-            {
-                return buf;
-            }
-        }
-    }
-
-    wxCharBuffer buf((char *)NULL);
-
-    return buf;
-}
-
-const wxWCharBuffer
-wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
-{
-    // the currently accumulated wide characters
-    wxWCharBuffer wbuf;
-
-    // the current length of wbuf
-    size_t lenBuf = 0;
+    // the number of chars [which would be] written to dst [if it were not NULL]
+    size_t dstWritten = 0;
  
      // the number of NULs terminating this string
-    size_t nulLen   wxDUMMY_INITIALIZE(0);
-
-    // make a copy of the input string unless it is already properly
-    // NUL-terminated
-    wxCharBuffer bufTmp;
+    size_t nulLen wxDUMMY_INITIALIZE(0);
  
      // if we were not given the input size we just have to assume that the
      // string is properly terminated as we have no way of knowing how long it
      // is anyhow, but if we do have the size check whether there are enough
      // NULs at the end
-    if ( inLen != (size_t)-1 )
+    wxCharBuffer bufTmp;
+    const char *srcEnd;
+    if ( srcLen != (size_t)-1 )
      {
          // we need to know how to find the end of this string
          nulLen = GetMBNulLen();
-        if ( nulLen == (size_t)-1 )
-            return wbuf;
+        if ( nulLen == wxCONV_FAILED )
+            return wxCONV_FAILED;
  
          // if there are enough NULs we can avoid the copy
-        if ( inLen < nulLen || NotAllNULs(in + inLen - nulLen, nulLen) )
+        if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
          {
              // make a copy in order to properly NUL-terminate the string
-            bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
+            bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
              char * const p = bufTmp.data();
-            memcpy(p, in, inLen);
-            for ( char *s = p + inLen; s < p + inLen + nulLen; s++ )
+            memcpy(p, src, srcLen);
+            for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
                  *s = '\0';
+
+            src = bufTmp;
          }
-    }
  
-    if ( bufTmp )
-        in = bufTmp;
+        srcEnd = src + srcLen;
+    }
+    else // quit after the first loop iteration
+    {
+        srcEnd = NULL;
+    }
  
-    size_t lenChunk;
-    for ( const char * const inEnd = in + inLen;; )
+    for ( ;; )
      {
          // try to convert the current chunk
-        lenChunk = MB2WC(NULL, in, 0);
+        size_t lenChunk = MB2WC(NULL, src, 0);
          if ( lenChunk == 0 )
          {
              // nothing left in the input string, conversion succeeded
              break;
          }
  
-        if ( lenChunk == (size_t)-1 )
-            break;
+        if ( lenChunk == wxCONV_FAILED )
+            return wxCONV_FAILED;
  
          // if we already have a previous chunk, leave the NUL separating it
          // from this one
-        if ( lenBuf )
-            lenBuf++;
-
-        const size_t lenBufNew = lenBuf + lenChunk;
-        if ( !wbuf.extend(lenBufNew) )
+        if ( dstWritten )
          {
-            lenChunk = (size_t)-1;
-            break;
+            dstWritten++;
+            if ( dst )
+                dst++;
          }
  
-        lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
-        if ( lenChunk == (size_t)-1 )
-            break;
+        dstWritten += lenChunk;
+
+        if ( dst )
+        {
+            if ( dstWritten > dstLen )
+                return wxCONV_FAILED;
+
+            lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
+            if ( lenChunk == wxCONV_FAILED )
+                return wxCONV_FAILED;
  
-        lenBuf = lenBufNew;
+            dst += lenChunk;
+        }
  
-        if ( inLen == (size_t)-1 )
+        if ( !srcEnd )
          {
-            // convert only one chunk in this case, as we suppose that the
-            // string is NUL-terminated and so inEnd is not used at all
+            // we convert the entire string in this cas, as we suppose that the
+            // string is NUL-terminated and so srcEnd is not used at all
              break;
          }
  
          // advance the input pointer past the end of this chunk
-        while ( NotAllNULs(in, nulLen) )
+        while ( NotAllNULs(src, nulLen) )
          {
              // notice that we must skip over multiple bytes here as we suppose
              // that if NUL takes 2 or 4 bytes, then all the other characters do
              // too and so if advanced by a single byte we might erroneously
              // detect sequences of NUL bytes in the middle of the input
-            in += nulLen;
+            src += nulLen;
          }
  
-        in += nulLen; // skipping over its terminator as well
+        src += nulLen; // skipping over its terminator as well
  
          // note that ">=" (and not just "==") is needed here as the terminator
          // we skipped just above could be inside or just after the buffer
          // delimited by inEnd
-        if ( in >= inEnd )
+        if ( src >= srcEnd )
              break;
      }
  
-    if ( lenChunk == (size_t)-1 )
-    {
-        // conversion failed
-        lenBuf = 0;
-        wbuf.reset();
-    }
-
-    if ( outLen )
-        *outLen = lenBuf;
-
-    return wbuf;
+    return dstWritten;
  }
  
-const wxCharBuffer
-wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+size_t
+wxMBConv::FromWChar(char *dst, size_t dstLen,
+                    const wchar_t *src, size_t srcLen) const
  {
-    // the currently accumulated multibyte characters
-    wxCharBuffer buf;
-
-    // the current length of buf
-    size_t lenBuf = 0;
+    // the number of chars [which would be] written to dst [if it were not NULL]
+    size_t dstWritten = 0;
  
      // make a copy of the input string unless it is already properly
      // NUL-terminated
@@ -324,50 +275,127 @@ wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
      // if we don't know its length we have no choice but to assume that it is,
      // indeed, properly terminated
      wxWCharBuffer bufTmp;
-    if ( inLen == (size_t)-1 )
+    if ( srcLen == (size_t)-1 )
      {
-        inLen = wxWcslen(in) + 1;
+        srcLen = wxWcslen(src) + 1;
      }
-    else if ( inLen != 0 && in[inLen - 1] != L'\0' )
+    else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
      {
          // make a copy in order to properly NUL-terminate the string
-        bufTmp = wxWCharBuffer(inLen);
-        memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
+        bufTmp = wxWCharBuffer(srcLen);
+        memcpy(bufTmp.data(), src, srcLen*sizeof(wchar_t));
+        src = bufTmp;
+    }
+
+    const size_t lenNul = GetMBNulLen();
+    for ( const wchar_t * const srcEnd = src + srcLen;
+          src < srcEnd;
+          src += wxWcslen(src) + 1 /* skip L'\0' too */ )
+    {
+        // try to convert the current chunk
+        size_t lenChunk = WC2MB(NULL, src, 0);
+
+        if ( lenChunk == wxCONV_FAILED )
+            return wxCONV_FAILED;
+
+        lenChunk += lenNul;
+        dstWritten += lenChunk;
+
+        if ( dst )
+        {
+            if ( dstWritten > dstLen )
+                return wxCONV_FAILED;
+
+            if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
+                return wxCONV_FAILED;
+
+            dst += lenChunk;
+        }
      }
  
-    if ( bufTmp )
-        in = bufTmp;
+    return dstWritten;
+}
+
+wxMBConv::~wxMBConv()
+{
+    // nothing to do here (necessary for Darwin linking probably)
+}
  
-    for ( const wchar_t * const inEnd = in + inLen;; )
+const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
+{
+    if ( psz )
      {
-        // try to convert the current chunk, if anything left
-        size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
-        if ( lenChunk == 0 )
+        // calculate the length of the buffer needed first
+        const size_t nLen = MB2WC(NULL, psz, 0);
+        if ( nLen != wxCONV_FAILED )
          {
-            // nothing left in the input string, conversion succeeded
-            if ( outLen )
-                *outLen = lenBuf ? lenBuf - 1 : lenBuf;
+            // now do the actual conversion
+            wxWCharBuffer buf(nLen /* +1 added implicitly */);
  
-            return buf;
+            // +1 for the trailing NULL
+            if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
+                return buf;
          }
+    }
  
-        if ( lenChunk == (size_t)-1 )
-            break;
+    return wxWCharBuffer();
+}
  
-        const size_t lenBufNew = lenBuf + lenChunk;
-        if ( !buf.extend(lenBufNew) )
-            break;
+const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
+{
+    if ( pwz )
+    {
+        const size_t nLen = WC2MB(NULL, pwz, 0);
+        if ( nLen != wxCONV_FAILED )
+        {
+            // extra space for trailing NUL(s)
+            static const size_t extraLen = GetMaxMBNulLen();
  
-        lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
-        if ( lenChunk == (size_t)-1 )
-            break;
+            wxCharBuffer buf(nLen + extraLen - 1);
+            if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
+                return buf;
+        }
+    }
+
+    return wxCharBuffer();
+}
+
+const wxWCharBuffer
+wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
+{
+    const size_t dstLen = ToWChar(NULL, 0, in, inLen);
+    if ( dstLen != wxCONV_FAILED )
+    {
+        wxWCharBuffer wbuf(dstLen);
+        if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
+        {
+            if ( outLen )
+                *outLen = dstLen;
+            return wbuf;
+        }
+    }
+
+    if ( outLen )
+        *outLen = 0;
+
+    return wxWCharBuffer();
+}
  
-        // chunk successfully converted, go to the next one
-        in += wxWcslen(in) + 1 /* skip NUL too */;
-        lenBuf = lenBufNew + 1;
+const wxCharBuffer
+wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
+{
+    const size_t dstLen = FromWChar(NULL, 0, in, inLen);
+    if ( dstLen != wxCONV_FAILED )
+    {
+        wxCharBuffer buf(dstLen);
+        if ( FromWChar(buf.data(), dstLen, in, inLen) )
+        {
+            if ( outLen )
+                *outLen = dstLen;
+            return buf;
+        }
      }
  
-    // conversion failed
      if ( outLen )
          *outLen = 0;
author	Vadim Zeitlin <vadim@wxwidgets.org>
	Tue, 4 Apr 2006 12:35:21 +0000 (12:35 +0000)
committer	Vadim Zeitlin <vadim@wxwidgets.org>
	Tue, 4 Apr 2006 12:35:21 +0000 (12:35 +0000)
docs/latex/wx/mbconv.tex		patch \| blob \| blame \| history
include/wx/strconv.h		patch \| blob \| blame \| history
src/common/strconv.cpp		patch \| blob \| blame \| history