// Convenience functions for converting strings which may contain embedded
// NULs and don't have to be NUL-terminated.
//
- // inLen is the length of the buffer including trailing NUL if any: if the
- // last 4 bytes of the buffer are all NULs, these functions are more
- // efficient as they avoid copying the string, but otherwise a copy is made
- // internally which could be quite bad for (very) long strings.
+ // inLen is the length of the buffer including trailing NUL if any or
+ // wxNO_LEN if the input is NUL-terminated.
//
// outLen receives, if not NULL, the length of the converted string or 0 if
// the conversion failed (returning 0 and not -1 in this case makes it
// difficult to distinguish between failed conversion and empty input but
- // this is done for backwards compatibility)
+ // this is done for backwards compatibility). Notice that the rules for
+ // whether outLen accounts or not for the last NUL are the same as for
+ // To/FromWChar() above: if inLen is specified, outLen is exactly the
+ // number of characters converted, whether the last one of them was NUL or
+ // not. But if inLen == wxNO_LEN then outLen doesn't account for the last
+ // NUL even though it is present.
const wxWCharBuffer
cMB2WC(const char *in, size_t inLen, size_t *outLen) const;
const wxCharBuffer
const wchar_t* src,
size_t srcLen = wxNO_LEN) const;
- //@{
/**
- Converts from multibyte encoding to Unicode by calling MB2WC() and
+ Converts from multibyte encoding to Unicode by calling ToWChar() and
allocating a temporary wxWCharBuffer to hold the result.
- The first overload takes a @c NUL-terminated input string. The second
- one takes a string of exactly the specified length and the string may
- include or not the trailing @c NUL character(s). If the string is not
- @c NUL-terminated, a temporary @c NUL-terminated copy of it suitable
- for passing to wxMBConv::MB2WC is made, so it is more efficient to
- ensure that the string is does have the appropriate number of @c NUL
- bytes (which is usually 1 but may be 2 or 4 for UTF-16 or UTF-32, see
- wxMBConv::GetMBNulLen), especially for long strings.
-
- If @a outLen is not-@NULL, it receives the length of the converted
- string.
+ This function is a convenient wrapper around ToWChar() as it takes care
+ of allocating the buffer of the necessary size itself. Its parameters
+ have the same meaning as for ToWChar(), in particular @a inLen can be
+ specified explicitly in which case exactly that many characters are
+ converted and @a outLen receives (if non-@NULL) exactly the
+ corresponding number of wide characters, whether the last one of them
+ is @c NUL or not. However if @c inLen is @c wxNO_LEN, then @c outLen
+ doesn't count the trailing @c NUL even if it is always present in this
+ case.
+
+ Finally notice that if the conversion fails, the returned buffer is
+ invalid and @a outLen is set to 0 (and not @c wxCONV_FAILED for
+ compatibility concerns).
*/
- const wxWCharBuffer cMB2WC(const char* in) const;
- const wxWCharBuffer cMB2WC(const char* in, size_t inLen, size_t *outLen) const;
- //@}
+ const wxWCharBuffer cMB2WC(const char* in,
+ size_t inLen = wxNO_LEN,
+ size_t *outLen = NULL) const;
//@{
/**
const wxWCharBuffer cMB2WX(const char* psz) const;
//@}
- //@{
/**
- Converts from Unicode to multibyte encoding by calling WC2MB and
+ Converts from Unicode to multibyte encoding by calling FromWChar() and
allocating a temporary wxCharBuffer to hold the result.
- The second overload of this function allows to convert a string of the
- given length @e inLen, whether it is @c NUL-terminated or not (for wide
- character strings, unlike for the multibyte ones, a single @c NUL is
- always enough). But notice that just as with @ref wxMBConv::mb2wc
- cMB2WC, it is more efficient to pass an already terminated string to
- this function as otherwise a copy is made internally. If @a outLen is
- not-@NULL, it receives the length of the converted string.
+ This function is a convenient wrapper around FromWChar() as it takes
+ care of allocating the buffer of necessary size itself.
+
+ Its parameters have the same meaning as the corresponding parameters of
+ FromWChar(), please see the description of cMB2WC() for more details.
*/
- const wxCharBuffer cWC2MB(const wchar_t* in) const;
- const wxCharBuffer cWC2MB(const wchar_t* in, size_t inLen, size_t *outLen) const;
- //@}
+ const wxCharBuffer cWC2MB(const wchar_t* in,
+ size_t inLen = wxNO_LEN,
+ size_t *outLen = NULL) const;
//@{
/**
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
- lenChunk++; // for the L'\0' at the end of this chunk
-
dstWritten += lenChunk;
+ if ( !srcEnd )
+ dstWritten++;
- if ( lenChunk == 1 )
+ if ( !lenChunk )
{
// nothing left in the input string, conversion succeeded
break;
if ( dstWritten > dstLen )
return wxCONV_FAILED;
- if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
+ // +1 is for trailing NUL
+ if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
return wxCONV_FAILED;
dst += lenChunk;
+ if ( !srcEnd )
+ dst++;
}
if ( !srcEnd )
// the number of chars [which would be] written to dst [if it were not NULL]
size_t dstWritten = 0;
+ // if we don't know its length we have no choice but to assume that it is
+ // NUL-terminated (notice that it can still be NUL-terminated even if
+ // explicit length is given but it doesn't change our return value)
+ const bool isNulTerminated = srcLen == wxNO_LEN;
+
// make a copy of the input string unless it is already properly
// NUL-terminated
- //
- // if we don't know its length we have no choice but to assume that it is,
- // indeed, properly terminated
wxWCharBuffer bufTmp;
- if ( srcLen == wxNO_LEN )
+ if ( isNulTerminated )
{
srcLen = wxWcslen(src) + 1;
}
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
- lenChunk += lenNul;
dstWritten += lenChunk;
+ if ( isNulTerminated )
+ dstWritten += lenNul;
if ( dst )
{
if ( dstWritten > dstLen )
return wxCONV_FAILED;
- if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
+ if ( WC2MB(dst, src, lenChunk + lenNul) == wxCONV_FAILED )
return wxCONV_FAILED;
dst += lenChunk;
+ if ( isNulTerminated )
+ dst += lenNul;
}
}
// because we want the buffer to always be NUL-terminated, even if the
// input isn't (as otherwise the caller has no way to know its length)
wxWCharBuffer wbuf(dstLen);
- wbuf.data()[dstLen - 1] = L'\0';
+ wbuf.data()[dstLen] = L'\0';
if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{
if ( outLen )
{
*outLen = dstLen;
- if ( wbuf[dstLen - 1] == L'\0' )
+
+ // we also need to handle NUL-terminated input strings
+ // specially: for them the output is the length of the string
+ // excluding the trailing NUL, however if we're asked to
+ // convert a specific number of characters we return the length
+ // of the resulting output even if it's NUL-terminated
+ if ( inLen == wxNO_LEN )
(*outLen)--;
}
{
*outLen = dstLen;
- if ( dstLen >= nulLen &&
- !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
+ if ( inLen == wxNO_LEN )
{
- // in this case the output is NUL-terminated and we're not
- // supposed to count NUL
+ // in this case both input and output are NUL-terminated
+ // and we're not supposed to count NUL
*outLen -= nulLen;
}
}