- For all wxInputStreams, Eof() becomes true after an attempt has been made
to read _past_ the end of file.
- wxCHECK family of macros now must be followed by a semicolon
+- wxMBConv::cMB2WC() and cWC2MB() take size of the input buffer and return
+ length of the converted string in all cases now.
+
Deprecated methods since 2.6.x and their replacements
-----------------------------------------------------
-%
-% automatically generated by HelpGen from
-% ../include/wx/strconv.h at 25/Mar/00 10:20:56
-%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Name: mbconv.tex
+%% Purpose: wxMBConv documentation
+%% Author: Ove Kaaven, Vadim Zeitlin
+%% Created: 2000-03-25
+%% RCS-ID: $Id$
+%% Copyright: (c) 2000 Ove Kaaven
+%% (c) 2003-2006 Vadim Zeitlin
+%% License: wxWindows license
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
\section{\class{wxMBConv}}\label{wxmbconv}
This class is the base class of a hierarchy of classes capable of converting
-text strings between multibyte (SBCS or DBCS) encodings and Unicode. It is itself
-a wrapper around the standard libc mbstowcs() and wcstombs() routines, and has
-one predefined instance, {\bf wxConvLibc}.
+text strings between multibyte (SBCS or DBCS) encodings and Unicode.
+
+In the documentation for this and related classes please notice that
+\emph{length} of the string refers to the number of characters in the string
+not counting the terminating \NUL, if any. While the \emph{size} of the string
+is the total number of bytes in the string, including any trailing {\NUL}s.
+Thus, length of wide character string \texttt{L"foo"} is $3$ while its size can
+be either $8$ or $16$ depending on whether \texttt{wchar\_t} is $2$ bytes (as
+under Windows) or $4$ (Unix).
+
+\wxheading{Global variables}
+
+There are several predefined instances of this class:
+\begin{twocollist}
+\twocolitem{\textbf{wxConvLibc}}{Uses the standard ANSI C \texttt{mbstowcs()} and
+\texttt{wcstombs()} functions to perform the conversions; thus depends on the
+current locale.}
+\twocolitem{\textbf{wxConvFile}}{The appropriate conversion for the file names,
+depends on the system.}
+\end{twocollist}
\wxheading{Derived from}
\membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc}
-\constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{outputBuf}, \param{const char *}{psz}, \param{size\_t }{outputSize}}
+\constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}}
+
+Converts from a string \arg{in} in multibyte encoding to Unicode putting up to
+\arg{outLen} characters into the buffer \arg{out}.
-Converts from a string {\it psz} in multibyte encoding to Unicode putting the
-output into the buffer {\it outputBuf} of the maximum size {\it outputSize} (in wide
-characters, not bytes). If {\it outputBuf} is {\tt NULL}, only the length of the
-string which would result from the conversion is calculated and returned.
-Note that this is the length and not size, i.e. the returned value does
-{\bf not} include the trailing NUL. But when the function is called with a
-non-{\tt NULL} {\it outputBuf}, the {\it outputSize} parameter should be the size of the buffer
-and so it {\bf should} take into account the trailing NUL.
+If \arg{out} is \NULL, only the length of the string which would result from
+the conversion is calculated and returned. Note that this is the length and not
+size, i.e. the returned value does \emph{not} include the trailing \NUL. But
+when the function is called with a non-\NULL \arg{out} buffer, the \arg{outLen}
+parameter should be one more to allow to properly \NUL-terminate the string.
\wxheading{Parameters}
-\docparam{outputBuf}{the output buffer, may be {\tt NULL} if the caller is only
+\docparam{out}{The output buffer, may be \NULL if the caller is only
interested in the length of the resulting string}
-\docparam{psz}{the {\tt NUL}-terminated input string, cannot be {\tt NULL}}
+\docparam{in}{The \NUL-terminated input string, cannot be \NULL}
-\docparam{outputSize}{the size of the output buffer (in wide characters, {\bf including} the
-NUL) , ignored if {\it outputBuf} is {\tt NULL}}
+\docparam{outLen}{The length of the output buffer but \emph{including}
+\NUL, ignored if \arg{out} is \NULL}
\wxheading{Return value}
-The length of the converted string (in wide characters, {\bf excluding} the NUL)
+The length of the converted string \emph{excluding} the trailing {\NUL}.
+
\membersection{wxMBConv::WC2MB}\label{wxmbconvwc2mb}
(including the return value meaning) is the same as for
\helpref{MB2WC}{wxmbconvmb2wc}.
-Notice that when the function is called with a non-{\tt NULL} buffer, the
-{\it n} parameter should be the size of the buffer and so it {\bf should} take
+Notice that when the function is called with a non-\NULL buffer, the
+{\it n} parameter should be the size of the buffer and so it \emph{should} take
into account the trailing NUL, which might take two or four bytes for some
-encodings (UTF-16 and UTF-32).
+encodings (UTF-16 and UTF-32) and not one.
+
\membersection{wxMBConv::cMB2WC}\label{wxmbconvcmb2wc}
-\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char* }{psz}}
+\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char *}{in}}
+
+\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char *}{in}, \param{size\_t }{inLen}, \param{size\_t }{*outLen}}
+
+Converts from multibyte encoding to Unicode by calling
+\helpref{MB2WC}{wxmbconvmb2wc}, allocating a temporary wxWCharBuffer to hold
+the result.
+
+The first overload takes a \NUL-terminated input string. The second one takes a
+string of exactly the specified length and the string may include or not the
+trailing {\NUL}s. If the string is not \NUL-terminated, a temporary
+\NUL-terminated copy of it suitable for passing to \helpref{MB2WC}{wxmbconvmb2wc}
+is made, so it is more efficient to ensure that the string is does have the
+appropriate number of \NUL bytes (which is usually $1$ but may be $2$ or $4$
+for UTF-16 or UTF-32), especially for long strings.
+
+If \arg{outLen} is not-\NULL, it receives the length of the converted
+string.
-Converts from multibyte encoding to Unicode by calling MB2WC,
-allocating a temporary wxWCharBuffer to hold the result.
\membersection{wxMBConv::cWC2MB}\label{wxmbconvcwc2mb}
-\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{psz}}
+\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{in}}
+
+\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{in}, \param{size\_t }{inLen}, \param{size\_t }{*outLen}}
Converts from Unicode to multibyte encoding by calling WC2MB,
allocating a temporary wxCharBuffer to hold the result.
+The second overload of this function allows to convert a string of the given
+length \arg{inLen}, whether it is \NUL-terminated or not (for wide character
+strings, unlike for the multibyte ones, a single \NUL is always enough).
+But notice that just as with \helpref{cMB2WC}{wxmbconvmb2wc}, it is more
+efficient to pass an already terminated string to this function as otherwise a
+copy is made internally.
+
+If \arg{outLen} is not-\NULL, it receives the length of the converted
+string.
+
+
\membersection{wxMBConv::cMB2WX}\label{wxmbconvcmb2wx}
\constfunc{const char*}{cMB2WX}{\param{const char* }{psz}}
result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
return type (without const).
+
\membersection{wxMBConv::cWX2MB}\label{wxmbconvcwx2mb}
\constfunc{const char*}{cWX2MB}{\param{const wxChar* }{psz}}
result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
return type (without const).
+
\membersection{wxMBConv::cWC2WX}\label{wxmbconvcwc2wx}
\constfunc{const wchar\_t*}{cWC2WX}{\param{const wchar\_t* }{psz}}
result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
return type (without const).
+
\membersection{wxMBConv::cWX2WC}\label{wxmbconvcwx2wc}
\constfunc{const wchar\_t*}{cWX2WC}{\param{const wxChar* }{psz}}
class WXDLLIMPEXP_BASE wxMBConv
{
public:
- // the actual conversion takes place here
+ // The functions doing actual conversion. On success, the return value is
+ // the length (i.e. the number of characters, not bytes, and not counting
+ // the trailing L'\0') of the converted string. On failure, (size_t)-1 is
+ // returned. In the special case when outputBuf is NULL the return value is
+ // the same one but nothing is written to the buffer.
//
- // note that outputSize is the size of the output buffer, not the length of input
- // (the latter is always supposed to be NUL-terminated)
- virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const = 0;
- virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const = 0;
+ // Note that outLen is the length of the output buffer, not the length of
+ // the input (which is always supposed to be terminated by one or more
+ // NULs, as appropriate for the encoding)!
+ virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const = 0;
+ virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const = 0;
// MB <-> WC
- const wxWCharBuffer cMB2WC(const char *psz) const;
- const wxCharBuffer cWC2MB(const wchar_t *psz) const;
+ const wxWCharBuffer cMB2WC(const char *in) const;
+ const wxCharBuffer cWC2MB(const wchar_t *in) const;
- // MB <-> WC for strings with embedded null characters
+ // Functions converting strings which may contain embedded NULs and don't
+ // have to be NUL-terminated.
//
- // pszLen length of the input string
- // pOutSize gets the final size of the converted string
- const wxWCharBuffer cMB2WC(const char *psz, size_t pszLen, size_t* pOutSize) const;
- const wxCharBuffer cWC2MB(const wchar_t *psz, size_t pszLen, size_t* pOutSize) const;
+ // inLen is the length of the buffer including trailing NUL if any: if the
+ // last 4 bytes of the buffer are all NULs, these functions are more
+ // efficient as they avoid copying the string, but otherwise a copy is made
+ // internally which could be quite bad for (very) long strings.
+ //
+ // outLen receives, if not NULL, the length of the converted string or 0 if
+ // the conversion failed (returning 0 and not -1 in this case makes it
+ // difficult to distinguish between failed conversion and empty input but
+ // this is done for backwards compatibility)
+ const wxWCharBuffer
+ cMB2WC(const char *in, size_t inLen, size_t *outLen) const;
+ const wxCharBuffer
+ cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const;
// convenience functions for converting MB or WC to/from wxWin default
#if wxUSE_UNICODE
// virtual dtor for any base class
virtual ~wxMBConv();
+
+private:
+ // this function must return the multibyte representation of L'\0'
+ //
+ // on error, nulLen should be set to -1
+ virtual const char *GetMBNul(size_t *nulLen) const
+ {
+ *nulLen = 1;
+
+ return "";
+ }
};
// ----------------------------------------------------------------------------
wxConvBrokenFileNames(const wxChar *charset);
virtual ~wxConvBrokenFileNames() { delete m_conv; }
- virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
- virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
+ virtual size_t MB2WC(wchar_t *out, const char *in, size_t outLen) const
+ {
+ return m_conv->MB2WC(out, in, outLen);
+ }
+
+ virtual size_t WC2MB(char *out, const wchar_t *in, size_t outLen) const
+ {
+ return m_conv->WC2MB(out, in, outLen);
+ }
private:
+ virtual wxCharBuffer GetMBNul(size_t *nulLen) const
+ {
+ return m_conv->GetMBNul(nulLen);
+ }
+
+
// the conversion object we forward to
wxMBConv *m_conv;
};
-#endif
+#endif // __UNIX__
// ----------------------------------------------------------------------------
// wxMBConvUTF7 (for conversion using UTF7 encoding)
class WXDLLIMPEXP_BASE wxMBConvUTF8 : public wxMBConv
{
public:
- enum {
+ enum {
MAP_INVALID_UTF8_NOT = 0,
MAP_INVALID_UTF8_TO_PUA = 1,
MAP_INVALID_UTF8_TO_OCTAL = 2
wxMBConvUTF8(int options = MAP_INVALID_UTF8_NOT) : m_options(options) { }
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
-
+
private:
int m_options;
};
+// ----------------------------------------------------------------------------
+// wxMBConvUTF16Base: for both LE and BE variants
+// ----------------------------------------------------------------------------
+
+class WXDLLIMPEXP_BASE wxMBConvUTF16Base : public wxMBConv
+{
+private:
+ virtual const char *GetMBNul(size_t *nulLen) const
+ {
+ *nulLen = 2;
+ return "\0";
+ }
+};
+
// ----------------------------------------------------------------------------
// wxMBConvUTF16LE (for conversion using UTF16 Little Endian encoding)
// ----------------------------------------------------------------------------
-class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConv
+class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConvUTF16Base
{
public:
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
// wxMBConvUTF16BE (for conversion using UTF16 Big Endian encoding)
// ----------------------------------------------------------------------------
-class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConv
+class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConvUTF16Base
{
public:
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
virtual size_t WC2MB(char *outputBuf, const wchar_t *psz, size_t outputSize) const;
};
+// ----------------------------------------------------------------------------
+// wxMBConvUTF32Base: base class for both LE and BE variants
+// ----------------------------------------------------------------------------
+
+class WXDLLIMPEXP_BASE wxMBConvUTF32Base : public wxMBConv
+{
+private:
+ virtual const char *GetMBNul(size_t *nulLen) const
+ {
+ *nulLen = 4;
+ return "\0\0\0";
+ }
+};
+
// ----------------------------------------------------------------------------
// wxMBConvUTF32LE (for conversion using UTF32 Little Endian encoding)
// ----------------------------------------------------------------------------
-class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConv
+class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConvUTF32Base
{
public:
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
// wxMBConvUTF32BE (for conversion using UTF32 Big Endian encoding)
// ----------------------------------------------------------------------------
-class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConv
+class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConvUTF32Base
{
public:
virtual size_t MB2WC(wchar_t *outputBuf, const char *psz, size_t outputSize) const;
// charset string
void SetName(const wxChar *charset);
+ virtual const char *GetMBNul(size_t *nulLen) const;
+
// note that we can't use wxString here because of compilation
// dependencies: we're included from wx/string.h
return buf;
}
-const wxWCharBuffer wxMBConv::cMB2WC(const char *szString, size_t nStringLen, size_t* pOutSize) const
+const wxWCharBuffer
+wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
{
- wxASSERT(pOutSize != NULL);
-
- const char* szEnd = szString + nStringLen + 1;
- const char* szPos = szString;
- const char* szStart = szPos;
-
- size_t nActualLength = 0;
- size_t nCurrentSize = nStringLen; //try normal size first (should never resize?)
-
- wxWCharBuffer theBuffer(nCurrentSize);
+ // the currently accumulated wide characters
+ wxWCharBuffer wbuf;
+
+ // the current length of wbuf
+ size_t lenBuf = 0;
+
+ // we need to know the representation of L'\0' for this conversion
+ size_t nulLen;
+ const char * const nul = GetMBNul(&nulLen);
+ if ( nulLen == (size_t)-1 || nulLen == 0 )
+ return wxWCharBuffer();
+
+ // make a copy of the input string unless it is already properly
+ // NUL-terminated
+ wxCharBuffer bufTmp;
+
+ // now we can compute the input size if we were not given it: notice that
+ // in this case the string must be properly NUL-terminated, of course, as
+ // otherwise we have no way of knowing how long it is
+ if ( inLen == (size_t)-1 )
+ {
+ // not the most efficient algorithm but it shouldn't matter as normally
+ // there are not many NULs in the string and so normally memcmp()
+ // should stop on the first character
+ for ( const char *p = in; ; p++ )
+ {
+ if ( memcmp(p, nul, nulLen) == 0 )
+ break;
+ }
- //Convert the string until the length() is reached, continuing the
- //loop every time a null character is reached
- while(szPos != szEnd)
+ inLen = p - in + nulLen;
+ }
+ else // we already have the size
{
- wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
+ // check if it's not already NUL-terminated too to avoid the copy
+ if ( inLen < nulLen || memcmp(in + inLen - nulLen, nul, nulLen) != 0 )
+ {
+ // make a copy in order to properly NUL-terminate the string
+ bufTmp = wxCharBuffer(inLen + nulLen - 1 /* 1 will be added */);
+ memcpy(bufTmp.data(), in, inLen);
+ memcpy(bufTmp.data() + inLen, nul, nulLen);
+ }
+ }
- //Get the length of the current (sub)string
- size_t nLen = MB2WC(NULL, szPos, 0);
+ if ( bufTmp )
+ in = bufTmp;
- //Invalid conversion?
- if( nLen == (size_t)-1 )
+ for ( const char * const inEnd = in + inLen;; )
+ {
+ // try to convert the current chunk if anything left
+ size_t lenChunk = in < inEnd ? MB2WC(NULL, in, 0) : 0;
+ if ( lenChunk == 0 )
{
- *pOutSize = 0;
- theBuffer.data()[0u] = wxT('\0');
- return theBuffer;
+ // nothing left in the input string, conversion succeeded
+ if ( outLen )
+ {
+ // we shouldn't include the last NUL in the result length
+ *outLen = lenBuf ? lenBuf - 1 : 0;
+ }
+
+ return wbuf;
}
+ if ( lenChunk == (size_t)-1 )
+ break;
- //Increase the actual length (+1 for current null character)
- nActualLength += nLen + 1;
+ const size_t lenBufNew = lenBuf + lenChunk;
+ if ( !wbuf.extend(lenBufNew) )
+ break;
- //if buffer too big, realloc the buffer
- if (nActualLength > (nCurrentSize+1))
- {
- wxWCharBuffer theNewBuffer(nCurrentSize << 1);
- memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize * sizeof(wchar_t));
- theBuffer = theNewBuffer;
- nCurrentSize <<= 1;
- }
+ lenChunk = MB2WC(wbuf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
+ if ( lenChunk == (size_t)-1 )
+ break;
- //Convert the current (sub)string
- if ( MB2WC(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
- {
- *pOutSize = 0;
- theBuffer.data()[0u] = wxT('\0');
- return theBuffer;
- }
+ // +! for the embedded NUL (if something follows)
+ lenBuf = lenBufNew + 1;
+
+ // advance the input pointer past the end of this chunk
+ while ( memcmp(in, nul, nulLen) != 0 )
+ in++;
- //Increment to next (sub)string
- //Note that we have to use strlen instead of nLen here
- //because XX2XX gives us the size of the output buffer,
- //which is not necessarily the length of the string
- szPos += strlen(szPos) + 1;
+ in += nulLen; // skipping over its terminator as well
}
- //success - return actual length and the buffer
- *pOutSize = nActualLength;
- return theBuffer;
+ // conversion failed
+ if ( outLen )
+ *outLen = 0;
+
+ return wxWCharBuffer();
}
-const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *szString, size_t nStringLen, size_t* pOutSize) const
+const wxCharBuffer
+wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
{
- wxASSERT(pOutSize != NULL);
-
- const wchar_t* szEnd = szString + nStringLen + 1;
- const wchar_t* szPos = szString;
- const wchar_t* szStart = szPos;
-
- size_t nActualLength = 0;
- size_t nCurrentSize = nStringLen << 2; //try * 4 first
+ // the currently accumulated multibyte characters
+ wxCharBuffer buf;
- wxCharBuffer theBuffer(nCurrentSize);
+ // the current length of buf
+ size_t lenBuf = 0;
- //Convert the string until the length() is reached, continuing the
- //loop every time a null character is reached
- while(szPos != szEnd)
+ // make a copy of the input string unless it is already properly
+ // NUL-terminated
+ //
+ // if we don't know its length we have no choice but to assume that it is,
+ // indeed, properly terminated
+ wxWCharBuffer bufTmp;
+ if ( inLen == (size_t)-1 )
{
- wxASSERT(szPos < szEnd); //something is _really_ screwed up if this rings true
+ inLen = wxWcslen(in) + 1;
+ }
+ else if ( inLen != 0 && in[inLen - 1] != L'\0' )
+ {
+ // make a copy in order to properly NUL-terminate the string
+ bufTmp = wxWCharBuffer(inLen);
+ memcpy(bufTmp.data(), in, inLen*sizeof(wchar_t));
+ }
- //Get the length of the current (sub)string
- size_t nLen = WC2MB(NULL, szPos, 0);
+ if ( bufTmp )
+ in = bufTmp;
- //Invalid conversion?
- if( nLen == (size_t)-1 )
+ for ( const wchar_t * const inEnd = in + inLen;; )
+ {
+ // try to convert the current chunk, if anything left
+ size_t lenChunk = in < inEnd ? WC2MB(NULL, in, 0) : 0;
+ if ( lenChunk == 0 )
{
- *pOutSize = 0;
- theBuffer.data()[0u] = wxT('\0');
- return theBuffer;
+ // nothing left in the input string, conversion succeeded
+ if ( outLen )
+ *outLen = lenBuf ? lenBuf - 1 : lenBuf;
+
+ return buf;
}
- //Increase the actual length (+1 for current null character)
- nActualLength += nLen + 1;
+ if ( lenChunk == (size_t)-1 )
+ break;
- //if buffer too big, realloc the buffer
- if (nActualLength > (nCurrentSize+1))
- {
- wxCharBuffer theNewBuffer(nCurrentSize << 1);
- memcpy(theNewBuffer.data(), theBuffer.data(), nCurrentSize);
- theBuffer = theNewBuffer;
- nCurrentSize <<= 1;
- }
+ const size_t lenBufNew = lenBuf + lenChunk;
+ if ( !buf.extend(lenBufNew) )
+ break;
- //Convert the current (sub)string
- if(WC2MB(&theBuffer.data()[szPos - szStart], szPos, nLen + 1) == (size_t)-1 )
- {
- *pOutSize = 0;
- theBuffer.data()[0u] = wxT('\0');
- return theBuffer;
- }
+ lenChunk = WC2MB(buf.data() + lenBuf, in, lenChunk + 1 /* for NUL */);
+ if ( lenChunk == (size_t)-1 )
+ break;
- //Increment to next (sub)string
- //Note that we have to use wxWcslen instead of nLen here
- //because XX2XX gives us the size of the output buffer,
- //which is not necessarily the length of the string
- szPos += wxWcslen(szPos) + 1;
+ // chunk successfully converted, go to the next one
+ in += wxWcslen(in) + 1 /* skip NUL too */;
+ lenBuf = lenBufNew + 1;
}
- //success - return actual length and the buffer
- *pOutSize = nActualLength;
- return theBuffer;
+ // conversion failed
+ if ( outLen )
+ *outLen = 0;
+
+ return wxCharBuffer();
}
// ----------------------------------------------------------------------------
return wxWC2MB(buf, psz, n);
}
-#ifdef __UNIX__
-
// ----------------------------------------------------------------------------
// wxConvBrokenFileNames
// ----------------------------------------------------------------------------
+#ifdef __UNIX__
+
wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
{
if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
m_conv = new wxCSConv(charset);
}
-size_t
-wxConvBrokenFileNames::MB2WC(wchar_t *outputBuf,
- const char *psz,
- size_t outputSize) const
-{
- return m_conv->MB2WC( outputBuf, psz, outputSize );
-}
-
-size_t
-wxConvBrokenFileNames::WC2MB(char *outputBuf,
- const wchar_t *psz,
- size_t outputSize) const
-{
- return m_conv->WC2MB( outputBuf, psz, outputSize );
-}
-
-#endif
+#endif // __UNIX__
// ----------------------------------------------------------------------------
// UTF-7
size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
-
-
size_t len = 0;
while (*psz && ((!buf) || (len < n)))
// swap 16bit MB to 16bit String
size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
- size_t len=0;
+ size_t len = 0;
- while (*psz && (!buf || len < n))
+ while ( *psz && (!buf || len < n) )
{
- if (buf)
+ if ( buf )
{
*buf++ = ((char*)psz)[1];
*buf++ = ((char*)psz)[0];
}
- len += sizeof(wxUint16);
+ len += 2;
psz++;
}
- if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
+
+ if ( buf && len < n )
+ *buf = '\0';
return len;
}
#endif
private:
+ virtual const char *GetMBNul(size_t *nulLen) const;
+
// the name (for iconv_open()) of a wide char charset -- if none is
// available on this machine, it will remain NULL
static wxString ms_wcCharsetName;
// true if the wide char encoding we use (i.e. ms_wcCharsetName) has
// different endian-ness than the native one
static bool ms_wcNeedsSwap;
+
+ // NUL representation
+ size_t m_nulLen;
+ char m_nulBuf[8];
};
// make the constructor available for unit testing
wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
{
+ m_nulLen = (size_t)-2;
+
// iconv operates with chars, not wxChars, but luckily it uses only ASCII
// names for the charsets
const wxCharBuffer cname(wxString(name).ToAscii());
return res;
}
+const char *wxMBConv_iconv::GetMBNul(size_t *nulLen) const
+{
+ if ( m_nulLen == (size_t)-2 )
+ {
+ wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
+
+#if wxUSE_THREADS
+ // NB: explained in MB2WC
+ wxMutexLocker lock(self->m_iconvMutex);
+#endif
+
+ size_t inLen = 1,
+ outLen = WXSIZEOF(m_nulBuf);
+ self->m_nulLen = iconv(w2m, ICONV_CHAR_CAST(L""), &inLen,
+ &self->m_nulBuf, &outLen);
+ }
+
+ *nulLen = m_nulLen;
+ return m_nulBuf;
+}
+
#endif // HAVE_ICONV
wxMBConv_win32()
{
m_CodePage = CP_ACP;
+ m_nulLen = (size_t)-2;
}
#if wxUSE_FONTMAP
wxMBConv_win32(const wxChar* name)
{
m_CodePage = wxCharsetToCodepage(name);
+ m_nulLen = (size_t)-2;
}
wxMBConv_win32(wxFontEncoding encoding)
{
m_CodePage = wxEncodingToCodepage(encoding);
+ m_nulLen = (size_t)-2;
}
-#endif
+#endif // wxUSE_FONTMAP
size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
#endif
}
+ virtual const char *GetMBNul(size_t *nulLen) const
+ {
+ if ( m_nulLen == (size_t)-2 )
+ {
+ wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
+
+ self->m_nulLen = ::WideCharToMultiByte
+ (
+ m_CodePage, // code page
+ 0, // no flags
+ L"", // input string
+ 1, // translate just NUL
+ self->m_nulBuf, // output buffer
+ WXSIZEOF(m_nulBuf), // and its size
+ NULL, // "replacement" char
+ NULL // [out] was it used?
+ );
+
+ if ( m_nulLen == 0 )
+ self->m_nulLen = (size_t)-1;
+ }
+
+ *nulLen = m_nulLen;
+ return m_nulBuf;
+ }
+
long m_CodePage;
+ size_t m_nulLen;
+ char m_nulBuf[8];
};
#endif // wxHAVE_WIN32_MB2WC
wxFontEncoding m_enc;
wxEncodingConverter m2w, w2m;
+private:
+ virtual const char *GetMBNul(size_t *nulLen) const
+ {
+ switch ( m_enc )
+ {
+ case wxFONTENCODING_UTF16BE:
+ case wxFONTENCODING_UTF16LE:
+ *nulLen = 2;
+ return "\0";
+
+ case wxFONTENCODING_UTF32BE:
+ case wxFONTENCODING_UTF32LE:
+ *nulLen = 4;
+ return "\0\0\0";
+
+ default:
+ *nulLen = 1;
+ return "";
+ }
+ }
+
// were we initialized successfully?
bool m_ok;
return len;
}
+const char *wxCSConv::GetMBNul(size_t *nulLen) const
+{
+ CreateConvIfNeeded();
+
+ if ( m_convReal )
+ {
+ // cast needed just to call private function of m_convReal
+ return ((wxCSConv *)m_convReal)->GetMBNul(nulLen);
+ }
+
+ *nulLen = 1;
+ return "";
+}
+
// ----------------------------------------------------------------------------
// globals
// ----------------------------------------------------------------------------
// from multibyte string
wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength)
{
- // if nLength != npos, then we have to make a NULL-terminated copy
- // of first nLength bytes of psz first because the input buffer to MB2WC
- // must always be NULL-terminated:
- wxCharBuffer inBuf((const char *)NULL);
- if (nLength != npos)
- {
- wxASSERT( psz != NULL );
- wxCharBuffer tmp(nLength);
- memcpy(tmp.data(), psz, nLength);
- tmp.data()[nLength] = '\0';
- inBuf = tmp;
- psz = inBuf.data();
- }
-
- // first get the size of the buffer we need
- size_t nLen;
- if ( psz )
- {
- // calculate the needed size ourselves or use the provided one
- if (nLength == npos)
- nLen = strlen(psz);
- else
- nLen = nLength;
- }
- else
- {
- // nothing to convert
- nLen = 0;
- }
-
-
// anything to do?
- if ( (nLen != 0) && (nLen != (size_t)-1) )
+ if ( psz && nLength != 0 )
{
- //Convert string
- size_t nRealSize;
- wxWCharBuffer theBuffer = conv.cMB2WC(psz, nLen, &nRealSize);
+ if ( nLength == npos )
+ {
+ nLength = (size_t)-1;
+ }
+ else if ( nLength == length() )
+ {
+ // this is important to avoid copying the string in cMB2WC: we're
+ // already NUL-terminated so we can pass this NUL with the data
+ nLength++;
+ }
+
+ size_t nLenWide;
+ wxWCharBuffer wbuf = conv.cMB2WC(psz, nLength, &nLenWide);
- //Copy
- if (nRealSize)
- assign( theBuffer.data() , nRealSize - 1 );
+ if ( nLenWide )
+ assign(wbuf, nLenWide);
}
}
//Convert wxString in Unicode mode to a multi-byte string
const wxCharBuffer wxString::mb_str(wxMBConv& conv) const
{
- size_t dwOutSize;
- return conv.cWC2MB(c_str(), length(), &dwOutSize);
+ return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
}
#else // ANSI
#if wxUSE_WCHAR_T
+
// from wide string
wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength)
{
- // if nLength != npos, then we have to make a NULL-terminated copy
- // of first nLength chars of psz first because the input buffer to WC2MB
- // must always be NULL-terminated:
- wxWCharBuffer inBuf((const wchar_t *)NULL);
- if (nLength != npos)
- {
- wxASSERT( pwz != NULL );
- wxWCharBuffer tmp(nLength);
- memcpy(tmp.data(), pwz, nLength * sizeof(wchar_t));
- tmp.data()[nLength] = '\0';
- inBuf = tmp;
- pwz = inBuf.data();
- }
-
- // first get the size of the buffer we need
- size_t nLen;
- if ( pwz )
- {
- // calculate the needed size ourselves or use the provided one
- if (nLength == npos)
- nLen = wxWcslen(pwz);
- else
- nLen = nLength;
- }
- else
- {
- // nothing to convert
- nLen = 0;
- }
-
// anything to do?
- if ( (nLen != 0) && (nLen != (size_t)-1) )
+ if ( pwz && nLength != 0 )
{
- //Convert string
- size_t nRealSize;
- wxCharBuffer theBuffer = conv.cWC2MB(pwz, nLen, &nRealSize);
+ if ( nLength == npos )
+ {
+ nLength = (size_t)-1;
+ }
+ else if ( nLength == length() )
+ {
+ // this is important to avoid copying the string in cMB2WC: we're
+ // already NUL-terminated so we can pass this NUL with the data
+ nLength++;
+ }
+
+ size_t nLenMB;
+ wxCharBuffer buf = conv.cWC2MB(pwz, nLength, &nLenMB);
- //Copy
- if (nRealSize)
- assign( theBuffer.data() , nRealSize - 1 );
+ if ( nLenMB )
+ assign(buf, nLenMB);
}
}
//mode is not enabled and wxUSE_WCHAR_T is enabled
const wxWCharBuffer wxString::wc_str(wxMBConv& conv) const
{
- size_t dwOutSize;
- return conv.cMB2WC(c_str(), length(), &dwOutSize);
+ return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
}
#endif // wxUSE_WCHAR_T