]> git.saurik.com Git - wxWidgets.git/blame - docs/latex/wx/mbconv.tex
minor corrections and note about Unicode (patch 1555691)
[wxWidgets.git] / docs / latex / wx / mbconv.tex
CommitLineData
eec47cc6
VZ
1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%% Name: mbconv.tex
3%% Purpose: wxMBConv documentation
4%% Author: Ove Kaaven, Vadim Zeitlin
5%% Created: 2000-03-25
6%% RCS-ID: $Id$
7%% Copyright: (c) 2000 Ove Kaaven
8%% (c) 2003-2006 Vadim Zeitlin
9%% License: wxWindows license
10%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
11
f6bcfd97
BP
12
13\section{\class{wxMBConv}}\label{wxmbconv}
14
15This class is the base class of a hierarchy of classes capable of converting
eec47cc6
VZ
16text strings between multibyte (SBCS or DBCS) encodings and Unicode.
17
18In the documentation for this and related classes please notice that
19\emph{length} of the string refers to the number of characters in the string
20not counting the terminating \NUL, if any. While the \emph{size} of the string
5e51fb4c 21is the total number of bytes in the string, including any trailing \NUL.
eec47cc6
VZ
22Thus, length of wide character string \texttt{L"foo"} is $3$ while its size can
23be either $8$ or $16$ depending on whether \texttt{wchar\_t} is $2$ bytes (as
24under Windows) or $4$ (Unix).
25
26\wxheading{Global variables}
27
28There are several predefined instances of this class:
29\begin{twocollist}
30\twocolitem{\textbf{wxConvLibc}}{Uses the standard ANSI C \texttt{mbstowcs()} and
31\texttt{wcstombs()} functions to perform the conversions; thus depends on the
32current locale.}
ef95ce41
VZ
33\twocolitem{\textbf{wxConvLocal}}{Another conversion corresponding to the
34current locale but this one uses the best available conversion.}
d5bef0a3
VZ
35\twocolitem{\textbf{wxConvUI}}{The conversion used for hte standard UI elements
36such as menu items and buttons. This is a pointer which is initially set to
37\texttt{wxConvLocal} as the program uses the current locale by default but can
38be set to some specific conversion if the program needs to use a specific
39encoding for its UI.}
ef95ce41
VZ
40\twocolitem{\textbf{wxConvISO8859\_1}}{Conversion to and from ISO-8859-1 (Latin I)
41encoding.}
42\twocolitem{\textbf{wxConvUTF8}}{Conversion to and from UTF-8 encoding.}
eec47cc6
VZ
43\twocolitem{\textbf{wxConvFile}}{The appropriate conversion for the file names,
44depends on the system.}
ef95ce41 45% \twocolitem{\textbf{wxConvCurrent}}{Not really clear what is it for...}
eec47cc6 46\end{twocollist}
f6bcfd97 47
483b0434
VZ
48
49\wxheading{Constants}
50
51\texttt{wxCONV\_FAILED} value is defined as \texttt{(size\_t)$-1$} and is
52returned by the conversion functions instead of the length of the converted
53string if the conversion fails.
54
55
f6bcfd97
BP
56\wxheading{Derived from}
57
58No base class
59
60\wxheading{Include files}
61
62<wx/strconv.h>
63
64\wxheading{See also}
65
66\helpref{wxCSConv}{wxcsconv},
67\helpref{wxEncodingConverter}{wxencodingconverter},
68\helpref{wxMBConv classes overview}{mbconvclasses}
69
483b0434 70
f6bcfd97
BP
71\latexignore{\rtfignore{\wxheading{Members}}}
72
73
74\membersection{wxMBConv::wxMBConv}\label{wxmbconvwxmbconv}
75
76\func{}{wxMBConv}{\void}
77
483b0434
VZ
78Trivial default constructor.
79
f6bcfd97
BP
80
81\membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc}
82
eec47cc6
VZ
83\constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}}
84
483b0434
VZ
85\deprecated{\helpref{ToWChar}{wxmbconvtowchar}}
86
eec47cc6
VZ
87Converts from a string \arg{in} in multibyte encoding to Unicode putting up to
88\arg{outLen} characters into the buffer \arg{out}.
f6bcfd97 89
eec47cc6
VZ
90If \arg{out} is \NULL, only the length of the string which would result from
91the conversion is calculated and returned. Note that this is the length and not
92size, i.e. the returned value does \emph{not} include the trailing \NUL. But
93when the function is called with a non-\NULL \arg{out} buffer, the \arg{outLen}
94parameter should be one more to allow to properly \NUL-terminate the string.
2b5f62a0
VZ
95
96\wxheading{Parameters}
97
eec47cc6 98\docparam{out}{The output buffer, may be \NULL if the caller is only
2b5f62a0
VZ
99interested in the length of the resulting string}
100
eec47cc6 101\docparam{in}{The \NUL-terminated input string, cannot be \NULL}
2b5f62a0 102
eec47cc6
VZ
103\docparam{outLen}{The length of the output buffer but \emph{including}
104\NUL, ignored if \arg{out} is \NULL}
2b5f62a0
VZ
105
106\wxheading{Return value}
107
5e51fb4c 108The length of the converted string \emph{excluding} the trailing \NUL.
eec47cc6 109
f6bcfd97
BP
110
111\membersection{wxMBConv::WC2MB}\label{wxmbconvwc2mb}
112
113\constfunc{virtual size\_t}{WC2MB}{\param{char* }{buf}, \param{const wchar\_t* }{psz}, \param{size\_t }{n}}
114
483b0434
VZ
115\deprecated{\helpref{FromWChar}{wxmbconvfromwchar}}
116
2b5f62a0
VZ
117Converts from Unicode to multibyte encoding. The semantics of this function
118(including the return value meaning) is the same as for
119\helpref{MB2WC}{wxmbconvmb2wc}.
120
eec47cc6
VZ
121Notice that when the function is called with a non-\NULL buffer, the
122{\it n} parameter should be the size of the buffer and so it \emph{should} take
5e51fb4c 123into account the trailing \NUL, which might take two or four bytes for some
eec47cc6
VZ
124encodings (UTF-16 and UTF-32) and not one.
125
f6bcfd97
BP
126
127\membersection{wxMBConv::cMB2WC}\label{wxmbconvcmb2wc}
128
eec47cc6
VZ
129\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char *}{in}}
130
131\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char *}{in}, \param{size\_t }{inLen}, \param{size\_t }{*outLen}}
132
133Converts from multibyte encoding to Unicode by calling
134\helpref{MB2WC}{wxmbconvmb2wc}, allocating a temporary wxWCharBuffer to hold
135the result.
136
137The first overload takes a \NUL-terminated input string. The second one takes a
138string of exactly the specified length and the string may include or not the
5e51fb4c 139trailing \NUL character(s). If the string is not \NUL-terminated, a temporary
eec47cc6
VZ
140\NUL-terminated copy of it suitable for passing to \helpref{MB2WC}{wxmbconvmb2wc}
141is made, so it is more efficient to ensure that the string is does have the
142appropriate number of \NUL bytes (which is usually $1$ but may be $2$ or $4$
7ef3ab50
VZ
143for UTF-16 or UTF-32, see \helpref{GetMBNulLen}{wxmbconvgetmbnullen}),
144especially for long strings.
eec47cc6
VZ
145
146If \arg{outLen} is not-\NULL, it receives the length of the converted
147string.
f6bcfd97 148
f6bcfd97
BP
149
150\membersection{wxMBConv::cWC2MB}\label{wxmbconvcwc2mb}
151
eec47cc6
VZ
152\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{in}}
153
154\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{in}, \param{size\_t }{inLen}, \param{size\_t }{*outLen}}
f6bcfd97
BP
155
156Converts from Unicode to multibyte encoding by calling WC2MB,
157allocating a temporary wxCharBuffer to hold the result.
158
eec47cc6
VZ
159The second overload of this function allows to convert a string of the given
160length \arg{inLen}, whether it is \NUL-terminated or not (for wide character
161strings, unlike for the multibyte ones, a single \NUL is always enough).
162But notice that just as with \helpref{cMB2WC}{wxmbconvmb2wc}, it is more
163efficient to pass an already terminated string to this function as otherwise a
164copy is made internally.
165
166If \arg{outLen} is not-\NULL, it receives the length of the converted
167string.
168
169
f6bcfd97
BP
170\membersection{wxMBConv::cMB2WX}\label{wxmbconvcmb2wx}
171
172\constfunc{const char*}{cMB2WX}{\param{const char* }{psz}}
173
174\constfunc{const wxWCharBuffer}{cMB2WX}{\param{const char* }{psz}}
175
176Converts from multibyte encoding to the current wxChar type
177(which depends on whether wxUSE\_UNICODE is set to 1). If wxChar is char,
178it returns the parameter unaltered. If wxChar is wchar\_t, it returns the
179result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
180return type (without const).
181
eec47cc6 182
f6bcfd97
BP
183\membersection{wxMBConv::cWX2MB}\label{wxmbconvcwx2mb}
184
185\constfunc{const char*}{cWX2MB}{\param{const wxChar* }{psz}}
186
187\constfunc{const wxCharBuffer}{cWX2MB}{\param{const wxChar* }{psz}}
188
189Converts from the current wxChar type to multibyte encoding. If wxChar is char,
190it returns the parameter unaltered. If wxChar is wchar\_t, it returns the
191result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
192return type (without const).
193
eec47cc6 194
f6bcfd97
BP
195\membersection{wxMBConv::cWC2WX}\label{wxmbconvcwc2wx}
196
197\constfunc{const wchar\_t*}{cWC2WX}{\param{const wchar\_t* }{psz}}
198
199\constfunc{const wxCharBuffer}{cWC2WX}{\param{const wchar\_t* }{psz}}
200
201Converts from Unicode to the current wxChar type. If wxChar is wchar\_t,
202it returns the parameter unaltered. If wxChar is char, it returns the
203result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
204return type (without const).
205
eec47cc6 206
f6bcfd97
BP
207\membersection{wxMBConv::cWX2WC}\label{wxmbconvcwx2wc}
208
209\constfunc{const wchar\_t*}{cWX2WC}{\param{const wxChar* }{psz}}
210
211\constfunc{const wxWCharBuffer}{cWX2WC}{\param{const wxChar* }{psz}}
212
213Converts from the current wxChar type to Unicode. If wxChar is wchar\_t,
214it returns the parameter unaltered. If wxChar is char, it returns the
215result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
216return type (without const).
217
7ef3ab50 218
483b0434
VZ
219\membersection{wxMBConv::FromWChar}\label{wxmbconvfromwchar}
220
221\constfunc{virtual size\_t}{FromWChar}{\param{wchar\_t *}{dst}, \param{size\_t }{dstLen}, \param{const char *}{src}, \param{size\_t }{srcLen = $-1$}}
222
223The most general function for converting a multibyte string to a wide string.
224The main case is when \arg{dst} is not \NULL and \arg{srcLen} is not $-1$: then
225the function converts exactly \arg{srcLen} bytes starting at \arg{src} into
226wide string which it output to \arg{dst}. If the length of the resulting wide
227string is greater than \arg{dstLen}, an error is returned. Note that if
228\arg{srcLen} bytes don't include \NUL characters, the resulting wide string is
229not \NUL-terminated neither.
230
231If \arg{srcLen} is $-1$, the function supposes that the string is properly
232(i.e. as necessary for the encoding handled by this conversion) \NUL-terminated
233and converts the entire string, including any trailing \NUL bytes. In this case
234the wide string is also \NUL-terminated.
235
236Finally, if \arg{dst} is \NULL, the function returns the length of the needed
237buffer.
238
239\wxheading{Return value}
240
241The number of characters written to \arg{dst} (or the number of characters
242which would have been written to it if it were non-\NULL) on success or
243\texttt{wxCONV\_FAILED} on error.
244
245
246\membersection{wxMBConv::GetMaxMBNulLen}\label{wxmbconvgetmaxmbnullen}
247
248\func{const size\_t}{GetMaxMBNulLen}{\void}
249
250Returns the maximal value which can be returned by
251\helpref{GetMBNulLen}{wxmbconvgetmbnullen} for any conversion object. Currently
252this value is $4$.
253
254This method can be used to allocate the buffer with enough space for the
255trailing \NUL characters for any encoding.
256
257
7ef3ab50
VZ
258\membersection{wxMBConv::GetMBNulLen}\label{wxmbconvgetmbnullen}
259
260\constfunc{size\_t}{GetMBNulLen}{\void}
261
262This function returns $1$ for most of the multibyte encodings in which the
263string is terminated by a single \NUL, $2$ for UTF-16 and $4$ for UTF-32 for
264which the string is terminated with $2$ and $4$ \NUL characters respectively.
265The other cases are not currently supported and $-1$ is returned for them.
266
267
483b0434
VZ
268\membersection{wxMBConv::ToWChar}\label{wxmbconvtowchar}
269
270\constfunc{virtual size\_t}{ToWChar}{\param{char\_t *}{dst}, \param{size\_t }{dstLen}, \param{const wchar\_t *}{src}, \param{size\_t }{srcLen = $-1$}}
271
272This function has the same semantics as \helpref{FromWChar}{wxmbconvfromwchar}
273except that it converts a wide string to multibyte one.
274
275