]>
Commit | Line | Data |
---|---|---|
1 | .\" Copyright (c) 1993 | |
2 | .\" The Regents of the University of California. All rights reserved. | |
3 | .\" | |
4 | .\" This code is derived from software contributed to Berkeley by | |
5 | .\" Donn Seeley of BSDI. | |
6 | .\" | |
7 | .\" Redistribution and use in source and binary forms, with or without | |
8 | .\" modification, are permitted provided that the following conditions | |
9 | .\" are met: | |
10 | .\" 1. Redistributions of source code must retain the above copyright | |
11 | .\" notice, this list of conditions and the following disclaimer. | |
12 | .\" 2. Redistributions in binary form must reproduce the above copyright | |
13 | .\" notice, this list of conditions and the following disclaimer in the | |
14 | .\" documentation and/or other materials provided with the distribution. | |
15 | .\" 3. All advertising materials mentioning features or use of this software | |
16 | .\" must display the following acknowledgement: | |
17 | .\" This product includes software developed by the University of | |
18 | .\" California, Berkeley and its contributors. | |
19 | .\" 4. Neither the name of the University nor the names of its contributors | |
20 | .\" may be used to endorse or promote products derived from this software | |
21 | .\" without specific prior written permission. | |
22 | .\" | |
23 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
24 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
25 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
26 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
27 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
28 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
29 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
30 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
31 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
32 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
33 | .\" SUCH DAMAGE. | |
34 | .\" | |
35 | .\" @(#)multibyte.3 8.1 (Berkeley) 6/4/93 | |
36 | .\" $FreeBSD: src/lib/libc/locale/multibyte.3,v 1.19 2003/02/06 11:04:46 charnier Exp $ | |
37 | .\" | |
38 | .Dd October 6, 2002 | |
39 | .Dt MULTIBYTE 3 | |
40 | .Os | |
41 | .Sh NAME | |
42 | .Nm mblen , | |
43 | .Nm mbstowcs , | |
44 | .Nm mbtowc , | |
45 | .Nm wcstombs , | |
46 | .Nm wctomb | |
47 | .Nd multibyte character support for C | |
48 | .Sh LIBRARY | |
49 | .Lb libc | |
50 | .Sh SYNOPSIS | |
51 | .In stdlib.h | |
52 | .Ft int | |
53 | .Fn mblen "const char *mbchar" "size_t nbytes" | |
54 | .Ft size_t | |
55 | .Fn mbstowcs "wchar_t * restrict wcstring" "const char * restrict mbstring" "size_t nwchars" | |
56 | .Ft int | |
57 | .Fn mbtowc "wchar_t * restrict wcharp" "const char * restrict mbchar" "size_t nbytes" | |
58 | .Ft size_t | |
59 | .Fn wcstombs "char * restrict mbstring" "const wchar_t * restrict wcstring" "size_t nbytes" | |
60 | .Ft int | |
61 | .Fn wctomb "char *mbchar" "wchar_t wchar" | |
62 | .Sh DESCRIPTION | |
63 | The basic elements of some written natural languages such as Chinese | |
64 | cannot be represented uniquely with single C | |
65 | .Va char Ns s . | |
66 | The C standard supports two different ways of dealing with | |
67 | extended natural language encodings, | |
68 | .Em wide | |
69 | characters and | |
70 | .Em multibyte | |
71 | characters. | |
72 | Wide characters are an internal representation | |
73 | which allows each basic element to map | |
74 | to a single object of type | |
75 | .Va wchar_t . | |
76 | Multibyte characters are used for input and output | |
77 | and code each basic element as a sequence of C | |
78 | .Va char Ns s . | |
79 | Individual basic elements may map into one or more | |
80 | (up to | |
81 | .Dv MB_LEN_MAX ) | |
82 | bytes in a multibyte character. | |
83 | .Pp | |
84 | The current locale | |
85 | .Pq Xr setlocale 3 | |
86 | governs the interpretation of wide and multibyte characters. | |
87 | The locale category | |
88 | .Dv LC_CTYPE | |
89 | specifically controls this interpretation. | |
90 | The | |
91 | .Va wchar_t | |
92 | type is wide enough to hold the largest value | |
93 | in the wide character representations for all locales. | |
94 | .Pp | |
95 | Multibyte strings may contain | |
96 | .Sq shift | |
97 | indicators to switch to and from | |
98 | particular modes within the given representation. | |
99 | If explicit bytes are used to signal shifting, | |
100 | these are not recognized as separate characters | |
101 | but are lumped with a neighboring character. | |
102 | There is always a distinguished | |
103 | .Sq initial | |
104 | shift state. | |
105 | The | |
106 | .Fn mbstowcs | |
107 | and | |
108 | .Fn wcstombs | |
109 | functions assume that multibyte strings are interpreted | |
110 | starting from the initial shift state. | |
111 | The | |
112 | .Fn mblen , | |
113 | .Fn mbtowc | |
114 | and | |
115 | .Fn wctomb | |
116 | functions maintain static shift state internally. | |
117 | A call with a null | |
118 | .Fa mbchar | |
119 | pointer returns nonzero if the current locale requires shift states, | |
120 | zero otherwise; | |
121 | if shift states are required, the shift state is reset to the initial state. | |
122 | The internal shift states are undefined after a call to | |
123 | .Fn setlocale | |
124 | with the | |
125 | .Dv LC_CTYPE | |
126 | or | |
127 | .Dv LC_ALL | |
128 | categories. | |
129 | .Pp | |
130 | For convenience in processing, | |
131 | the wide character with value 0 | |
132 | (the null wide character) | |
133 | is recognized as the wide character string terminator, | |
134 | and the character with value 0 | |
135 | (the null byte) | |
136 | is recognized as the multibyte character string terminator. | |
137 | Null bytes are not permitted within multibyte characters. | |
138 | .Pp | |
139 | The | |
140 | .Fn mblen | |
141 | function computes the length in bytes | |
142 | of a multibyte character | |
143 | .Fa mbchar . | |
144 | Up to | |
145 | .Fa nbytes | |
146 | bytes are examined. | |
147 | .Pp | |
148 | The | |
149 | .Fn mbtowc | |
150 | function converts a multibyte character | |
151 | .Fa mbchar | |
152 | into a wide character and stores the result | |
153 | in the object pointed to by | |
154 | .Fa wcharp . | |
155 | Up to | |
156 | .Fa nbytes | |
157 | bytes are examined. | |
158 | .Pp | |
159 | The | |
160 | .Fn wctomb | |
161 | function converts a wide character | |
162 | .Fa wchar | |
163 | into a multibyte character and stores | |
164 | the result in | |
165 | .Fa mbchar . | |
166 | The object pointed to by | |
167 | .Fa mbchar | |
168 | must be large enough to accommodate the multibyte character. | |
169 | .Pp | |
170 | The | |
171 | .Fn mbstowcs | |
172 | function converts a multibyte character string | |
173 | .Fa mbstring | |
174 | into a wide character string | |
175 | .Fa wcstring . | |
176 | No more than | |
177 | .Fa nwchars | |
178 | wide characters are stored. | |
179 | A terminating null wide character is appended if there is room. | |
180 | .Pp | |
181 | The | |
182 | .Fn wcstombs | |
183 | function converts a wide character string | |
184 | .Fa wcstring | |
185 | into a multibyte character string | |
186 | .Fa mbstring . | |
187 | Up to | |
188 | .Fa nbytes | |
189 | bytes are stored in | |
190 | .Fa mbstring . | |
191 | Partial multibyte characters at the end of the string are not stored. | |
192 | The multibyte character string is null terminated if there is room. | |
193 | .Sh "RETURN VALUES | |
194 | If | |
195 | .Fa mbchar | |
196 | is | |
197 | .Dv NULL , | |
198 | the | |
199 | .Fn mblen , | |
200 | .Fn mbtowc | |
201 | and | |
202 | .Fn wctomb | |
203 | functions return nonzero if shift states are supported, | |
204 | zero otherwise. | |
205 | If | |
206 | .Fa mbchar | |
207 | is valid, | |
208 | then these functions return | |
209 | the number of bytes processed in | |
210 | .Fa mbchar , | |
211 | or \-1 if no multibyte character | |
212 | could be recognized or converted. | |
213 | .Pp | |
214 | The | |
215 | .Fn mbstowcs | |
216 | function returns the number of wide characters converted, | |
217 | not counting any terminating null wide character. | |
218 | The | |
219 | .Fn wcstombs | |
220 | function returns the number of bytes converted, | |
221 | not counting any terminating null byte. | |
222 | If any invalid multibyte characters are encountered, | |
223 | both functions return \-1. | |
224 | .Sh SEE ALSO | |
225 | .Xr btowc 3 , | |
226 | .Xr mbrlen 3 , | |
227 | .Xr mbrtowc 3 , | |
228 | .Xr mbrune 3 , | |
229 | .Xr mbsrtowcs 3 , | |
230 | .Xr rune 3 , | |
231 | .Xr setlocale 3 , | |
232 | .Xr wcrtomb 3 , | |
233 | .Xr wcsrtombs 3 , | |
234 | .Xr euc 4 , | |
235 | .Xr utf2 4 , | |
236 | .Xr utf8 5 | |
237 | .Sh STANDARDS | |
238 | The | |
239 | .Fn mblen , | |
240 | .Fn mbstowcs , | |
241 | .Fn mbtowc , | |
242 | .Fn wcstombs | |
243 | and | |
244 | .Fn wctomb | |
245 | functions conform to | |
246 | .St -isoC . | |
247 | .Sh BUGS | |
248 | The current implementation does not support shift states. |