]>
Commit | Line | Data |
---|---|---|
974e3884 A |
1 | .\" $NetBSD: vis.3,v 1.39 2013/02/20 20:05:26 christos Exp $ |
2 | .\" $FreeBSD$ | |
3 | .\" | |
5b2abdfb A |
4 | .\" Copyright (c) 1989, 1991, 1993 |
5 | .\" The Regents of the University of California. All rights reserved. | |
6 | .\" | |
7 | .\" Redistribution and use in source and binary forms, with or without | |
8 | .\" modification, are permitted provided that the following conditions | |
9 | .\" are met: | |
10 | .\" 1. Redistributions of source code must retain the above copyright | |
11 | .\" notice, this list of conditions and the following disclaimer. | |
12 | .\" 2. Redistributions in binary form must reproduce the above copyright | |
13 | .\" notice, this list of conditions and the following disclaimer in the | |
14 | .\" documentation and/or other materials provided with the distribution. | |
974e3884 | 15 | .\" 3. Neither the name of the University nor the names of its contributors |
5b2abdfb A |
16 | .\" may be used to endorse or promote products derived from this software |
17 | .\" without specific prior written permission. | |
18 | .\" | |
19 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
20 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
22 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
23 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
24 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
25 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
26 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
27 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
28 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
29 | .\" SUCH DAMAGE. | |
30 | .\" | |
974e3884 | 31 | .\" @(#)vis.3 8.1 (Berkeley) 6/9/93 |
5b2abdfb | 32 | .\" |
974e3884 | 33 | .Dd February 19, 2013 |
5b2abdfb A |
34 | .Dt VIS 3 |
35 | .Os | |
36 | .Sh NAME | |
974e3884 A |
37 | .Nm vis , |
38 | .Nm nvis , | |
39 | .Nm strvis , | |
40 | .Nm strnvis , | |
41 | .Nm strvisx , | |
42 | .Nm strnvisx , | |
43 | .Nm strenvisx , | |
44 | .Nm svis , | |
45 | .Nm snvis , | |
46 | .Nm strsvis , | |
47 | .Nm strsnvis , | |
48 | .Nm strsvisx , | |
49 | .Nm strsnvisx , | |
50 | .Nm strsenvisx | |
5b2abdfb A |
51 | .Nd visually encode characters |
52 | .Sh LIBRARY | |
53 | .Lb libc | |
54 | .Sh SYNOPSIS | |
55 | .In vis.h | |
56 | .Ft char * | |
57 | .Fn vis "char *dst" "int c" "int flag" "int nextc" | |
974e3884 A |
58 | .Ft char * |
59 | .Fn nvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc" | |
5b2abdfb A |
60 | .Ft int |
61 | .Fn strvis "char *dst" "const char *src" "int flag" | |
62 | .Ft int | |
974e3884 A |
63 | .Fn strnvis "char *dst" "size_t dlen" "const char *src" "int flag" |
64 | .Ft int | |
5b2abdfb | 65 | .Fn strvisx "char *dst" "const char *src" "size_t len" "int flag" |
974e3884 A |
66 | .Ft int |
67 | .Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" | |
68 | .Ft int | |
69 | .Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr" | |
70 | .Ft char * | |
71 | .Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra" | |
72 | .Ft char * | |
73 | .Fn snvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc" "const char *extra" | |
74 | .Ft int | |
75 | .Fn strsvis "char *dst" "const char *src" "int flag" "const char *extra" | |
76 | .Ft int | |
77 | .Fn strsnvis "char *dst" "size_t dlen" "const char *src" "int flag" "const char *extra" | |
78 | .Ft int | |
79 | .Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra" | |
80 | .Ft int | |
81 | .Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" | |
82 | .Ft int | |
83 | .Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr" | |
5b2abdfb A |
84 | .Sh DESCRIPTION |
85 | The | |
86 | .Fn vis | |
87 | function | |
88 | copies into | |
89 | .Fa dst | |
90 | a string which represents the character | |
91 | .Fa c . | |
92 | If | |
93 | .Fa c | |
3d9156a7 | 94 | needs no encoding, it is copied in unaltered. |
974e3884 | 95 | The string is null terminated, and a pointer to the end of the string is |
3d9156a7 A |
96 | returned. |
97 | The maximum length of any encoding is four | |
974e3884 | 98 | bytes (not including the trailing |
5b2abdfb A |
99 | .Dv NUL ) ; |
100 | thus, when | |
101 | encoding a set of characters into a buffer, the size of the buffer should | |
974e3884 | 102 | be four times the number of bytes encoded, plus one for the trailing |
5b2abdfb | 103 | .Dv NUL . |
974e3884 | 104 | The flag parameter is used for altering the default range of |
5b2abdfb A |
105 | characters considered for encoding and for altering the visual |
106 | representation. | |
107 | The additional character, | |
108 | .Fa nextc , | |
109 | is only used when selecting the | |
110 | .Dv VIS_CSTYLE | |
111 | encoding format (explained below). | |
112 | .Pp | |
113 | The | |
974e3884 A |
114 | .Fn strvis , |
115 | .Fn strnvis , | |
116 | .Fn strvisx , | |
5b2abdfb | 117 | and |
974e3884 | 118 | .Fn strnvisx |
5b2abdfb A |
119 | functions copy into |
120 | .Fa dst | |
121 | a visual representation of | |
122 | the string | |
123 | .Fa src . | |
124 | The | |
125 | .Fn strvis | |
974e3884 A |
126 | and |
127 | .Fn strnvis | |
128 | functions encode characters from | |
5b2abdfb A |
129 | .Fa src |
130 | up to the | |
131 | first | |
132 | .Dv NUL . | |
133 | The | |
134 | .Fn strvisx | |
974e3884 A |
135 | and |
136 | .Fn strnvisx | |
137 | functions encode exactly | |
5b2abdfb A |
138 | .Fa len |
139 | characters from | |
140 | .Fa src | |
141 | (this | |
142 | is useful for encoding a block of data that may contain | |
143 | .Dv NUL Ns 's ) . | |
144 | Both forms | |
145 | .Dv NUL | |
146 | terminate | |
147 | .Fa dst . | |
148 | The size of | |
149 | .Fa dst | |
150 | must be four times the number | |
974e3884 | 151 | of bytes encoded from |
5b2abdfb A |
152 | .Fa src |
153 | (plus one for the | |
154 | .Dv NUL ) . | |
155 | Both | |
974e3884 A |
156 | forms return the number of characters in |
157 | .Fa dst | |
158 | (not including the trailing | |
5b2abdfb | 159 | .Dv NUL ) . |
974e3884 A |
160 | The |
161 | .Dq Nm n | |
162 | versions of the functions also take an additional argument | |
163 | .Fa dlen | |
164 | that indicates the length of the | |
165 | .Fa dst | |
166 | buffer. | |
167 | If | |
168 | .Fa dlen | |
169 | is not large enough to fit the converted string then the | |
170 | .Fn strnvis | |
171 | and | |
172 | .Fn strnvisx | |
173 | functions return \-1 and set | |
174 | .Va errno | |
175 | to | |
176 | .Dv ENOSPC . | |
177 | The | |
178 | .Fn strenvisx | |
179 | function takes an additional argument, | |
180 | .Fa cerr_ptr , | |
181 | that is used to pass in and out a multibyte conversion error flag. | |
182 | This is useful when processing single characters at a time when | |
183 | it is possible that the locale may be set to something other | |
184 | than the locale of the characters in the input data. | |
185 | .Pp | |
186 | The functions | |
187 | .Fn svis , | |
188 | .Fn snvis , | |
189 | .Fn strsvis , | |
190 | .Fn strsnvis , | |
191 | .Fn strsvisx , | |
192 | .Fn strsnvisx , | |
193 | and | |
194 | .Fn strsenvisx | |
195 | correspond to | |
196 | .Fn vis , | |
197 | .Fn nvis , | |
198 | .Fn strvis , | |
199 | .Fn strnvis , | |
200 | .Fn strvisx , | |
201 | .Fn strnvisx , | |
202 | and | |
203 | .Fn strenvisx | |
204 | but have an additional argument | |
205 | .Fa extra , | |
206 | pointing to a | |
207 | .Dv NUL | |
208 | terminated list of characters. | |
209 | These characters will be copied encoded or backslash-escaped into | |
210 | .Fa dst . | |
211 | These functions are useful e.g. to remove the special meaning | |
212 | of certain characters to shells. | |
5b2abdfb A |
213 | .Pp |
214 | The encoding is a unique, invertible representation composed entirely of | |
215 | graphic characters; it can be decoded back into the original form using | |
216 | the | |
974e3884 | 217 | .Xr unvis 3 , |
5b2abdfb | 218 | .Xr strunvis 3 |
974e3884 A |
219 | or |
220 | .Xr strnunvis 3 | |
5b2abdfb A |
221 | functions. |
222 | .Pp | |
223 | There are two parameters that can be controlled: the range of | |
974e3884 A |
224 | characters that are encoded (applies only to |
225 | .Fn vis , | |
226 | .Fn nvis , | |
227 | .Fn strvis , | |
228 | .Fn strnvis , | |
229 | .Fn strvisx , | |
230 | and | |
231 | .Fn strnvisx ) , | |
232 | and the type of representation used. | |
233 | By default, all non-graphic characters, | |
234 | except space, tab, and newline are encoded (see | |
235 | .Xr isgraph 3 ) . | |
5b2abdfb A |
236 | The following flags |
237 | alter this: | |
238 | .Bl -tag -width VIS_WHITEX | |
3d9156a7 | 239 | .It Dv VIS_GLOB |
974e3884 | 240 | Also encode the magic characters |
3d9156a7 A |
241 | .Ql ( * , |
242 | .Ql \&? , | |
243 | .Ql \&[ | |
244 | and | |
245 | .Ql # ) | |
246 | recognized by | |
247 | .Xr glob 3 . | |
5b2abdfb A |
248 | .It Dv VIS_SP |
249 | Also encode space. | |
250 | .It Dv VIS_TAB | |
251 | Also encode tab. | |
252 | .It Dv VIS_NL | |
253 | Also encode newline. | |
254 | .It Dv VIS_WHITE | |
255 | Synonym for | |
256 | .Dv VIS_SP | |
257 | \&| | |
258 | .Dv VIS_TAB | |
259 | \&| | |
260 | .Dv VIS_NL . | |
261 | .It Dv VIS_SAFE | |
974e3884 A |
262 | Only encode |
263 | .Dq unsafe | |
264 | characters. | |
265 | Unsafe means control characters which may cause common terminals to perform | |
3d9156a7 | 266 | unexpected functions. |
974e3884 A |
267 | Currently this form allows space, tab, newline, backspace, bell, and |
268 | return \(em in addition to all graphic characters \(em unencoded. | |
5b2abdfb A |
269 | .El |
270 | .Pp | |
974e3884 A |
271 | (The above flags have no effect for |
272 | .Fn svis , | |
273 | .Fn snvis , | |
274 | .Fn strsvis , | |
275 | .Fn strsnvis , | |
276 | .Fn strsvisx , | |
277 | and | |
278 | .Fn strsnvisx . | |
279 | When using these functions, place all graphic characters to be | |
280 | encoded in an array pointed to by | |
281 | .Fa extra . | |
282 | In general, the backslash character should be included in this array, see the | |
283 | warning on the use of the | |
284 | .Dv VIS_NOSLASH | |
285 | flag below). | |
286 | .Pp | |
5b2abdfb | 287 | There are four forms of encoding. |
974e3884 | 288 | All forms use the backslash character |
5b2abdfb A |
289 | .Ql \e |
290 | to introduce a special | |
974e3884 A |
291 | sequence; two backslashes are used to represent a real backslash, |
292 | except | |
293 | .Dv VIS_HTTPSTYLE | |
294 | that uses | |
295 | .Ql % , | |
296 | or | |
297 | .Dv VIS_MIMESTYLE | |
298 | that uses | |
299 | .Ql = . | |
5b2abdfb | 300 | These are the visual formats: |
974e3884 | 301 | .Bl -tag -width VIS_CSTYLE |
5b2abdfb A |
302 | .It (default) |
303 | Use an | |
304 | .Ql M | |
305 | to represent meta characters (characters with the 8th | |
3d9156a7 | 306 | bit set), and use caret |
5b2abdfb | 307 | .Ql ^ |
974e3884 A |
308 | to represent control characters (see |
309 | .Xr iscntrl 3 ) . | |
5b2abdfb A |
310 | The following formats are used: |
311 | .Bl -tag -width xxxxx | |
312 | .It Dv \e^C | |
313 | Represents the control character | |
314 | .Ql C . | |
315 | Spans characters | |
316 | .Ql \e000 | |
317 | through | |
318 | .Ql \e037 , | |
319 | and | |
320 | .Ql \e177 | |
321 | (as | |
322 | .Ql \e^? ) . | |
323 | .It Dv \eM-C | |
324 | Represents character | |
325 | .Ql C | |
326 | with the 8th bit set. | |
327 | Spans characters | |
328 | .Ql \e241 | |
329 | through | |
330 | .Ql \e376 . | |
331 | .It Dv \eM^C | |
332 | Represents control character | |
333 | .Ql C | |
334 | with the 8th bit set. | |
335 | Spans characters | |
336 | .Ql \e200 | |
337 | through | |
338 | .Ql \e237 , | |
339 | and | |
340 | .Ql \e377 | |
341 | (as | |
342 | .Ql \eM^? ) . | |
343 | .It Dv \e040 | |
344 | Represents | |
345 | .Tn ASCII | |
346 | space. | |
347 | .It Dv \e240 | |
348 | Represents Meta-space. | |
349 | .El | |
350 | .Pp | |
351 | .It Dv VIS_CSTYLE | |
352 | Use C-style backslash sequences to represent standard non-printable | |
353 | characters. | |
354 | The following sequences are used to represent the indicated characters: | |
974e3884 A |
355 | .Bd -unfilled -offset indent |
356 | .Li \ea Tn \(em BEL No (007) | |
357 | .Li \eb Tn \(em BS No (010) | |
358 | .Li \ef Tn \(em NP No (014) | |
359 | .Li \en Tn \(em NL No (012) | |
360 | .Li \er Tn \(em CR No (015) | |
361 | .Li \es Tn \(em SP No (040) | |
362 | .Li \et Tn \(em HT No (011) | |
363 | .Li \ev Tn \(em VT No (013) | |
364 | .Li \e0 Tn \(em NUL No (000) | |
365 | .Ed | |
5b2abdfb | 366 | .Pp |
9385eb3d A |
367 | When using this format, the |
368 | .Fa nextc | |
974e3884 | 369 | parameter is looked at to determine if a |
5b2abdfb A |
370 | .Dv NUL |
371 | character can be encoded as | |
372 | .Ql \e0 | |
373 | instead of | |
374 | .Ql \e000 . | |
375 | If | |
376 | .Fa nextc | |
377 | is an octal digit, the latter representation is used to | |
378 | avoid ambiguity. | |
5b2abdfb | 379 | .It Dv VIS_OCTAL |
3d9156a7 A |
380 | Use a three digit octal sequence. |
381 | The form is | |
5b2abdfb A |
382 | .Ql \eddd |
383 | where | |
974e3884 | 384 | .Em d |
5b2abdfb | 385 | represents an octal digit. |
974e3884 A |
386 | .It Dv VIS_HTTPSTYLE |
387 | Use URI encoding as described in RFC 1738. | |
388 | The form is | |
389 | .Ql %xx | |
390 | where | |
391 | .Em x | |
392 | represents a lower case hexadecimal digit. | |
393 | .It Dv VIS_MIMESTYLE | |
394 | Use MIME Quoted-Printable encoding as described in RFC 2045, only don't | |
395 | break lines and don't handle CRLF. | |
396 | The form is | |
397 | .Ql =XX | |
398 | where | |
399 | .Em X | |
400 | represents an upper case hexadecimal digit. | |
5b2abdfb A |
401 | .El |
402 | .Pp | |
403 | There is one additional flag, | |
404 | .Dv VIS_NOSLASH , | |
405 | which inhibits the | |
406 | doubling of backslashes and the backslash before the default | |
407 | format (that is, control characters are represented by | |
408 | .Ql ^C | |
409 | and | |
410 | meta characters as | |
411 | .Ql M-C ) . | |
412 | With this flag set, the encoding is | |
413 | ambiguous and non-invertible. | |
974e3884 A |
414 | .Sh MULTIBYTE CHARACTER SUPPORT |
415 | These functions support multibyte character input. | |
416 | The encoding conversion is influenced by the setting of the | |
417 | .Ev LC_CTYPE | |
418 | environment variable which defines the set of characters | |
419 | that can be copied without encoding. | |
420 | .Pp | |
421 | When 8-bit data is present in the input, | |
422 | .Ev LC_CTYPE | |
423 | must be set to the correct locale or to the C locale. | |
424 | If the locales of the data and the conversion are mismatched, | |
425 | multibyte character recognition may fail and encoding will be performed | |
426 | byte-by-byte instead. | |
427 | .Pp | |
428 | As noted above, | |
429 | .Fa dst | |
430 | must be four times the number of bytes processed from | |
431 | .Fa src . | |
432 | But note that each multibyte character can be up to | |
433 | .Dv MB_LEN_MAX | |
434 | bytes | |
435 | .\" (see | |
436 | .\" .Xr multibyte 3 ) | |
437 | so in terms of multibyte characters, | |
438 | .Fa dst | |
439 | must be four times | |
440 | .Dv MB_LEN_MAX | |
441 | times the number of characters processed from | |
442 | .Fa src . | |
443 | .Sh ENVIRONMENT | |
444 | .Bl -tag -width ".Ev LC_CTYPE" | |
445 | .It Ev LC_CTYPE | |
446 | Specify the locale of the input data. | |
447 | Set to C if the input data locale is unknown. | |
448 | .El | |
449 | .Sh ERRORS | |
450 | The functions | |
451 | .Fn nvis | |
452 | and | |
453 | .Fn snvis | |
454 | will return | |
455 | .Dv NULL | |
456 | and the functions | |
457 | .Fn strnvis , | |
458 | .Fn strnvisx , | |
459 | .Fn strsnvis , | |
460 | and | |
461 | .Fn strsnvisx , | |
462 | will return \-1 when the | |
463 | .Fa dlen | |
464 | destination buffer size is not enough to perform the conversion while | |
465 | setting | |
466 | .Va errno | |
467 | to: | |
468 | .Bl -tag -width ".Bq Er ENOSPC" | |
469 | .It Bq Er ENOSPC | |
470 | The destination buffer size is not large enough to perform the conversion. | |
471 | .El | |
5b2abdfb A |
472 | .Sh SEE ALSO |
473 | .Xr unvis 1 , | |
974e3884 A |
474 | .Xr vis 1 , |
475 | .Xr glob 3 , | |
476 | .\" .Xr multibyte 3 , | |
5b2abdfb A |
477 | .Xr unvis 3 |
478 | .Rs | |
974e3884 A |
479 | .%A T. Berners-Lee |
480 | .%T Uniform Resource Locators (URL) | |
481 | .%O "RFC 1738" | |
482 | .Re | |
483 | .Rs | |
484 | .%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies" | |
485 | .%O "RFC 2045" | |
5b2abdfb A |
486 | .Re |
487 | .Sh HISTORY | |
974e3884 A |
488 | The |
489 | .Fn vis , | |
490 | .Fn strvis , | |
491 | and | |
492 | .Fn strvisx | |
493 | functions first appeared in | |
5b2abdfb | 494 | .Bx 4.4 . |
3d9156a7 | 495 | The |
974e3884 A |
496 | .Fn svis , |
497 | .Fn strsvis , | |
498 | .Fn strsvisx | |
499 | .Fn nvis , | |
500 | .Fn strnvis , | |
501 | .Fn strnvisx , | |
502 | .Fn snvis , | |
503 | .Fn strsnvis | |
504 | and | |
505 | .Fn strsnvisx | |
506 | functions as well as multibyte character support were added in OS X 10.12. |