]>
Commit | Line | Data |
---|---|---|
974e3884 A |
1 | .\" $NetBSD: unvis.3,v 1.27 2012/12/15 07:34:36 wiz Exp $ |
2 | .\" $FreeBSD$ | |
3 | .\" | |
5b2abdfb A |
4 | .\" Copyright (c) 1989, 1991, 1993 |
5 | .\" The Regents of the University of California. All rights reserved. | |
6 | .\" | |
7 | .\" Redistribution and use in source and binary forms, with or without | |
8 | .\" modification, are permitted provided that the following conditions | |
9 | .\" are met: | |
10 | .\" 1. Redistributions of source code must retain the above copyright | |
11 | .\" notice, this list of conditions and the following disclaimer. | |
12 | .\" 2. Redistributions in binary form must reproduce the above copyright | |
13 | .\" notice, this list of conditions and the following disclaimer in the | |
14 | .\" documentation and/or other materials provided with the distribution. | |
974e3884 | 15 | .\" 3. Neither the name of the University nor the names of its contributors |
5b2abdfb A |
16 | .\" may be used to endorse or promote products derived from this software |
17 | .\" without specific prior written permission. | |
18 | .\" | |
19 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
20 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
22 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
23 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
24 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
25 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
26 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
27 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
28 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
29 | .\" SUCH DAMAGE. | |
30 | .\" | |
31 | .\" @(#)unvis.3 8.2 (Berkeley) 12/11/93 | |
5b2abdfb | 32 | .\" |
974e3884 | 33 | .Dd March 12, 2011 |
5b2abdfb A |
34 | .Dt UNVIS 3 |
35 | .Os | |
36 | .Sh NAME | |
37 | .Nm unvis , | |
38 | .Nm strunvis | |
39 | .Nd decode a visual representation of characters | |
40 | .Sh LIBRARY | |
41 | .Lb libc | |
42 | .Sh SYNOPSIS | |
43 | .In vis.h | |
44 | .Ft int | |
45 | .Fn unvis "char *cp" "int c" "int *astate" "int flag" | |
46 | .Ft int | |
47 | .Fn strunvis "char *dst" "const char *src" | |
48 | .Ft int | |
974e3884 A |
49 | .Fn strnunvis "char *dst" "size_t dlen" "const char *src" |
50 | .Ft int | |
5b2abdfb | 51 | .Fn strunvisx "char *dst" "const char *src" "int flag" |
974e3884 A |
52 | .Ft int |
53 | .Fn strnunvisx "char *dst" "size_t dlen" "const char *src" "int flag" | |
5b2abdfb A |
54 | .Sh DESCRIPTION |
55 | The | |
56 | .Fn unvis , | |
57 | .Fn strunvis | |
58 | and | |
59 | .Fn strunvisx | |
60 | functions | |
61 | are used to decode a visual representation of characters, as produced | |
62 | by the | |
63 | .Xr vis 3 | |
64 | function, back into | |
3d9156a7 | 65 | the original form. |
974e3884 A |
66 | .Pp |
67 | The | |
68 | .Fn unvis | |
69 | function is called with successive characters in | |
70 | .Ar c | |
71 | until a valid sequence is recognized, at which time the decoded | |
72 | character is available at the character pointed to by | |
73 | .Ar cp . | |
5b2abdfb A |
74 | .Pp |
75 | The | |
76 | .Fn strunvis | |
974e3884 A |
77 | function decodes the characters pointed to by |
78 | .Ar src | |
79 | into the buffer pointed to by | |
80 | .Ar dst . | |
81 | The | |
82 | .Fn strunvis | |
83 | function simply copies | |
84 | .Ar src | |
5b2abdfb | 85 | to |
974e3884 | 86 | .Ar dst , |
5b2abdfb A |
87 | decoding any escape sequences along the way, |
88 | and returns the number of characters placed into | |
974e3884 | 89 | .Ar dst , |
5b2abdfb | 90 | or \-1 if an |
3d9156a7 A |
91 | invalid escape sequence was detected. |
92 | The size of | |
974e3884 A |
93 | .Ar dst |
94 | should be equal to the size of | |
95 | .Ar src | |
96 | (that is, no expansion takes place during decoding). | |
5b2abdfb A |
97 | .Pp |
98 | The | |
99 | .Fn strunvisx | |
100 | function does the same as the | |
101 | .Fn strunvis | |
102 | function, | |
103 | but it allows you to add a flag that specifies the style the string | |
974e3884 | 104 | .Ar src |
5b2abdfb | 105 | is encoded with. |
974e3884 A |
106 | Currently, the supported flags are: |
107 | .Dv VIS_HTTPSTYLE | |
108 | and | |
109 | .Dv VIS_MIMESTYLE . | |
5b2abdfb A |
110 | .Pp |
111 | The | |
112 | .Fn unvis | |
974e3884 A |
113 | function implements a state machine that can be used to decode an |
114 | arbitrary stream of bytes. | |
115 | All state associated with the bytes being decoded is stored outside the | |
5b2abdfb A |
116 | .Fn unvis |
117 | function (that is, a pointer to the state is passed in), so | |
3d9156a7 | 118 | calls decoding different streams can be freely intermixed. |
974e3884 | 119 | To start decoding a stream of bytes, first initialize an integer to zero. |
3d9156a7 | 120 | Call |
5b2abdfb A |
121 | .Fn unvis |
122 | with each successive byte, along with a pointer | |
123 | to this integer, and a pointer to a destination character. | |
124 | The | |
125 | .Fn unvis | |
974e3884 | 126 | function has several return codes that must be handled properly. |
3d9156a7 | 127 | They are: |
5b2abdfb | 128 | .Bl -tag -width UNVIS_VALIDPUSH |
974e3884 | 129 | .It Li \&0 No (zero) |
5b2abdfb | 130 | Another character is necessary; nothing has been recognized yet. |
3d9156a7 | 131 | .It Dv UNVIS_VALID |
5b2abdfb | 132 | A valid character has been recognized and is available at the location |
974e3884 A |
133 | pointed to by |
134 | .Fa cp . | |
3d9156a7 | 135 | .It Dv UNVIS_VALIDPUSH |
5b2abdfb | 136 | A valid character has been recognized and is available at the location |
974e3884 A |
137 | pointed to by |
138 | .Fa cp ; | |
139 | however, the character currently passed in should be passed in again. | |
3d9156a7 A |
140 | .It Dv UNVIS_NOCHAR |
141 | A valid sequence was detected, but no character was produced. | |
974e3884 | 142 | This return code is necessary to indicate a logical break between characters. |
3d9156a7 | 143 | .It Dv UNVIS_SYNBAD |
974e3884 | 144 | An invalid escape sequence was detected, or the decoder is in an unknown state. |
3d9156a7 | 145 | The decoder is placed into the starting state. |
5b2abdfb A |
146 | .El |
147 | .Pp | |
148 | When all bytes in the stream have been processed, call | |
149 | .Fn unvis | |
974e3884 | 150 | one more time with flag set to |
5b2abdfb A |
151 | .Dv UNVIS_END |
152 | to extract any remaining character (the character passed in is ignored). | |
153 | .Pp | |
154 | The | |
9385eb3d | 155 | .Fa flag |
5b2abdfb A |
156 | argument is also used to specify the encoding style of the source. |
157 | If set to | |
974e3884 A |
158 | .Dv VIS_HTTPSTYLE |
159 | or | |
160 | .Dv VIS_HTTP1808 , | |
5b2abdfb A |
161 | .Fn unvis |
162 | will decode URI strings as specified in RFC 1808. | |
974e3884 A |
163 | If set to |
164 | .Dv VIS_HTTP1866 , | |
165 | .Fn unvis | |
166 | will decode entity references and numeric character references | |
167 | as specified in RFC 1866. | |
168 | If set to | |
169 | .Dv VIS_MIMESTYLE , | |
170 | .Fn unvis | |
171 | will decode MIME Quoted-Printable strings as specified in RFC 2045. | |
172 | If set to | |
173 | .Dv VIS_NOESCAPE , | |
174 | .Fn unvis | |
175 | will not decode | |
176 | .Ql \e | |
177 | quoted characters. | |
5b2abdfb A |
178 | .Pp |
179 | The following code fragment illustrates a proper use of | |
180 | .Fn unvis . | |
181 | .Bd -literal -offset indent | |
182 | int state = 0; | |
183 | char out; | |
184 | ||
185 | while ((ch = getchar()) != EOF) { | |
186 | again: | |
974e3884 | 187 | switch(unvis(\*[Am]out, ch, \*[Am]state, 0)) { |
5b2abdfb A |
188 | case 0: |
189 | case UNVIS_NOCHAR: | |
190 | break; | |
191 | case UNVIS_VALID: | |
974e3884 | 192 | (void)putchar(out); |
5b2abdfb A |
193 | break; |
194 | case UNVIS_VALIDPUSH: | |
974e3884 | 195 | (void)putchar(out); |
5b2abdfb A |
196 | goto again; |
197 | case UNVIS_SYNBAD: | |
974e3884 | 198 | errx(EXIT_FAILURE, "Bad character sequence!"); |
5b2abdfb A |
199 | } |
200 | } | |
974e3884 A |
201 | if (unvis(\*[Am]out, '\e0', \*[Am]state, UNVIS_END) == UNVIS_VALID) |
202 | (void)putchar(out); | |
5b2abdfb | 203 | .Ed |
974e3884 A |
204 | .Sh ERRORS |
205 | The functions | |
206 | .Fn strunvis , | |
207 | .Fn strnunvis , | |
208 | .Fn strunvisx , | |
209 | and | |
210 | .Fn strnunvisx | |
211 | will return \-1 on error and set | |
212 | .Va errno | |
213 | to: | |
214 | .Bl -tag -width Er | |
215 | .It Bq Er EINVAL | |
216 | An invalid escape sequence was detected, or the decoder is in an unknown state. | |
217 | .El | |
218 | .Pp | |
219 | In addition the functions | |
220 | .Fn strnunvis | |
221 | and | |
222 | .Fn strnunvisx | |
223 | will can also set | |
224 | .Va errno | |
225 | on error to: | |
226 | .Bl -tag -width Er | |
227 | .It Bq Er ENOSPC | |
228 | Not enough space to perform the conversion. | |
229 | .El | |
5b2abdfb | 230 | .Sh SEE ALSO |
974e3884 | 231 | .Xr unvis 1 , |
5b2abdfb A |
232 | .Xr vis 1 , |
233 | .Xr vis 3 | |
234 | .Rs | |
235 | .%A R. Fielding | |
236 | .%T Relative Uniform Resource Locators | |
237 | .%O RFC1808 | |
238 | .Re | |
239 | .Sh HISTORY | |
240 | The | |
241 | .Fn unvis | |
242 | function | |
243 | first appeared in | |
244 | .Bx 4.4 . | |
974e3884 A |
245 | The |
246 | .Fn strnunvis | |
247 | and | |
248 | .Fn strnunvisx | |
249 | functions appeared in OS X 10.12. | |
250 | .Sh BUGS | |
251 | The names | |
252 | .Dv VIS_HTTP1808 | |
253 | and | |
254 | .Dv VIS_HTTP1866 | |
255 | are wrong. | |
256 | Percent-encoding was defined in RFC 1738, the original RFC for URL. | |
257 | RFC 1866 defines HTML 2.0, an application of SGML, from which it | |
258 | inherits concepts of numeric character references and entity | |
259 | references. |