]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-344.21.73.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * Copyright (c) 1982, 1986, 1989, 1990, 1993
27 * The Regents of the University of California. All rights reserved.
28 *
29 * sendfile(2) and related extensions:
30 * Copyright (c) 1998, David Greenman. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
61 */
62
63
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/filedesc.h>
68 #include <sys/proc.h>
69 #include <sys/file.h>
70 #include <sys/buf.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #if KTRACE
77 #include <sys/ktrace.h>
78 #endif
79 #include <sys/kernel.h>
80
81 #include <sys/kdebug.h>
82
83 #if KDEBUG
84
85 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
86 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
87 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
88 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
89 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
90 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
91 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
92 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
93 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
94 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
95
96 #endif
97
98 struct getsockname_args {
99 int fdes;
100 caddr_t asa;
101 int *alen;
102 };
103
104 struct getsockopt_args {
105 int s;
106 int level;
107 int name;
108 caddr_t val;
109 int *avalsize;
110 } ;
111
112 struct accept_args {
113 int s;
114 caddr_t name;
115 int *anamelen;
116 };
117
118 struct getpeername_args {
119 int fdes;
120 caddr_t asa;
121 int *alen;
122 };
123
124
125 /* ARGSUSED */
126
127 #if SENDFILE
128 static void sf_buf_init(void *arg);
129 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
130 static struct sf_buf *sf_buf_alloc(void);
131 static void sf_buf_ref(caddr_t addr, u_int size);
132 static void sf_buf_free(caddr_t addr, u_int size);
133
134 static SLIST_HEAD(, sf_buf) sf_freelist;
135 static vm_offset_t sf_base;
136 static struct sf_buf *sf_bufs;
137 static int sf_buf_alloc_want;
138 #endif
139
140 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags, register_t *retval));
141 static int recvit __P((struct proc *p, int s, struct msghdr *mp,
142 caddr_t namelenp, register_t *retval));
143
144 static int accept1 __P((struct proc *p, struct accept_args *uap, register_t *retval, int compat));
145 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
146 register_t *retval, int compat));
147 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
148 register_t *retval, int compat));
149
150 /*
151 * System call interface to the socket abstraction.
152 */
153 #if COMPAT_43 || defined(COMPAT_SUNOS)
154 #define COMPAT_OLDSOCK
155 #endif
156
157 extern struct fileops socketops;
158
159 struct socket_args {
160 int domain;
161 int type;
162 int protocol;
163 };
164 int
165 socket(p, uap, retval)
166 struct proc *p;
167 register struct socket_args *uap;
168 register_t *retval;
169 {
170 struct filedesc *fdp = p->p_fd;
171 struct socket *so;
172 struct file *fp;
173 int fd, error;
174
175 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
176 error = falloc(p, &fp, &fd);
177 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
178
179 if (error)
180 return (error);
181 fp->f_flag = FREAD|FWRITE;
182 fp->f_type = DTYPE_SOCKET;
183 fp->f_ops = &socketops;
184 if (error = socreate(uap->domain, &so, uap->type,
185 uap->protocol)) {
186 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
187 fdrelse(p, fd);
188 ffree(fp);
189 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
190 } else {
191 fp->f_data = (caddr_t)so;
192 *fdflags(p, fd) &= ~UF_RESERVED;
193 *retval = fd;
194 }
195 return (error);
196 }
197
198 struct bind_args {
199 int s;
200 caddr_t name;
201 int namelen;
202 };
203
204 /* ARGSUSED */
205 int
206 bind(p, uap, retval)
207 struct proc *p;
208 register struct bind_args *uap;
209 register_t *retval;
210 {
211 struct file *fp;
212 struct sockaddr *sa;
213 int error;
214
215 error = getsock(p->p_fd, uap->s, &fp);
216 if (error)
217 return (error);
218 error = getsockaddr(&sa, uap->name, uap->namelen);
219 if (error)
220 return (error);
221 error = sobind((struct socket *)fp->f_data, sa);
222 FREE(sa, M_SONAME);
223 return (error);
224 }
225
226 struct listen_args {
227 int s;
228 int backlog;
229 };
230
231
232
233 int
234 listen(p, uap, retval)
235 struct proc *p;
236 register struct listen_args *uap;
237 register_t *retval;
238 {
239 struct file *fp;
240 int error;
241
242 error = getsock(p->p_fd, uap->s, &fp);
243 if (error)
244 return (error);
245 return (solisten((struct socket *)fp->f_data, uap->backlog));
246 }
247
248 #ifndef COMPAT_OLDSOCK
249 #define accept1 accept
250 #endif
251
252
253
254 int
255 accept1(p, uap, retval, compat)
256 struct proc *p;
257 register struct accept_args *uap;
258 register_t *retval;
259 int compat;
260 {
261 struct file *fp;
262 struct sockaddr *sa;
263 u_int namelen;
264 int error, s;
265 struct socket *head, *so;
266 int fd;
267 short fflag; /* type must match fp->f_flag */
268 int tmpfd;
269
270 if (uap->name) {
271 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
272 sizeof (namelen));
273 if(error)
274 return (error);
275 }
276 error = getsock(p->p_fd, uap->s, &fp);
277 if (error)
278 return (error);
279 s = splnet();
280 head = (struct socket *)fp->f_data;
281 if ((head->so_options & SO_ACCEPTCONN) == 0) {
282 splx(s);
283 return (EINVAL);
284 }
285 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
286 splx(s);
287 return (EWOULDBLOCK);
288 }
289 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
290 if (head->so_state & SS_CANTRCVMORE) {
291 head->so_error = ECONNABORTED;
292 break;
293 }
294 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
295 "accept", 0);
296 if (error) {
297 splx(s);
298 return (error);
299 }
300 }
301 if (head->so_error) {
302 error = head->so_error;
303 head->so_error = 0;
304 splx(s);
305 return (error);
306 }
307
308
309 /*
310 * At this point we know that there is at least one connection
311 * ready to be accepted. Remove it from the queue prior to
312 * allocating the file descriptor for it since falloc() may
313 * block allowing another process to accept the connection
314 * instead.
315 */
316 so = TAILQ_FIRST(&head->so_comp);
317 TAILQ_REMOVE(&head->so_comp, so, so_list);
318 head->so_qlen--;
319
320 fflag = fp->f_flag;
321 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
322 error = falloc(p, &fp, &fd);
323 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
324 if (error) {
325 /*
326 * Probably ran out of file descriptors. Put the
327 * unaccepted connection back onto the queue and
328 * do another wakeup so some other process might
329 * have a chance at it.
330 */
331 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
332 head->so_qlen++;
333 wakeup_one(&head->so_timeo);
334 splx(s);
335 return (error);
336 } else {
337 *fdflags(p, fd) &= ~UF_RESERVED;
338 *retval = fd;
339 }
340
341 so->so_state &= ~SS_COMP;
342 so->so_head = NULL;
343 fp->f_type = DTYPE_SOCKET;
344 fp->f_flag = fflag;
345 fp->f_ops = &socketops;
346 fp->f_data = (caddr_t)so;
347 sa = 0;
348 (void) soaccept(so, &sa);
349 if (sa == 0) {
350 namelen = 0;
351 if (uap->name)
352 goto gotnoname;
353 return 0;
354 }
355 if (uap->name) {
356 /* check sa_len before it is destroyed */
357 if (namelen > sa->sa_len)
358 namelen = sa->sa_len;
359 #ifdef COMPAT_OLDSOCK
360 if (compat)
361 ((struct osockaddr *)sa)->sa_family =
362 sa->sa_family;
363 #endif
364 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
365 if (!error)
366 gotnoname:
367 error = copyout((caddr_t)&namelen,
368 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
369 }
370 FREE(sa, M_SONAME);
371 splx(s);
372 return (error);
373 }
374
375 int
376 accept(p, uap, retval)
377 struct proc *p;
378 struct accept_args *uap;
379 register_t *retval;
380 {
381
382 return (accept1(p, uap, retval, 0));
383 }
384
385 #ifdef COMPAT_OLDSOCK
386 int
387 oaccept(p, uap, retval)
388 struct proc *p;
389 struct accept_args *uap;
390 register_t *retval;
391 {
392
393 return (accept1(p, uap, retval, 1));
394 }
395 #endif /* COMPAT_OLDSOCK */
396
397 struct connect_args {
398 int s;
399 caddr_t name;
400 int namelen;
401 };
402 /* ARGSUSED */
403 int
404 connect(p, uap, retval)
405 struct proc *p;
406 register struct connect_args *uap;
407 register_t *retval;
408 {
409 struct file *fp;
410 register struct socket *so;
411 struct sockaddr *sa;
412 int error, s;
413
414 error = getsock(p->p_fd, uap->s, &fp);
415 if (error)
416 return (error);
417 so = (struct socket *)fp->f_data;
418 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
419 return (EALREADY);
420 error = getsockaddr(&sa, uap->name, uap->namelen);
421 if (error)
422 return (error);
423 error = soconnect(so, sa);
424 if (error)
425 goto bad;
426 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
427 FREE(sa, M_SONAME);
428 return (EINPROGRESS);
429 }
430 s = splnet();
431 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
432 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
433 "connec", 0);
434 if (error)
435 break;
436 }
437 if (error == 0) {
438 error = so->so_error;
439 so->so_error = 0;
440 }
441 splx(s);
442 bad:
443 so->so_state &= ~SS_ISCONNECTING;
444 FREE(sa, M_SONAME);
445 if (error == ERESTART)
446 error = EINTR;
447 return (error);
448 }
449
450 struct socketpair_args {
451 int domain;
452 int type;
453 int protocol;
454 int *rsv;
455 };
456 int
457 socketpair(p, uap, retval)
458 struct proc *p;
459 register struct socketpair_args *uap;
460 register_t *retval;
461 {
462 register struct filedesc *fdp = p->p_fd;
463 struct file *fp1, *fp2;
464 struct socket *so1, *so2;
465 int fd, error, sv[2];
466
467 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
468 if (error)
469 return (error);
470 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
471 if (error)
472 goto free1;
473 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
474 error = falloc(p, &fp1, &fd);
475 if (error)
476 goto free2;
477 sv[0] = fd;
478 fp1->f_flag = FREAD|FWRITE;
479 fp1->f_type = DTYPE_SOCKET;
480 fp1->f_ops = &socketops;
481 fp1->f_data = (caddr_t)so1;
482 error = falloc(p, &fp2, &fd);
483 if (error)
484 goto free3;
485 fp2->f_flag = FREAD|FWRITE;
486 fp2->f_type = DTYPE_SOCKET;
487 fp2->f_ops = &socketops;
488 fp2->f_data = (caddr_t)so2;
489 sv[1] = fd;
490 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
491 error = soconnect2(so1, so2);
492 if (error) {
493 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
494 goto free4;
495 }
496
497 if (uap->type == SOCK_DGRAM) {
498 /*
499 * Datagram socket connection is asymmetric.
500 */
501 error = soconnect2(so2, so1);
502 if (error) {
503 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
504 goto free4;
505 }
506 }
507 *fdflags(p, sv[0]) &= ~UF_RESERVED;
508 *fdflags(p, sv[1]) &= ~UF_RESERVED;
509 error = copyout((caddr_t)sv, (caddr_t)uap->rsv,
510 2 * sizeof (int));
511 #if 0 /* old pipe(2) syscall compatability, unused these days */
512 retval[0] = sv[0]; /* XXX ??? */
513 retval[1] = sv[1]; /* XXX ??? */
514 #endif /* 0 */
515 return (error);
516 free4:
517 fdrelse(p, sv[1]);
518 ffree(fp2);
519 free3:
520 fdrelse(p, sv[0]);
521 ffree(fp1);
522 free2:
523 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
524 (void)soclose(so2);
525 free1:
526 (void)soclose(so1);
527 return (error);
528 }
529
530 static int
531 sendit(p, s, mp, flags, retsize)
532 register struct proc *p;
533 int s;
534 register struct msghdr *mp;
535 int flags;
536 register_t *retsize;
537 {
538 struct file *fp;
539 struct uio auio;
540 register struct iovec *iov;
541 register int i;
542 struct mbuf *control;
543 struct sockaddr *to;
544 int len, error;
545 struct socket *so;
546 #if KTRACE
547 struct iovec *ktriov = NULL;
548 struct uio ktruio;
549 #endif
550
551 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0);
552
553 if (error = getsock(p->p_fd, s, &fp))
554 {
555 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
556 return (error);
557 }
558
559 auio.uio_iov = mp->msg_iov;
560 auio.uio_iovcnt = mp->msg_iovlen;
561 auio.uio_segflg = UIO_USERSPACE;
562 auio.uio_rw = UIO_WRITE;
563 auio.uio_procp = p;
564 auio.uio_offset = 0; /* XXX */
565 auio.uio_resid = 0;
566 iov = mp->msg_iov;
567 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
568 if (iov->iov_len < 0)
569 {
570 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0);
571 return (EINVAL);
572 }
573
574 if ((auio.uio_resid += iov->iov_len) < 0)
575 {
576 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0);
577 return (EINVAL);
578 }
579 }
580 if (mp->msg_name) {
581 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
582 if (error) {
583 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
584 return (error);
585 }
586 } else
587 to = 0;
588 if (mp->msg_control) {
589 if (mp->msg_controllen < sizeof(struct cmsghdr)
590 #ifdef COMPAT_OLDSOCK
591 && mp->msg_flags != MSG_COMPAT
592 #endif
593 ) {
594 error = EINVAL;
595 goto bad;
596 }
597 error = sockargs(&control, mp->msg_control,
598 mp->msg_controllen, MT_CONTROL);
599 if (error)
600 goto bad;
601 #ifdef COMPAT_OLDSOCK
602 if (mp->msg_flags == MSG_COMPAT) {
603 register struct cmsghdr *cm;
604
605 M_PREPEND(control, sizeof(*cm), M_WAIT);
606 if (control == 0) {
607 error = ENOBUFS;
608 goto bad;
609 } else {
610 cm = mtod(control, struct cmsghdr *);
611 cm->cmsg_len = control->m_len;
612 cm->cmsg_level = SOL_SOCKET;
613 cm->cmsg_type = SCM_RIGHTS;
614 }
615 }
616 #endif
617 } else
618 control = 0;
619
620 #if KTRACE
621 if (KTRPOINT(p, KTR_GENIO)) {
622 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
623
624 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
625 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
626 ktruio = auio;
627 }
628 #endif
629 len = auio.uio_resid;
630 so = (struct socket *)fp->f_data;
631 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
632 flags);
633 if (error) {
634 if (auio.uio_resid != len && (error == ERESTART ||
635 error == EINTR || error == EWOULDBLOCK))
636 error = 0;
637 /* Generation of SIGPIPE can be controlled per socket */
638 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
639 psignal(p, SIGPIPE);
640 }
641 if (error == 0)
642 *retsize = len - auio.uio_resid;
643 #if KTRACE
644 if (ktriov != NULL) {
645 if (error == 0) {
646 ktruio.uio_iov = ktriov;
647 ktruio.uio_resid = retsize[0];
648 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error, -1);
649 }
650 FREE(ktriov, M_TEMP);
651 }
652 #endif
653 bad:
654 if (to)
655 FREE(to, M_SONAME);
656 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
657 return (error);
658 }
659
660
661 struct sendto_args {
662 int s;
663 caddr_t buf;
664 size_t len;
665 int flags;
666 caddr_t to;
667 int tolen;
668 };
669
670 int
671 sendto(p, uap, retval)
672 struct proc *p;
673 register struct sendto_args /* {
674 int s;
675 caddr_t buf;
676 size_t len;
677 int flags;
678 caddr_t to;
679 int tolen;
680 } */ *uap;
681 register_t *retval;
682
683 {
684 struct msghdr msg;
685 struct iovec aiov;
686 int stat;
687
688 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0,0,0,0,0);
689
690 msg.msg_name = uap->to;
691 msg.msg_namelen = uap->tolen;
692 msg.msg_iov = &aiov;
693 msg.msg_iovlen = 1;
694 msg.msg_control = 0;
695 #ifdef COMPAT_OLDSOCK
696 msg.msg_flags = 0;
697 #endif
698 aiov.iov_base = uap->buf;
699 aiov.iov_len = uap->len;
700 stat = sendit(p, uap->s, &msg, uap->flags, retval);
701 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, stat, *retval,0,0,0);
702 return(stat);
703 }
704
705 #ifdef COMPAT_OLDSOCK
706 struct osend_args {
707 int s;
708 caddr_t buf;
709 int len;
710 int flags;
711 };
712
713 int
714 osend(p, uap, retval)
715 struct proc *p;
716 register struct osend_args /* {
717 int s;
718 caddr_t buf;
719 int len;
720 int flags;
721 } */ *uap;
722 register_t *retval;
723
724 {
725 struct msghdr msg;
726 struct iovec aiov;
727
728 msg.msg_name = 0;
729 msg.msg_namelen = 0;
730 msg.msg_iov = &aiov;
731 msg.msg_iovlen = 1;
732 aiov.iov_base = uap->buf;
733 aiov.iov_len = uap->len;
734 msg.msg_control = 0;
735 msg.msg_flags = 0;
736 return (sendit(p, uap->s, &msg, uap->flags, retval));
737 }
738 struct osendmsg_args {
739 int s;
740 caddr_t msg;
741 int flags;
742 };
743
744 int
745 osendmsg(p, uap, retval)
746 struct proc *p;
747 register struct osendmsg_args /* {
748 int s;
749 caddr_t msg;
750 int flags;
751 } */ *uap;
752 register_t *retval;
753
754 {
755 struct msghdr msg;
756 struct iovec aiov[UIO_SMALLIOV], *iov;
757 int error;
758
759 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
760 if (error)
761 return (error);
762 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
763 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
764 return (EMSGSIZE);
765 MALLOC(iov, struct iovec *,
766 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
767 M_WAITOK);
768 } else
769 iov = aiov;
770 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
771 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
772 if (error)
773 goto done;
774 msg.msg_flags = MSG_COMPAT;
775 msg.msg_iov = iov;
776 error = sendit(p, uap->s, &msg, uap->flags, retval);
777 done:
778 if (iov != aiov)
779 FREE(iov, M_IOV);
780 return (error);
781 }
782 #endif
783
784 struct sendmsg_args {
785 int s;
786 caddr_t msg;
787 int flags;
788 };
789
790 int
791 sendmsg(p, uap, retval)
792 struct proc *p;
793 register struct sendmsg_args *uap;
794 register_t *retval;
795 {
796 struct msghdr msg;
797 struct iovec aiov[UIO_SMALLIOV], *iov;
798 int error;
799
800 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0,0,0,0,0);
801 if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)))
802 {
803 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
804 return (error);
805 }
806
807 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
808 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
809 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
810 return (EMSGSIZE);
811 }
812 MALLOC(iov, struct iovec *,
813 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
814 M_WAITOK);
815 } else
816 iov = aiov;
817 if (msg.msg_iovlen &&
818 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
819 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
820 goto done;
821 msg.msg_iov = iov;
822 #ifdef COMPAT_OLDSOCK
823 msg.msg_flags = 0;
824 #endif
825 error = sendit(p, uap->s, &msg, uap->flags, retval);
826 done:
827 if (iov != aiov)
828 FREE(iov, M_IOV);
829 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
830 return (error);
831 }
832
833 static int
834 recvit(p, s, mp, namelenp, retval)
835 register struct proc *p;
836 int s;
837 register struct msghdr *mp;
838 caddr_t namelenp;
839 register_t *retval;
840 {
841 struct file *fp;
842 struct uio auio;
843 register struct iovec *iov;
844 register int i;
845 int len, error;
846 struct mbuf *m, *control = 0;
847 caddr_t ctlbuf;
848 struct socket *so;
849 struct sockaddr *fromsa = 0;
850 #if KTRACE
851 struct iovec *ktriov = NULL;
852 struct uio ktruio;
853 #endif
854
855 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0,0,0,0,0);
856 if (error = getsock(p->p_fd, s, &fp))
857 {
858 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
859 return (error);
860 }
861
862 auio.uio_iov = mp->msg_iov;
863 auio.uio_iovcnt = mp->msg_iovlen;
864 auio.uio_segflg = UIO_USERSPACE;
865 auio.uio_rw = UIO_READ;
866 auio.uio_procp = p;
867 auio.uio_offset = 0; /* XXX */
868 auio.uio_resid = 0;
869 iov = mp->msg_iov;
870 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
871 if ((auio.uio_resid += iov->iov_len) < 0) {
872 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0);
873 return (EINVAL);
874 }
875 }
876 #if KTRACE
877 if (KTRPOINT(p, KTR_GENIO)) {
878 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
879
880 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
881 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
882 ktruio = auio;
883 }
884 #endif
885 len = auio.uio_resid;
886 so = (struct socket *)fp->f_data;
887 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
888 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
889 &mp->msg_flags);
890 if (error) {
891 if (auio.uio_resid != len && (error == ERESTART ||
892 error == EINTR || error == EWOULDBLOCK))
893 error = 0;
894 }
895 #if KTRACE
896 if (ktriov != NULL) {
897 if (error == 0) {
898 ktruio.uio_iov = ktriov;
899 ktruio.uio_resid = len - auio.uio_resid;
900 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error, -1);
901 }
902 FREE(ktriov, M_TEMP);
903 }
904 #endif
905 if (error)
906 goto out;
907 *retval = len - auio.uio_resid;
908 if (mp->msg_name) {
909 len = mp->msg_namelen;
910 if (len <= 0 || fromsa == 0)
911 len = 0;
912 else {
913 #ifndef MIN
914 #define MIN(a,b) ((a)>(b)?(b):(a))
915 #endif
916 /* save sa_len before it is destroyed by MSG_COMPAT */
917 len = MIN(len, fromsa->sa_len);
918 #ifdef COMPAT_OLDSOCK
919 if (mp->msg_flags & MSG_COMPAT)
920 ((struct osockaddr *)fromsa)->sa_family =
921 fromsa->sa_family;
922 #endif
923 error = copyout(fromsa,
924 (caddr_t)mp->msg_name, (unsigned)len);
925 if (error)
926 goto out;
927 }
928 mp->msg_namelen = len;
929 if (namelenp &&
930 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
931 #ifdef COMPAT_OLDSOCK
932 if (mp->msg_flags & MSG_COMPAT)
933 error = 0; /* old recvfrom didn't check */
934 else
935 #endif
936 goto out;
937 }
938 }
939 if (mp->msg_control) {
940 #ifdef COMPAT_OLDSOCK
941 /*
942 * We assume that old recvmsg calls won't receive access
943 * rights and other control info, esp. as control info
944 * is always optional and those options didn't exist in 4.3.
945 * If we receive rights, trim the cmsghdr; anything else
946 * is tossed.
947 */
948 if (control && mp->msg_flags & MSG_COMPAT) {
949 if (mtod(control, struct cmsghdr *)->cmsg_level !=
950 SOL_SOCKET ||
951 mtod(control, struct cmsghdr *)->cmsg_type !=
952 SCM_RIGHTS) {
953 mp->msg_controllen = 0;
954 goto out;
955 }
956 control->m_len -= sizeof (struct cmsghdr);
957 control->m_data += sizeof (struct cmsghdr);
958 }
959 #endif
960 len = mp->msg_controllen;
961 m = control;
962 mp->msg_controllen = 0;
963 ctlbuf = (caddr_t) mp->msg_control;
964
965 while (m && len > 0) {
966 unsigned int tocopy;
967
968 if (len >= m->m_len)
969 tocopy = m->m_len;
970 else {
971 mp->msg_flags |= MSG_CTRUNC;
972 tocopy = len;
973 }
974
975 if (error = copyout((caddr_t)mtod(m, caddr_t),
976 ctlbuf, tocopy))
977 goto out;
978
979 ctlbuf += tocopy;
980 len -= tocopy;
981 m = m->m_next;
982 }
983 mp->msg_controllen = ctlbuf - mp->msg_control;
984 }
985 out:
986 if (fromsa)
987 FREE(fromsa, M_SONAME);
988 if (control)
989 m_freem(control);
990 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
991 return (error);
992 }
993
994
995 struct recvfrom_args {
996 int s;
997 caddr_t buf;
998 size_t len;
999 int flags;
1000 caddr_t from;
1001 int *fromlenaddr;
1002 };
1003
1004 int
1005 recvfrom(p, uap, retval)
1006 struct proc *p;
1007 register struct recvfrom_args /* {
1008 int s;
1009 caddr_t buf;
1010 size_t len;
1011 int flags;
1012 caddr_t from;
1013 int *fromlenaddr;
1014 } */ *uap;
1015 register_t *retval;
1016 {
1017 struct msghdr msg;
1018 struct iovec aiov;
1019 int error;
1020
1021 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0,0,0,0,0);
1022
1023 if (uap->fromlenaddr) {
1024 error = copyin((caddr_t)uap->fromlenaddr,
1025 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1026 if (error)
1027 return (error);
1028 } else
1029 msg.msg_namelen = 0;
1030 msg.msg_name = uap->from;
1031 msg.msg_iov = &aiov;
1032 msg.msg_iovlen = 1;
1033 aiov.iov_base = uap->buf;
1034 aiov.iov_len = uap->len;
1035 msg.msg_control = 0;
1036 msg.msg_flags = uap->flags;
1037 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error,0,0,0,0);
1038 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval));
1039 }
1040
1041 #ifdef COMPAT_OLDSOCK
1042 int
1043 orecvfrom(p, uap, retval)
1044 struct proc *p;
1045 struct recvfrom_args *uap;
1046 register_t *retval;
1047 {
1048
1049 uap->flags |= MSG_COMPAT;
1050 return (recvfrom(p, uap));
1051 }
1052 #endif
1053
1054
1055 #ifdef COMPAT_OLDSOCK
1056 struct orecv_args {
1057 int s;
1058 caddr_t buf;
1059 int len;
1060 int flags;
1061 };
1062
1063 int
1064 orecv(p, uap, retval)
1065 struct proc *p;
1066 struct orecv_args *uap;
1067 register_t *retval;
1068 {
1069 struct msghdr msg;
1070 struct iovec aiov;
1071
1072 msg.msg_name = 0;
1073 msg.msg_namelen = 0;
1074 msg.msg_iov = &aiov;
1075 msg.msg_iovlen = 1;
1076 aiov.iov_base = uap->buf;
1077 aiov.iov_len = uap->len;
1078 msg.msg_control = 0;
1079 msg.msg_flags = uap->flags;
1080 return (recvit(p, uap->s, &msg, (caddr_t)0, retval));
1081 }
1082
1083 /*
1084 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1085 * overlays the new one, missing only the flags, and with the (old) access
1086 * rights where the control fields are now.
1087 */
1088 struct orecvmsg_args {
1089 int s;
1090 struct omsghdr *msg;
1091 int flags;
1092 };
1093
1094 int
1095 orecvmsg(p, uap, retval)
1096 struct proc *p;
1097 struct orecvmsg_args *uap;
1098 register_t *retval;
1099 {
1100 struct msghdr msg;
1101 struct iovec aiov[UIO_SMALLIOV], *iov;
1102 int error;
1103
1104 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1105 sizeof (struct omsghdr));
1106 if (error)
1107 return (error);
1108 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1109 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
1110 return (EMSGSIZE);
1111 MALLOC(iov, struct iovec *,
1112 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1113 M_WAITOK);
1114 } else
1115 iov = aiov;
1116 msg.msg_flags = uap->flags | MSG_COMPAT;
1117 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1118 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1119 if (error)
1120 goto done;
1121 msg.msg_iov = iov;
1122 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval);
1123
1124 if (msg.msg_controllen && error == 0)
1125 error = copyout((caddr_t)&msg.msg_controllen,
1126 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1127 done:
1128 if (iov != aiov)
1129 FREE(iov, M_IOV);
1130 return (error);
1131 }
1132 #endif
1133
1134 struct recvmsg_args {
1135 int s;
1136 struct msghdr *msg;
1137 int flags;
1138 };
1139
1140 int
1141 recvmsg(p, uap, retval)
1142 struct proc *p;
1143 struct recvmsg_args *uap;
1144 register_t *retval;
1145 {
1146 struct msghdr msg;
1147 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1148 register int error;
1149
1150 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0,0,0,0,0);
1151 if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1152 sizeof (msg)))
1153 {
1154 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1155 return (error);
1156 }
1157
1158 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1159 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1160 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
1161 return (EMSGSIZE);
1162 }
1163 MALLOC(iov, struct iovec *,
1164 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1165 M_WAITOK);
1166 } else
1167 iov = aiov;
1168 #ifdef COMPAT_OLDSOCK
1169 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1170 #else
1171 msg.msg_flags = uap->flags;
1172 #endif
1173 uiov = msg.msg_iov;
1174 msg.msg_iov = iov;
1175 error = copyin((caddr_t)uiov, (caddr_t)iov,
1176 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1177 if (error)
1178 goto done;
1179 error = recvit(p, uap->s, &msg, (caddr_t)0, retval);
1180 if (!error) {
1181 msg.msg_iov = uiov;
1182 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1183 }
1184 done:
1185 if (iov != aiov)
1186 FREE(iov, M_IOV);
1187 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1188 return (error);
1189 }
1190
1191 /* ARGSUSED */
1192 struct shutdown_args {
1193 int s;
1194 int how;
1195 };
1196
1197 int
1198 shutdown(p, uap, retval)
1199 struct proc *p;
1200 struct shutdown_args *uap;
1201 register_t *retval;
1202 {
1203 struct file *fp;
1204 int error;
1205
1206 error = getsock(p->p_fd, uap->s, &fp);
1207 if (error)
1208 return (error);
1209 return (soshutdown((struct socket *)fp->f_data, uap->how));
1210 }
1211
1212
1213
1214
1215
1216 /* ARGSUSED */
1217 struct setsockopt_args {
1218 int s;
1219 int level;
1220 int name;
1221 caddr_t val;
1222 int valsize;
1223 };
1224
1225 int
1226 setsockopt(p, uap, retval)
1227 struct proc *p;
1228 struct setsockopt_args *uap;
1229 register_t *retval;
1230 {
1231 struct file *fp;
1232 struct sockopt sopt;
1233 int error;
1234
1235 if (uap->val == 0 && uap->valsize != 0)
1236 return (EFAULT);
1237 if (uap->valsize < 0)
1238 return (EINVAL);
1239
1240 error = getsock(p->p_fd, uap->s, &fp);
1241 if (error)
1242 return (error);
1243
1244 sopt.sopt_dir = SOPT_SET;
1245 sopt.sopt_level = uap->level;
1246 sopt.sopt_name = uap->name;
1247 sopt.sopt_val = uap->val;
1248 sopt.sopt_valsize = uap->valsize;
1249 sopt.sopt_p = p;
1250
1251 return (sosetopt((struct socket *)fp->f_data, &sopt));
1252 }
1253
1254
1255
1256 int
1257 getsockopt(p, uap, retval)
1258 struct proc *p;
1259 struct getsockopt_args *uap;
1260 register_t *retval;
1261 {
1262 int valsize, error;
1263 struct file *fp;
1264 struct sockopt sopt;
1265
1266 error = getsock(p->p_fd, uap->s, &fp);
1267 if (error)
1268 return (error);
1269 if (uap->val) {
1270 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1271 sizeof (valsize));
1272 if (error)
1273 return (error);
1274 if (valsize < 0)
1275 return (EINVAL);
1276 } else
1277 valsize = 0;
1278
1279 sopt.sopt_dir = SOPT_GET;
1280 sopt.sopt_level = uap->level;
1281 sopt.sopt_name = uap->name;
1282 sopt.sopt_val = uap->val;
1283 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1284 sopt.sopt_p = p;
1285
1286 error = sogetopt((struct socket *)fp->f_data, &sopt);
1287 if (error == 0) {
1288 valsize = sopt.sopt_valsize;
1289 error = copyout((caddr_t)&valsize,
1290 (caddr_t)uap->avalsize, sizeof (valsize));
1291 }
1292 return (error);
1293 }
1294
1295
1296
1297 struct pipe_args {
1298 int dummy;
1299 };
1300 /* ARGSUSED */
1301 int
1302 pipe(p, uap, retval)
1303 struct proc *p;
1304 struct pipe_args *uap;
1305 register_t *retval;
1306 {
1307 struct file *rf, *wf;
1308 struct socket *rso, *wso;
1309 int fd, error;
1310
1311 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1312 if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) {
1313 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1314 return (error);
1315 }
1316 if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) {
1317 goto free1;
1318 }
1319 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1320 error = falloc(p, &rf, &fd);
1321 if (error)
1322 goto free2;
1323 retval[0] = fd;
1324 rf->f_flag = FREAD;
1325 rf->f_type = DTYPE_SOCKET;
1326 rf->f_ops = &socketops;
1327 rf->f_data = (caddr_t)rso;
1328 if (error = falloc(p, &wf, &fd))
1329 goto free3;
1330 wf->f_flag = FWRITE;
1331 wf->f_type = DTYPE_SOCKET;
1332 wf->f_ops = &socketops;
1333 wf->f_data = (caddr_t)wso;
1334 retval[1] = fd;
1335
1336 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1337 error = unp_connect2(wso, rso);
1338 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1339 if (error)
1340 goto free4;
1341 *fdflags(p, retval[0]) &= ~UF_RESERVED;
1342 *fdflags(p, retval[1]) &= ~UF_RESERVED;
1343 return (0);
1344 free4:
1345 fdrelse(p, retval[1]);
1346 ffree(wf);
1347 free3:
1348 fdrelse(p, retval[0]);
1349 ffree(rf);
1350 free2:
1351 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1352 (void)soclose(wso);
1353 free1:
1354 (void)soclose(rso);
1355
1356 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1357 return (error);
1358 }
1359
1360
1361 /*
1362 * Get socket name.
1363 */
1364 /* ARGSUSED */
1365 static int
1366 getsockname1(p, uap, retval, compat)
1367 struct proc *p;
1368 register struct getsockname_args *uap;
1369 register_t *retval;
1370 int compat;
1371 {
1372 struct file *fp;
1373 register struct socket *so;
1374 struct sockaddr *sa;
1375 u_int len;
1376 int error;
1377
1378 error = getsock(p->p_fd, uap->fdes, &fp);
1379 if (error)
1380 return (error);
1381 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1382 if (error)
1383 return (error);
1384 so = (struct socket *)fp->f_data;
1385 sa = 0;
1386 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1387 if (error)
1388 goto bad;
1389 if (sa == 0) {
1390 len = 0;
1391 goto gotnothing;
1392 }
1393
1394 len = MIN(len, sa->sa_len);
1395 #ifdef COMPAT_OLDSOCK
1396 if (compat)
1397 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1398 #endif
1399 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1400 if (error == 0)
1401 gotnothing:
1402 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1403 sizeof (len));
1404 bad:
1405 if (sa)
1406 FREE(sa, M_SONAME);
1407 return (error);
1408 }
1409
1410 int
1411 getsockname(p, uap, retval)
1412 struct proc *p;
1413 struct getsockname_args *uap;
1414 register_t *retval;
1415 {
1416
1417 return (getsockname1(p, uap, retval, 0));
1418 }
1419
1420 #ifdef COMPAT_OLDSOCK
1421 int
1422 ogetsockname(p, uap, retval)
1423 struct proc *p;
1424 struct getsockname_args *uap;
1425 register_t *retval;
1426 {
1427
1428 return (getsockname1(p, uap, retval, 1));
1429 }
1430 #endif /* COMPAT_OLDSOCK */
1431
1432 /*
1433 * Get name of peer for connected socket.
1434 */
1435 /* ARGSUSED */
1436 int
1437 getpeername1(p, uap, retval, compat)
1438 struct proc *p;
1439 register struct getpeername_args *uap;
1440 register_t *retval;
1441 int compat;
1442 {
1443 struct file *fp;
1444 register struct socket *so;
1445 struct sockaddr *sa;
1446 u_int len;
1447 int error;
1448
1449 error = getsock(p->p_fd, uap->fdes, &fp);
1450 if (error)
1451 return (error);
1452 so = (struct socket *)fp->f_data;
1453 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
1454 return (ENOTCONN);
1455 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1456 if (error)
1457 return (error);
1458 sa = 0;
1459 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1460 if (error)
1461 goto bad;
1462 if (sa == 0) {
1463 len = 0;
1464 goto gotnothing;
1465 }
1466 len = MIN(len, sa->sa_len);
1467 #ifdef COMPAT_OLDSOCK
1468 if (compat)
1469 ((struct osockaddr *)sa)->sa_family =
1470 sa->sa_family;
1471 #endif
1472 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1473 if (error)
1474 goto bad;
1475 gotnothing:
1476 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1477 bad:
1478 if (sa) FREE(sa, M_SONAME);
1479 return (error);
1480 }
1481
1482 int
1483 getpeername(p, uap, retval)
1484 struct proc *p;
1485 struct getpeername_args *uap;
1486 register_t *retval;
1487 {
1488
1489 return (getpeername1(p, uap, retval, 0));
1490 }
1491
1492 #ifdef COMPAT_OLDSOCK
1493 int
1494 ogetpeername(p, uap, retval)
1495 struct proc *p;
1496 struct ogetpeername_args *uap;
1497 register_t *retval;
1498 {
1499
1500 /* XXX uap should have type `getpeername_args *' to begin with. */
1501 return (getpeername1(p, (struct getpeername_args *)uap, retval, 1));
1502 }
1503 #endif /* COMPAT_OLDSOCK */
1504
1505 int
1506 sockargs(mp, buf, buflen, type)
1507 struct mbuf **mp;
1508 caddr_t buf;
1509 int buflen, type;
1510 {
1511 register struct sockaddr *sa;
1512 register struct mbuf *m;
1513 int error;
1514
1515 if ((u_int)buflen > MLEN) {
1516 #ifdef COMPAT_OLDSOCK
1517 if (type == MT_SONAME && (u_int)buflen <= 112)
1518 buflen = MLEN; /* unix domain compat. hack */
1519 else
1520 #endif
1521 return (EINVAL);
1522 }
1523 m = m_get(M_WAIT, type);
1524 if (m == NULL)
1525 return (ENOBUFS);
1526 m->m_len = buflen;
1527 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1528 if (error)
1529 (void) m_free(m);
1530 else {
1531 *mp = m;
1532 if (type == MT_SONAME) {
1533 sa = mtod(m, struct sockaddr *);
1534
1535 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1536 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1537 sa->sa_family = sa->sa_len;
1538 #endif
1539 sa->sa_len = buflen;
1540 }
1541 }
1542 return (error);
1543 }
1544
1545 int
1546 getsockaddr(namp, uaddr, len)
1547 struct sockaddr **namp;
1548 caddr_t uaddr;
1549 size_t len;
1550 {
1551 struct sockaddr *sa;
1552 int error;
1553
1554 if (len > SOCK_MAXADDRLEN)
1555 return ENAMETOOLONG;
1556
1557 if (len == 0)
1558 return EINVAL;
1559
1560 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1561 error = copyin(uaddr, sa, len);
1562 if (error) {
1563 FREE(sa, M_SONAME);
1564 } else {
1565 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1566 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1567 sa->sa_family = sa->sa_len;
1568 #endif
1569 sa->sa_len = len;
1570 *namp = sa;
1571 }
1572 return error;
1573 }
1574
1575 int
1576 getsock(fdp, fdes, fpp)
1577 struct filedesc *fdp;
1578 int fdes;
1579 struct file **fpp;
1580 {
1581 register struct file *fp;
1582
1583 if ((unsigned)fdes >= fdp->fd_nfiles ||
1584 (fp = fdp->fd_ofiles[fdes]) == NULL ||
1585 (fdp->fd_ofileflags[fdes] & UF_RESERVED))
1586 return (EBADF);
1587 if (fp->f_type != DTYPE_SOCKET)
1588 return (ENOTSOCK);
1589 *fpp = fp;
1590 return (0);
1591 }
1592
1593 #if SENDFILE
1594 /*
1595 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1596 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1597 * been made static, but may be useful in the future for doing zero-copy in
1598 * other parts of the networking code.
1599 */
1600 static void
1601 sf_buf_init(void *arg)
1602 {
1603 int i;
1604
1605 SLIST_INIT(&sf_freelist);
1606 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1607 sf_bufs = _MALLOC(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1608 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1609 for (i = 0; i < nsfbufs; i++) {
1610 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1611 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1612 }
1613 }
1614
1615 /*
1616 * Get an sf_buf from the freelist. Will block if none are available.
1617 */
1618 static struct sf_buf *
1619 sf_buf_alloc()
1620 {
1621 struct sf_buf *sf;
1622 int s;
1623
1624 s = splimp();
1625 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1626 sf_buf_alloc_want = 1;
1627 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1628 }
1629 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1630 splx(s);
1631 sf->refcnt = 1;
1632 return (sf);
1633 }
1634
1635 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1636 static void
1637 sf_buf_ref(caddr_t addr, u_int size)
1638 {
1639 struct sf_buf *sf;
1640
1641 sf = dtosf(addr);
1642 if (sf->refcnt == 0)
1643 panic("sf_buf_ref: referencing a free sf_buf");
1644 sf->refcnt++;
1645 }
1646
1647 /*
1648 * Lose a reference to an sf_buf. When none left, detach mapped page
1649 * and release resources back to the system.
1650 *
1651 * Must be called at splimp.
1652 */
1653 static void
1654 sf_buf_free(caddr_t addr, u_int size)
1655 {
1656 struct sf_buf *sf;
1657 struct vm_page *m;
1658 int s;
1659
1660 sf = dtosf(addr);
1661 if (sf->refcnt == 0)
1662 panic("sf_buf_free: freeing free sf_buf");
1663 sf->refcnt--;
1664 if (sf->refcnt == 0) {
1665 pmap_qremove((vm_offset_t)addr, 1);
1666 m = sf->m;
1667 s = splvm();
1668 vm_page_unwire(m, 0);
1669 /*
1670 * Check for the object going away on us. This can
1671 * happen since we don't hold a reference to it.
1672 * If so, we're responsible for freeing the page.
1673 */
1674 if (m->wire_count == 0 && m->object == NULL)
1675 vm_page_lock_queues();
1676 vm_page_free(m);
1677 vm_page_unlock_queues();
1678 splx(s);
1679 sf->m = NULL;
1680 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1681 if (sf_buf_alloc_want) {
1682 sf_buf_alloc_want = 0;
1683 wakeup(&sf_freelist);
1684 }
1685 }
1686 }
1687
1688 /*
1689 * sendfile(2).
1690 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1691 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1692 *
1693 * Send a file specified by 'fd' and starting at 'offset' to a socket
1694 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1695 * nbytes == 0. Optionally add a header and/or trailer to the socket
1696 * output. If specified, write the total number of bytes sent into *sbytes.
1697 */
1698 int
1699 sendfile(struct proc *p, struct sendfile_args *uap)
1700 {
1701 struct file *fp;
1702 struct filedesc *fdp = p->p_fd;
1703 struct vnode *vp;
1704 struct vm_object *obj;
1705 struct socket *so;
1706 struct mbuf *m;
1707 struct sf_buf *sf;
1708 struct vm_page *pg;
1709 struct writev_args nuap;
1710 struct sf_hdtr hdtr;
1711 off_t off, xfsize, sbytes = 0;
1712 int error = 0, s;
1713
1714 /*
1715 * Do argument checking. Must be a regular file in, stream
1716 * type and connected socket out, positive offset.
1717 */
1718 if (((u_int)uap->fd) >= fdp->fd_nfiles ||
1719 (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
1720 (fp->f_flag & FREAD) == 0) {
1721 error = EBADF;
1722 goto done;
1723 }
1724 if (fp->f_type != DTYPE_VNODE) {
1725 error = EINVAL;
1726 goto done;
1727 }
1728 vp = (struct vnode *)fp->f_data;
1729 obj = vp->v_object;
1730 if (vp->v_type != VREG || obj == NULL) {
1731 error = EINVAL;
1732 goto done;
1733 }
1734 error = getsock(p->p_fd, uap->s, &fp);
1735 if (error)
1736 goto done;
1737 so = (struct socket *)fp->f_data;
1738 if (so->so_type != SOCK_STREAM) {
1739 error = EINVAL;
1740 goto done;
1741 }
1742 if ((so->so_state & SS_ISCONNECTED) == 0) {
1743 error = ENOTCONN;
1744 goto done;
1745 }
1746 if (uap->offset < 0) {
1747 error = EINVAL;
1748 goto done;
1749 }
1750
1751 /*
1752 * If specified, get the pointer to the sf_hdtr struct for
1753 * any headers/trailers.
1754 */
1755 if (uap->hdtr != NULL) {
1756 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1757 if (error)
1758 goto done;
1759 /*
1760 * Send any headers. Wimp out and use writev(2).
1761 */
1762 if (hdtr.headers != NULL) {
1763 nuap.fd = uap->s;
1764 nuap.iovp = hdtr.headers;
1765 nuap.iovcnt = hdtr.hdr_cnt;
1766 error = writev(p, &nuap);
1767 if (error)
1768 goto done;
1769 sbytes += p->p_retval[0];
1770 }
1771 }
1772
1773 /*
1774 * Protect against multiple writers to the socket.
1775 */
1776 (void) sblock(&so->so_snd, M_WAIT);
1777
1778 /*
1779 * Loop through the pages in the file, starting with the requested
1780 * offset. Get a file page (do I/O if necessary), map the file page
1781 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1782 * it on the socket.
1783 */
1784 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1785 vm_object_offset_t pindex;
1786 vm_object_offset_t pgoff;
1787
1788 pindex = OFF_TO_IDX(off);
1789 retry_lookup:
1790 /*
1791 * Calculate the amount to transfer. Not to exceed a page,
1792 * the EOF, or the passed in nbytes.
1793 */
1794 xfsize = obj->un_pager.vnp.vnp_size - off;
1795 if (xfsize > PAGE_SIZE_64)
1796 xfsize = PAGE_SIZE;
1797 pgoff = (vm_object_offset_t)(off & PAGE_MASK_64);
1798 if (PAGE_SIZE - pgoff < xfsize)
1799 xfsize = PAGE_SIZE_64 - pgoff;
1800 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1801 xfsize = uap->nbytes - sbytes;
1802 if (xfsize <= 0)
1803 break;
1804 /*
1805 * Optimize the non-blocking case by looking at the socket space
1806 * before going to the extra work of constituting the sf_buf.
1807 */
1808 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1809 if (so->so_state & SS_CANTSENDMORE)
1810 error = EPIPE;
1811 else
1812 error = EAGAIN;
1813 sbunlock(&so->so_snd);
1814 goto done;
1815 }
1816 /*
1817 * Attempt to look up the page. If the page doesn't exist or the
1818 * part we're interested in isn't valid, then read it from disk.
1819 * If some other part of the kernel has this page (i.e. it's busy),
1820 * then disk I/O may be occuring on it, so wait and retry.
1821 */
1822 pg = vm_page_lookup(obj, pindex);
1823 if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
1824 !vm_page_is_valid(pg, pgoff, xfsize))) {
1825 struct uio auio;
1826 struct iovec aiov;
1827 int bsize;
1828
1829 if (pg == NULL) {
1830 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1831 if (pg == NULL) {
1832 VM_WAIT;
1833 goto retry_lookup;
1834 }
1835 /*
1836 * don't just clear PG_BUSY manually -
1837 * vm_page_alloc() should be considered opaque,
1838 * use the VM routine provided to clear
1839 * PG_BUSY.
1840 */
1841 vm_page_wakeup(pg);
1842
1843 }
1844 /*
1845 * Ensure that our page is still around when the I/O completes.
1846 */
1847 vm_page_io_start(pg);
1848 vm_page_wire(pg);
1849 /*
1850 * Get the page from backing store.
1851 */
1852 bsize = vp->v_mount->mnt_stat.f_iosize;
1853 auio.uio_iov = &aiov;
1854 auio.uio_iovcnt = 1;
1855 aiov.iov_base = 0;
1856 aiov.iov_len = MAXBSIZE;
1857 auio.uio_resid = MAXBSIZE;
1858 auio.uio_offset = trunc_page(off);
1859 auio.uio_segflg = UIO_NOCOPY;
1860 auio.uio_rw = UIO_READ;
1861 auio.uio_procp = p;
1862 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1863 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1864 p->p_ucred);
1865 VOP_UNLOCK(vp, 0, p);
1866 vm_page_flag_clear(pg, PG_ZERO);
1867 vm_page_io_finish(pg);
1868 if (error) {
1869 vm_page_unwire(pg, 0);
1870 /*
1871 * See if anyone else might know about this page.
1872 * If not and it is not valid, then free it.
1873 */
1874 if (pg->wire_count == 0 && pg->valid == 0 &&
1875 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1876 pg->hold_count == 0)
1877 vm_page_lock_queues();
1878 vm_page_free(pg);
1879 vm_page_unlock_queues();
1880 sbunlock(&so->so_snd);
1881 goto done;
1882 }
1883 } else {
1884 if ((pg->flags & PG_BUSY) || pg->busy) {
1885 s = splvm();
1886 if ((pg->flags & PG_BUSY) || pg->busy) {
1887 /*
1888 * Page is busy. Wait and retry.
1889 */
1890 vm_page_flag_set(pg, PG_WANTED);
1891 tsleep(pg, PVM, "sfpbsy", 0);
1892 splx(s);
1893 goto retry_lookup;
1894 }
1895 splx(s);
1896 }
1897 /*
1898 * Protect from having the page ripped out from beneath us.
1899 */
1900 vm_page_wire(pg);
1901 }
1902 /*
1903 * Allocate a kernel virtual page and insert the physical page
1904 * into it.
1905 */
1906 sf = sf_buf_alloc();
1907 sf->m = pg;
1908 pmap_qenter(sf->kva, &pg, 1);
1909 /*
1910 * Get an mbuf header and set it up as having external storage.
1911 */
1912 MGETHDR(m, M_WAIT, MT_DATA);
1913 m->m_ext.ext_free = sf_buf_free;
1914 m->m_ext.ext_ref = sf_buf_ref;
1915 m->m_ext.ext_buf = (void *)sf->kva;
1916 m->m_ext.ext_size = PAGE_SIZE;
1917 m->m_data = (char *) sf->kva + pgoff;
1918 m->m_flags |= M_EXT;
1919 m->m_pkthdr.len = m->m_len = xfsize;
1920 /*
1921 * Add the buffer to the socket buffer chain.
1922 */
1923 s = splnet();
1924 retry_space:
1925 /*
1926 * Make sure that the socket is still able to take more data.
1927 * CANTSENDMORE being true usually means that the connection
1928 * was closed. so_error is true when an error was sensed after
1929 * a previous send.
1930 * The state is checked after the page mapping and buffer
1931 * allocation above since those operations may block and make
1932 * any socket checks stale. From this point forward, nothing
1933 * blocks before the pru_send (or more accurately, any blocking
1934 * results in a loop back to here to re-check).
1935 */
1936 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1937 if (so->so_state & SS_CANTSENDMORE) {
1938 error = EPIPE;
1939 } else {
1940 error = so->so_error;
1941 so->so_error = 0;
1942 }
1943 m_freem(m);
1944 sbunlock(&so->so_snd);
1945 splx(s);
1946 goto done;
1947 }
1948 /*
1949 * Wait for socket space to become available. We do this just
1950 * after checking the connection state above in order to avoid
1951 * a race condition with sbwait().
1952 */
1953 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1954 if (so->so_state & SS_NBIO) {
1955 m_freem(m);
1956 sbunlock(&so->so_snd);
1957 splx(s);
1958 error = EAGAIN;
1959 goto done;
1960 }
1961 error = sbwait(&so->so_snd);
1962 /*
1963 * An error from sbwait usually indicates that we've
1964 * been interrupted by a signal. If we've sent anything
1965 * then return bytes sent, otherwise return the error.
1966 */
1967 if (error) {
1968 m_freem(m);
1969 sbunlock(&so->so_snd);
1970 splx(s);
1971 goto done;
1972 }
1973 goto retry_space;
1974 }
1975 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
1976 splx(s);
1977 if (error) {
1978 sbunlock(&so->so_snd);
1979 goto done;
1980 }
1981 }
1982 sbunlock(&so->so_snd);
1983
1984 /*
1985 * Send trailers. Wimp out and use writev(2).
1986 */
1987 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1988 nuap.fd = uap->s;
1989 nuap.iovp = hdtr.trailers;
1990 nuap.iovcnt = hdtr.trl_cnt;
1991 error = writev(p, &nuap);
1992 if (error)
1993 goto done;
1994 sbytes += p->p_retval[0];
1995 }
1996
1997 done:
1998 if (uap->sbytes != NULL) {
1999 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2000 }
2001 return (error);
2002 }
2003
2004 #endif