]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-517.7.7.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Copyright (c) 1982, 1986, 1989, 1990, 1993
24 * The Regents of the University of California. All rights reserved.
25 *
26 * sendfile(2) and related extensions:
27 * Copyright (c) 1998, David Greenman. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions
31 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * 2. Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in the
36 * documentation and/or other materials provided with the distribution.
37 * 3. All advertising materials mentioning features or use of this software
38 * must display the following acknowledgement:
39 * This product includes software developed by the University of
40 * California, Berkeley and its contributors.
41 * 4. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
58 */
59
60
61
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/filedesc.h>
65 #include <sys/proc.h>
66 #include <sys/file.h>
67 #include <sys/buf.h>
68 #include <sys/malloc.h>
69 #include <sys/mbuf.h>
70 #include <sys/protosw.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #if KTRACE
74 #include <sys/ktrace.h>
75 #endif
76 #include <sys/kernel.h>
77
78 #include <bsm/audit_kernel.h>
79
80 #include <sys/kdebug.h>
81
82 #if KDEBUG
83
84 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
85 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
86 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
87 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
88 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
89 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
90 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
91 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
92 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
93 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
94
95 #endif
96
97 struct getsockname_args {
98 int fdes;
99 caddr_t asa;
100 socklen_t *alen;
101 };
102
103 struct getsockopt_args {
104 int s;
105 int level;
106 int name;
107 caddr_t val;
108 socklen_t *avalsize;
109 } ;
110
111 struct accept_args {
112 int s;
113 caddr_t name;
114 socklen_t *anamelen;
115 };
116
117 struct getpeername_args {
118 int fdes;
119 caddr_t asa;
120 socklen_t *alen;
121 };
122
123
124 /* ARGSUSED */
125
126 #if SENDFILE
127 static void sf_buf_init(void *arg);
128 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
129 static struct sf_buf *sf_buf_alloc(void);
130 static void sf_buf_ref(caddr_t addr, u_int size);
131 static void sf_buf_free(caddr_t addr, u_int size);
132
133 static SLIST_HEAD(, sf_buf) sf_freelist;
134 static vm_offset_t sf_base;
135 static struct sf_buf *sf_bufs;
136 static int sf_buf_alloc_want;
137 #endif
138
139 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags, register_t *retval));
140 static int recvit __P((struct proc *p, int s, struct msghdr *mp,
141 caddr_t namelenp, register_t *retval));
142
143 static int accept1 __P((struct proc *p, struct accept_args *uap, register_t *retval, int compat));
144 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
145 register_t *retval, int compat));
146 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
147 register_t *retval, int compat));
148
149 /*
150 * System call interface to the socket abstraction.
151 */
152 #if COMPAT_43 || defined(COMPAT_SUNOS)
153 #define COMPAT_OLDSOCK
154 #endif
155
156 extern struct fileops socketops;
157
158 struct socket_args {
159 int domain;
160 int type;
161 int protocol;
162 };
163 int
164 socket(p, uap, retval)
165 struct proc *p;
166 register struct socket_args *uap;
167 register_t *retval;
168 {
169 struct filedesc *fdp = p->p_fd;
170 struct socket *so;
171 struct file *fp;
172 int fd, error;
173
174 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
175 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
176 error = falloc(p, &fp, &fd);
177 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
178
179 if (error)
180 return (error);
181 fp->f_flag = FREAD|FWRITE;
182 fp->f_type = DTYPE_SOCKET;
183 fp->f_ops = &socketops;
184 if (error = socreate(uap->domain, &so, uap->type,
185 uap->protocol)) {
186 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
187 fdrelse(p, fd);
188 ffree(fp);
189 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
190 } else {
191 fp->f_data = (caddr_t)so;
192 *fdflags(p, fd) &= ~UF_RESERVED;
193 *retval = fd;
194 }
195 return (error);
196 }
197
198 struct bind_args {
199 int s;
200 caddr_t name;
201 socklen_t namelen;
202 };
203
204 /* ARGSUSED */
205 int
206 bind(p, uap, retval)
207 struct proc *p;
208 register struct bind_args *uap;
209 register_t *retval;
210 {
211 struct file *fp;
212 struct sockaddr *sa;
213 int error;
214
215 AUDIT_ARG(fd, uap->s);
216 error = getsock(p->p_fd, uap->s, &fp);
217 if (error)
218 return (error);
219 error = getsockaddr(&sa, uap->name, uap->namelen);
220 if (error)
221 return (error);
222 AUDIT_ARG(sockaddr, p, sa);
223 if (fp->f_data != NULL)
224 error = sobind((struct socket *)fp->f_data, sa);
225 else
226 error = EBADF;
227 FREE(sa, M_SONAME);
228 return (error);
229 }
230
231 struct listen_args {
232 int s;
233 int backlog;
234 };
235
236
237
238 int
239 listen(p, uap, retval)
240 struct proc *p;
241 register struct listen_args *uap;
242 register_t *retval;
243 {
244 struct file *fp;
245 int error;
246
247 AUDIT_ARG(fd, uap->s);
248 error = getsock(p->p_fd, uap->s, &fp);
249 if (error)
250 return (error);
251 if (fp->f_data != NULL)
252 return (solisten((struct socket *)fp->f_data, uap->backlog));
253 else
254 return (EBADF);
255 }
256
257 #ifndef COMPAT_OLDSOCK
258 #define accept1 accept
259 #endif
260
261
262
263 int
264 accept1(p, uap, retval, compat)
265 struct proc *p;
266 register struct accept_args *uap;
267 register_t *retval;
268 int compat;
269 {
270 struct file *fp;
271 struct sockaddr *sa;
272 u_int namelen;
273 int error, s;
274 struct socket *head, *so;
275 int fd;
276 short fflag; /* type must match fp->f_flag */
277 int tmpfd;
278
279 AUDIT_ARG(fd, uap->s);
280 if (uap->name) {
281 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
282 sizeof (namelen));
283 if(error)
284 return (error);
285 }
286 error = getsock(p->p_fd, uap->s, &fp);
287 if (error)
288 return (error);
289 s = splnet();
290 head = (struct socket *)fp->f_data;
291 if (head == NULL) {
292 splx(s);
293 return (EBADF);
294 }
295 if ((head->so_options & SO_ACCEPTCONN) == 0) {
296 splx(s);
297 return (EINVAL);
298 }
299 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
300 splx(s);
301 return (EWOULDBLOCK);
302 }
303 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
304 if (head->so_state & SS_CANTRCVMORE) {
305 head->so_error = ECONNABORTED;
306 break;
307 }
308 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
309 "accept", 0);
310 if (error) {
311 splx(s);
312 return (error);
313 }
314 }
315 if (head->so_error) {
316 error = head->so_error;
317 head->so_error = 0;
318 splx(s);
319 return (error);
320 }
321
322
323 /*
324 * At this point we know that there is at least one connection
325 * ready to be accepted. Remove it from the queue prior to
326 * allocating the file descriptor for it since falloc() may
327 * block allowing another process to accept the connection
328 * instead.
329 */
330 so = TAILQ_FIRST(&head->so_comp);
331 TAILQ_REMOVE(&head->so_comp, so, so_list);
332 head->so_qlen--;
333
334 fflag = fp->f_flag;
335 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
336 error = falloc(p, &fp, &fd);
337 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
338 if (error) {
339 /*
340 * Probably ran out of file descriptors. Put the
341 * unaccepted connection back onto the queue and
342 * do another wakeup so some other process might
343 * have a chance at it.
344 */
345 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
346 head->so_qlen++;
347 wakeup_one(&head->so_timeo);
348 splx(s);
349 return (error);
350 } else {
351 *fdflags(p, fd) &= ~UF_RESERVED;
352 *retval = fd;
353 }
354
355 so->so_state &= ~SS_COMP;
356 so->so_head = NULL;
357 fp->f_type = DTYPE_SOCKET;
358 fp->f_flag = fflag;
359 fp->f_ops = &socketops;
360 fp->f_data = (caddr_t)so;
361 sa = 0;
362 (void) soaccept(so, &sa);
363 if (sa == 0) {
364 namelen = 0;
365 if (uap->name)
366 goto gotnoname;
367 return 0;
368 }
369 AUDIT_ARG(sockaddr, p, sa);
370 if (uap->name) {
371 /* check sa_len before it is destroyed */
372 if (namelen > sa->sa_len)
373 namelen = sa->sa_len;
374 #ifdef COMPAT_OLDSOCK
375 if (compat)
376 ((struct osockaddr *)sa)->sa_family =
377 sa->sa_family;
378 #endif
379 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
380 if (!error)
381 gotnoname:
382 error = copyout((caddr_t)&namelen,
383 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
384 }
385 FREE(sa, M_SONAME);
386 splx(s);
387 return (error);
388 }
389
390 int
391 accept(p, uap, retval)
392 struct proc *p;
393 struct accept_args *uap;
394 register_t *retval;
395 {
396
397 return (accept1(p, uap, retval, 0));
398 }
399
400 #ifdef COMPAT_OLDSOCK
401 int
402 oaccept(p, uap, retval)
403 struct proc *p;
404 struct accept_args *uap;
405 register_t *retval;
406 {
407
408 return (accept1(p, uap, retval, 1));
409 }
410 #endif /* COMPAT_OLDSOCK */
411
412 struct connect_args {
413 int s;
414 caddr_t name;
415 socklen_t namelen;
416 };
417 /* ARGSUSED */
418 int
419 connect(p, uap, retval)
420 struct proc *p;
421 register struct connect_args *uap;
422 register_t *retval;
423 {
424 struct file *fp;
425 register struct socket *so;
426 struct sockaddr *sa;
427 int error, s;
428
429 AUDIT_ARG(fd, uap->s);
430 error = getsock(p->p_fd, uap->s, &fp);
431 if (error)
432 return (error);
433 so = (struct socket *)fp->f_data;
434 if (so == NULL)
435 return (EBADF);
436 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
437 return (EALREADY);
438 error = getsockaddr(&sa, uap->name, uap->namelen);
439 if (error)
440 return (error);
441 AUDIT_ARG(sockaddr, p, sa);
442 error = soconnect(so, sa);
443 if (error)
444 goto bad;
445 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
446 FREE(sa, M_SONAME);
447 return (EINPROGRESS);
448 }
449 s = splnet();
450 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
451 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
452 "connec", 0);
453 if (error)
454 break;
455 }
456 if (error == 0) {
457 error = so->so_error;
458 so->so_error = 0;
459 }
460 splx(s);
461 bad:
462 so->so_state &= ~SS_ISCONNECTING;
463 FREE(sa, M_SONAME);
464 if (error == ERESTART)
465 error = EINTR;
466 return (error);
467 }
468
469 struct socketpair_args {
470 int domain;
471 int type;
472 int protocol;
473 int *rsv;
474 };
475 int
476 socketpair(p, uap, retval)
477 struct proc *p;
478 register struct socketpair_args *uap;
479 register_t *retval;
480 {
481 register struct filedesc *fdp = p->p_fd;
482 struct file *fp1, *fp2;
483 struct socket *so1, *so2;
484 int fd, error, sv[2];
485
486 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
487 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
488 if (error)
489 return (error);
490 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
491 if (error)
492 goto free1;
493 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
494 error = falloc(p, &fp1, &fd);
495 if (error)
496 goto free2;
497 sv[0] = fd;
498 fp1->f_flag = FREAD|FWRITE;
499 fp1->f_type = DTYPE_SOCKET;
500 fp1->f_ops = &socketops;
501 fp1->f_data = (caddr_t)so1;
502 error = falloc(p, &fp2, &fd);
503 if (error)
504 goto free3;
505 fp2->f_flag = FREAD|FWRITE;
506 fp2->f_type = DTYPE_SOCKET;
507 fp2->f_ops = &socketops;
508 fp2->f_data = (caddr_t)so2;
509 sv[1] = fd;
510 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
511 error = soconnect2(so1, so2);
512 if (error) {
513 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
514 goto free4;
515 }
516
517 if (uap->type == SOCK_DGRAM) {
518 /*
519 * Datagram socket connection is asymmetric.
520 */
521 error = soconnect2(so2, so1);
522 if (error) {
523 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
524 goto free4;
525 }
526 }
527 *fdflags(p, sv[0]) &= ~UF_RESERVED;
528 *fdflags(p, sv[1]) &= ~UF_RESERVED;
529 error = copyout((caddr_t)sv, (caddr_t)uap->rsv,
530 2 * sizeof (int));
531 #if 0 /* old pipe(2) syscall compatability, unused these days */
532 retval[0] = sv[0]; /* XXX ??? */
533 retval[1] = sv[1]; /* XXX ??? */
534 #endif /* 0 */
535 return (error);
536 free4:
537 fdrelse(p, sv[1]);
538 ffree(fp2);
539 free3:
540 fdrelse(p, sv[0]);
541 ffree(fp1);
542 free2:
543 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
544 (void)soclose(so2);
545 free1:
546 (void)soclose(so1);
547 return (error);
548 }
549
550 static int
551 sendit(p, s, mp, flags, retsize)
552 register struct proc *p;
553 int s;
554 register struct msghdr *mp;
555 int flags;
556 register_t *retsize;
557 {
558 struct file *fp;
559 struct uio auio;
560 register struct iovec *iov;
561 register int i;
562 struct mbuf *control;
563 struct sockaddr *to;
564 int len, error;
565 struct socket *so;
566 #if KTRACE
567 struct iovec *ktriov = NULL;
568 struct uio ktruio;
569 #endif
570
571 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0);
572
573 if (error = getsock(p->p_fd, s, &fp))
574 {
575 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
576 return (error);
577 }
578
579 auio.uio_iov = mp->msg_iov;
580 auio.uio_iovcnt = mp->msg_iovlen;
581 auio.uio_segflg = UIO_USERSPACE;
582 auio.uio_rw = UIO_WRITE;
583 auio.uio_procp = p;
584 auio.uio_offset = 0; /* XXX */
585 auio.uio_resid = 0;
586 iov = mp->msg_iov;
587 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
588 if (iov->iov_len < 0)
589 {
590 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0);
591 return (EINVAL);
592 }
593
594 if ((auio.uio_resid += iov->iov_len) < 0)
595 {
596 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0);
597 return (EINVAL);
598 }
599 }
600 if (mp->msg_name) {
601 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
602 if (error) {
603 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
604 return (error);
605 }
606 AUDIT_ARG(sockaddr, p, to);
607 } else
608 to = 0;
609 if (mp->msg_control) {
610 if (mp->msg_controllen < sizeof(struct cmsghdr)
611 #ifdef COMPAT_OLDSOCK
612 && mp->msg_flags != MSG_COMPAT
613 #endif
614 ) {
615 error = EINVAL;
616 goto bad;
617 }
618 error = sockargs(&control, mp->msg_control,
619 mp->msg_controllen, MT_CONTROL);
620 if (error)
621 goto bad;
622 #ifdef COMPAT_OLDSOCK
623 if (mp->msg_flags == MSG_COMPAT) {
624 register struct cmsghdr *cm;
625
626 M_PREPEND(control, sizeof(*cm), M_WAIT);
627 if (control == 0) {
628 error = ENOBUFS;
629 goto bad;
630 } else {
631 cm = mtod(control, struct cmsghdr *);
632 cm->cmsg_len = control->m_len;
633 cm->cmsg_level = SOL_SOCKET;
634 cm->cmsg_type = SCM_RIGHTS;
635 }
636 }
637 #endif
638 } else
639 control = 0;
640
641 #if KTRACE
642 if (KTRPOINT(p, KTR_GENIO)) {
643 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
644
645 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
646 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
647 ktruio = auio;
648 }
649 #endif
650 len = auio.uio_resid;
651 so = (struct socket *)fp->f_data;
652 if (so == NULL)
653 error = EBADF;
654 else
655 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
656 flags);
657 if (error) {
658 if (auio.uio_resid != len && (error == ERESTART ||
659 error == EINTR || error == EWOULDBLOCK))
660 error = 0;
661 /* Generation of SIGPIPE can be controlled per socket */
662 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
663 psignal(p, SIGPIPE);
664 }
665 if (error == 0)
666 *retsize = len - auio.uio_resid;
667 #if KTRACE
668 if (ktriov != NULL) {
669 if (error == 0) {
670 ktruio.uio_iov = ktriov;
671 ktruio.uio_resid = retsize[0];
672 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error, -1);
673 }
674 FREE(ktriov, M_TEMP);
675 }
676 #endif
677 bad:
678 if (to)
679 FREE(to, M_SONAME);
680 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
681 return (error);
682 }
683
684
685 struct sendto_args {
686 int s;
687 caddr_t buf;
688 size_t len;
689 int flags;
690 caddr_t to;
691 int tolen;
692 };
693
694 int
695 sendto(p, uap, retval)
696 struct proc *p;
697 register struct sendto_args /* {
698 int s;
699 caddr_t buf;
700 size_t len;
701 int flags;
702 caddr_t to;
703 int tolen;
704 } */ *uap;
705 register_t *retval;
706
707 {
708 struct msghdr msg;
709 struct iovec aiov;
710 int stat;
711
712 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0,0,0,0,0);
713 AUDIT_ARG(fd, uap->s);
714
715 msg.msg_name = uap->to;
716 msg.msg_namelen = uap->tolen;
717 msg.msg_iov = &aiov;
718 msg.msg_iovlen = 1;
719 msg.msg_control = 0;
720 #ifdef COMPAT_OLDSOCK
721 msg.msg_flags = 0;
722 #endif
723 aiov.iov_base = uap->buf;
724 aiov.iov_len = uap->len;
725 stat = sendit(p, uap->s, &msg, uap->flags, retval);
726 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, stat, *retval,0,0,0);
727 return(stat);
728 }
729
730 #ifdef COMPAT_OLDSOCK
731 struct osend_args {
732 int s;
733 caddr_t buf;
734 int len;
735 int flags;
736 };
737
738 int
739 osend(p, uap, retval)
740 struct proc *p;
741 register struct osend_args /* {
742 int s;
743 caddr_t buf;
744 int len;
745 int flags;
746 } */ *uap;
747 register_t *retval;
748
749 {
750 struct msghdr msg;
751 struct iovec aiov;
752
753 msg.msg_name = 0;
754 msg.msg_namelen = 0;
755 msg.msg_iov = &aiov;
756 msg.msg_iovlen = 1;
757 aiov.iov_base = uap->buf;
758 aiov.iov_len = uap->len;
759 msg.msg_control = 0;
760 msg.msg_flags = 0;
761 return (sendit(p, uap->s, &msg, uap->flags, retval));
762 }
763 struct osendmsg_args {
764 int s;
765 caddr_t msg;
766 int flags;
767 };
768
769 int
770 osendmsg(p, uap, retval)
771 struct proc *p;
772 register struct osendmsg_args /* {
773 int s;
774 caddr_t msg;
775 int flags;
776 } */ *uap;
777 register_t *retval;
778
779 {
780 struct msghdr msg;
781 struct iovec aiov[UIO_SMALLIOV], *iov;
782 int error;
783
784 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
785 if (error)
786 return (error);
787 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
788 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
789 return (EMSGSIZE);
790 MALLOC(iov, struct iovec *,
791 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
792 M_WAITOK);
793 } else
794 iov = aiov;
795 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
796 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
797 if (error)
798 goto done;
799 msg.msg_flags = MSG_COMPAT;
800 msg.msg_iov = iov;
801 error = sendit(p, uap->s, &msg, uap->flags, retval);
802 done:
803 if (iov != aiov)
804 FREE(iov, M_IOV);
805 return (error);
806 }
807 #endif
808
809 struct sendmsg_args {
810 int s;
811 caddr_t msg;
812 int flags;
813 };
814
815 int
816 sendmsg(p, uap, retval)
817 struct proc *p;
818 register struct sendmsg_args *uap;
819 register_t *retval;
820 {
821 struct msghdr msg;
822 struct iovec aiov[UIO_SMALLIOV], *iov;
823 int error;
824
825 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0,0,0,0,0);
826 AUDIT_ARG(fd, uap->s);
827 if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)))
828 {
829 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
830 return (error);
831 }
832
833 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
834 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
835 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
836 return (EMSGSIZE);
837 }
838 MALLOC(iov, struct iovec *,
839 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
840 M_WAITOK);
841 } else
842 iov = aiov;
843 if (msg.msg_iovlen &&
844 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
845 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
846 goto done;
847 msg.msg_iov = iov;
848 #ifdef COMPAT_OLDSOCK
849 msg.msg_flags = 0;
850 #endif
851 error = sendit(p, uap->s, &msg, uap->flags, retval);
852 done:
853 if (iov != aiov)
854 FREE(iov, M_IOV);
855 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
856 return (error);
857 }
858
859 static int
860 recvit(p, s, mp, namelenp, retval)
861 register struct proc *p;
862 int s;
863 register struct msghdr *mp;
864 caddr_t namelenp;
865 register_t *retval;
866 {
867 struct file *fp;
868 struct uio auio;
869 register struct iovec *iov;
870 register int i;
871 int len, error;
872 struct mbuf *m, *control = 0;
873 caddr_t ctlbuf;
874 struct socket *so;
875 struct sockaddr *fromsa = 0;
876 #if KTRACE
877 struct iovec *ktriov = NULL;
878 struct uio ktruio;
879 #endif
880
881 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0,0,0,0,0);
882 if (error = getsock(p->p_fd, s, &fp))
883 {
884 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
885 return (error);
886 }
887
888 auio.uio_iov = mp->msg_iov;
889 auio.uio_iovcnt = mp->msg_iovlen;
890 auio.uio_segflg = UIO_USERSPACE;
891 auio.uio_rw = UIO_READ;
892 auio.uio_procp = p;
893 auio.uio_offset = 0; /* XXX */
894 auio.uio_resid = 0;
895 iov = mp->msg_iov;
896 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
897 if ((auio.uio_resid += iov->iov_len) < 0) {
898 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0);
899 return (EINVAL);
900 }
901 }
902 #if KTRACE
903 if (KTRPOINT(p, KTR_GENIO)) {
904 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
905
906 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
907 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
908 ktruio = auio;
909 }
910 #endif
911 len = auio.uio_resid;
912 so = (struct socket *)fp->f_data;
913 if (so == NULL)
914 error = EBADF;
915 else
916 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
917 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
918 &mp->msg_flags);
919 AUDIT_ARG(sockaddr, p, fromsa);
920 if (error) {
921 if (auio.uio_resid != len && (error == ERESTART ||
922 error == EINTR || error == EWOULDBLOCK))
923 error = 0;
924 }
925 #if KTRACE
926 if (ktriov != NULL) {
927 if (error == 0) {
928 ktruio.uio_iov = ktriov;
929 ktruio.uio_resid = len - auio.uio_resid;
930 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error, -1);
931 }
932 FREE(ktriov, M_TEMP);
933 }
934 #endif
935 if (error)
936 goto out;
937 *retval = len - auio.uio_resid;
938 if (mp->msg_name) {
939 len = mp->msg_namelen;
940 if (len <= 0 || fromsa == 0)
941 len = 0;
942 else {
943 #ifndef MIN
944 #define MIN(a,b) ((a)>(b)?(b):(a))
945 #endif
946 /* save sa_len before it is destroyed by MSG_COMPAT */
947 len = MIN(len, fromsa->sa_len);
948 #ifdef COMPAT_OLDSOCK
949 if (mp->msg_flags & MSG_COMPAT)
950 ((struct osockaddr *)fromsa)->sa_family =
951 fromsa->sa_family;
952 #endif
953 error = copyout(fromsa,
954 (caddr_t)mp->msg_name, (unsigned)len);
955 if (error)
956 goto out;
957 }
958 mp->msg_namelen = len;
959 if (namelenp &&
960 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
961 #ifdef COMPAT_OLDSOCK
962 if (mp->msg_flags & MSG_COMPAT)
963 error = 0; /* old recvfrom didn't check */
964 else
965 #endif
966 goto out;
967 }
968 }
969 if (mp->msg_control) {
970 #ifdef COMPAT_OLDSOCK
971 /*
972 * We assume that old recvmsg calls won't receive access
973 * rights and other control info, esp. as control info
974 * is always optional and those options didn't exist in 4.3.
975 * If we receive rights, trim the cmsghdr; anything else
976 * is tossed.
977 */
978 if (control && mp->msg_flags & MSG_COMPAT) {
979 if (mtod(control, struct cmsghdr *)->cmsg_level !=
980 SOL_SOCKET ||
981 mtod(control, struct cmsghdr *)->cmsg_type !=
982 SCM_RIGHTS) {
983 mp->msg_controllen = 0;
984 goto out;
985 }
986 control->m_len -= sizeof (struct cmsghdr);
987 control->m_data += sizeof (struct cmsghdr);
988 }
989 #endif
990 len = mp->msg_controllen;
991 m = control;
992 mp->msg_controllen = 0;
993 ctlbuf = (caddr_t) mp->msg_control;
994
995 while (m && len > 0) {
996 unsigned int tocopy;
997
998 if (len >= m->m_len)
999 tocopy = m->m_len;
1000 else {
1001 mp->msg_flags |= MSG_CTRUNC;
1002 tocopy = len;
1003 }
1004
1005 if (error = copyout((caddr_t)mtod(m, caddr_t),
1006 ctlbuf, tocopy))
1007 goto out;
1008
1009 ctlbuf += tocopy;
1010 len -= tocopy;
1011 m = m->m_next;
1012 }
1013 mp->msg_controllen = ctlbuf - mp->msg_control;
1014 }
1015 out:
1016 if (fromsa)
1017 FREE(fromsa, M_SONAME);
1018 if (control)
1019 m_freem(control);
1020 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
1021 return (error);
1022 }
1023
1024
1025 struct recvfrom_args {
1026 int s;
1027 caddr_t buf;
1028 size_t len;
1029 int flags;
1030 caddr_t from;
1031 int *fromlenaddr;
1032 };
1033
1034 int
1035 recvfrom(p, uap, retval)
1036 struct proc *p;
1037 register struct recvfrom_args /* {
1038 int s;
1039 caddr_t buf;
1040 size_t len;
1041 int flags;
1042 caddr_t from;
1043 int *fromlenaddr;
1044 } */ *uap;
1045 register_t *retval;
1046 {
1047 struct msghdr msg;
1048 struct iovec aiov;
1049 int error;
1050
1051 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0,0,0,0,0);
1052 AUDIT_ARG(fd, uap->s);
1053
1054 if (uap->fromlenaddr) {
1055 error = copyin((caddr_t)uap->fromlenaddr,
1056 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1057 if (error)
1058 return (error);
1059 } else
1060 msg.msg_namelen = 0;
1061 msg.msg_name = uap->from;
1062 msg.msg_iov = &aiov;
1063 msg.msg_iovlen = 1;
1064 aiov.iov_base = uap->buf;
1065 aiov.iov_len = uap->len;
1066 msg.msg_control = 0;
1067 msg.msg_flags = uap->flags;
1068 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error,0,0,0,0);
1069 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval));
1070 }
1071
1072 #ifdef COMPAT_OLDSOCK
1073 int
1074 orecvfrom(p, uap, retval)
1075 struct proc *p;
1076 struct recvfrom_args *uap;
1077 register_t *retval;
1078 {
1079
1080 uap->flags |= MSG_COMPAT;
1081 return (recvfrom(p, uap, retval));
1082 }
1083 #endif
1084
1085
1086 #ifdef COMPAT_OLDSOCK
1087 struct orecv_args {
1088 int s;
1089 caddr_t buf;
1090 int len;
1091 int flags;
1092 };
1093
1094 int
1095 orecv(p, uap, retval)
1096 struct proc *p;
1097 struct orecv_args *uap;
1098 register_t *retval;
1099 {
1100 struct msghdr msg;
1101 struct iovec aiov;
1102
1103 msg.msg_name = 0;
1104 msg.msg_namelen = 0;
1105 msg.msg_iov = &aiov;
1106 msg.msg_iovlen = 1;
1107 aiov.iov_base = uap->buf;
1108 aiov.iov_len = uap->len;
1109 msg.msg_control = 0;
1110 msg.msg_flags = uap->flags;
1111 return (recvit(p, uap->s, &msg, (caddr_t)0, retval));
1112 }
1113
1114 /*
1115 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1116 * overlays the new one, missing only the flags, and with the (old) access
1117 * rights where the control fields are now.
1118 */
1119 struct orecvmsg_args {
1120 int s;
1121 struct omsghdr *msg;
1122 int flags;
1123 };
1124
1125 int
1126 orecvmsg(p, uap, retval)
1127 struct proc *p;
1128 struct orecvmsg_args *uap;
1129 register_t *retval;
1130 {
1131 struct msghdr msg;
1132 struct iovec aiov[UIO_SMALLIOV], *iov;
1133 int error;
1134
1135 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1136 sizeof (struct omsghdr));
1137 if (error)
1138 return (error);
1139 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1140 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
1141 return (EMSGSIZE);
1142 MALLOC(iov, struct iovec *,
1143 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1144 M_WAITOK);
1145 } else
1146 iov = aiov;
1147 msg.msg_flags = uap->flags | MSG_COMPAT;
1148 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1149 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1150 if (error)
1151 goto done;
1152 msg.msg_iov = iov;
1153 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval);
1154
1155 if (msg.msg_controllen && error == 0)
1156 error = copyout((caddr_t)&msg.msg_controllen,
1157 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1158 done:
1159 if (iov != aiov)
1160 FREE(iov, M_IOV);
1161 return (error);
1162 }
1163 #endif
1164
1165 struct recvmsg_args {
1166 int s;
1167 struct msghdr *msg;
1168 int flags;
1169 };
1170
1171 int
1172 recvmsg(p, uap, retval)
1173 struct proc *p;
1174 struct recvmsg_args *uap;
1175 register_t *retval;
1176 {
1177 struct msghdr msg;
1178 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1179 register int error;
1180
1181 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0,0,0,0,0);
1182 AUDIT_ARG(fd, uap->s);
1183 if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1184 sizeof (msg)))
1185 {
1186 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1187 return (error);
1188 }
1189
1190 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1191 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1192 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
1193 return (EMSGSIZE);
1194 }
1195 MALLOC(iov, struct iovec *,
1196 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1197 M_WAITOK);
1198 } else
1199 iov = aiov;
1200 #ifdef COMPAT_OLDSOCK
1201 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1202 #else
1203 msg.msg_flags = uap->flags;
1204 #endif
1205 uiov = msg.msg_iov;
1206 msg.msg_iov = iov;
1207 error = copyin((caddr_t)uiov, (caddr_t)iov,
1208 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1209 if (error)
1210 goto done;
1211 error = recvit(p, uap->s, &msg, (caddr_t)0, retval);
1212 if (!error) {
1213 msg.msg_iov = uiov;
1214 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1215 }
1216 done:
1217 if (iov != aiov)
1218 FREE(iov, M_IOV);
1219 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1220 return (error);
1221 }
1222
1223 /* ARGSUSED */
1224 struct shutdown_args {
1225 int s;
1226 int how;
1227 };
1228
1229 int
1230 shutdown(p, uap, retval)
1231 struct proc *p;
1232 struct shutdown_args *uap;
1233 register_t *retval;
1234 {
1235 struct file *fp;
1236 int error;
1237
1238 AUDIT_ARG(fd, uap->s);
1239 error = getsock(p->p_fd, uap->s, &fp);
1240 if (error)
1241 return (error);
1242 if (fp->f_data == NULL)
1243 return (EBADF);
1244 return (soshutdown((struct socket *)fp->f_data, uap->how));
1245 }
1246
1247
1248
1249
1250
1251 /* ARGSUSED */
1252 struct setsockopt_args {
1253 int s;
1254 int level;
1255 int name;
1256 caddr_t val;
1257 socklen_t valsize;
1258 };
1259
1260 int
1261 setsockopt(p, uap, retval)
1262 struct proc *p;
1263 struct setsockopt_args *uap;
1264 register_t *retval;
1265 {
1266 struct file *fp;
1267 struct sockopt sopt;
1268 int error;
1269
1270 AUDIT_ARG(fd, uap->s);
1271 if (uap->val == 0 && uap->valsize != 0)
1272 return (EFAULT);
1273 if (uap->valsize < 0)
1274 return (EINVAL);
1275
1276 error = getsock(p->p_fd, uap->s, &fp);
1277 if (error)
1278 return (error);
1279
1280 sopt.sopt_dir = SOPT_SET;
1281 sopt.sopt_level = uap->level;
1282 sopt.sopt_name = uap->name;
1283 sopt.sopt_val = uap->val;
1284 sopt.sopt_valsize = uap->valsize;
1285 sopt.sopt_p = p;
1286
1287 if (fp->f_data == NULL)
1288 return (EBADF);
1289 return (sosetopt((struct socket *)fp->f_data, &sopt));
1290 }
1291
1292
1293
1294 int
1295 getsockopt(p, uap, retval)
1296 struct proc *p;
1297 struct getsockopt_args *uap;
1298 register_t *retval;
1299 {
1300 int valsize, error;
1301 struct file *fp;
1302 struct sockopt sopt;
1303
1304 error = getsock(p->p_fd, uap->s, &fp);
1305 if (error)
1306 return (error);
1307 if (uap->val) {
1308 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1309 sizeof (valsize));
1310 if (error)
1311 return (error);
1312 if (valsize < 0)
1313 return (EINVAL);
1314 } else
1315 valsize = 0;
1316
1317 sopt.sopt_dir = SOPT_GET;
1318 sopt.sopt_level = uap->level;
1319 sopt.sopt_name = uap->name;
1320 sopt.sopt_val = uap->val;
1321 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1322 sopt.sopt_p = p;
1323
1324 if (fp->f_data == NULL)
1325 return (EBADF);
1326 error = sogetopt((struct socket *)fp->f_data, &sopt);
1327 if (error == 0) {
1328 valsize = sopt.sopt_valsize;
1329 error = copyout((caddr_t)&valsize,
1330 (caddr_t)uap->avalsize, sizeof (valsize));
1331 }
1332 return (error);
1333 }
1334
1335
1336
1337 struct pipe_args {
1338 int dummy;
1339 };
1340 /* ARGSUSED */
1341 int
1342 pipe(p, uap, retval)
1343 struct proc *p;
1344 struct pipe_args *uap;
1345 register_t *retval;
1346 {
1347 struct file *rf, *wf;
1348 struct socket *rso, *wso;
1349 int fd, error;
1350
1351 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1352 if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) {
1353 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1354 return (error);
1355 }
1356 if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) {
1357 goto free1;
1358 }
1359 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1360 error = falloc(p, &rf, &fd);
1361 if (error)
1362 goto free2;
1363 retval[0] = fd;
1364 rf->f_flag = FREAD;
1365 rf->f_type = DTYPE_SOCKET;
1366 rf->f_ops = &socketops;
1367 rf->f_data = (caddr_t)rso;
1368 if (error = falloc(p, &wf, &fd))
1369 goto free3;
1370 wf->f_flag = FWRITE;
1371 wf->f_type = DTYPE_SOCKET;
1372 wf->f_ops = &socketops;
1373 wf->f_data = (caddr_t)wso;
1374 retval[1] = fd;
1375
1376 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1377 error = unp_connect2(wso, rso);
1378 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1379 if (error)
1380 goto free4;
1381 *fdflags(p, retval[0]) &= ~UF_RESERVED;
1382 *fdflags(p, retval[1]) &= ~UF_RESERVED;
1383 return (0);
1384 free4:
1385 fdrelse(p, retval[1]);
1386 ffree(wf);
1387 free3:
1388 fdrelse(p, retval[0]);
1389 ffree(rf);
1390 free2:
1391 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1392 (void)soclose(wso);
1393 free1:
1394 (void)soclose(rso);
1395
1396 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1397 return (error);
1398 }
1399
1400
1401 /*
1402 * Get socket name.
1403 */
1404 /* ARGSUSED */
1405 static int
1406 getsockname1(p, uap, retval, compat)
1407 struct proc *p;
1408 register struct getsockname_args *uap;
1409 register_t *retval;
1410 int compat;
1411 {
1412 struct file *fp;
1413 register struct socket *so;
1414 struct sockaddr *sa;
1415 u_int len;
1416 int error;
1417
1418 error = getsock(p->p_fd, uap->fdes, &fp);
1419 if (error)
1420 return (error);
1421 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1422 if (error)
1423 return (error);
1424 so = (struct socket *)fp->f_data;
1425 if (so == NULL)
1426 return (EBADF);
1427 sa = 0;
1428 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1429 if (error)
1430 goto bad;
1431 if (sa == 0) {
1432 len = 0;
1433 goto gotnothing;
1434 }
1435
1436 len = MIN(len, sa->sa_len);
1437 #ifdef COMPAT_OLDSOCK
1438 if (compat)
1439 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1440 #endif
1441 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1442 if (error == 0)
1443 gotnothing:
1444 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1445 sizeof (len));
1446 bad:
1447 if (sa)
1448 FREE(sa, M_SONAME);
1449 return (error);
1450 }
1451
1452 int
1453 getsockname(p, uap, retval)
1454 struct proc *p;
1455 struct getsockname_args *uap;
1456 register_t *retval;
1457 {
1458
1459 return (getsockname1(p, uap, retval, 0));
1460 }
1461
1462 #ifdef COMPAT_OLDSOCK
1463 int
1464 ogetsockname(p, uap, retval)
1465 struct proc *p;
1466 struct getsockname_args *uap;
1467 register_t *retval;
1468 {
1469
1470 return (getsockname1(p, uap, retval, 1));
1471 }
1472 #endif /* COMPAT_OLDSOCK */
1473
1474 /*
1475 * Get name of peer for connected socket.
1476 */
1477 /* ARGSUSED */
1478 int
1479 getpeername1(p, uap, retval, compat)
1480 struct proc *p;
1481 register struct getpeername_args *uap;
1482 register_t *retval;
1483 int compat;
1484 {
1485 struct file *fp;
1486 register struct socket *so;
1487 struct sockaddr *sa;
1488 u_int len;
1489 int error;
1490
1491 error = getsock(p->p_fd, uap->fdes, &fp);
1492 if (error)
1493 return (error);
1494 so = (struct socket *)fp->f_data;
1495 if (so == NULL)
1496 return (EBADF);
1497 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
1498 return (ENOTCONN);
1499 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1500 if (error)
1501 return (error);
1502 sa = 0;
1503 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1504 if (error)
1505 goto bad;
1506 if (sa == 0) {
1507 len = 0;
1508 goto gotnothing;
1509 }
1510 len = MIN(len, sa->sa_len);
1511 #ifdef COMPAT_OLDSOCK
1512 if (compat)
1513 ((struct osockaddr *)sa)->sa_family =
1514 sa->sa_family;
1515 #endif
1516 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1517 if (error)
1518 goto bad;
1519 gotnothing:
1520 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1521 bad:
1522 if (sa) FREE(sa, M_SONAME);
1523 return (error);
1524 }
1525
1526 int
1527 getpeername(p, uap, retval)
1528 struct proc *p;
1529 struct getpeername_args *uap;
1530 register_t *retval;
1531 {
1532
1533 return (getpeername1(p, uap, retval, 0));
1534 }
1535
1536 #ifdef COMPAT_OLDSOCK
1537 int
1538 ogetpeername(p, uap, retval)
1539 struct proc *p;
1540 struct ogetpeername_args *uap;
1541 register_t *retval;
1542 {
1543
1544 /* XXX uap should have type `getpeername_args *' to begin with. */
1545 return (getpeername1(p, (struct getpeername_args *)uap, retval, 1));
1546 }
1547 #endif /* COMPAT_OLDSOCK */
1548
1549 int
1550 sockargs(mp, buf, buflen, type)
1551 struct mbuf **mp;
1552 caddr_t buf;
1553 int buflen, type;
1554 {
1555 register struct sockaddr *sa;
1556 register struct mbuf *m;
1557 int error;
1558
1559 if ((u_int)buflen > MLEN) {
1560 #ifdef COMPAT_OLDSOCK
1561 if (type == MT_SONAME && (u_int)buflen <= 112)
1562 buflen = MLEN; /* unix domain compat. hack */
1563 else
1564 #endif
1565 return (EINVAL);
1566 }
1567 m = m_get(M_WAIT, type);
1568 if (m == NULL)
1569 return (ENOBUFS);
1570 m->m_len = buflen;
1571 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1572 if (error)
1573 (void) m_free(m);
1574 else {
1575 *mp = m;
1576 if (type == MT_SONAME) {
1577 sa = mtod(m, struct sockaddr *);
1578
1579 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1580 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1581 sa->sa_family = sa->sa_len;
1582 #endif
1583 sa->sa_len = buflen;
1584 }
1585 }
1586 return (error);
1587 }
1588
1589 int
1590 getsockaddr(namp, uaddr, len)
1591 struct sockaddr **namp;
1592 caddr_t uaddr;
1593 size_t len;
1594 {
1595 struct sockaddr *sa;
1596 int error;
1597
1598 if (len > SOCK_MAXADDRLEN)
1599 return ENAMETOOLONG;
1600
1601 if (len == 0)
1602 return EINVAL;
1603
1604 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1605 error = copyin(uaddr, sa, len);
1606 if (error) {
1607 FREE(sa, M_SONAME);
1608 } else {
1609 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1610 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1611 sa->sa_family = sa->sa_len;
1612 #endif
1613 sa->sa_len = len;
1614 *namp = sa;
1615 }
1616 return error;
1617 }
1618
1619 int
1620 getsock(fdp, fdes, fpp)
1621 struct filedesc *fdp;
1622 int fdes;
1623 struct file **fpp;
1624 {
1625 register struct file *fp;
1626
1627 if ((unsigned)fdes >= fdp->fd_nfiles ||
1628 (fp = fdp->fd_ofiles[fdes]) == NULL ||
1629 (fdp->fd_ofileflags[fdes] & UF_RESERVED))
1630 return (EBADF);
1631 if (fp->f_type != DTYPE_SOCKET)
1632 return (ENOTSOCK);
1633 *fpp = fp;
1634 return (0);
1635 }
1636
1637 #if SENDFILE
1638 /*
1639 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1640 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1641 * been made static, but may be useful in the future for doing zero-copy in
1642 * other parts of the networking code.
1643 */
1644 static void
1645 sf_buf_init(void *arg)
1646 {
1647 int i;
1648
1649 SLIST_INIT(&sf_freelist);
1650 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1651 sf_bufs = _MALLOC(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1652 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1653 for (i = 0; i < nsfbufs; i++) {
1654 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1655 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1656 }
1657 }
1658
1659 /*
1660 * Get an sf_buf from the freelist. Will block if none are available.
1661 */
1662 static struct sf_buf *
1663 sf_buf_alloc()
1664 {
1665 struct sf_buf *sf;
1666 int s;
1667
1668 s = splimp();
1669 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1670 sf_buf_alloc_want = 1;
1671 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1672 }
1673 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1674 splx(s);
1675 sf->refcnt = 1;
1676 return (sf);
1677 }
1678
1679 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1680 static void
1681 sf_buf_ref(caddr_t addr, u_int size)
1682 {
1683 struct sf_buf *sf;
1684
1685 sf = dtosf(addr);
1686 if (sf->refcnt == 0)
1687 panic("sf_buf_ref: referencing a free sf_buf");
1688 sf->refcnt++;
1689 }
1690
1691 /*
1692 * Lose a reference to an sf_buf. When none left, detach mapped page
1693 * and release resources back to the system.
1694 *
1695 * Must be called at splimp.
1696 */
1697 static void
1698 sf_buf_free(caddr_t addr, u_int size)
1699 {
1700 struct sf_buf *sf;
1701 struct vm_page *m;
1702 int s;
1703
1704 sf = dtosf(addr);
1705 if (sf->refcnt == 0)
1706 panic("sf_buf_free: freeing free sf_buf");
1707 sf->refcnt--;
1708 if (sf->refcnt == 0) {
1709 pmap_qremove((vm_offset_t)addr, 1);
1710 m = sf->m;
1711 s = splvm();
1712 vm_page_unwire(m, 0);
1713 /*
1714 * Check for the object going away on us. This can
1715 * happen since we don't hold a reference to it.
1716 * If so, we're responsible for freeing the page.
1717 */
1718 if (m->wire_count == 0 && m->object == NULL)
1719 vm_page_lock_queues();
1720 vm_page_free(m);
1721 vm_page_unlock_queues();
1722 splx(s);
1723 sf->m = NULL;
1724 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1725 if (sf_buf_alloc_want) {
1726 sf_buf_alloc_want = 0;
1727 wakeup(&sf_freelist);
1728 }
1729 }
1730 }
1731
1732 /*
1733 * sendfile(2).
1734 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1735 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1736 *
1737 * Send a file specified by 'fd' and starting at 'offset' to a socket
1738 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1739 * nbytes == 0. Optionally add a header and/or trailer to the socket
1740 * output. If specified, write the total number of bytes sent into *sbytes.
1741 */
1742 int
1743 sendfile(struct proc *p, struct sendfile_args *uap)
1744 {
1745 struct file *fp;
1746 struct filedesc *fdp = p->p_fd;
1747 struct vnode *vp;
1748 struct vm_object *obj;
1749 struct socket *so;
1750 struct mbuf *m;
1751 struct sf_buf *sf;
1752 struct vm_page *pg;
1753 struct writev_args nuap;
1754 struct sf_hdtr hdtr;
1755 off_t off, xfsize, sbytes = 0;
1756 int error = 0, s;
1757
1758 /*
1759 * Do argument checking. Must be a regular file in, stream
1760 * type and connected socket out, positive offset.
1761 */
1762 if (((u_int)uap->fd) >= fdp->fd_nfiles ||
1763 (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
1764 (fp->f_flag & FREAD) == 0) {
1765 error = EBADF;
1766 goto done;
1767 }
1768 if (fp->f_type != DTYPE_VNODE) {
1769 error = EINVAL;
1770 goto done;
1771 }
1772 vp = (struct vnode *)fp->f_data;
1773 obj = vp->v_object;
1774 if (vp->v_type != VREG || obj == NULL) {
1775 error = EINVAL;
1776 goto done;
1777 }
1778 error = getsock(p->p_fd, uap->s, &fp);
1779 if (error)
1780 goto done;
1781 so = (struct socket *)fp->f_data;
1782 if (so == NULL) {
1783 error = EBADF;
1784 goto done;
1785 }
1786 if (so->so_type != SOCK_STREAM) {
1787 error = EINVAL;
1788 goto done;
1789 }
1790 if ((so->so_state & SS_ISCONNECTED) == 0) {
1791 error = ENOTCONN;
1792 goto done;
1793 }
1794 if (uap->offset < 0) {
1795 error = EINVAL;
1796 goto done;
1797 }
1798
1799 /*
1800 * If specified, get the pointer to the sf_hdtr struct for
1801 * any headers/trailers.
1802 */
1803 if (uap->hdtr != NULL) {
1804 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1805 if (error)
1806 goto done;
1807 /*
1808 * Send any headers. Wimp out and use writev(2).
1809 */
1810 if (hdtr.headers != NULL) {
1811 nuap.fd = uap->s;
1812 nuap.iovp = hdtr.headers;
1813 nuap.iovcnt = hdtr.hdr_cnt;
1814 error = writev(p, &nuap);
1815 if (error)
1816 goto done;
1817 sbytes += p->p_retval[0];
1818 }
1819 }
1820
1821 /*
1822 * Protect against multiple writers to the socket.
1823 */
1824 (void) sblock(&so->so_snd, M_WAIT);
1825
1826 /*
1827 * Loop through the pages in the file, starting with the requested
1828 * offset. Get a file page (do I/O if necessary), map the file page
1829 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1830 * it on the socket.
1831 */
1832 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1833 vm_object_offset_t pindex;
1834 vm_object_offset_t pgoff;
1835
1836 pindex = OFF_TO_IDX(off);
1837 retry_lookup:
1838 /*
1839 * Calculate the amount to transfer. Not to exceed a page,
1840 * the EOF, or the passed in nbytes.
1841 */
1842 xfsize = obj->un_pager.vnp.vnp_size - off;
1843 if (xfsize > PAGE_SIZE_64)
1844 xfsize = PAGE_SIZE;
1845 pgoff = (vm_object_offset_t)(off & PAGE_MASK_64);
1846 if (PAGE_SIZE - pgoff < xfsize)
1847 xfsize = PAGE_SIZE_64 - pgoff;
1848 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1849 xfsize = uap->nbytes - sbytes;
1850 if (xfsize <= 0)
1851 break;
1852 /*
1853 * Optimize the non-blocking case by looking at the socket space
1854 * before going to the extra work of constituting the sf_buf.
1855 */
1856 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1857 if (so->so_state & SS_CANTSENDMORE)
1858 error = EPIPE;
1859 else
1860 error = EAGAIN;
1861 sbunlock(&so->so_snd);
1862 goto done;
1863 }
1864 /*
1865 * Attempt to look up the page. If the page doesn't exist or the
1866 * part we're interested in isn't valid, then read it from disk.
1867 * If some other part of the kernel has this page (i.e. it's busy),
1868 * then disk I/O may be occuring on it, so wait and retry.
1869 */
1870 pg = vm_page_lookup(obj, pindex);
1871 if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
1872 !vm_page_is_valid(pg, pgoff, xfsize))) {
1873 struct uio auio;
1874 struct iovec aiov;
1875 int bsize;
1876
1877 if (pg == NULL) {
1878 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1879 if (pg == NULL) {
1880 VM_WAIT;
1881 goto retry_lookup;
1882 }
1883 /*
1884 * don't just clear PG_BUSY manually -
1885 * vm_page_alloc() should be considered opaque,
1886 * use the VM routine provided to clear
1887 * PG_BUSY.
1888 */
1889 vm_page_wakeup(pg);
1890
1891 }
1892 /*
1893 * Ensure that our page is still around when the I/O completes.
1894 */
1895 vm_page_io_start(pg);
1896 vm_page_wire(pg);
1897 /*
1898 * Get the page from backing store.
1899 */
1900 bsize = vp->v_mount->mnt_stat.f_iosize;
1901 auio.uio_iov = &aiov;
1902 auio.uio_iovcnt = 1;
1903 aiov.iov_base = 0;
1904 aiov.iov_len = MAXBSIZE;
1905 auio.uio_resid = MAXBSIZE;
1906 auio.uio_offset = trunc_page(off);
1907 auio.uio_segflg = UIO_NOCOPY;
1908 auio.uio_rw = UIO_READ;
1909 auio.uio_procp = p;
1910 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1911 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1912 p->p_ucred);
1913 VOP_UNLOCK(vp, 0, p);
1914 vm_page_flag_clear(pg, PG_ZERO);
1915 vm_page_io_finish(pg);
1916 if (error) {
1917 vm_page_unwire(pg, 0);
1918 /*
1919 * See if anyone else might know about this page.
1920 * If not and it is not valid, then free it.
1921 */
1922 if (pg->wire_count == 0 && pg->valid == 0 &&
1923 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1924 pg->hold_count == 0)
1925 vm_page_lock_queues();
1926 vm_page_free(pg);
1927 vm_page_unlock_queues();
1928 sbunlock(&so->so_snd);
1929 goto done;
1930 }
1931 } else {
1932 if ((pg->flags & PG_BUSY) || pg->busy) {
1933 s = splvm();
1934 if ((pg->flags & PG_BUSY) || pg->busy) {
1935 /*
1936 * Page is busy. Wait and retry.
1937 */
1938 vm_page_flag_set(pg, PG_WANTED);
1939 tsleep(pg, PVM, "sfpbsy", 0);
1940 splx(s);
1941 goto retry_lookup;
1942 }
1943 splx(s);
1944 }
1945 /*
1946 * Protect from having the page ripped out from beneath us.
1947 */
1948 vm_page_wire(pg);
1949 }
1950 /*
1951 * Allocate a kernel virtual page and insert the physical page
1952 * into it.
1953 */
1954 sf = sf_buf_alloc();
1955 sf->m = pg;
1956 pmap_qenter(sf->kva, &pg, 1);
1957 /*
1958 * Get an mbuf header and set it up as having external storage.
1959 */
1960 MGETHDR(m, M_WAIT, MT_DATA);
1961 m->m_ext.ext_free = sf_buf_free;
1962 m->m_ext.ext_ref = sf_buf_ref;
1963 m->m_ext.ext_buf = (void *)sf->kva;
1964 m->m_ext.ext_size = PAGE_SIZE;
1965 m->m_data = (char *) sf->kva + pgoff;
1966 m->m_flags |= M_EXT;
1967 m->m_pkthdr.len = m->m_len = xfsize;
1968 /*
1969 * Add the buffer to the socket buffer chain.
1970 */
1971 s = splnet();
1972 retry_space:
1973 /*
1974 * Make sure that the socket is still able to take more data.
1975 * CANTSENDMORE being true usually means that the connection
1976 * was closed. so_error is true when an error was sensed after
1977 * a previous send.
1978 * The state is checked after the page mapping and buffer
1979 * allocation above since those operations may block and make
1980 * any socket checks stale. From this point forward, nothing
1981 * blocks before the pru_send (or more accurately, any blocking
1982 * results in a loop back to here to re-check).
1983 */
1984 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1985 if (so->so_state & SS_CANTSENDMORE) {
1986 error = EPIPE;
1987 } else {
1988 error = so->so_error;
1989 so->so_error = 0;
1990 }
1991 m_freem(m);
1992 sbunlock(&so->so_snd);
1993 splx(s);
1994 goto done;
1995 }
1996 /*
1997 * Wait for socket space to become available. We do this just
1998 * after checking the connection state above in order to avoid
1999 * a race condition with sbwait().
2000 */
2001 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2002 if (so->so_state & SS_NBIO) {
2003 m_freem(m);
2004 sbunlock(&so->so_snd);
2005 splx(s);
2006 error = EAGAIN;
2007 goto done;
2008 }
2009 error = sbwait(&so->so_snd);
2010 /*
2011 * An error from sbwait usually indicates that we've
2012 * been interrupted by a signal. If we've sent anything
2013 * then return bytes sent, otherwise return the error.
2014 */
2015 if (error) {
2016 m_freem(m);
2017 sbunlock(&so->so_snd);
2018 splx(s);
2019 goto done;
2020 }
2021 goto retry_space;
2022 }
2023 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
2024 splx(s);
2025 if (error) {
2026 sbunlock(&so->so_snd);
2027 goto done;
2028 }
2029 }
2030 sbunlock(&so->so_snd);
2031
2032 /*
2033 * Send trailers. Wimp out and use writev(2).
2034 */
2035 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2036 nuap.fd = uap->s;
2037 nuap.iovp = hdtr.trailers;
2038 nuap.iovcnt = hdtr.trl_cnt;
2039 error = writev(p, &nuap);
2040 if (error)
2041 goto done;
2042 sbytes += p->p_retval[0];
2043 }
2044
2045 done:
2046 if (uap->sbytes != NULL) {
2047 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2048 }
2049 return (error);
2050 }
2051
2052 #endif