]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-201.42.3.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Copyright (c) 1982, 1986, 1989, 1990, 1993
24 * The Regents of the University of California. All rights reserved.
25 *
26 * sendfile(2) and related extensions:
27 * Copyright (c) 1998, David Greenman. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions
31 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * 2. Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in the
36 * documentation and/or other materials provided with the distribution.
37 * 3. All advertising materials mentioning features or use of this software
38 * must display the following acknowledgement:
39 * This product includes software developed by the University of
40 * California, Berkeley and its contributors.
41 * 4. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
58 */
59
60
61
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/filedesc.h>
65 #include <sys/proc.h>
66 #include <sys/file.h>
67 #include <sys/buf.h>
68 #include <sys/malloc.h>
69 #include <sys/mbuf.h>
70 #include <sys/protosw.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #if KTRACE
74 #include <sys/ktrace.h>
75 #endif
76 #include <sys/kernel.h>
77
78 #include <sys/kdebug.h>
79
80 #if KDEBUG
81
82 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
83 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
84 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
85 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
86 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
87 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
88 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
89 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
90 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
91 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
92
93 #endif
94
95 struct getsockname_args {
96 int fdes;
97 caddr_t asa;
98 int *alen;
99 };
100
101 struct getsockopt_args {
102 int s;
103 int level;
104 int name;
105 caddr_t val;
106 int *avalsize;
107 } ;
108
109 struct accept_args {
110 int s;
111 caddr_t name;
112 int *anamelen;
113 };
114
115 struct getpeername_args {
116 int fdes;
117 caddr_t asa;
118 int *alen;
119 };
120
121
122 /* ARGSUSED */
123
124 #if SENDFILE
125 static void sf_buf_init(void *arg);
126 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
127 static struct sf_buf *sf_buf_alloc(void);
128 static void sf_buf_ref(caddr_t addr, u_int size);
129 static void sf_buf_free(caddr_t addr, u_int size);
130
131 static SLIST_HEAD(, sf_buf) sf_freelist;
132 static vm_offset_t sf_base;
133 static struct sf_buf *sf_bufs;
134 static int sf_buf_alloc_want;
135 #endif
136
137 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags, register_t *retval));
138 static int recvit __P((struct proc *p, int s, struct msghdr *mp,
139 caddr_t namelenp, register_t *retval));
140
141 static int accept1 __P((struct proc *p, struct accept_args *uap, register_t *retval, int compat));
142 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
143 register_t *retval, int compat));
144 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
145 register_t *retval, int compat));
146
147 /*
148 * System call interface to the socket abstraction.
149 */
150 #if COMPAT_43 || defined(COMPAT_SUNOS)
151 #define COMPAT_OLDSOCK
152 #endif
153
154 extern struct fileops socketops;
155
156 struct socket_args {
157 int domain;
158 int type;
159 int protocol;
160 };
161 int
162 socket(p, uap, retval)
163 struct proc *p;
164 register struct socket_args *uap;
165 register_t *retval;
166 {
167 struct filedesc *fdp = p->p_fd;
168 struct socket *so;
169 struct file *fp;
170 int fd, error;
171
172 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
173 error = falloc(p, &fp, &fd);
174 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
175
176 if (error)
177 return (error);
178 fp->f_flag = FREAD|FWRITE;
179 fp->f_type = DTYPE_SOCKET;
180 fp->f_ops = &socketops;
181 if (error = socreate(uap->domain, &so, uap->type,
182 uap->protocol)) {
183 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
184 fdrelse(p, fd);
185 ffree(fp);
186 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
187 } else {
188 fp->f_data = (caddr_t)so;
189 *fdflags(p, fd) &= ~UF_RESERVED;
190 *retval = fd;
191 }
192 return (error);
193 }
194
195 struct bind_args {
196 int s;
197 caddr_t name;
198 int namelen;
199 };
200
201 /* ARGSUSED */
202 int
203 bind(p, uap, retval)
204 struct proc *p;
205 register struct bind_args *uap;
206 register_t *retval;
207 {
208 struct file *fp;
209 struct sockaddr *sa;
210 int error;
211
212 error = getsock(p->p_fd, uap->s, &fp);
213 if (error)
214 return (error);
215 error = getsockaddr(&sa, uap->name, uap->namelen);
216 if (error)
217 return (error);
218 error = sobind((struct socket *)fp->f_data, sa);
219 FREE(sa, M_SONAME);
220 return (error);
221 }
222
223 struct listen_args {
224 int s;
225 int backlog;
226 };
227
228
229
230 int
231 listen(p, uap, retval)
232 struct proc *p;
233 register struct listen_args *uap;
234 register_t *retval;
235 {
236 struct file *fp;
237 int error;
238
239 error = getsock(p->p_fd, uap->s, &fp);
240 if (error)
241 return (error);
242 return (solisten((struct socket *)fp->f_data, uap->backlog));
243 }
244
245 #ifndef COMPAT_OLDSOCK
246 #define accept1 accept
247 #endif
248
249
250
251 int
252 accept1(p, uap, retval, compat)
253 struct proc *p;
254 register struct accept_args *uap;
255 register_t *retval;
256 int compat;
257 {
258 struct file *fp;
259 struct sockaddr *sa;
260 u_int namelen;
261 int error, s;
262 struct socket *head, *so;
263 int fd;
264 short fflag; /* type must match fp->f_flag */
265 int tmpfd;
266
267 if (uap->name) {
268 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
269 sizeof (namelen));
270 if(error)
271 return (error);
272 }
273 error = getsock(p->p_fd, uap->s, &fp);
274 if (error)
275 return (error);
276 s = splnet();
277 head = (struct socket *)fp->f_data;
278 if ((head->so_options & SO_ACCEPTCONN) == 0) {
279 splx(s);
280 return (EINVAL);
281 }
282 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
283 splx(s);
284 return (EWOULDBLOCK);
285 }
286 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
287 if (head->so_state & SS_CANTRCVMORE) {
288 head->so_error = ECONNABORTED;
289 break;
290 }
291 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
292 "accept", 0);
293 if (error) {
294 splx(s);
295 return (error);
296 }
297 }
298 if (head->so_error) {
299 error = head->so_error;
300 head->so_error = 0;
301 splx(s);
302 return (error);
303 }
304
305
306 /*
307 * At this point we know that there is at least one connection
308 * ready to be accepted. Remove it from the queue prior to
309 * allocating the file descriptor for it since falloc() may
310 * block allowing another process to accept the connection
311 * instead.
312 */
313 so = TAILQ_FIRST(&head->so_comp);
314 TAILQ_REMOVE(&head->so_comp, so, so_list);
315 head->so_qlen--;
316
317 fflag = fp->f_flag;
318 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
319 error = falloc(p, &fp, &fd);
320 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
321 if (error) {
322 /*
323 * Probably ran out of file descriptors. Put the
324 * unaccepted connection back onto the queue and
325 * do another wakeup so some other process might
326 * have a chance at it.
327 */
328 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
329 head->so_qlen++;
330 wakeup_one(&head->so_timeo);
331 splx(s);
332 return (error);
333 } else {
334 *fdflags(p, fd) &= ~UF_RESERVED;
335 *retval = fd;
336 }
337
338 so->so_state &= ~SS_COMP;
339 so->so_head = NULL;
340 fp->f_type = DTYPE_SOCKET;
341 fp->f_flag = fflag;
342 fp->f_ops = &socketops;
343 fp->f_data = (caddr_t)so;
344 sa = 0;
345 (void) soaccept(so, &sa);
346 if (sa == 0) {
347 namelen = 0;
348 if (uap->name)
349 goto gotnoname;
350 return 0;
351 }
352 if (uap->name) {
353 /* check sa_len before it is destroyed */
354 if (namelen > sa->sa_len)
355 namelen = sa->sa_len;
356 #ifdef COMPAT_OLDSOCK
357 if (compat)
358 ((struct osockaddr *)sa)->sa_family =
359 sa->sa_family;
360 #endif
361 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
362 if (!error)
363 gotnoname:
364 error = copyout((caddr_t)&namelen,
365 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
366 }
367 FREE(sa, M_SONAME);
368 splx(s);
369 return (error);
370 }
371
372 int
373 accept(p, uap, retval)
374 struct proc *p;
375 struct accept_args *uap;
376 register_t *retval;
377 {
378
379 return (accept1(p, uap, retval, 0));
380 }
381
382 #ifdef COMPAT_OLDSOCK
383 int
384 oaccept(p, uap, retval)
385 struct proc *p;
386 struct accept_args *uap;
387 register_t *retval;
388 {
389
390 return (accept1(p, uap, retval, 1));
391 }
392 #endif /* COMPAT_OLDSOCK */
393
394 struct connect_args {
395 int s;
396 caddr_t name;
397 int namelen;
398 };
399 /* ARGSUSED */
400 int
401 connect(p, uap, retval)
402 struct proc *p;
403 register struct connect_args *uap;
404 register_t *retval;
405 {
406 struct file *fp;
407 register struct socket *so;
408 struct sockaddr *sa;
409 int error, s;
410
411 error = getsock(p->p_fd, uap->s, &fp);
412 if (error)
413 return (error);
414 so = (struct socket *)fp->f_data;
415 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
416 return (EALREADY);
417 error = getsockaddr(&sa, uap->name, uap->namelen);
418 if (error)
419 return (error);
420 error = soconnect(so, sa);
421 if (error)
422 goto bad;
423 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
424 FREE(sa, M_SONAME);
425 return (EINPROGRESS);
426 }
427 s = splnet();
428 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
429 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
430 "connec", 0);
431 if (error)
432 break;
433 }
434 if (error == 0) {
435 error = so->so_error;
436 so->so_error = 0;
437 }
438 splx(s);
439 bad:
440 so->so_state &= ~SS_ISCONNECTING;
441 FREE(sa, M_SONAME);
442 if (error == ERESTART)
443 error = EINTR;
444 return (error);
445 }
446
447 struct socketpair_args {
448 int domain;
449 int type;
450 int protocol;
451 int *rsv;
452 };
453 int
454 socketpair(p, uap, retval)
455 struct proc *p;
456 register struct socketpair_args *uap;
457 register_t *retval;
458 {
459 register struct filedesc *fdp = p->p_fd;
460 struct file *fp1, *fp2;
461 struct socket *so1, *so2;
462 int fd, error, sv[2];
463
464 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
465 if (error)
466 return (error);
467 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
468 if (error)
469 goto free1;
470 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
471 error = falloc(p, &fp1, &fd);
472 if (error)
473 goto free2;
474 sv[0] = fd;
475 fp1->f_flag = FREAD|FWRITE;
476 fp1->f_type = DTYPE_SOCKET;
477 fp1->f_ops = &socketops;
478 fp1->f_data = (caddr_t)so1;
479 error = falloc(p, &fp2, &fd);
480 if (error)
481 goto free3;
482 fp2->f_flag = FREAD|FWRITE;
483 fp2->f_type = DTYPE_SOCKET;
484 fp2->f_ops = &socketops;
485 fp2->f_data = (caddr_t)so2;
486 sv[1] = fd;
487 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
488 error = soconnect2(so1, so2);
489 if (error) {
490 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
491 goto free4;
492 }
493
494 if (uap->type == SOCK_DGRAM) {
495 /*
496 * Datagram socket connection is asymmetric.
497 */
498 error = soconnect2(so2, so1);
499 if (error) {
500 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
501 goto free4;
502 }
503 }
504 *fdflags(p, sv[0]) &= ~UF_RESERVED;
505 *fdflags(p, sv[1]) &= ~UF_RESERVED;
506 error = copyout((caddr_t)sv, (caddr_t)uap->rsv,
507 2 * sizeof (int));
508 #if 0 /* old pipe(2) syscall compatability, unused these days */
509 retval[0] = sv[0]; /* XXX ??? */
510 retval[1] = sv[1]; /* XXX ??? */
511 #endif /* 0 */
512 return (error);
513 free4:
514 fdrelse(p, sv[1]);
515 ffree(fp2);
516 free3:
517 fdrelse(p, sv[0]);
518 ffree(fp1);
519 free2:
520 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
521 (void)soclose(so2);
522 free1:
523 (void)soclose(so1);
524 return (error);
525 }
526
527 static int
528 sendit(p, s, mp, flags, retsize)
529 register struct proc *p;
530 int s;
531 register struct msghdr *mp;
532 int flags;
533 register_t *retsize;
534 {
535 struct file *fp;
536 struct uio auio;
537 register struct iovec *iov;
538 register int i;
539 struct mbuf *control;
540 struct sockaddr *to;
541 int len, error;
542 struct socket *so;
543 #if KTRACE
544 struct iovec *ktriov = NULL;
545 #endif
546
547 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0);
548
549 if (error = getsock(p->p_fd, s, &fp))
550 {
551 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
552 return (error);
553 }
554
555 auio.uio_iov = mp->msg_iov;
556 auio.uio_iovcnt = mp->msg_iovlen;
557 auio.uio_segflg = UIO_USERSPACE;
558 auio.uio_rw = UIO_WRITE;
559 auio.uio_procp = p;
560 auio.uio_offset = 0; /* XXX */
561 auio.uio_resid = 0;
562 iov = mp->msg_iov;
563 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
564 if (iov->iov_len < 0)
565 {
566 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0);
567 return (EINVAL);
568 }
569
570 if ((auio.uio_resid += iov->iov_len) < 0)
571 {
572 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0);
573 return (EINVAL);
574 }
575 }
576 if (mp->msg_name) {
577 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
578 if (error) {
579 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
580 return (error);
581 }
582 } else
583 to = 0;
584 if (mp->msg_control) {
585 if (mp->msg_controllen < sizeof(struct cmsghdr)
586 #ifdef COMPAT_OLDSOCK
587 && mp->msg_flags != MSG_COMPAT
588 #endif
589 ) {
590 error = EINVAL;
591 goto bad;
592 }
593 error = sockargs(&control, mp->msg_control,
594 mp->msg_controllen, MT_CONTROL);
595 if (error)
596 goto bad;
597 #ifdef COMPAT_OLDSOCK
598 if (mp->msg_flags == MSG_COMPAT) {
599 register struct cmsghdr *cm;
600
601 M_PREPEND(control, sizeof(*cm), M_WAIT);
602 if (control == 0) {
603 error = ENOBUFS;
604 goto bad;
605 } else {
606 cm = mtod(control, struct cmsghdr *);
607 cm->cmsg_len = control->m_len;
608 cm->cmsg_level = SOL_SOCKET;
609 cm->cmsg_type = SCM_RIGHTS;
610 }
611 }
612 #endif
613 } else
614 control = 0;
615
616 len = auio.uio_resid;
617 so = (struct socket *)fp->f_data;
618 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
619 flags);
620 if (error) {
621 if (auio.uio_resid != len && (error == ERESTART ||
622 error == EINTR || error == EWOULDBLOCK))
623 error = 0;
624 if (error == EPIPE)
625 psignal(p, SIGPIPE);
626 }
627 if (error == 0)
628 *retsize = len - auio.uio_resid;
629 #if KTRACE
630 if (ktriov != NULL) {
631 if (error == 0)
632 ktrgenio(p->p_tracep, s, UIO_WRITE,
633 ktriov, *retsize, error);
634 FREE(ktriov, M_TEMP);
635 }
636 #endif
637 bad:
638 if (to)
639 FREE(to, M_SONAME);
640 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
641 return (error);
642 }
643
644
645 struct sendto_args {
646 int s;
647 caddr_t buf;
648 size_t len;
649 int flags;
650 caddr_t to;
651 int tolen;
652 };
653
654 int
655 sendto(p, uap, retval)
656 struct proc *p;
657 register struct sendto_args /* {
658 int s;
659 caddr_t buf;
660 size_t len;
661 int flags;
662 caddr_t to;
663 int tolen;
664 } */ *uap;
665 register_t *retval;
666
667 {
668 struct msghdr msg;
669 struct iovec aiov;
670 int stat;
671
672 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0,0,0,0,0);
673
674 msg.msg_name = uap->to;
675 msg.msg_namelen = uap->tolen;
676 msg.msg_iov = &aiov;
677 msg.msg_iovlen = 1;
678 msg.msg_control = 0;
679 #ifdef COMPAT_OLDSOCK
680 msg.msg_flags = 0;
681 #endif
682 aiov.iov_base = uap->buf;
683 aiov.iov_len = uap->len;
684 stat = sendit(p, uap->s, &msg, uap->flags, retval);
685 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, stat, *retval,0,0,0);
686 return(stat);
687 }
688
689 #ifdef COMPAT_OLDSOCK
690 struct osend_args {
691 int s;
692 caddr_t buf;
693 int len;
694 int flags;
695 };
696
697 int
698 osend(p, uap, retval)
699 struct proc *p;
700 register struct osend_args /* {
701 int s;
702 caddr_t buf;
703 int len;
704 int flags;
705 } */ *uap;
706 register_t *retval;
707
708 {
709 struct msghdr msg;
710 struct iovec aiov;
711
712 msg.msg_name = 0;
713 msg.msg_namelen = 0;
714 msg.msg_iov = &aiov;
715 msg.msg_iovlen = 1;
716 aiov.iov_base = uap->buf;
717 aiov.iov_len = uap->len;
718 msg.msg_control = 0;
719 msg.msg_flags = 0;
720 return (sendit(p, uap->s, &msg, uap->flags, retval));
721 }
722 struct osendmsg_args {
723 int s;
724 caddr_t msg;
725 int flags;
726 };
727
728 int
729 osendmsg(p, uap, retval)
730 struct proc *p;
731 register struct osendmsg_args /* {
732 int s;
733 caddr_t msg;
734 int flags;
735 } */ *uap;
736 register_t *retval;
737
738 {
739 struct msghdr msg;
740 struct iovec aiov[UIO_SMALLIOV], *iov;
741 int error;
742
743 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
744 if (error)
745 return (error);
746 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
747 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
748 return (EMSGSIZE);
749 MALLOC(iov, struct iovec *,
750 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
751 M_WAITOK);
752 } else
753 iov = aiov;
754 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
755 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
756 if (error)
757 goto done;
758 msg.msg_flags = MSG_COMPAT;
759 msg.msg_iov = iov;
760 error = sendit(p, uap->s, &msg, uap->flags, retval);
761 done:
762 if (iov != aiov)
763 FREE(iov, M_IOV);
764 return (error);
765 }
766 #endif
767
768 struct sendmsg_args {
769 int s;
770 caddr_t msg;
771 int flags;
772 };
773
774 int
775 sendmsg(p, uap, retval)
776 struct proc *p;
777 register struct sendmsg_args *uap;
778 register_t *retval;
779 {
780 struct msghdr msg;
781 struct iovec aiov[UIO_SMALLIOV], *iov;
782 int error;
783
784 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0,0,0,0,0);
785 if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)))
786 {
787 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
788 return (error);
789 }
790
791 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
792 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
793 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
794 return (EMSGSIZE);
795 }
796 MALLOC(iov, struct iovec *,
797 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
798 M_WAITOK);
799 } else
800 iov = aiov;
801 if (msg.msg_iovlen &&
802 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
803 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
804 goto done;
805 msg.msg_iov = iov;
806 #ifdef COMPAT_OLDSOCK
807 msg.msg_flags = 0;
808 #endif
809 error = sendit(p, uap->s, &msg, uap->flags, retval);
810 done:
811 if (iov != aiov)
812 FREE(iov, M_IOV);
813 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
814 return (error);
815 }
816
817 static int
818 recvit(p, s, mp, namelenp, retval)
819 register struct proc *p;
820 int s;
821 register struct msghdr *mp;
822 caddr_t namelenp;
823 register_t *retval;
824 {
825 struct file *fp;
826 struct uio auio;
827 register struct iovec *iov;
828 register int i;
829 int len, error;
830 struct mbuf *m, *control = 0;
831 caddr_t ctlbuf;
832 struct socket *so;
833 struct sockaddr *fromsa = 0;
834 #if KTRACE
835 struct iovec *ktriov = NULL;
836 #endif
837
838 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0,0,0,0,0);
839 if (error = getsock(p->p_fd, s, &fp))
840 {
841 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
842 return (error);
843 }
844
845 auio.uio_iov = mp->msg_iov;
846 auio.uio_iovcnt = mp->msg_iovlen;
847 auio.uio_segflg = UIO_USERSPACE;
848 auio.uio_rw = UIO_READ;
849 auio.uio_procp = p;
850 auio.uio_offset = 0; /* XXX */
851 auio.uio_resid = 0;
852 iov = mp->msg_iov;
853 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
854 if ((auio.uio_resid += iov->iov_len) < 0) {
855 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0);
856 return (EINVAL);
857 }
858 }
859 #if KTRACE
860 if (KTRPOINT(p, KTR_GENIO)) {
861 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
862
863 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
864 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
865 }
866 #endif
867 len = auio.uio_resid;
868 so = (struct socket *)fp->f_data;
869 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
870 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
871 &mp->msg_flags);
872 if (error) {
873 if (auio.uio_resid != len && (error == ERESTART ||
874 error == EINTR || error == EWOULDBLOCK))
875 error = 0;
876 }
877 #if KTRACE
878 if (ktriov != NULL) {
879 if (error == 0)
880 ktrgenio(p->p_tracep, s, UIO_WRITE,
881 ktriov, len - auio.uio_resid, error);
882 FREE(ktriov, M_TEMP);
883 }
884 #endif
885 if (error)
886 goto out;
887 *retval = len - auio.uio_resid;
888 if (mp->msg_name) {
889 len = mp->msg_namelen;
890 if (len <= 0 || fromsa == 0)
891 len = 0;
892 else {
893 #ifndef MIN
894 #define MIN(a,b) ((a)>(b)?(b):(a))
895 #endif
896 /* save sa_len before it is destroyed by MSG_COMPAT */
897 len = MIN(len, fromsa->sa_len);
898 #ifdef COMPAT_OLDSOCK
899 if (mp->msg_flags & MSG_COMPAT)
900 ((struct osockaddr *)fromsa)->sa_family =
901 fromsa->sa_family;
902 #endif
903 error = copyout(fromsa,
904 (caddr_t)mp->msg_name, (unsigned)len);
905 if (error)
906 goto out;
907 }
908 mp->msg_namelen = len;
909 if (namelenp &&
910 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
911 #ifdef COMPAT_OLDSOCK
912 if (mp->msg_flags & MSG_COMPAT)
913 error = 0; /* old recvfrom didn't check */
914 else
915 #endif
916 goto out;
917 }
918 }
919 if (mp->msg_control) {
920 #ifdef COMPAT_OLDSOCK
921 /*
922 * We assume that old recvmsg calls won't receive access
923 * rights and other control info, esp. as control info
924 * is always optional and those options didn't exist in 4.3.
925 * If we receive rights, trim the cmsghdr; anything else
926 * is tossed.
927 */
928 if (control && mp->msg_flags & MSG_COMPAT) {
929 if (mtod(control, struct cmsghdr *)->cmsg_level !=
930 SOL_SOCKET ||
931 mtod(control, struct cmsghdr *)->cmsg_type !=
932 SCM_RIGHTS) {
933 mp->msg_controllen = 0;
934 goto out;
935 }
936 control->m_len -= sizeof (struct cmsghdr);
937 control->m_data += sizeof (struct cmsghdr);
938 }
939 #endif
940 len = mp->msg_controllen;
941 m = control;
942 mp->msg_controllen = 0;
943 ctlbuf = (caddr_t) mp->msg_control;
944
945 while (m && len > 0) {
946 unsigned int tocopy;
947
948 if (len >= m->m_len)
949 tocopy = m->m_len;
950 else {
951 mp->msg_flags |= MSG_CTRUNC;
952 tocopy = len;
953 }
954
955 if (error = copyout((caddr_t)mtod(m, caddr_t),
956 ctlbuf, tocopy))
957 goto out;
958
959 ctlbuf += tocopy;
960 len -= tocopy;
961 m = m->m_next;
962 }
963 mp->msg_controllen = ctlbuf - mp->msg_control;
964 }
965 out:
966 if (fromsa)
967 FREE(fromsa, M_SONAME);
968 if (control)
969 m_freem(control);
970 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
971 return (error);
972 }
973
974
975 struct recvfrom_args {
976 int s;
977 caddr_t buf;
978 size_t len;
979 int flags;
980 caddr_t from;
981 int *fromlenaddr;
982 };
983
984 int
985 recvfrom(p, uap, retval)
986 struct proc *p;
987 register struct recvfrom_args /* {
988 int s;
989 caddr_t buf;
990 size_t len;
991 int flags;
992 caddr_t from;
993 int *fromlenaddr;
994 } */ *uap;
995 register_t *retval;
996 {
997 struct msghdr msg;
998 struct iovec aiov;
999 int error;
1000
1001 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0,0,0,0,0);
1002
1003 if (uap->fromlenaddr) {
1004 error = copyin((caddr_t)uap->fromlenaddr,
1005 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1006 if (error)
1007 return (error);
1008 } else
1009 msg.msg_namelen = 0;
1010 msg.msg_name = uap->from;
1011 msg.msg_iov = &aiov;
1012 msg.msg_iovlen = 1;
1013 aiov.iov_base = uap->buf;
1014 aiov.iov_len = uap->len;
1015 msg.msg_control = 0;
1016 msg.msg_flags = uap->flags;
1017 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error,0,0,0,0);
1018 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval));
1019 }
1020
1021 #ifdef COMPAT_OLDSOCK
1022 int
1023 orecvfrom(p, uap, retval)
1024 struct proc *p;
1025 struct recvfrom_args *uap;
1026 register_t *retval;
1027 {
1028
1029 uap->flags |= MSG_COMPAT;
1030 return (recvfrom(p, uap));
1031 }
1032 #endif
1033
1034
1035 #ifdef COMPAT_OLDSOCK
1036 int
1037 orecv(p, uap, retval)
1038 struct proc *p;
1039 register struct orecv_args {
1040 int s;
1041 caddr_t buf;
1042 int len;
1043 int flags;
1044 } *uap;
1045 register_t *retval;
1046 {
1047 struct msghdr msg;
1048 struct iovec aiov;
1049
1050 msg.msg_name = 0;
1051 msg.msg_namelen = 0;
1052 msg.msg_iov = &aiov;
1053 msg.msg_iovlen = 1;
1054 aiov.iov_base = uap->buf;
1055 aiov.iov_len = uap->len;
1056 msg.msg_control = 0;
1057 msg.msg_flags = uap->flags;
1058 return (recvit(p, uap->s, &msg, (caddr_t)0, retval));
1059 }
1060
1061 /*
1062 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1063 * overlays the new one, missing only the flags, and with the (old) access
1064 * rights where the control fields are now.
1065 */
1066 int
1067 orecvmsg(p, uap, retval)
1068 struct proc *p;
1069 register struct orecvmsg_args {
1070 int s;
1071 struct omsghdr *msg;
1072 int flags;
1073 } *uap;
1074 register_t *retval;
1075 {
1076 struct msghdr msg;
1077 struct iovec aiov[UIO_SMALLIOV], *iov;
1078 int error;
1079
1080 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1081 sizeof (struct omsghdr));
1082 if (error)
1083 return (error);
1084 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1085 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
1086 return (EMSGSIZE);
1087 MALLOC(iov, struct iovec *,
1088 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1089 M_WAITOK);
1090 } else
1091 iov = aiov;
1092 msg.msg_flags = uap->flags | MSG_COMPAT;
1093 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1094 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1095 if (error)
1096 goto done;
1097 msg.msg_iov = iov;
1098 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval);
1099
1100 if (msg.msg_controllen && error == 0)
1101 error = copyout((caddr_t)&msg.msg_controllen,
1102 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1103 done:
1104 if (iov != aiov)
1105 FREE(iov, M_IOV);
1106 return (error);
1107 }
1108 #endif
1109
1110 int
1111 recvmsg(p, uap, retval)
1112 struct proc *p;
1113 register struct recvmsg_args {
1114 int s;
1115 struct msghdr *msg;
1116 int flags;
1117 } *uap;
1118 register_t *retval;
1119 {
1120 struct msghdr msg;
1121 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1122 register int error;
1123
1124 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0,0,0,0,0);
1125 if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1126 sizeof (msg)))
1127 {
1128 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1129 return (error);
1130 }
1131
1132 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1133 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1134 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
1135 return (EMSGSIZE);
1136 }
1137 MALLOC(iov, struct iovec *,
1138 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1139 M_WAITOK);
1140 } else
1141 iov = aiov;
1142 #ifdef COMPAT_OLDSOCK
1143 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1144 #else
1145 msg.msg_flags = uap->flags;
1146 #endif
1147 uiov = msg.msg_iov;
1148 msg.msg_iov = iov;
1149 error = copyin((caddr_t)uiov, (caddr_t)iov,
1150 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1151 if (error)
1152 goto done;
1153 error = recvit(p, uap->s, &msg, (caddr_t)0, retval);
1154 if (!error) {
1155 msg.msg_iov = uiov;
1156 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1157 }
1158 done:
1159 if (iov != aiov)
1160 FREE(iov, M_IOV);
1161 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1162 return (error);
1163 }
1164
1165 /* ARGSUSED */
1166 int
1167 shutdown(p, uap, retval)
1168 struct proc *p;
1169 register struct shutdown_args {
1170 int s;
1171 int how;
1172 } *uap;
1173 register_t *retval;
1174 {
1175 struct file *fp;
1176 int error;
1177
1178 error = getsock(p->p_fd, uap->s, &fp);
1179 if (error)
1180 return (error);
1181 return (soshutdown((struct socket *)fp->f_data, uap->how));
1182 }
1183
1184
1185
1186
1187
1188 /* ARGSUSED */
1189 int
1190 setsockopt(p, uap, retval)
1191 struct proc *p;
1192 register struct setsockopt_args {
1193 int s;
1194 int level;
1195 int name;
1196 caddr_t val;
1197 int valsize;
1198 } *uap;
1199 register_t *retval;
1200 {
1201 struct file *fp;
1202 struct sockopt sopt;
1203 int error;
1204
1205 if (uap->val == 0 && uap->valsize != 0)
1206 return (EFAULT);
1207 if (uap->valsize < 0)
1208 return (EINVAL);
1209
1210 error = getsock(p->p_fd, uap->s, &fp);
1211 if (error)
1212 return (error);
1213
1214 sopt.sopt_dir = SOPT_SET;
1215 sopt.sopt_level = uap->level;
1216 sopt.sopt_name = uap->name;
1217 sopt.sopt_val = uap->val;
1218 sopt.sopt_valsize = uap->valsize;
1219 sopt.sopt_p = p;
1220
1221 return (sosetopt((struct socket *)fp->f_data, &sopt));
1222 }
1223
1224
1225
1226 int
1227 getsockopt(p, uap, retval)
1228 struct proc *p;
1229 struct getsockopt_args *uap;
1230 register_t *retval;
1231 {
1232 int valsize, error;
1233 struct file *fp;
1234 struct sockopt sopt;
1235
1236 error = getsock(p->p_fd, uap->s, &fp);
1237 if (error)
1238 return (error);
1239 if (uap->val) {
1240 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1241 sizeof (valsize));
1242 if (error)
1243 return (error);
1244 if (valsize < 0)
1245 return (EINVAL);
1246 } else
1247 valsize = 0;
1248
1249 sopt.sopt_dir = SOPT_GET;
1250 sopt.sopt_level = uap->level;
1251 sopt.sopt_name = uap->name;
1252 sopt.sopt_val = uap->val;
1253 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1254 sopt.sopt_p = p;
1255
1256 error = sogetopt((struct socket *)fp->f_data, &sopt);
1257 if (error == 0) {
1258 valsize = sopt.sopt_valsize;
1259 error = copyout((caddr_t)&valsize,
1260 (caddr_t)uap->avalsize, sizeof (valsize));
1261 }
1262 return (error);
1263 }
1264
1265
1266
1267 struct pipe_args {
1268 int dummy;
1269 };
1270 /* ARGSUSED */
1271 int
1272 pipe(p, uap, retval)
1273 struct proc *p;
1274 struct pipe_args *uap;
1275 register_t *retval;
1276 {
1277 struct file *rf, *wf;
1278 struct socket *rso, *wso;
1279 int fd, error;
1280
1281 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1282 if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) {
1283 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1284 return (error);
1285 }
1286 if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) {
1287 goto free1;
1288 }
1289 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1290 error = falloc(p, &rf, &fd);
1291 if (error)
1292 goto free2;
1293 retval[0] = fd;
1294 rf->f_flag = FREAD;
1295 rf->f_type = DTYPE_SOCKET;
1296 rf->f_ops = &socketops;
1297 rf->f_data = (caddr_t)rso;
1298 if (error = falloc(p, &wf, &fd))
1299 goto free3;
1300 wf->f_flag = FWRITE;
1301 wf->f_type = DTYPE_SOCKET;
1302 wf->f_ops = &socketops;
1303 wf->f_data = (caddr_t)wso;
1304 retval[1] = fd;
1305
1306 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1307 error = unp_connect2(wso, rso);
1308 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1309 if (error)
1310 goto free4;
1311 *fdflags(p, retval[0]) &= ~UF_RESERVED;
1312 *fdflags(p, retval[1]) &= ~UF_RESERVED;
1313 return (0);
1314 free4:
1315 fdrelse(p, retval[1]);
1316 ffree(wf);
1317 free3:
1318 fdrelse(p, retval[0]);
1319 ffree(rf);
1320 free2:
1321 thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
1322 (void)soclose(wso);
1323 free1:
1324 (void)soclose(rso);
1325
1326 thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
1327 return (error);
1328 }
1329
1330
1331 /*
1332 * Get socket name.
1333 */
1334 /* ARGSUSED */
1335 static int
1336 getsockname1(p, uap, retval, compat)
1337 struct proc *p;
1338 register struct getsockname_args *uap;
1339 register_t *retval;
1340 int compat;
1341 {
1342 struct file *fp;
1343 register struct socket *so;
1344 struct sockaddr *sa;
1345 u_int len;
1346 int error;
1347
1348 error = getsock(p->p_fd, uap->fdes, &fp);
1349 if (error)
1350 return (error);
1351 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1352 if (error)
1353 return (error);
1354 so = (struct socket *)fp->f_data;
1355 sa = 0;
1356 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1357 if (error)
1358 goto bad;
1359 if (sa == 0) {
1360 len = 0;
1361 goto gotnothing;
1362 }
1363
1364 len = MIN(len, sa->sa_len);
1365 #ifdef COMPAT_OLDSOCK
1366 if (compat)
1367 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1368 #endif
1369 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1370 if (error == 0)
1371 gotnothing:
1372 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1373 sizeof (len));
1374 bad:
1375 if (sa)
1376 FREE(sa, M_SONAME);
1377 return (error);
1378 }
1379
1380 int
1381 getsockname(p, uap, retval)
1382 struct proc *p;
1383 struct getsockname_args *uap;
1384 register_t *retval;
1385 {
1386
1387 return (getsockname1(p, uap, retval, 0));
1388 }
1389
1390 #ifdef COMPAT_OLDSOCK
1391 int
1392 ogetsockname(p, uap, retval)
1393 struct proc *p;
1394 struct getsockname_args *uap;
1395 register_t *retval;
1396 {
1397
1398 return (getsockname1(p, uap, retval, 1));
1399 }
1400 #endif /* COMPAT_OLDSOCK */
1401
1402 /*
1403 * Get name of peer for connected socket.
1404 */
1405 /* ARGSUSED */
1406 int
1407 getpeername1(p, uap, retval, compat)
1408 struct proc *p;
1409 register struct getpeername_args *uap;
1410 register_t *retval;
1411 int compat;
1412 {
1413 struct file *fp;
1414 register struct socket *so;
1415 struct sockaddr *sa;
1416 u_int len;
1417 int error;
1418
1419 error = getsock(p->p_fd, uap->fdes, &fp);
1420 if (error)
1421 return (error);
1422 so = (struct socket *)fp->f_data;
1423 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
1424 return (ENOTCONN);
1425 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1426 if (error)
1427 return (error);
1428 sa = 0;
1429 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1430 if (error)
1431 goto bad;
1432 if (sa == 0) {
1433 len = 0;
1434 goto gotnothing;
1435 }
1436 len = MIN(len, sa->sa_len);
1437 #ifdef COMPAT_OLDSOCK
1438 if (compat)
1439 ((struct osockaddr *)sa)->sa_family =
1440 sa->sa_family;
1441 #endif
1442 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1443 if (error)
1444 goto bad;
1445 gotnothing:
1446 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1447 bad:
1448 if (sa) FREE(sa, M_SONAME);
1449 return (error);
1450 }
1451
1452 int
1453 getpeername(p, uap, retval)
1454 struct proc *p;
1455 struct getpeername_args *uap;
1456 register_t *retval;
1457 {
1458
1459 return (getpeername1(p, uap, retval, 0));
1460 }
1461
1462 #ifdef COMPAT_OLDSOCK
1463 int
1464 ogetpeername(p, uap, retval)
1465 struct proc *p;
1466 struct ogetpeername_args *uap;
1467 register_t *retval;
1468 {
1469
1470 /* XXX uap should have type `getpeername_args *' to begin with. */
1471 return (getpeername1(p, (struct getpeername_args *)uap, retval, 1));
1472 }
1473 #endif /* COMPAT_OLDSOCK */
1474
1475 int
1476 sockargs(mp, buf, buflen, type)
1477 struct mbuf **mp;
1478 caddr_t buf;
1479 int buflen, type;
1480 {
1481 register struct sockaddr *sa;
1482 register struct mbuf *m;
1483 int error;
1484
1485 if ((u_int)buflen > MLEN) {
1486 #ifdef COMPAT_OLDSOCK
1487 if (type == MT_SONAME && (u_int)buflen <= 112)
1488 buflen = MLEN; /* unix domain compat. hack */
1489 else
1490 #endif
1491 return (EINVAL);
1492 }
1493 m = m_get(M_WAIT, type);
1494 if (m == NULL)
1495 return (ENOBUFS);
1496 m->m_len = buflen;
1497 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1498 if (error)
1499 (void) m_free(m);
1500 else {
1501 *mp = m;
1502 if (type == MT_SONAME) {
1503 sa = mtod(m, struct sockaddr *);
1504
1505 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1506 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1507 sa->sa_family = sa->sa_len;
1508 #endif
1509 sa->sa_len = buflen;
1510 }
1511 }
1512 return (error);
1513 }
1514
1515 int
1516 getsockaddr(namp, uaddr, len)
1517 struct sockaddr **namp;
1518 caddr_t uaddr;
1519 size_t len;
1520 {
1521 struct sockaddr *sa;
1522 int error;
1523
1524 if (len > SOCK_MAXADDRLEN)
1525 return ENAMETOOLONG;
1526
1527 if (len == 0)
1528 return EINVAL;
1529
1530 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1531 error = copyin(uaddr, sa, len);
1532 if (error) {
1533 FREE(sa, M_SONAME);
1534 } else {
1535 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1536 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1537 sa->sa_family = sa->sa_len;
1538 #endif
1539 sa->sa_len = len;
1540 *namp = sa;
1541 }
1542 return error;
1543 }
1544
1545 int
1546 getsock(fdp, fdes, fpp)
1547 struct filedesc *fdp;
1548 int fdes;
1549 struct file **fpp;
1550 {
1551 register struct file *fp;
1552
1553 if ((unsigned)fdes >= fdp->fd_nfiles ||
1554 (fp = fdp->fd_ofiles[fdes]) == NULL ||
1555 (fdp->fd_ofileflags[fdes] & UF_RESERVED))
1556 return (EBADF);
1557 if (fp->f_type != DTYPE_SOCKET)
1558 return (ENOTSOCK);
1559 *fpp = fp;
1560 return (0);
1561 }
1562
1563 #if SENDFILE
1564 /*
1565 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1566 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1567 * been made static, but may be useful in the future for doing zero-copy in
1568 * other parts of the networking code.
1569 */
1570 static void
1571 sf_buf_init(void *arg)
1572 {
1573 int i;
1574
1575 SLIST_INIT(&sf_freelist);
1576 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1577 sf_bufs = _MALLOC(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1578 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1579 for (i = 0; i < nsfbufs; i++) {
1580 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1581 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1582 }
1583 }
1584
1585 /*
1586 * Get an sf_buf from the freelist. Will block if none are available.
1587 */
1588 static struct sf_buf *
1589 sf_buf_alloc()
1590 {
1591 struct sf_buf *sf;
1592 int s;
1593
1594 s = splimp();
1595 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1596 sf_buf_alloc_want = 1;
1597 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1598 }
1599 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1600 splx(s);
1601 sf->refcnt = 1;
1602 return (sf);
1603 }
1604
1605 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1606 static void
1607 sf_buf_ref(caddr_t addr, u_int size)
1608 {
1609 struct sf_buf *sf;
1610
1611 sf = dtosf(addr);
1612 if (sf->refcnt == 0)
1613 panic("sf_buf_ref: referencing a free sf_buf");
1614 sf->refcnt++;
1615 }
1616
1617 /*
1618 * Lose a reference to an sf_buf. When none left, detach mapped page
1619 * and release resources back to the system.
1620 *
1621 * Must be called at splimp.
1622 */
1623 static void
1624 sf_buf_free(caddr_t addr, u_int size)
1625 {
1626 struct sf_buf *sf;
1627 struct vm_page *m;
1628 int s;
1629
1630 sf = dtosf(addr);
1631 if (sf->refcnt == 0)
1632 panic("sf_buf_free: freeing free sf_buf");
1633 sf->refcnt--;
1634 if (sf->refcnt == 0) {
1635 pmap_qremove((vm_offset_t)addr, 1);
1636 m = sf->m;
1637 s = splvm();
1638 vm_page_unwire(m, 0);
1639 /*
1640 * Check for the object going away on us. This can
1641 * happen since we don't hold a reference to it.
1642 * If so, we're responsible for freeing the page.
1643 */
1644 if (m->wire_count == 0 && m->object == NULL)
1645 vm_page_lock_queues();
1646 vm_page_free(m);
1647 vm_page_unlock_queues();
1648 splx(s);
1649 sf->m = NULL;
1650 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1651 if (sf_buf_alloc_want) {
1652 sf_buf_alloc_want = 0;
1653 wakeup(&sf_freelist);
1654 }
1655 }
1656 }
1657
1658 /*
1659 * sendfile(2).
1660 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1661 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1662 *
1663 * Send a file specified by 'fd' and starting at 'offset' to a socket
1664 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1665 * nbytes == 0. Optionally add a header and/or trailer to the socket
1666 * output. If specified, write the total number of bytes sent into *sbytes.
1667 */
1668 int
1669 sendfile(struct proc *p, struct sendfile_args *uap)
1670 {
1671 struct file *fp;
1672 struct filedesc *fdp = p->p_fd;
1673 struct vnode *vp;
1674 struct vm_object *obj;
1675 struct socket *so;
1676 struct mbuf *m;
1677 struct sf_buf *sf;
1678 struct vm_page *pg;
1679 struct writev_args nuap;
1680 struct sf_hdtr hdtr;
1681 off_t off, xfsize, sbytes = 0;
1682 int error = 0, s;
1683
1684 /*
1685 * Do argument checking. Must be a regular file in, stream
1686 * type and connected socket out, positive offset.
1687 */
1688 if (((u_int)uap->fd) >= fdp->fd_nfiles ||
1689 (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
1690 (fp->f_flag & FREAD) == 0) {
1691 error = EBADF;
1692 goto done;
1693 }
1694 if (fp->f_type != DTYPE_VNODE) {
1695 error = EINVAL;
1696 goto done;
1697 }
1698 vp = (struct vnode *)fp->f_data;
1699 obj = vp->v_object;
1700 if (vp->v_type != VREG || obj == NULL) {
1701 error = EINVAL;
1702 goto done;
1703 }
1704 error = getsock(p->p_fd, uap->s, &fp);
1705 if (error)
1706 goto done;
1707 so = (struct socket *)fp->f_data;
1708 if (so->so_type != SOCK_STREAM) {
1709 error = EINVAL;
1710 goto done;
1711 }
1712 if ((so->so_state & SS_ISCONNECTED) == 0) {
1713 error = ENOTCONN;
1714 goto done;
1715 }
1716 if (uap->offset < 0) {
1717 error = EINVAL;
1718 goto done;
1719 }
1720
1721 /*
1722 * If specified, get the pointer to the sf_hdtr struct for
1723 * any headers/trailers.
1724 */
1725 if (uap->hdtr != NULL) {
1726 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1727 if (error)
1728 goto done;
1729 /*
1730 * Send any headers. Wimp out and use writev(2).
1731 */
1732 if (hdtr.headers != NULL) {
1733 nuap.fd = uap->s;
1734 nuap.iovp = hdtr.headers;
1735 nuap.iovcnt = hdtr.hdr_cnt;
1736 error = writev(p, &nuap);
1737 if (error)
1738 goto done;
1739 sbytes += p->p_retval[0];
1740 }
1741 }
1742
1743 /*
1744 * Protect against multiple writers to the socket.
1745 */
1746 (void) sblock(&so->so_snd, M_WAIT);
1747
1748 /*
1749 * Loop through the pages in the file, starting with the requested
1750 * offset. Get a file page (do I/O if necessary), map the file page
1751 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1752 * it on the socket.
1753 */
1754 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1755 vm_object_offset_t pindex;
1756 vm_object_offset_t pgoff;
1757
1758 pindex = OFF_TO_IDX(off);
1759 retry_lookup:
1760 /*
1761 * Calculate the amount to transfer. Not to exceed a page,
1762 * the EOF, or the passed in nbytes.
1763 */
1764 xfsize = obj->un_pager.vnp.vnp_size - off;
1765 if (xfsize > PAGE_SIZE_64)
1766 xfsize = PAGE_SIZE;
1767 pgoff = (vm_object_offset_t)(off & PAGE_MASK_64);
1768 if (PAGE_SIZE - pgoff < xfsize)
1769 xfsize = PAGE_SIZE_64 - pgoff;
1770 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1771 xfsize = uap->nbytes - sbytes;
1772 if (xfsize <= 0)
1773 break;
1774 /*
1775 * Optimize the non-blocking case by looking at the socket space
1776 * before going to the extra work of constituting the sf_buf.
1777 */
1778 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1779 if (so->so_state & SS_CANTSENDMORE)
1780 error = EPIPE;
1781 else
1782 error = EAGAIN;
1783 sbunlock(&so->so_snd);
1784 goto done;
1785 }
1786 /*
1787 * Attempt to look up the page. If the page doesn't exist or the
1788 * part we're interested in isn't valid, then read it from disk.
1789 * If some other part of the kernel has this page (i.e. it's busy),
1790 * then disk I/O may be occuring on it, so wait and retry.
1791 */
1792 pg = vm_page_lookup(obj, pindex);
1793 if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
1794 !vm_page_is_valid(pg, pgoff, xfsize))) {
1795 struct uio auio;
1796 struct iovec aiov;
1797 int bsize;
1798
1799 if (pg == NULL) {
1800 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1801 if (pg == NULL) {
1802 VM_WAIT;
1803 goto retry_lookup;
1804 }
1805 /*
1806 * don't just clear PG_BUSY manually -
1807 * vm_page_alloc() should be considered opaque,
1808 * use the VM routine provided to clear
1809 * PG_BUSY.
1810 */
1811 vm_page_wakeup(pg);
1812
1813 }
1814 /*
1815 * Ensure that our page is still around when the I/O completes.
1816 */
1817 vm_page_io_start(pg);
1818 vm_page_wire(pg);
1819 /*
1820 * Get the page from backing store.
1821 */
1822 bsize = vp->v_mount->mnt_stat.f_iosize;
1823 auio.uio_iov = &aiov;
1824 auio.uio_iovcnt = 1;
1825 aiov.iov_base = 0;
1826 aiov.iov_len = MAXBSIZE;
1827 auio.uio_resid = MAXBSIZE;
1828 auio.uio_offset = trunc_page(off);
1829 auio.uio_segflg = UIO_NOCOPY;
1830 auio.uio_rw = UIO_READ;
1831 auio.uio_procp = p;
1832 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1833 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1834 p->p_ucred);
1835 VOP_UNLOCK(vp, 0, p);
1836 vm_page_flag_clear(pg, PG_ZERO);
1837 vm_page_io_finish(pg);
1838 if (error) {
1839 vm_page_unwire(pg, 0);
1840 /*
1841 * See if anyone else might know about this page.
1842 * If not and it is not valid, then free it.
1843 */
1844 if (pg->wire_count == 0 && pg->valid == 0 &&
1845 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1846 pg->hold_count == 0)
1847 vm_page_lock_queues();
1848 vm_page_free(pg);
1849 vm_page_unlock_queues();
1850 sbunlock(&so->so_snd);
1851 goto done;
1852 }
1853 } else {
1854 if ((pg->flags & PG_BUSY) || pg->busy) {
1855 s = splvm();
1856 if ((pg->flags & PG_BUSY) || pg->busy) {
1857 /*
1858 * Page is busy. Wait and retry.
1859 */
1860 vm_page_flag_set(pg, PG_WANTED);
1861 tsleep(pg, PVM, "sfpbsy", 0);
1862 splx(s);
1863 goto retry_lookup;
1864 }
1865 splx(s);
1866 }
1867 /*
1868 * Protect from having the page ripped out from beneath us.
1869 */
1870 vm_page_wire(pg);
1871 }
1872 /*
1873 * Allocate a kernel virtual page and insert the physical page
1874 * into it.
1875 */
1876 sf = sf_buf_alloc();
1877 sf->m = pg;
1878 pmap_qenter(sf->kva, &pg, 1);
1879 /*
1880 * Get an mbuf header and set it up as having external storage.
1881 */
1882 MGETHDR(m, M_WAIT, MT_DATA);
1883 m->m_ext.ext_free = sf_buf_free;
1884 m->m_ext.ext_ref = sf_buf_ref;
1885 m->m_ext.ext_buf = (void *)sf->kva;
1886 m->m_ext.ext_size = PAGE_SIZE;
1887 m->m_data = (char *) sf->kva + pgoff;
1888 m->m_flags |= M_EXT;
1889 m->m_pkthdr.len = m->m_len = xfsize;
1890 /*
1891 * Add the buffer to the socket buffer chain.
1892 */
1893 s = splnet();
1894 retry_space:
1895 /*
1896 * Make sure that the socket is still able to take more data.
1897 * CANTSENDMORE being true usually means that the connection
1898 * was closed. so_error is true when an error was sensed after
1899 * a previous send.
1900 * The state is checked after the page mapping and buffer
1901 * allocation above since those operations may block and make
1902 * any socket checks stale. From this point forward, nothing
1903 * blocks before the pru_send (or more accurately, any blocking
1904 * results in a loop back to here to re-check).
1905 */
1906 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1907 if (so->so_state & SS_CANTSENDMORE) {
1908 error = EPIPE;
1909 } else {
1910 error = so->so_error;
1911 so->so_error = 0;
1912 }
1913 m_freem(m);
1914 sbunlock(&so->so_snd);
1915 splx(s);
1916 goto done;
1917 }
1918 /*
1919 * Wait for socket space to become available. We do this just
1920 * after checking the connection state above in order to avoid
1921 * a race condition with sbwait().
1922 */
1923 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1924 if (so->so_state & SS_NBIO) {
1925 m_freem(m);
1926 sbunlock(&so->so_snd);
1927 splx(s);
1928 error = EAGAIN;
1929 goto done;
1930 }
1931 error = sbwait(&so->so_snd);
1932 /*
1933 * An error from sbwait usually indicates that we've
1934 * been interrupted by a signal. If we've sent anything
1935 * then return bytes sent, otherwise return the error.
1936 */
1937 if (error) {
1938 m_freem(m);
1939 sbunlock(&so->so_snd);
1940 splx(s);
1941 goto done;
1942 }
1943 goto retry_space;
1944 }
1945 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
1946 splx(s);
1947 if (error) {
1948 sbunlock(&so->so_snd);
1949 goto done;
1950 }
1951 }
1952 sbunlock(&so->so_snd);
1953
1954 /*
1955 * Send trailers. Wimp out and use writev(2).
1956 */
1957 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1958 nuap.fd = uap->s;
1959 nuap.iovp = hdtr.trailers;
1960 nuap.iovcnt = hdtr.trl_cnt;
1961 error = writev(p, &nuap);
1962 if (error)
1963 goto done;
1964 sbytes += p->p_retval[0];
1965 }
1966
1967 done:
1968 if (uap->sbytes != NULL) {
1969 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1970 }
1971 return (error);
1972 }
1973
1974 #endif