]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
xnu-792.10.96.tar.gz
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Copyright (c) 1982, 1986, 1989, 1990, 1993
24 * The Regents of the University of California. All rights reserved.
25 *
26 * sendfile(2) and related extensions:
27 * Copyright (c) 1998, David Greenman. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions
31 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * 2. Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in the
36 * documentation and/or other materials provided with the distribution.
37 * 3. All advertising materials mentioning features or use of this software
38 * must display the following acknowledgement:
39 * This product includes software developed by the University of
40 * California, Berkeley and its contributors.
41 * 4. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
58 */
59
60
61
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/filedesc.h>
65 #include <sys/proc_internal.h>
66 #include <sys/file_internal.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <kern/lock.h>
70 #include <sys/domain.h>
71 #include <sys/protosw.h>
72 #include <sys/signalvar.h>
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #if KTRACE
76 #include <sys/ktrace.h>
77 #endif
78 #include <sys/kernel.h>
79 #include <sys/uio_internal.h>
80
81 #include <bsm/audit_kernel.h>
82
83 #include <sys/kdebug.h>
84 #include <sys/sysproto.h>
85
86 #define f_flag f_fglob->fg_flag
87 #define f_type f_fglob->fg_type
88 #define f_msgcount f_fglob->fg_msgcount
89 #define f_cred f_fglob->fg_cred
90 #define f_ops f_fglob->fg_ops
91 #define f_offset f_fglob->fg_offset
92 #define f_data f_fglob->fg_data
93 #if KDEBUG
94
95 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
96 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
97 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
98 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
99 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
100 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
101 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
102 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
103 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
104 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
105
106 #endif
107
108
109 #define HACK_FOR_4056224 1
110 #if HACK_FOR_4056224
111 static pid_t last_pid_4056224 = 0;
112 #endif /* HACK_FOR_4056224 */
113
114
115 #if SENDFILE
116 static void sf_buf_init(void *arg);
117 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
118 static struct sf_buf *sf_buf_alloc(void);
119 static void sf_buf_ref(caddr_t addr, u_int size);
120 static void sf_buf_free(caddr_t addr, u_int size);
121
122 static SLIST_HEAD(, sf_buf) sf_freelist;
123 static vm_offset_t sf_base;
124 static struct sf_buf *sf_bufs;
125 static int sf_buf_alloc_want;
126 #endif
127
128 static int sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
129 int flags, register_t *retval);
130 static int recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
131 user_addr_t namelenp, register_t *retval);
132
133 static int accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat);
134 static int getsockname1(struct proc *p, struct getsockname_args *uap,
135 register_t *retval, int compat);
136 static int getpeername1(struct proc *p, struct getpeername_args *uap,
137 register_t *retval, int compat);
138
139
140 #if COMPAT_43_SOCKET
141 struct orecvmsg_args {
142 int s;
143 struct omsghdr *msg;
144 int flags;
145 };
146 struct osendmsg_args {
147 int s;
148 caddr_t msg;
149 int flags;
150 };
151 struct osend_args {
152 int s;
153 caddr_t buf;
154 int len;
155 int flags;
156 };
157 struct orecv_args {
158 int s;
159 caddr_t buf;
160 int len;
161 int flags;
162 };
163
164 int oaccept(struct proc *p, struct accept_args *uap, register_t *retval);
165 int ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval);
166 int ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval);
167 int orecv(struct proc *p, struct orecv_args *uap, register_t *retval);
168 int orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval);
169 int orecvmsg(struct proc *p, struct orecvmsg_args *uap, register_t *retval);
170 int osend(struct proc *p, struct osend_args *uap, register_t *retval);
171 int osendmsg(struct proc *p, struct osendmsg_args *uap, register_t *retval);
172 #endif // COMPAT_43_SOCKET
173
174 /*
175 * System call interface to the socket abstraction.
176 */
177
178 extern struct fileops socketops;
179
180 int
181 socket(p, uap, retval)
182 struct proc *p;
183 register struct socket_args *uap;
184 register_t *retval;
185 {
186 struct socket *so;
187 struct fileproc *fp;
188 int fd, error;
189
190 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
191
192 error = falloc(p, &fp, &fd);
193 if (error) {
194 return (error);
195 }
196 fp->f_flag = FREAD|FWRITE;
197 fp->f_type = DTYPE_SOCKET;
198 fp->f_ops = &socketops;
199
200 error = socreate(uap->domain, &so, uap->type, uap->protocol);
201 if (error) {
202 fp_free(p, fd, fp);
203 } else {
204 fp->f_data = (caddr_t)so;
205
206 proc_fdlock(p);
207 *fdflags(p, fd) &= ~UF_RESERVED;
208
209 fp_drop(p, fd, fp, 1);
210 proc_fdunlock(p);
211
212 *retval = fd;
213 }
214 return (error);
215 }
216
217 /* ARGSUSED */
218 int
219 bind(struct proc *p, struct bind_args *uap, __unused register_t *retval)
220 {
221 struct sockaddr *sa;
222 struct socket *so;
223 int error;
224
225 AUDIT_ARG(fd, uap->s);
226 error = file_socket(uap->s, &so);
227 if (error)
228 return (error);
229 error = getsockaddr(&sa, uap->name, uap->namelen);
230 if (error)
231 goto out;
232 AUDIT_ARG(sockaddr, p, sa);
233 if (so != NULL)
234 error = sobind(so, sa);
235 else
236 error = EBADF;
237 FREE(sa, M_SONAME);
238 out:
239 file_drop(uap->s);
240 return (error);
241 }
242
243
244 int
245 listen(__unused struct proc *p, register struct listen_args *uap,
246 __unused register_t *retval)
247 {
248 int error;
249 struct socket * so;
250
251 AUDIT_ARG(fd, uap->s);
252 error = file_socket(uap->s, &so);
253 if (error)
254 return (error);
255 if (so != NULL)
256 error = solisten(so, uap->backlog);
257 else
258 error = EBADF;
259 file_drop(uap->s);
260 return (error);
261 }
262
263 #if !COMPAT_43_SOCKET
264 #define accept1 accept
265 #endif
266
267
268
269 int
270 accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat)
271 {
272 struct fileproc *fp;
273 struct sockaddr *sa;
274 socklen_t namelen;
275 int error;
276 struct socket *head, *so = NULL;
277 lck_mtx_t *mutex_held;
278 int fd = uap->s;
279 int newfd;;
280 short fflag; /* type must match fp->f_flag */
281 int dosocklock = 0;
282
283 AUDIT_ARG(fd, uap->s);
284 if (uap->name) {
285 error = copyin(uap->anamelen, (caddr_t)&namelen,
286 sizeof(socklen_t));
287 if(error)
288 return (error);
289 }
290 error = fp_getfsock(p, fd, &fp, &head);
291 if (error) {
292 if (error == EOPNOTSUPP)
293 error = ENOTSOCK;
294 return (error);
295 }
296 if (head == NULL) {
297 error = EBADF;
298 goto out;
299 }
300
301 socket_lock(head, 1);
302
303 if (head->so_proto->pr_getlock != NULL) {
304 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
305 dosocklock = 1;
306 }
307 else {
308 mutex_held = head->so_proto->pr_domain->dom_mtx;
309 dosocklock = 0;
310 }
311
312
313 if ((head->so_options & SO_ACCEPTCONN) == 0) {
314 socket_unlock(head, 1);
315 error = EINVAL;
316 goto out;
317 }
318 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
319 socket_unlock(head, 1);
320 error = EWOULDBLOCK;
321 goto out;
322 }
323 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
324 if (head->so_state & SS_CANTRCVMORE) {
325 head->so_error = ECONNABORTED;
326 break;
327 }
328 if (head->so_usecount < 1)
329 panic("accept1: head=%x refcount=%d\n", head, head->so_usecount);
330 error = msleep((caddr_t)&head->so_timeo, mutex_held, PSOCK | PCATCH,
331 "accept", 0);
332 if (head->so_usecount < 1)
333 panic("accept1: 2 head=%x refcount=%d\n", head, head->so_usecount);
334 if ((head->so_state & SS_DRAINING)) {
335 error = ECONNABORTED;
336 }
337 if (error) {
338 socket_unlock(head, 1);
339 goto out;
340 }
341 }
342 if (head->so_error) {
343 error = head->so_error;
344 head->so_error = 0;
345 socket_unlock(head, 1);
346 goto out;
347 }
348
349
350 /*
351 * At this point we know that there is at least one connection
352 * ready to be accepted. Remove it from the queue prior to
353 * allocating the file descriptor for it since falloc() may
354 * block allowing another process to accept the connection
355 * instead.
356 */
357 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
358 so = TAILQ_FIRST(&head->so_comp);
359 TAILQ_REMOVE(&head->so_comp, so, so_list);
360 head->so_qlen--;
361 socket_unlock(head, 0); /* unlock head to avoid deadlock with select, keep a ref on head */
362 fflag = fp->f_flag;
363 proc_fdlock(p);
364 error = falloc_locked(p, &fp, &newfd, 1);
365 if (error) {
366 /*
367 * Probably ran out of file descriptors. Put the
368 * unaccepted connection back onto the queue and
369 * do another wakeup so some other process might
370 * have a chance at it.
371 */
372 proc_fdunlock(p);
373 socket_lock(head, 0);
374 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
375 head->so_qlen++;
376 wakeup_one((caddr_t)&head->so_timeo);
377 socket_unlock(head, 1);
378 goto out;
379 }
380 *fdflags(p, newfd) &= ~UF_RESERVED;
381 *retval = newfd;
382 fp->f_type = DTYPE_SOCKET;
383 fp->f_flag = fflag;
384 fp->f_ops = &socketops;
385 fp->f_data = (caddr_t)so;
386 fp_drop(p, newfd, fp, 1);
387 proc_fdunlock(p);
388 socket_lock(head, 0);
389 if (dosocklock)
390 socket_lock(so, 1);
391 so->so_state &= ~SS_COMP;
392 so->so_head = NULL;
393 sa = 0;
394 (void) soacceptlock(so, &sa, 0);
395 socket_unlock(head, 1);
396 if (sa == 0) {
397 namelen = 0;
398 if (uap->name)
399 goto gotnoname;
400 if (dosocklock)
401 socket_unlock(so, 1);
402 error = 0;
403 goto out;
404 }
405 AUDIT_ARG(sockaddr, p, sa);
406 if (uap->name) {
407 /* check sa_len before it is destroyed */
408 if (namelen > sa->sa_len)
409 namelen = sa->sa_len;
410 #if COMPAT_43_SOCKET
411 if (compat)
412 ((struct osockaddr *)sa)->sa_family =
413 sa->sa_family;
414 #endif
415 error = copyout(sa, uap->name, namelen);
416 if (!error)
417 gotnoname:
418 error = copyout((caddr_t)&namelen, uap->anamelen,
419 sizeof(socklen_t));
420 }
421 FREE(sa, M_SONAME);
422 if (dosocklock)
423 socket_unlock(so, 1);
424 out:
425 file_drop(fd);
426 return (error);
427 }
428
429 int
430 accept(struct proc *p, struct accept_args *uap, register_t *retval)
431 {
432
433 return (accept1(p, uap, retval, 0));
434 }
435
436 #if COMPAT_43_SOCKET
437 int
438 oaccept(struct proc *p, struct accept_args *uap, register_t *retval)
439 {
440
441 return (accept1(p, uap, retval, 1));
442 }
443 #endif /* COMPAT_43_SOCKET */
444
445 /* ARGSUSED */
446 int
447 connect(struct proc *p, struct connect_args *uap, __unused register_t *retval)
448 {
449 struct socket *so;
450 struct sockaddr *sa;
451 lck_mtx_t *mutex_held;
452 int error;
453 int fd = uap->s;
454
455 AUDIT_ARG(fd, uap->s);
456 error = file_socket( fd, &so);
457 if (error)
458 return (error);
459 if (so == NULL) {
460 error = EBADF;
461 goto out;
462 }
463
464 socket_lock(so, 1);
465
466 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
467 socket_unlock(so, 1);
468 error = EALREADY;
469 goto out;
470 }
471 error = getsockaddr(&sa, uap->name, uap->namelen);
472 if (error) {
473 socket_unlock(so, 1);
474 goto out;
475 }
476 AUDIT_ARG(sockaddr, p, sa);
477 error = soconnectlock(so, sa, 0);
478 if (error)
479 goto bad;
480 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
481 FREE(sa, M_SONAME);
482 socket_unlock(so, 1);
483 error = EINPROGRESS;
484 goto out;
485 }
486 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
487 if (so->so_proto->pr_getlock != NULL)
488 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
489 else
490 mutex_held = so->so_proto->pr_domain->dom_mtx;
491 error = msleep((caddr_t)&so->so_timeo, mutex_held, PSOCK | PCATCH,
492 "connec", 0);
493 if ((so->so_state & SS_DRAINING)) {
494 error = ECONNABORTED;
495 }
496 if (error)
497 break;
498 }
499 if (error == 0) {
500 error = so->so_error;
501 so->so_error = 0;
502 }
503 bad:
504 so->so_state &= ~SS_ISCONNECTING;
505 socket_unlock(so, 1);
506 FREE(sa, M_SONAME);
507 if (error == ERESTART)
508 error = EINTR;
509 out:
510 file_drop(fd);
511 return (error);
512 }
513
514 int
515 socketpair(struct proc *p, struct socketpair_args *uap, __unused register_t *retval)
516 {
517 struct fileproc *fp1, *fp2;
518 struct socket *so1, *so2;
519 int fd, error, sv[2];
520
521 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
522 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
523 if (error)
524 return (error);
525 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
526 if (error)
527 goto free1;
528
529 error = falloc(p, &fp1, &fd);
530 if (error) {
531 goto free2;
532 }
533 fp1->f_flag = FREAD|FWRITE;
534 fp1->f_type = DTYPE_SOCKET;
535 fp1->f_ops = &socketops;
536 fp1->f_data = (caddr_t)so1;
537 sv[0] = fd;
538
539 error = falloc(p, &fp2, &fd);
540 if (error) {
541 goto free3;
542 }
543 fp2->f_flag = FREAD|FWRITE;
544 fp2->f_type = DTYPE_SOCKET;
545 fp2->f_ops = &socketops;
546 fp2->f_data = (caddr_t)so2;
547 sv[1] = fd;
548
549 error = soconnect2(so1, so2);
550 if (error) {
551 goto free4;
552 }
553 if (uap->type == SOCK_DGRAM) {
554 /*
555 * Datagram socket connection is asymmetric.
556 */
557 error = soconnect2(so2, so1);
558 if (error) {
559 goto free4;
560 }
561 }
562
563 proc_fdlock(p);
564 *fdflags(p, sv[0]) &= ~UF_RESERVED;
565 *fdflags(p, sv[1]) &= ~UF_RESERVED;
566 fp_drop(p, sv[0], fp1, 1);
567 fp_drop(p, sv[1], fp2, 1);
568 proc_fdunlock(p);
569
570 error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof(int));
571 #if 0 /* old pipe(2) syscall compatability, unused these days */
572 retval[0] = sv[0]; /* XXX ??? */
573 retval[1] = sv[1]; /* XXX ??? */
574 #endif /* 0 */
575 return (error);
576 free4:
577 fp_free(p, sv[1], fp2);
578 free3:
579 fp_free(p, sv[0], fp1);
580 free2:
581 (void)soclose(so2);
582 free1:
583 (void)soclose(so1);
584 return (error);
585 }
586
587 static int
588 sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
589 int flags, register_t *retval)
590 {
591 struct mbuf *control;
592 struct sockaddr *to;
593 int error;
594 struct socket *so;
595 user_ssize_t len;
596 #if KTRACE
597 uio_t ktruio = NULL;
598 #endif
599
600 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0);
601
602 error = file_socket(s, &so);
603 if (error )
604 {
605 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
606 return (error);
607 }
608
609 if (mp->msg_name) {
610 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
611 if (error) {
612 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
613 goto out;
614 }
615 AUDIT_ARG(sockaddr, p, to);
616 } else {
617 to = 0;
618 }
619 if (mp->msg_control) {
620 if (mp->msg_controllen < ((socklen_t)sizeof(struct cmsghdr))
621 #if COMPAT_43_SOCKET
622 && !(mp->msg_flags & MSG_COMPAT)
623 #endif
624 ) {
625 error = EINVAL;
626 goto bad;
627 }
628 error = sockargs(&control, mp->msg_control,
629 mp->msg_controllen, MT_CONTROL);
630 if (error)
631 goto bad;
632 #if COMPAT_43_SOCKET
633 if (mp->msg_flags & MSG_COMPAT) {
634 register struct cmsghdr *cm;
635
636 M_PREPEND(control, sizeof(*cm), M_WAIT);
637 if (control == 0) {
638 error = ENOBUFS;
639 goto bad;
640 } else {
641 cm = mtod(control, struct cmsghdr *);
642 cm->cmsg_len = control->m_len;
643 cm->cmsg_level = SOL_SOCKET;
644 cm->cmsg_type = SCM_RIGHTS;
645 }
646 }
647 #endif
648 } else {
649 control = 0;
650 }
651
652 #if KTRACE
653 if (KTRPOINT(p, KTR_GENIO)) {
654 ktruio = uio_duplicate(uiop);
655 }
656 #endif
657
658 len = uio_resid(uiop);
659 if (so == NULL)
660 error = EBADF;
661 else
662 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
663 flags);
664 if (error) {
665 if (uio_resid(uiop) != len && (error == ERESTART ||
666 error == EINTR || error == EWOULDBLOCK))
667 error = 0;
668 /* Generation of SIGPIPE can be controlled per socket */
669 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
670 psignal(p, SIGPIPE);
671 }
672 if (error == 0)
673 *retval = (int)(len - uio_resid(uiop));
674 bad:
675 #if KTRACE
676 if (ktruio != NULL) {
677 if (error == 0) {
678 uio_setresid(ktruio, retval[0]);
679 ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error);
680 }
681 uio_free(ktruio);
682 }
683 #endif
684 if (to)
685 FREE(to, M_SONAME);
686 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
687 out:
688 file_drop(s);
689 return (error);
690 }
691
692
693 int
694 sendto(struct proc *p, struct sendto_args *uap, register_t *retval)
695 {
696 struct user_msghdr msg;
697 int error;
698 uio_t auio = NULL;
699
700 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0,0,0,0,0);
701 AUDIT_ARG(fd, uap->s);
702
703 auio = uio_create(1, 0,
704 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
705 UIO_WRITE);
706 if (auio == NULL) {
707 return (ENOMEM);
708 }
709 uio_addiov(auio, uap->buf, uap->len);
710
711 msg.msg_name = uap->to;
712 msg.msg_namelen = uap->tolen;
713 /* no need to set up msg_iov. sendit uses uio_t we send it */
714 msg.msg_iov = 0;
715 msg.msg_iovlen = 0;
716 msg.msg_control = 0;
717 msg.msg_flags = 0;
718
719 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
720
721 if (auio != NULL) {
722 uio_free(auio);
723 }
724
725 #if HACK_FOR_4056224
726 /*
727 * Radar 4056224
728 * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
729 * This will be removed in the release following Tiger
730 */
731 if (error == ENOTSOCK) {
732 struct fileproc *fp;
733
734 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
735 (void) fp_drop(p, uap->s, fp,0);
736
737 if (fp->f_type == DTYPE_PIPE) {
738 struct write_args write_uap;
739 user_ssize_t write_retval;
740
741 if (p->p_pid > last_pid_4056224) {
742 last_pid_4056224 = p->p_pid;
743
744 printf("%s[%d] uses send/recv on a pipe\n",
745 p->p_comm, p->p_pid);
746 }
747
748 bzero(&write_uap, sizeof(struct write_args));
749 write_uap.fd = uap->s;
750 write_uap.cbuf = uap->buf;
751 write_uap.nbyte = uap->len;
752
753 error = write(p, &write_uap, &write_retval);
754 *retval = (int)write_retval;
755 }
756 }
757 }
758 #endif /* HACK_FOR_4056224 */
759
760 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval,0,0,0);
761
762 return(error);
763 }
764
765 #if COMPAT_43_SOCKET
766 int
767 osend(__unused struct proc *p,
768 __unused struct osend_args *uap,
769 __unused register_t *retval)
770 {
771 /* these are no longer supported and in fact
772 * there is no way to call it directly.
773 * LP64todo - remove this once we're sure there are no clients
774 */
775 return (ENOTSUP);
776 }
777
778 int
779 osendmsg(__unused struct proc *p,
780 __unused struct osendmsg_args *uap,
781 __unused register_t *retval)
782 {
783 /* these are no longer supported and in fact
784 * there is no way to call it directly.
785 * LP64todo - remove this once we're sure there are no clients
786 */
787 return (ENOTSUP);
788 }
789 #endif
790
791
792 int
793 sendmsg(struct proc *p, register struct sendmsg_args *uap, register_t *retval)
794 {
795 struct msghdr msg;
796 struct user_msghdr user_msg;
797 caddr_t msghdrp;
798 int size_of_msghdr;
799 int error;
800 int size_of_iovec;
801 uio_t auio = NULL;
802 struct user_iovec *iovp;
803
804 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0,0,0,0,0);
805 AUDIT_ARG(fd, uap->s);
806 if (IS_64BIT_PROCESS(p)) {
807 msghdrp = (caddr_t) &user_msg;
808 size_of_msghdr = sizeof(user_msg);
809 size_of_iovec = sizeof(struct user_iovec);
810 }
811 else {
812 msghdrp = (caddr_t) &msg;
813 size_of_msghdr = sizeof(msg);
814 size_of_iovec = sizeof(struct iovec);
815 }
816 error = copyin(uap->msg, msghdrp, size_of_msghdr);
817 if (error)
818 {
819 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
820 return (error);
821 }
822
823 /* only need to copy if user process is not 64-bit */
824 if (!IS_64BIT_PROCESS(p)) {
825 user_msg.msg_flags = msg.msg_flags;
826 user_msg.msg_controllen = msg.msg_controllen;
827 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
828 user_msg.msg_iovlen = msg.msg_iovlen;
829 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
830 user_msg.msg_namelen = msg.msg_namelen;
831 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
832 }
833
834 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
835 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
836 return (EMSGSIZE);
837 }
838
839 /* allocate a uio large enough to hold the number of iovecs passed */
840 auio = uio_create(user_msg.msg_iovlen, 0,
841 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
842 UIO_WRITE);
843 if (auio == NULL) {
844 error = ENOBUFS;
845 goto done;
846 }
847
848 if (user_msg.msg_iovlen) {
849 /* get location of iovecs within the uio. then copyin the iovecs from
850 * user space.
851 */
852 iovp = uio_iovsaddr(auio);
853 if (iovp == NULL) {
854 error = ENOBUFS;
855 goto done;
856 }
857 error = copyin(user_msg.msg_iov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec));
858 if (error)
859 goto done;
860 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
861
862 /* finish setup of uio_t */
863 uio_calculateresid(auio);
864 }
865 else {
866 user_msg.msg_iov = 0;
867 }
868
869 #if COMPAT_43_SOCKET
870 user_msg.msg_flags = 0;
871 #endif
872 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
873 done:
874 if (auio != NULL) {
875 uio_free(auio);
876 }
877 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
878
879 return (error);
880 }
881
882 static int
883 recvit(p, s, mp, uiop, namelenp, retval)
884 register struct proc *p;
885 int s;
886 register struct user_msghdr *mp;
887 uio_t uiop;
888 user_addr_t namelenp;
889 register_t *retval;
890 {
891 int len, error;
892 struct mbuf *m, *control = 0;
893 user_addr_t ctlbuf;
894 struct socket *so;
895 struct sockaddr *fromsa = 0;
896 struct fileproc *fp;
897 #if KTRACE
898 uio_t ktruio = NULL;
899 #endif
900
901 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0,0,0,0,0);
902 proc_fdlock(p);
903 if ( (error = fp_lookup(p, s, &fp, 1)) ) {
904 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
905 proc_fdunlock(p);
906 return (error);
907 }
908 if (fp->f_type != DTYPE_SOCKET) {
909 fp_drop(p, s, fp,1);
910 proc_fdunlock(p);
911 return(ENOTSOCK);
912 }
913
914 so = (struct socket *)fp->f_data;
915
916 proc_fdunlock(p);
917 if (uio_resid(uiop) < 0) {
918 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0);
919 error = EINVAL;
920 goto out1;
921 }
922 #if KTRACE
923 if (KTRPOINT(p, KTR_GENIO)) {
924 ktruio = uio_duplicate(uiop);
925 }
926 #endif
927
928 len = uio_resid(uiop);
929 if (so == NULL)
930 error = EBADF;
931 else {
932 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
933 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
934 &mp->msg_flags);
935 }
936 AUDIT_ARG(sockaddr, p, fromsa);
937 if (error) {
938 if (uio_resid(uiop) != len && (error == ERESTART ||
939 error == EINTR || error == EWOULDBLOCK))
940 error = 0;
941 }
942 #if KTRACE
943 if (ktruio != NULL) {
944 if (error == 0) {
945 uio_setresid(ktruio, len - uio_resid(uiop));
946 ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error);
947 }
948 uio_free(ktruio);
949 }
950 #endif
951 if (error)
952 goto out;
953 *retval = len - uio_resid(uiop);
954 if (mp->msg_name) {
955 len = mp->msg_namelen;
956 if (len <= 0 || fromsa == 0)
957 len = 0;
958 else {
959 #ifndef MIN
960 #define MIN(a,b) ((a)>(b)?(b):(a))
961 #endif
962 /* save sa_len before it is destroyed by MSG_COMPAT */
963 len = MIN(len, fromsa->sa_len);
964 #if COMPAT_43_SOCKET
965 if (mp->msg_flags & MSG_COMPAT)
966 ((struct osockaddr *)fromsa)->sa_family =
967 fromsa->sa_family;
968 #endif
969 error = copyout(fromsa, mp->msg_name, (unsigned)len);
970 if (error)
971 goto out;
972 }
973 mp->msg_namelen = len;
974 if (namelenp &&
975 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
976 #if COMPAT_43_SOCKET
977 if (mp->msg_flags & MSG_COMPAT)
978 error = 0; /* old recvfrom didn't check */
979 else
980 #endif
981 goto out;
982 }
983 }
984 if (mp->msg_control) {
985 #if COMPAT_43_SOCKET
986 /*
987 * We assume that old recvmsg calls won't receive access
988 * rights and other control info, esp. as control info
989 * is always optional and those options didn't exist in 4.3.
990 * If we receive rights, trim the cmsghdr; anything else
991 * is tossed.
992 */
993 if (control && mp->msg_flags & MSG_COMPAT) {
994 if (mtod(control, struct cmsghdr *)->cmsg_level !=
995 SOL_SOCKET ||
996 mtod(control, struct cmsghdr *)->cmsg_type !=
997 SCM_RIGHTS) {
998 mp->msg_controllen = 0;
999 goto out;
1000 }
1001 control->m_len -= sizeof (struct cmsghdr);
1002 control->m_data += sizeof (struct cmsghdr);
1003 }
1004 #endif
1005 len = mp->msg_controllen;
1006 m = control;
1007 mp->msg_controllen = 0;
1008 ctlbuf = mp->msg_control;
1009
1010 while (m && len > 0) {
1011 unsigned int tocopy;
1012
1013 if (len >= m->m_len)
1014 tocopy = m->m_len;
1015 else {
1016 mp->msg_flags |= MSG_CTRUNC;
1017 tocopy = len;
1018 }
1019
1020 error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf, tocopy);
1021 if (error)
1022 goto out;
1023
1024 ctlbuf += tocopy;
1025 len -= tocopy;
1026 m = m->m_next;
1027 }
1028 mp->msg_controllen = ctlbuf - mp->msg_control;
1029 }
1030 out:
1031 if (fromsa)
1032 FREE(fromsa, M_SONAME);
1033 if (control)
1034 m_freem(control);
1035 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
1036 out1:
1037 fp_drop(p, s, fp, 0);
1038 return (error);
1039 }
1040
1041
1042 int
1043 recvfrom(p, uap, retval)
1044 struct proc *p;
1045 register struct recvfrom_args /* {
1046 int s;
1047 caddr_t buf;
1048 size_t len;
1049 int flags;
1050 caddr_t from;
1051 int *fromlenaddr;
1052 } */ *uap;
1053 register_t *retval;
1054 {
1055 struct user_msghdr msg;
1056 int error;
1057 uio_t auio = NULL;
1058
1059 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0,0,0,0,0);
1060 AUDIT_ARG(fd, uap->s);
1061
1062 if (uap->fromlenaddr) {
1063 error = copyin(uap->fromlenaddr,
1064 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1065 if (error)
1066 return (error);
1067 } else
1068 msg.msg_namelen = 0;
1069 msg.msg_name = uap->from;
1070 auio = uio_create(1, 0,
1071 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1072 UIO_READ);
1073 if (auio == NULL) {
1074 return (ENOMEM);
1075 }
1076
1077 uio_addiov(auio, uap->buf, uap->len);
1078 /* no need to set up msg_iov. recvit uses uio_t we send it */
1079 msg.msg_iov = 0;
1080 msg.msg_iovlen = 0;
1081 msg.msg_control = 0;
1082 msg.msg_controllen = 0;
1083 msg.msg_flags = uap->flags;
1084 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1085 if (auio != NULL) {
1086 uio_free(auio);
1087 }
1088
1089 #if HACK_FOR_4056224
1090 /*
1091 * Radar 4056224
1092 * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
1093 * This will be removed in the release following Tiger
1094 */
1095 if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1096 struct fileproc *fp;
1097
1098 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1099 (void) fp_drop(p, uap->s, fp,0);
1100
1101 if (fp->f_type == DTYPE_PIPE) {
1102 struct read_args read_uap;
1103 user_ssize_t read_retval;
1104
1105 if (p->p_pid > last_pid_4056224) {
1106 last_pid_4056224 = p->p_pid;
1107
1108 printf("%s[%d] uses send/recv on a pipe\n",
1109 p->p_comm, p->p_pid);
1110 }
1111
1112 bzero(&read_uap, sizeof(struct read_args));
1113 read_uap.fd = uap->s;
1114 read_uap.cbuf = uap->buf;
1115 read_uap.nbyte = uap->len;
1116
1117 error = read(p, &read_uap, &read_retval);
1118 *retval = (int)read_retval;
1119 }
1120 }
1121 }
1122 #endif /* HACK_FOR_4056224 */
1123
1124 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error,0,0,0,0);
1125
1126 return (error);
1127 }
1128
1129 #if COMPAT_43_SOCKET
1130 int
1131 orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval)
1132 {
1133
1134 uap->flags |= MSG_COMPAT;
1135 return (recvfrom(p, uap, retval));
1136 }
1137 #endif
1138
1139
1140 #if COMPAT_43_SOCKET
1141 int
1142 orecv(__unused struct proc *p, __unused struct orecv_args *uap,
1143 __unused register_t *retval)
1144 {
1145 /* these are no longer supported and in fact
1146 * there is no way to call it directly.
1147 * LP64todo - remove this once we're sure there are no clients
1148 */
1149
1150 return (ENOTSUP);
1151 }
1152
1153 /*
1154 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1155 * overlays the new one, missing only the flags, and with the (old) access
1156 * rights where the control fields are now.
1157 */
1158 int
1159 orecvmsg(__unused struct proc *p, __unused struct orecvmsg_args *uap,
1160 __unused register_t *retval)
1161 {
1162 /* these are no longer supported and in fact
1163 * there is no way to call it directly.
1164 * LP64todo - remove this once we're sure there are no clients
1165 */
1166
1167 return (ENOTSUP);
1168
1169 }
1170 #endif
1171
1172 int
1173 recvmsg(p, uap, retval)
1174 struct proc *p;
1175 struct recvmsg_args *uap;
1176 register_t *retval;
1177 {
1178 struct msghdr msg;
1179 struct user_msghdr user_msg;
1180 caddr_t msghdrp;
1181 int size_of_msghdr;
1182 user_addr_t uiov;
1183 register int error;
1184 int size_of_iovec;
1185 uio_t auio = NULL;
1186 struct user_iovec *iovp;
1187
1188 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0,0,0,0,0);
1189 AUDIT_ARG(fd, uap->s);
1190 if (IS_64BIT_PROCESS(p)) {
1191 msghdrp = (caddr_t) &user_msg;
1192 size_of_msghdr = sizeof(user_msg);
1193 size_of_iovec = sizeof(struct user_iovec);
1194 }
1195 else {
1196 msghdrp = (caddr_t) &msg;
1197 size_of_msghdr = sizeof(msg);
1198 size_of_iovec = sizeof(struct iovec);
1199 }
1200 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1201 if (error)
1202 {
1203 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1204 return (error);
1205 }
1206
1207 /* only need to copy if user process is not 64-bit */
1208 if (!IS_64BIT_PROCESS(p)) {
1209 user_msg.msg_flags = msg.msg_flags;
1210 user_msg.msg_controllen = msg.msg_controllen;
1211 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
1212 user_msg.msg_iovlen = msg.msg_iovlen;
1213 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
1214 user_msg.msg_namelen = msg.msg_namelen;
1215 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
1216 }
1217
1218 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1219 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
1220 return (EMSGSIZE);
1221 }
1222
1223 #if COMPAT_43_SOCKET
1224 user_msg.msg_flags = uap->flags &~ MSG_COMPAT;
1225 #else
1226 user_msg.msg_flags = uap->flags;
1227 #endif
1228
1229 /* allocate a uio large enough to hold the number of iovecs passed */
1230 auio = uio_create(user_msg.msg_iovlen, 0,
1231 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1232 UIO_READ);
1233 if (auio == NULL) {
1234 error = ENOMEM;
1235 goto done;
1236 }
1237
1238 /* get location of iovecs within the uio. then copyin the iovecs from
1239 * user space.
1240 */
1241 iovp = uio_iovsaddr(auio);
1242 if (iovp == NULL) {
1243 error = ENOMEM;
1244 goto done;
1245 }
1246 uiov = user_msg.msg_iov;
1247 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1248 error = copyin(uiov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec));
1249 if (error)
1250 goto done;
1251
1252 /* finish setup of uio_t */
1253 uio_calculateresid(auio);
1254
1255 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1256 if (!error) {
1257 user_msg.msg_iov = uiov;
1258 /* only need to copy if user process is not 64-bit */
1259 if (!IS_64BIT_PROCESS(p)) {
1260 // LP64todo - do all these change? if not, then no need to copy all of them!
1261 msg.msg_flags = user_msg.msg_flags;
1262 msg.msg_controllen = user_msg.msg_controllen;
1263 msg.msg_control = CAST_DOWN(caddr_t, user_msg.msg_control);
1264 msg.msg_iovlen = user_msg.msg_iovlen;
1265 msg.msg_iov = (struct iovec *) CAST_DOWN(caddr_t, user_msg.msg_iov);
1266 msg.msg_namelen = user_msg.msg_namelen;
1267 msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name);
1268 }
1269 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1270 }
1271 done:
1272 if (auio != NULL) {
1273 uio_free(auio);
1274 }
1275 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1276 return (error);
1277 }
1278
1279 /* ARGSUSED */
1280 int
1281 shutdown(__unused struct proc *p, struct shutdown_args *uap, __unused register_t *retval)
1282 {
1283 struct socket * so;
1284 int error;
1285
1286 AUDIT_ARG(fd, uap->s);
1287 error = file_socket(uap->s, &so);
1288 if (error)
1289 return (error);
1290 if (so == NULL) {
1291 error = EBADF;
1292 goto out;
1293 }
1294 error = soshutdown((struct socket *)so, uap->how);
1295 out:
1296 file_drop(uap->s);
1297 return(error);
1298 }
1299
1300
1301
1302
1303
1304 /* ARGSUSED */
1305 int
1306 setsockopt(struct proc *p, struct setsockopt_args *uap, __unused register_t *retval)
1307 {
1308 struct socket * so;
1309 struct sockopt sopt;
1310 int error;
1311
1312 AUDIT_ARG(fd, uap->s);
1313 if (uap->val == 0 && uap->valsize != 0)
1314 return (EFAULT);
1315 if (uap->valsize < 0)
1316 return (EINVAL);
1317
1318 error = file_socket(uap->s, &so);
1319 if (error)
1320 return (error);
1321
1322 sopt.sopt_dir = SOPT_SET;
1323 sopt.sopt_level = uap->level;
1324 sopt.sopt_name = uap->name;
1325 sopt.sopt_val = uap->val;
1326 sopt.sopt_valsize = uap->valsize;
1327 sopt.sopt_p = p;
1328
1329 if (so == NULL) {
1330 error = EINVAL;
1331 goto out;
1332 }
1333 error = sosetopt(so, &sopt);
1334 out:
1335 file_drop(uap->s);
1336 return(error);
1337 }
1338
1339
1340
1341 int
1342 getsockopt(struct proc *p, struct getsockopt_args *uap, __unused register_t *retval)
1343 {
1344 int error;
1345 socklen_t valsize;
1346 struct sockopt sopt;
1347 struct socket * so;
1348
1349 error = file_socket(uap->s, &so);
1350 if (error)
1351 return (error);
1352 if (uap->val) {
1353 error = copyin(uap->avalsize, (caddr_t)&valsize, sizeof (valsize));
1354 if (error)
1355 goto out;
1356 if (valsize < 0) {
1357 error = EINVAL;
1358 goto out;
1359 }
1360 } else
1361 valsize = 0;
1362
1363 sopt.sopt_dir = SOPT_GET;
1364 sopt.sopt_level = uap->level;
1365 sopt.sopt_name = uap->name;
1366 sopt.sopt_val = uap->val;
1367 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1368 sopt.sopt_p = p;
1369
1370 if (so == NULL) {
1371 error = EBADF;
1372 goto out;
1373 }
1374 error = sogetopt((struct socket *)so, &sopt);
1375 if (error == 0) {
1376 valsize = sopt.sopt_valsize;
1377 error = copyout((caddr_t)&valsize, uap->avalsize, sizeof (valsize));
1378 }
1379 out:
1380 file_drop(uap->s);
1381 return (error);
1382 }
1383
1384
1385 /*
1386 * Get socket name.
1387 */
1388 /* ARGSUSED */
1389 static int
1390 getsockname1(__unused struct proc *p, struct getsockname_args *uap, __unused register_t *retval,
1391 int compat)
1392 {
1393 struct socket *so;
1394 struct sockaddr *sa;
1395 socklen_t len;
1396 int error;
1397
1398 error = file_socket(uap->fdes, &so);
1399 if (error)
1400 return (error);
1401 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
1402 if (error)
1403 goto out;
1404 if (so == NULL) {
1405 error = EBADF;
1406 goto out;
1407 }
1408 sa = 0;
1409 socket_lock(so, 1);
1410 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1411 if (error == 0)
1412 {
1413 struct socket_filter_entry *filter;
1414 int filtered = 0;
1415 for (filter = so->so_filt; filter && error == 0;
1416 filter = filter->sfe_next_onsocket) {
1417 if (filter->sfe_filter->sf_filter.sf_getsockname) {
1418 if (!filtered) {
1419 filtered = 1;
1420 sflt_use(so);
1421 socket_unlock(so, 0);
1422 }
1423 error = filter->sfe_filter->sf_filter.sf_getsockname(filter->sfe_cookie,
1424 so, &sa);
1425 }
1426 }
1427
1428 if (error == EJUSTRETURN)
1429 error = 0;
1430
1431 if (filtered) {
1432 socket_lock(so, 0);
1433 sflt_unuse(so);
1434 }
1435 }
1436 socket_unlock(so, 1);
1437 if (error)
1438 goto bad;
1439 if (sa == 0) {
1440 len = 0;
1441 goto gotnothing;
1442 }
1443
1444 len = MIN(len, sa->sa_len);
1445 #if COMPAT_43_SOCKET
1446 if (compat)
1447 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1448 #endif
1449 error = copyout((caddr_t)sa, uap->asa, len);
1450 if (error == 0)
1451 gotnothing:
1452 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1453 bad:
1454 if (sa)
1455 FREE(sa, M_SONAME);
1456 out:
1457 file_drop(uap->fdes);
1458 return (error);
1459 }
1460
1461 int
1462 getsockname(struct proc *p, struct getsockname_args *uap, register_t *retval)
1463 {
1464 return (getsockname1(p, uap, retval, 0));
1465 }
1466
1467 #if COMPAT_43_SOCKET
1468 int
1469 ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval)
1470 {
1471 return (getsockname1(p, uap, retval, 1));
1472 }
1473 #endif /* COMPAT_43_SOCKET */
1474
1475 /*
1476 * Get name of peer for connected socket.
1477 */
1478 /* ARGSUSED */
1479 int
1480 getpeername1(__unused struct proc *p, struct getpeername_args *uap, __unused register_t *retval,
1481 int compat)
1482 {
1483 struct socket *so;
1484 struct sockaddr *sa;
1485 socklen_t len;
1486 int error;
1487
1488 error = file_socket(uap->fdes, &so);
1489 if (error)
1490 return (error);
1491 if (so == NULL) {
1492 error = EBADF;
1493 goto out;
1494 }
1495
1496 socket_lock(so, 1);
1497
1498 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1499 socket_unlock(so, 1);
1500 error = ENOTCONN;
1501 goto out;
1502 }
1503 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
1504 if (error) {
1505 socket_unlock(so, 1);
1506 goto out;
1507 }
1508 sa = 0;
1509 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1510 if (error == 0)
1511 {
1512 struct socket_filter_entry *filter;
1513 int filtered = 0;
1514 for (filter = so->so_filt; filter && error == 0;
1515 filter = filter->sfe_next_onsocket) {
1516 if (filter->sfe_filter->sf_filter.sf_getpeername) {
1517 if (!filtered) {
1518 filtered = 1;
1519 sflt_use(so);
1520 socket_unlock(so, 0);
1521 }
1522 error = filter->sfe_filter->sf_filter.sf_getpeername(filter->sfe_cookie,
1523 so, &sa);
1524 }
1525 }
1526
1527 if (error == EJUSTRETURN)
1528 error = 0;
1529
1530 if (filtered) {
1531 socket_lock(so, 0);
1532 sflt_unuse(so);
1533 }
1534 }
1535 socket_unlock(so, 1);
1536 if (error)
1537 goto bad;
1538 if (sa == 0) {
1539 len = 0;
1540 goto gotnothing;
1541 }
1542 len = MIN(len, sa->sa_len);
1543 #if COMPAT_43_SOCKET
1544 if (compat)
1545 ((struct osockaddr *)sa)->sa_family =
1546 sa->sa_family;
1547 #endif
1548 error = copyout(sa, uap->asa, len);
1549 if (error)
1550 goto bad;
1551 gotnothing:
1552 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1553 bad:
1554 if (sa) FREE(sa, M_SONAME);
1555 out:
1556 file_drop(uap->fdes);
1557 return (error);
1558 }
1559
1560 int
1561 getpeername(struct proc *p, struct getpeername_args *uap, register_t *retval)
1562 {
1563
1564 return (getpeername1(p, uap, retval, 0));
1565 }
1566
1567 #if COMPAT_43_SOCKET
1568 int
1569 ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval)
1570 {
1571
1572 return (getpeername1(p, uap, retval, 1));
1573 }
1574 #endif /* COMPAT_43_SOCKET */
1575
1576 int
1577 sockargs(mp, data, buflen, type)
1578 struct mbuf **mp;
1579 user_addr_t data;
1580 int buflen, type;
1581 {
1582 register struct sockaddr *sa;
1583 register struct mbuf *m;
1584 int error;
1585
1586 if ((u_int)buflen > MLEN) {
1587 #if COMPAT_43_SOCKET
1588 if (type == MT_SONAME && (u_int)buflen <= 112)
1589 buflen = MLEN; /* unix domain compat. hack */
1590 else
1591 #endif
1592 if ((u_int)buflen > MCLBYTES)
1593 return (EINVAL);
1594 }
1595 m = m_get(M_WAIT, type);
1596 if (m == NULL)
1597 return (ENOBUFS);
1598 if ((u_int)buflen > MLEN) {
1599 MCLGET(m, M_WAIT);
1600 if ((m->m_flags & M_EXT) == 0) {
1601 m_free(m);
1602 return ENOBUFS;
1603 }
1604 }
1605 m->m_len = buflen;
1606 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
1607 if (error)
1608 (void) m_free(m);
1609 else {
1610 *mp = m;
1611 if (type == MT_SONAME) {
1612 sa = mtod(m, struct sockaddr *);
1613
1614 #if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
1615 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1616 sa->sa_family = sa->sa_len;
1617 #endif
1618 sa->sa_len = buflen;
1619 }
1620 }
1621 return (error);
1622 }
1623
1624 /*
1625 * Given a user_addr_t of length len, allocate and fill out a *sa.
1626 */
1627 int
1628 getsockaddr(struct sockaddr **namp, user_addr_t uaddr, size_t len)
1629 {
1630 struct sockaddr *sa;
1631 int error;
1632
1633 if (len > SOCK_MAXADDRLEN)
1634 return ENAMETOOLONG;
1635
1636 if (len == 0)
1637 return EINVAL;
1638
1639 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1640 if (sa == NULL) {
1641 return ENOMEM;
1642 }
1643 error = copyin(uaddr, (caddr_t)sa, len);
1644 if (error) {
1645 FREE(sa, M_SONAME);
1646 } else {
1647 #if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
1648 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1649 sa->sa_family = sa->sa_len;
1650 #endif
1651 sa->sa_len = len;
1652 *namp = sa;
1653 }
1654 return error;
1655 }
1656
1657
1658 #if SENDFILE
1659 /*
1660 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1661 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1662 * been made static, but may be useful in the future for doing zero-copy in
1663 * other parts of the networking code.
1664 */
1665 static void
1666 sf_buf_init(void *arg)
1667 {
1668 int i;
1669
1670 SLIST_INIT(&sf_freelist);
1671 kmem_alloc_pageable(kernel_map, &sf_base, nsfbufs * PAGE_SIZE);
1672 MALLOC(sf_bufs, struct sf_buf *, nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT|M_ZERO);
1673 if (sf_bufs == NULL)
1674 return; /* XXX silently fail leaving sf_bufs NULL */
1675
1676 for (i = 0; i < nsfbufs; i++) {
1677 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1678 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1679 }
1680 }
1681
1682 /*
1683 * Get an sf_buf from the freelist. Will block if none are available.
1684 */
1685 static struct sf_buf *
1686 sf_buf_alloc()
1687 {
1688 struct sf_buf *sf;
1689
1690 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1691 sf_buf_alloc_want = 1;
1692 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1693 }
1694 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1695 sf->refcnt = 1;
1696 return (sf);
1697 }
1698
1699 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1700 static void
1701 sf_buf_ref(caddr_t addr, u_int size)
1702 {
1703 struct sf_buf *sf;
1704
1705 sf = dtosf(addr);
1706 if (sf->refcnt == 0)
1707 panic("sf_buf_ref: referencing a free sf_buf");
1708 sf->refcnt++;
1709 }
1710
1711 /*
1712 * Lose a reference to an sf_buf. When none left, detach mapped page
1713 * and release resources back to the system.
1714 *
1715 * Must be called at splimp.
1716 */
1717 static void
1718 sf_buf_free(caddr_t addr, u_int size)
1719 {
1720 struct sf_buf *sf;
1721 struct vm_page *m;
1722
1723 sf = dtosf(addr);
1724 if (sf->refcnt == 0)
1725 panic("sf_buf_free: freeing free sf_buf");
1726 sf->refcnt--;
1727 if (sf->refcnt == 0) {
1728 pmap_qremove((vm_offset_t)addr, 1);
1729 m = sf->m;
1730 vm_page_unwire(m, 0);
1731 /*
1732 * Check for the object going away on us. This can
1733 * happen since we don't hold a reference to it.
1734 * If so, we're responsible for freeing the page.
1735 */
1736 if (m->wire_count == 0 && m->object == NULL)
1737 vm_page_lock_queues();
1738 vm_page_free(m);
1739 vm_page_unlock_queues();
1740 sf->m = NULL;
1741 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1742 if (sf_buf_alloc_want) {
1743 sf_buf_alloc_want = 0;
1744 wakeup(&sf_freelist);
1745 }
1746 }
1747 }
1748
1749 /*
1750 * sendfile(2).
1751 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1752 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1753 *
1754 * Send a file specified by 'fd' and starting at 'offset' to a socket
1755 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1756 * nbytes == 0. Optionally add a header and/or trailer to the socket
1757 * output. If specified, write the total number of bytes sent into *sbytes.
1758 */
1759 int
1760 sendfile(struct proc *p, struct sendfile_args *uap)
1761 {
1762 struct fileproc *fp;
1763 struct vnode *vp;
1764 struct vm_object *obj;
1765 struct socket *so;
1766 struct mbuf *m;
1767 struct sf_buf *sf;
1768 struct vm_page *pg;
1769 struct writev_args nuap;
1770 struct sf_hdtr hdtr;
1771 off_t off, xfsize, sbytes = 0;
1772 int error = 0, s;
1773
1774 if (sf_bufs == NULL) {
1775 /* Fail if initialization failed */
1776 return ENOSYS;
1777 }
1778
1779 /*
1780 * Do argument checking. Must be a regular file in, stream
1781 * type and connected socket out, positive offset.
1782 */
1783 if (error = fp_getfvp(p, uap->fd, &fp, &vp))
1784 goto done;
1785 if (fp->f_flag & FREAD) == 0) {
1786 error = EBADF;
1787 goto done1;
1788 }
1789 obj = vp->v_object;
1790 if (vp->v_type != VREG || obj == NULL) {
1791 error = EINVAL;
1792 goto done1;
1793 }
1794 error = file_socket(uap->s, &so);
1795 if (error)
1796 goto done1;
1797 if (so == NULL) {
1798 error = EBADF;
1799 goto done2;
1800 }
1801
1802 socket_lock(so, 1);
1803
1804 if (so->so_type != SOCK_STREAM) {
1805 error = EINVAL;
1806 goto done3;
1807 }
1808 if ((so->so_state & SS_ISCONNECTED) == 0) {
1809 error = ENOTCONN;
1810 goto done3;
1811 }
1812 if (uap->offset < 0) {
1813 error = EINVAL;
1814 goto done3;
1815 }
1816
1817 /*
1818 * If specified, get the pointer to the sf_hdtr struct for
1819 * any headers/trailers.
1820 */
1821 if (uap->hdtr != NULL) {
1822 error = copyin(CAST_USER_ADDR_T(uap->hdtr), &hdtr, sizeof(hdtr));
1823 if (error)
1824 goto done3;
1825 /*
1826 * Send any headers. Wimp out and use writev(2).
1827 */
1828 if (hdtr.headers != NULL) {
1829 nuap.fd = uap->s;
1830 nuap.iovp = hdtr.headers;
1831 nuap.iovcnt = hdtr.hdr_cnt;
1832 error = writev(p, &nuap);
1833 if (error)
1834 goto done3;
1835 sbytes += p->p_retval[0];
1836 }
1837 }
1838
1839 /*
1840 * Protect against multiple writers to the socket.
1841 */
1842 (void) sblock(&so->so_snd, M_WAIT);
1843
1844 /*
1845 * Loop through the pages in the file, starting with the requested
1846 * offset. Get a file page (do I/O if necessary), map the file page
1847 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1848 * it on the socket.
1849 */
1850 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1851 vm_object_offset_t pindex;
1852 vm_object_offset_t pgoff;
1853
1854 pindex = OFF_TO_IDX(off);
1855 retry_lookup:
1856 /*
1857 * Calculate the amount to transfer. Not to exceed a page,
1858 * the EOF, or the passed in nbytes.
1859 */
1860 xfsize = obj->un_pager.vnp.vnp_size - off;
1861 if (xfsize > PAGE_SIZE_64)
1862 xfsize = PAGE_SIZE;
1863 pgoff = (vm_object_offset_t)(off & PAGE_MASK_64);
1864 if (PAGE_SIZE - pgoff < xfsize)
1865 xfsize = PAGE_SIZE_64 - pgoff;
1866 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1867 xfsize = uap->nbytes - sbytes;
1868 if (xfsize <= 0)
1869 break;
1870 /*
1871 * Optimize the non-blocking case by looking at the socket space
1872 * before going to the extra work of constituting the sf_buf.
1873 */
1874 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1875 if (so->so_state & SS_CANTSENDMORE)
1876 error = EPIPE;
1877 else
1878 error = EAGAIN;
1879 sbunlock(&so->so_snd, 0); /* will release lock */
1880 goto done2;
1881 }
1882 /*
1883 * Attempt to look up the page. If the page doesn't exist or the
1884 * part we're interested in isn't valid, then read it from disk.
1885 * If some other part of the kernel has this page (i.e. it's busy),
1886 * then disk I/O may be occuring on it, so wait and retry.
1887 */
1888 pg = vm_page_lookup(obj, pindex);
1889 if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
1890 !vm_page_is_valid(pg, pgoff, xfsize))) {
1891 struct uio auio;
1892 struct iovec aiov;
1893 int bsize;
1894
1895 if (pg == NULL) {
1896 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1897 if (pg == NULL) {
1898 VM_WAIT;
1899 goto retry_lookup;
1900 }
1901 /*
1902 * don't just clear PG_BUSY manually -
1903 * vm_page_alloc() should be considered opaque,
1904 * use the VM routine provided to clear
1905 * PG_BUSY.
1906 */
1907 vm_page_wakeup(pg);
1908
1909 }
1910 /*
1911 * Ensure that our page is still around when the I/O completes.
1912 */
1913 vm_page_io_start(pg);
1914 vm_page_wire(pg);
1915 /*
1916 * Get the page from backing store.
1917 */
1918 bsize = vp->v_mount->mnt_vfsstat.f_iosize;
1919 auio.uio_iov = &aiov;
1920 auio.uio_iovcnt = 1;
1921 aiov.iov_base = 0;
1922 aiov.iov_len = MAXBSIZE;
1923 auio.uio_offset = trunc_page(off);
1924 auio.uio_segflg = UIO_NOCOPY;
1925 auio.uio_rw = UIO_READ;
1926 uio_setresid(&auio, MAXBSIZE);
1927 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1928 p->p_ucred);
1929 vm_page_flag_clear(pg, PG_ZERO);
1930 vm_page_io_finish(pg);
1931 if (error) {
1932 vm_page_unwire(pg, 0);
1933 /*
1934 * See if anyone else might know about this page.
1935 * If not and it is not valid, then free it.
1936 */
1937 if (pg->wire_count == 0 && pg->valid == 0 &&
1938 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1939 pg->hold_count == 0)
1940 vm_page_lock_queues();
1941 vm_page_free(pg);
1942 vm_page_unlock_queues();
1943 sbunlock(&so->so_snd, 0); /* will release socket lock */
1944 goto done2;
1945 }
1946 } else {
1947 if ((pg->flags & PG_BUSY) || pg->busy) {
1948 s = splvm();
1949 if ((pg->flags & PG_BUSY) || pg->busy) {
1950 /*
1951 * Page is busy. Wait and retry.
1952 */
1953 vm_page_flag_set(pg, PG_WANTED);
1954 tsleep(pg, PVM, "sfpbsy", 0);
1955 goto retry_lookup;
1956 }
1957 }
1958 /*
1959 * Protect from having the page ripped out from beneath us.
1960 */
1961 vm_page_wire(pg);
1962 }
1963 /*
1964 * Allocate a kernel virtual page and insert the physical page
1965 * into it.
1966 */
1967 sf = sf_buf_alloc();
1968 sf->m = pg;
1969 pmap_qenter(sf->kva, &pg, 1);
1970 /*
1971 * Get an mbuf header and set it up as having external storage.
1972 */
1973 MGETHDR(m, M_WAIT, MT_DATA);
1974 if (m == NULL) {
1975 error = ENOBUFS;
1976 sbunlock(&so->so_snd, 0); /* will release socket lock */
1977 goto done2;
1978 }
1979 m->m_ext.ext_free = sf_buf_free;
1980 m->m_ext.ext_ref = sf_buf_ref;
1981 m->m_ext.ext_buf = (void *)sf->kva;
1982 m->m_ext.ext_size = PAGE_SIZE;
1983 m->m_data = (char *) sf->kva + pgoff;
1984 m->m_flags |= M_EXT;
1985 m->m_pkthdr.len = m->m_len = xfsize;
1986 /*
1987 * Add the buffer to the socket buffer chain.
1988 */
1989 retry_space:
1990 /*
1991 * Make sure that the socket is still able to take more data.
1992 * CANTSENDMORE being true usually means that the connection
1993 * was closed. so_error is true when an error was sensed after
1994 * a previous send.
1995 * The state is checked after the page mapping and buffer
1996 * allocation above since those operations may block and make
1997 * any socket checks stale. From this point forward, nothing
1998 * blocks before the pru_send (or more accurately, any blocking
1999 * results in a loop back to here to re-check).
2000 */
2001 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2002 if (so->so_state & SS_CANTSENDMORE) {
2003 error = EPIPE;
2004 } else {
2005 error = so->so_error;
2006 so->so_error = 0;
2007 }
2008 m_freem(m);
2009 sbunlock(&so->so_snd, 0); /* will release socket lock */
2010 goto done2;
2011 }
2012 /*
2013 * Wait for socket space to become available. We do this just
2014 * after checking the connection state above in order to avoid
2015 * a race condition with sbwait().
2016 */
2017 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2018 if (so->so_state & SS_NBIO) {
2019 m_freem(m);
2020 sbunlock(&so->so_snd, 0); /* will release socket lock */
2021 error = EAGAIN;
2022 goto done2;
2023 }
2024 error = sbwait(&so->so_snd);
2025 /*
2026 * An error from sbwait usually indicates that we've
2027 * been interrupted by a signal. If we've sent anything
2028 * then return bytes sent, otherwise return the error.
2029 */
2030 if (error) {
2031 m_freem(m);
2032 sbunlock(&so->so_snd, 0);
2033 goto done2;
2034 }
2035 goto retry_space;
2036 }
2037 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
2038 splx(s);
2039 if (error) {
2040 sbunlock(&so->so_snd, 0); /* will release socket lock */
2041 goto done2;
2042 }
2043 }
2044 sbunlock(&so->so_snd, 0); /* will release socket lock */
2045
2046 /*
2047 * Send trailers. Wimp out and use writev(2).
2048 */
2049 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2050 nuap.fd = uap->s;
2051 nuap.iovp = hdtr.trailers;
2052 nuap.iovcnt = hdtr.trl_cnt;
2053 error = writev(p, &nuap);
2054 if (error)
2055 goto done2;
2056 sbytes += p->p_retval[0];
2057 }
2058 done2:
2059 file_drop(uap->s);
2060 done1:
2061 file_drop(uap->fd);
2062 done:
2063 if (uap->sbytes != NULL) {
2064 /* XXX this appears bogus for some early failure conditions */
2065 copyout(&sbytes, CAST_USER_ADDR_T(uap->sbytes), sizeof(off_t));
2066 }
2067 return (error);
2068 done3:
2069 socket_unlock(so, 1);
2070 goto done2;
2071 }
2072
2073 #endif