]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/uipc_syscalls.c
caed4ee613f9818fff7ec22a8adc51783fcbf411
[apple/xnu.git] / bsd / kern / uipc_syscalls.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
64 */
65
66
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc_internal.h>
72 #include <sys/file_internal.h>
73 #include <sys/malloc.h>
74 #include <sys/mbuf.h>
75 #include <kern/lock.h>
76 #include <sys/domain.h>
77 #include <sys/protosw.h>
78 #include <sys/signalvar.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #if KTRACE
82 #include <sys/ktrace.h>
83 #endif
84 #include <sys/kernel.h>
85 #include <sys/uio_internal.h>
86
87 #include <bsm/audit_kernel.h>
88
89 #include <sys/kdebug.h>
90 #include <sys/sysproto.h>
91
92 #define f_flag f_fglob->fg_flag
93 #define f_type f_fglob->fg_type
94 #define f_msgcount f_fglob->fg_msgcount
95 #define f_cred f_fglob->fg_cred
96 #define f_ops f_fglob->fg_ops
97 #define f_offset f_fglob->fg_offset
98 #define f_data f_fglob->fg_data
99 #if KDEBUG
100
101 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
102 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
103 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
104 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
105 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
106 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
107 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
108 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
109 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
110 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
111
112 #endif
113
114
115 #define HACK_FOR_4056224 1
116 #if HACK_FOR_4056224
117 static pid_t last_pid_4056224 = 0;
118 #endif /* HACK_FOR_4056224 */
119
120
121 #if SENDFILE
122 static void sf_buf_init(void *arg);
123 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
124 static struct sf_buf *sf_buf_alloc(void);
125 static void sf_buf_ref(caddr_t addr, u_int size);
126 static void sf_buf_free(caddr_t addr, u_int size);
127
128 static SLIST_HEAD(, sf_buf) sf_freelist;
129 static vm_offset_t sf_base;
130 static struct sf_buf *sf_bufs;
131 static int sf_buf_alloc_want;
132 #endif
133
134 static int sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
135 int flags, register_t *retval);
136 static int recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
137 user_addr_t namelenp, register_t *retval);
138
139 static int accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat);
140 static int getsockname1(struct proc *p, struct getsockname_args *uap,
141 register_t *retval, int compat);
142 static int getpeername1(struct proc *p, struct getpeername_args *uap,
143 register_t *retval, int compat);
144
145
146 #if COMPAT_43_SOCKET
147 struct orecvmsg_args {
148 int s;
149 struct omsghdr *msg;
150 int flags;
151 };
152 struct osendmsg_args {
153 int s;
154 caddr_t msg;
155 int flags;
156 };
157 struct osend_args {
158 int s;
159 caddr_t buf;
160 int len;
161 int flags;
162 };
163 struct orecv_args {
164 int s;
165 caddr_t buf;
166 int len;
167 int flags;
168 };
169
170 int oaccept(struct proc *p, struct accept_args *uap, register_t *retval);
171 int ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval);
172 int ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval);
173 int orecv(struct proc *p, struct orecv_args *uap, register_t *retval);
174 int orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval);
175 int orecvmsg(struct proc *p, struct orecvmsg_args *uap, register_t *retval);
176 int osend(struct proc *p, struct osend_args *uap, register_t *retval);
177 int osendmsg(struct proc *p, struct osendmsg_args *uap, register_t *retval);
178 #endif // COMPAT_43_SOCKET
179
180 /*
181 * System call interface to the socket abstraction.
182 */
183
184 extern struct fileops socketops;
185
186 int
187 socket(p, uap, retval)
188 struct proc *p;
189 register struct socket_args *uap;
190 register_t *retval;
191 {
192 struct socket *so;
193 struct fileproc *fp;
194 int fd, error;
195
196 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
197
198 error = falloc(p, &fp, &fd);
199 if (error) {
200 return (error);
201 }
202 fp->f_flag = FREAD|FWRITE;
203 fp->f_type = DTYPE_SOCKET;
204 fp->f_ops = &socketops;
205
206 error = socreate(uap->domain, &so, uap->type, uap->protocol);
207 if (error) {
208 fp_free(p, fd, fp);
209 } else {
210 fp->f_data = (caddr_t)so;
211
212 proc_fdlock(p);
213 *fdflags(p, fd) &= ~UF_RESERVED;
214
215 fp_drop(p, fd, fp, 1);
216 proc_fdunlock(p);
217
218 *retval = fd;
219 }
220 return (error);
221 }
222
223 /* ARGSUSED */
224 int
225 bind(struct proc *p, struct bind_args *uap, __unused register_t *retval)
226 {
227 struct sockaddr *sa;
228 struct socket *so;
229 int error;
230
231 AUDIT_ARG(fd, uap->s);
232 error = file_socket(uap->s, &so);
233 if (error)
234 return (error);
235 error = getsockaddr(&sa, uap->name, uap->namelen);
236 if (error)
237 goto out;
238 AUDIT_ARG(sockaddr, p, sa);
239 if (so != NULL)
240 error = sobind(so, sa);
241 else
242 error = EBADF;
243 FREE(sa, M_SONAME);
244 out:
245 file_drop(uap->s);
246 return (error);
247 }
248
249
250 int
251 listen(__unused struct proc *p, register struct listen_args *uap,
252 __unused register_t *retval)
253 {
254 int error;
255 struct socket * so;
256
257 AUDIT_ARG(fd, uap->s);
258 error = file_socket(uap->s, &so);
259 if (error)
260 return (error);
261 if (so != NULL)
262 error = solisten(so, uap->backlog);
263 else
264 error = EBADF;
265 file_drop(uap->s);
266 return (error);
267 }
268
269 #if !COMPAT_43_SOCKET
270 #define accept1 accept
271 #endif
272
273
274
275 int
276 accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat)
277 {
278 struct fileproc *fp;
279 struct sockaddr *sa;
280 socklen_t namelen;
281 int error;
282 struct socket *head, *so = NULL;
283 lck_mtx_t *mutex_held;
284 int fd = uap->s;
285 int newfd;;
286 short fflag; /* type must match fp->f_flag */
287 int dosocklock = 0;
288
289 AUDIT_ARG(fd, uap->s);
290 if (uap->name) {
291 error = copyin(uap->anamelen, (caddr_t)&namelen,
292 sizeof(socklen_t));
293 if(error)
294 return (error);
295 }
296 error = fp_getfsock(p, fd, &fp, &head);
297 if (error) {
298 if (error == EOPNOTSUPP)
299 error = ENOTSOCK;
300 return (error);
301 }
302 if (head == NULL) {
303 error = EBADF;
304 goto out;
305 }
306
307 socket_lock(head, 1);
308
309 if (head->so_proto->pr_getlock != NULL) {
310 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
311 dosocklock = 1;
312 }
313 else {
314 mutex_held = head->so_proto->pr_domain->dom_mtx;
315 dosocklock = 0;
316 }
317
318
319 if ((head->so_options & SO_ACCEPTCONN) == 0) {
320 socket_unlock(head, 1);
321 error = EINVAL;
322 goto out;
323 }
324 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
325 socket_unlock(head, 1);
326 error = EWOULDBLOCK;
327 goto out;
328 }
329 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
330 if (head->so_state & SS_CANTRCVMORE) {
331 head->so_error = ECONNABORTED;
332 break;
333 }
334 if (head->so_usecount < 1)
335 panic("accept1: head=%x refcount=%d\n", head, head->so_usecount);
336 error = msleep((caddr_t)&head->so_timeo, mutex_held, PSOCK | PCATCH,
337 "accept", 0);
338 if (head->so_usecount < 1)
339 panic("accept1: 2 head=%x refcount=%d\n", head, head->so_usecount);
340 if ((head->so_state & SS_DRAINING)) {
341 error = ECONNABORTED;
342 }
343 if (error) {
344 socket_unlock(head, 1);
345 goto out;
346 }
347 }
348 if (head->so_error) {
349 error = head->so_error;
350 head->so_error = 0;
351 socket_unlock(head, 1);
352 goto out;
353 }
354
355
356 /*
357 * At this point we know that there is at least one connection
358 * ready to be accepted. Remove it from the queue prior to
359 * allocating the file descriptor for it since falloc() may
360 * block allowing another process to accept the connection
361 * instead.
362 */
363 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
364 so = TAILQ_FIRST(&head->so_comp);
365 TAILQ_REMOVE(&head->so_comp, so, so_list);
366 head->so_qlen--;
367 socket_unlock(head, 0); /* unlock head to avoid deadlock with select, keep a ref on head */
368 fflag = fp->f_flag;
369 proc_fdlock(p);
370 error = falloc_locked(p, &fp, &newfd, 1);
371 if (error) {
372 /*
373 * Probably ran out of file descriptors. Put the
374 * unaccepted connection back onto the queue and
375 * do another wakeup so some other process might
376 * have a chance at it.
377 */
378 proc_fdunlock(p);
379 socket_lock(head, 0);
380 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
381 head->so_qlen++;
382 wakeup_one((caddr_t)&head->so_timeo);
383 socket_unlock(head, 1);
384 goto out;
385 }
386 *fdflags(p, newfd) &= ~UF_RESERVED;
387 *retval = newfd;
388 fp->f_type = DTYPE_SOCKET;
389 fp->f_flag = fflag;
390 fp->f_ops = &socketops;
391 fp->f_data = (caddr_t)so;
392 fp_drop(p, newfd, fp, 1);
393 proc_fdunlock(p);
394 socket_lock(head, 0);
395 if (dosocklock)
396 socket_lock(so, 1);
397 so->so_state &= ~SS_COMP;
398 so->so_head = NULL;
399 sa = 0;
400 (void) soacceptlock(so, &sa, 0);
401 socket_unlock(head, 1);
402 if (sa == 0) {
403 namelen = 0;
404 if (uap->name)
405 goto gotnoname;
406 if (dosocklock)
407 socket_unlock(so, 1);
408 error = 0;
409 goto out;
410 }
411 AUDIT_ARG(sockaddr, p, sa);
412 if (uap->name) {
413 /* check sa_len before it is destroyed */
414 if (namelen > sa->sa_len)
415 namelen = sa->sa_len;
416 #if COMPAT_43_SOCKET
417 if (compat)
418 ((struct osockaddr *)sa)->sa_family =
419 sa->sa_family;
420 #endif
421 error = copyout(sa, uap->name, namelen);
422 if (!error)
423 gotnoname:
424 error = copyout((caddr_t)&namelen, uap->anamelen,
425 sizeof(socklen_t));
426 }
427 FREE(sa, M_SONAME);
428 if (dosocklock)
429 socket_unlock(so, 1);
430 out:
431 file_drop(fd);
432 return (error);
433 }
434
435 int
436 accept(struct proc *p, struct accept_args *uap, register_t *retval)
437 {
438
439 return (accept1(p, uap, retval, 0));
440 }
441
442 #if COMPAT_43_SOCKET
443 int
444 oaccept(struct proc *p, struct accept_args *uap, register_t *retval)
445 {
446
447 return (accept1(p, uap, retval, 1));
448 }
449 #endif /* COMPAT_43_SOCKET */
450
451 /* ARGSUSED */
452 int
453 connect(struct proc *p, struct connect_args *uap, __unused register_t *retval)
454 {
455 struct socket *so;
456 struct sockaddr *sa;
457 lck_mtx_t *mutex_held;
458 int error;
459 int fd = uap->s;
460
461 AUDIT_ARG(fd, uap->s);
462 error = file_socket( fd, &so);
463 if (error)
464 return (error);
465 if (so == NULL) {
466 error = EBADF;
467 goto out;
468 }
469
470 socket_lock(so, 1);
471
472 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
473 socket_unlock(so, 1);
474 error = EALREADY;
475 goto out;
476 }
477 error = getsockaddr(&sa, uap->name, uap->namelen);
478 if (error) {
479 socket_unlock(so, 1);
480 goto out;
481 }
482 AUDIT_ARG(sockaddr, p, sa);
483 error = soconnectlock(so, sa, 0);
484 if (error)
485 goto bad;
486 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
487 FREE(sa, M_SONAME);
488 socket_unlock(so, 1);
489 error = EINPROGRESS;
490 goto out;
491 }
492 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
493 if (so->so_proto->pr_getlock != NULL)
494 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
495 else
496 mutex_held = so->so_proto->pr_domain->dom_mtx;
497 error = msleep((caddr_t)&so->so_timeo, mutex_held, PSOCK | PCATCH,
498 "connec", 0);
499 if ((so->so_state & SS_DRAINING)) {
500 error = ECONNABORTED;
501 }
502 if (error)
503 break;
504 }
505 if (error == 0) {
506 error = so->so_error;
507 so->so_error = 0;
508 }
509 bad:
510 so->so_state &= ~SS_ISCONNECTING;
511 socket_unlock(so, 1);
512 FREE(sa, M_SONAME);
513 if (error == ERESTART)
514 error = EINTR;
515 out:
516 file_drop(fd);
517 return (error);
518 }
519
520 int
521 socketpair(struct proc *p, struct socketpair_args *uap, __unused register_t *retval)
522 {
523 struct fileproc *fp1, *fp2;
524 struct socket *so1, *so2;
525 int fd, error, sv[2];
526
527 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
528 error = socreate(uap->domain, &so1, uap->type, uap->protocol);
529 if (error)
530 return (error);
531 error = socreate(uap->domain, &so2, uap->type, uap->protocol);
532 if (error)
533 goto free1;
534
535 error = falloc(p, &fp1, &fd);
536 if (error) {
537 goto free2;
538 }
539 fp1->f_flag = FREAD|FWRITE;
540 fp1->f_type = DTYPE_SOCKET;
541 fp1->f_ops = &socketops;
542 fp1->f_data = (caddr_t)so1;
543 sv[0] = fd;
544
545 error = falloc(p, &fp2, &fd);
546 if (error) {
547 goto free3;
548 }
549 fp2->f_flag = FREAD|FWRITE;
550 fp2->f_type = DTYPE_SOCKET;
551 fp2->f_ops = &socketops;
552 fp2->f_data = (caddr_t)so2;
553 sv[1] = fd;
554
555 error = soconnect2(so1, so2);
556 if (error) {
557 goto free4;
558 }
559 if (uap->type == SOCK_DGRAM) {
560 /*
561 * Datagram socket connection is asymmetric.
562 */
563 error = soconnect2(so2, so1);
564 if (error) {
565 goto free4;
566 }
567 }
568
569 proc_fdlock(p);
570 *fdflags(p, sv[0]) &= ~UF_RESERVED;
571 *fdflags(p, sv[1]) &= ~UF_RESERVED;
572 fp_drop(p, sv[0], fp1, 1);
573 fp_drop(p, sv[1], fp2, 1);
574 proc_fdunlock(p);
575
576 error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof(int));
577 #if 0 /* old pipe(2) syscall compatability, unused these days */
578 retval[0] = sv[0]; /* XXX ??? */
579 retval[1] = sv[1]; /* XXX ??? */
580 #endif /* 0 */
581 return (error);
582 free4:
583 fp_free(p, sv[1], fp2);
584 free3:
585 fp_free(p, sv[0], fp1);
586 free2:
587 (void)soclose(so2);
588 free1:
589 (void)soclose(so1);
590 return (error);
591 }
592
593 static int
594 sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
595 int flags, register_t *retval)
596 {
597 struct mbuf *control;
598 struct sockaddr *to;
599 int error;
600 struct socket *so;
601 user_ssize_t len;
602 #if KTRACE
603 uio_t ktruio = NULL;
604 #endif
605
606 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0);
607
608 error = file_socket(s, &so);
609 if (error )
610 {
611 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
612 return (error);
613 }
614
615 if (mp->msg_name) {
616 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
617 if (error) {
618 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
619 goto out;
620 }
621 AUDIT_ARG(sockaddr, p, to);
622 } else {
623 to = 0;
624 }
625 if (mp->msg_control) {
626 if (mp->msg_controllen < ((socklen_t)sizeof(struct cmsghdr))
627 #if COMPAT_43_SOCKET
628 && !(mp->msg_flags & MSG_COMPAT)
629 #endif
630 ) {
631 error = EINVAL;
632 goto bad;
633 }
634 error = sockargs(&control, mp->msg_control,
635 mp->msg_controllen, MT_CONTROL);
636 if (error)
637 goto bad;
638 #if COMPAT_43_SOCKET
639 if (mp->msg_flags & MSG_COMPAT) {
640 register struct cmsghdr *cm;
641
642 M_PREPEND(control, sizeof(*cm), M_WAIT);
643 if (control == 0) {
644 error = ENOBUFS;
645 goto bad;
646 } else {
647 cm = mtod(control, struct cmsghdr *);
648 cm->cmsg_len = control->m_len;
649 cm->cmsg_level = SOL_SOCKET;
650 cm->cmsg_type = SCM_RIGHTS;
651 }
652 }
653 #endif
654 } else {
655 control = 0;
656 }
657
658 #if KTRACE
659 if (KTRPOINT(p, KTR_GENIO)) {
660 ktruio = uio_duplicate(uiop);
661 }
662 #endif
663
664 len = uio_resid(uiop);
665 if (so == NULL)
666 error = EBADF;
667 else
668 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
669 flags);
670 if (error) {
671 if (uio_resid(uiop) != len && (error == ERESTART ||
672 error == EINTR || error == EWOULDBLOCK))
673 error = 0;
674 /* Generation of SIGPIPE can be controlled per socket */
675 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
676 psignal(p, SIGPIPE);
677 }
678 if (error == 0)
679 *retval = (int)(len - uio_resid(uiop));
680 bad:
681 #if KTRACE
682 if (ktruio != NULL) {
683 if (error == 0) {
684 uio_setresid(ktruio, retval[0]);
685 ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error);
686 }
687 uio_free(ktruio);
688 }
689 #endif
690 if (to)
691 FREE(to, M_SONAME);
692 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
693 out:
694 file_drop(s);
695 return (error);
696 }
697
698
699 int
700 sendto(struct proc *p, struct sendto_args *uap, register_t *retval)
701 {
702 struct user_msghdr msg;
703 int error;
704 uio_t auio = NULL;
705
706 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0,0,0,0,0);
707 AUDIT_ARG(fd, uap->s);
708
709 auio = uio_create(1, 0,
710 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
711 UIO_WRITE);
712 if (auio == NULL) {
713 return (ENOMEM);
714 }
715 uio_addiov(auio, uap->buf, uap->len);
716
717 msg.msg_name = uap->to;
718 msg.msg_namelen = uap->tolen;
719 /* no need to set up msg_iov. sendit uses uio_t we send it */
720 msg.msg_iov = 0;
721 msg.msg_iovlen = 0;
722 msg.msg_control = 0;
723 msg.msg_flags = 0;
724
725 error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
726
727 if (auio != NULL) {
728 uio_free(auio);
729 }
730
731 #if HACK_FOR_4056224
732 /*
733 * Radar 4056224
734 * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
735 * This will be removed in the release following Tiger
736 */
737 if (error == ENOTSOCK) {
738 struct fileproc *fp;
739
740 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
741 (void) fp_drop(p, uap->s, fp,0);
742
743 if (fp->f_type == DTYPE_PIPE) {
744 struct write_args write_uap;
745 user_ssize_t write_retval;
746
747 if (p->p_pid > last_pid_4056224) {
748 last_pid_4056224 = p->p_pid;
749
750 printf("%s[%d] uses send/recv on a pipe\n",
751 p->p_comm, p->p_pid);
752 }
753
754 bzero(&write_uap, sizeof(struct write_args));
755 write_uap.fd = uap->s;
756 write_uap.cbuf = uap->buf;
757 write_uap.nbyte = uap->len;
758
759 error = write(p, &write_uap, &write_retval);
760 *retval = (int)write_retval;
761 }
762 }
763 }
764 #endif /* HACK_FOR_4056224 */
765
766 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval,0,0,0);
767
768 return(error);
769 }
770
771 #if COMPAT_43_SOCKET
772 int
773 osend(__unused struct proc *p,
774 __unused struct osend_args *uap,
775 __unused register_t *retval)
776 {
777 /* these are no longer supported and in fact
778 * there is no way to call it directly.
779 * LP64todo - remove this once we're sure there are no clients
780 */
781 return (ENOTSUP);
782 }
783
784 int
785 osendmsg(__unused struct proc *p,
786 __unused struct osendmsg_args *uap,
787 __unused register_t *retval)
788 {
789 /* these are no longer supported and in fact
790 * there is no way to call it directly.
791 * LP64todo - remove this once we're sure there are no clients
792 */
793 return (ENOTSUP);
794 }
795 #endif
796
797
798 int
799 sendmsg(struct proc *p, register struct sendmsg_args *uap, register_t *retval)
800 {
801 struct msghdr msg;
802 struct user_msghdr user_msg;
803 caddr_t msghdrp;
804 int size_of_msghdr;
805 int error;
806 int size_of_iovec;
807 uio_t auio = NULL;
808 struct user_iovec *iovp;
809
810 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0,0,0,0,0);
811 AUDIT_ARG(fd, uap->s);
812 if (IS_64BIT_PROCESS(p)) {
813 msghdrp = (caddr_t) &user_msg;
814 size_of_msghdr = sizeof(user_msg);
815 size_of_iovec = sizeof(struct user_iovec);
816 }
817 else {
818 msghdrp = (caddr_t) &msg;
819 size_of_msghdr = sizeof(msg);
820 size_of_iovec = sizeof(struct iovec);
821 }
822 error = copyin(uap->msg, msghdrp, size_of_msghdr);
823 if (error)
824 {
825 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
826 return (error);
827 }
828
829 /* only need to copy if user process is not 64-bit */
830 if (!IS_64BIT_PROCESS(p)) {
831 user_msg.msg_flags = msg.msg_flags;
832 user_msg.msg_controllen = msg.msg_controllen;
833 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
834 user_msg.msg_iovlen = msg.msg_iovlen;
835 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
836 user_msg.msg_namelen = msg.msg_namelen;
837 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
838 }
839
840 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
841 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
842 return (EMSGSIZE);
843 }
844
845 /* allocate a uio large enough to hold the number of iovecs passed */
846 auio = uio_create(user_msg.msg_iovlen, 0,
847 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
848 UIO_WRITE);
849 if (auio == NULL) {
850 error = ENOBUFS;
851 goto done;
852 }
853
854 if (user_msg.msg_iovlen) {
855 /* get location of iovecs within the uio. then copyin the iovecs from
856 * user space.
857 */
858 iovp = uio_iovsaddr(auio);
859 if (iovp == NULL) {
860 error = ENOBUFS;
861 goto done;
862 }
863 error = copyin(user_msg.msg_iov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec));
864 if (error)
865 goto done;
866 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
867
868 /* finish setup of uio_t */
869 uio_calculateresid(auio);
870 }
871 else {
872 user_msg.msg_iov = 0;
873 }
874
875 #if COMPAT_43_SOCKET
876 user_msg.msg_flags = 0;
877 #endif
878 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
879 done:
880 if (auio != NULL) {
881 uio_free(auio);
882 }
883 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
884
885 return (error);
886 }
887
888 static int
889 recvit(p, s, mp, uiop, namelenp, retval)
890 register struct proc *p;
891 int s;
892 register struct user_msghdr *mp;
893 uio_t uiop;
894 user_addr_t namelenp;
895 register_t *retval;
896 {
897 int len, error;
898 struct mbuf *m, *control = 0;
899 user_addr_t ctlbuf;
900 struct socket *so;
901 struct sockaddr *fromsa = 0;
902 struct fileproc *fp;
903 #if KTRACE
904 uio_t ktruio = NULL;
905 #endif
906
907 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0,0,0,0,0);
908 proc_fdlock(p);
909 if ( (error = fp_lookup(p, s, &fp, 1)) ) {
910 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
911 proc_fdunlock(p);
912 return (error);
913 }
914 if (fp->f_type != DTYPE_SOCKET) {
915 fp_drop(p, s, fp,1);
916 proc_fdunlock(p);
917 return(ENOTSOCK);
918 }
919
920 so = (struct socket *)fp->f_data;
921
922 proc_fdunlock(p);
923 if (uio_resid(uiop) < 0) {
924 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0);
925 error = EINVAL;
926 goto out1;
927 }
928 #if KTRACE
929 if (KTRPOINT(p, KTR_GENIO)) {
930 ktruio = uio_duplicate(uiop);
931 }
932 #endif
933
934 len = uio_resid(uiop);
935 if (so == NULL)
936 error = EBADF;
937 else {
938 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
939 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
940 &mp->msg_flags);
941 }
942 AUDIT_ARG(sockaddr, p, fromsa);
943 if (error) {
944 if (uio_resid(uiop) != len && (error == ERESTART ||
945 error == EINTR || error == EWOULDBLOCK))
946 error = 0;
947 }
948 #if KTRACE
949 if (ktruio != NULL) {
950 if (error == 0) {
951 uio_setresid(ktruio, len - uio_resid(uiop));
952 ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error);
953 }
954 uio_free(ktruio);
955 }
956 #endif
957 if (error)
958 goto out;
959 *retval = len - uio_resid(uiop);
960 if (mp->msg_name) {
961 len = mp->msg_namelen;
962 if (len <= 0 || fromsa == 0)
963 len = 0;
964 else {
965 #ifndef MIN
966 #define MIN(a,b) ((a)>(b)?(b):(a))
967 #endif
968 /* save sa_len before it is destroyed by MSG_COMPAT */
969 len = MIN(len, fromsa->sa_len);
970 #if COMPAT_43_SOCKET
971 if (mp->msg_flags & MSG_COMPAT)
972 ((struct osockaddr *)fromsa)->sa_family =
973 fromsa->sa_family;
974 #endif
975 error = copyout(fromsa, mp->msg_name, (unsigned)len);
976 if (error)
977 goto out;
978 }
979 mp->msg_namelen = len;
980 if (namelenp &&
981 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
982 #if COMPAT_43_SOCKET
983 if (mp->msg_flags & MSG_COMPAT)
984 error = 0; /* old recvfrom didn't check */
985 else
986 #endif
987 goto out;
988 }
989 }
990 if (mp->msg_control) {
991 #if COMPAT_43_SOCKET
992 /*
993 * We assume that old recvmsg calls won't receive access
994 * rights and other control info, esp. as control info
995 * is always optional and those options didn't exist in 4.3.
996 * If we receive rights, trim the cmsghdr; anything else
997 * is tossed.
998 */
999 if (control && mp->msg_flags & MSG_COMPAT) {
1000 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1001 SOL_SOCKET ||
1002 mtod(control, struct cmsghdr *)->cmsg_type !=
1003 SCM_RIGHTS) {
1004 mp->msg_controllen = 0;
1005 goto out;
1006 }
1007 control->m_len -= sizeof (struct cmsghdr);
1008 control->m_data += sizeof (struct cmsghdr);
1009 }
1010 #endif
1011 len = mp->msg_controllen;
1012 m = control;
1013 mp->msg_controllen = 0;
1014 ctlbuf = mp->msg_control;
1015
1016 while (m && len > 0) {
1017 unsigned int tocopy;
1018
1019 if (len >= m->m_len)
1020 tocopy = m->m_len;
1021 else {
1022 mp->msg_flags |= MSG_CTRUNC;
1023 tocopy = len;
1024 }
1025
1026 error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf, tocopy);
1027 if (error)
1028 goto out;
1029
1030 ctlbuf += tocopy;
1031 len -= tocopy;
1032 m = m->m_next;
1033 }
1034 mp->msg_controllen = ctlbuf - mp->msg_control;
1035 }
1036 out:
1037 if (fromsa)
1038 FREE(fromsa, M_SONAME);
1039 if (control)
1040 m_freem(control);
1041 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
1042 out1:
1043 fp_drop(p, s, fp, 0);
1044 return (error);
1045 }
1046
1047
1048 int
1049 recvfrom(p, uap, retval)
1050 struct proc *p;
1051 register struct recvfrom_args /* {
1052 int s;
1053 caddr_t buf;
1054 size_t len;
1055 int flags;
1056 caddr_t from;
1057 int *fromlenaddr;
1058 } */ *uap;
1059 register_t *retval;
1060 {
1061 struct user_msghdr msg;
1062 int error;
1063 uio_t auio = NULL;
1064
1065 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0,0,0,0,0);
1066 AUDIT_ARG(fd, uap->s);
1067
1068 if (uap->fromlenaddr) {
1069 error = copyin(uap->fromlenaddr,
1070 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
1071 if (error)
1072 return (error);
1073 } else
1074 msg.msg_namelen = 0;
1075 msg.msg_name = uap->from;
1076 auio = uio_create(1, 0,
1077 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1078 UIO_READ);
1079 if (auio == NULL) {
1080 return (ENOMEM);
1081 }
1082
1083 uio_addiov(auio, uap->buf, uap->len);
1084 /* no need to set up msg_iov. recvit uses uio_t we send it */
1085 msg.msg_iov = 0;
1086 msg.msg_iovlen = 0;
1087 msg.msg_control = 0;
1088 msg.msg_controllen = 0;
1089 msg.msg_flags = uap->flags;
1090 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval);
1091 if (auio != NULL) {
1092 uio_free(auio);
1093 }
1094
1095 #if HACK_FOR_4056224
1096 /*
1097 * Radar 4056224
1098 * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
1099 * This will be removed in the release following Tiger
1100 */
1101 if (error == ENOTSOCK && proc_is64bit(p) == 0) {
1102 struct fileproc *fp;
1103
1104 if (fp_lookup(p, uap->s, &fp, 0) == 0) {
1105 (void) fp_drop(p, uap->s, fp,0);
1106
1107 if (fp->f_type == DTYPE_PIPE) {
1108 struct read_args read_uap;
1109 user_ssize_t read_retval;
1110
1111 if (p->p_pid > last_pid_4056224) {
1112 last_pid_4056224 = p->p_pid;
1113
1114 printf("%s[%d] uses send/recv on a pipe\n",
1115 p->p_comm, p->p_pid);
1116 }
1117
1118 bzero(&read_uap, sizeof(struct read_args));
1119 read_uap.fd = uap->s;
1120 read_uap.cbuf = uap->buf;
1121 read_uap.nbyte = uap->len;
1122
1123 error = read(p, &read_uap, &read_retval);
1124 *retval = (int)read_retval;
1125 }
1126 }
1127 }
1128 #endif /* HACK_FOR_4056224 */
1129
1130 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error,0,0,0,0);
1131
1132 return (error);
1133 }
1134
1135 #if COMPAT_43_SOCKET
1136 int
1137 orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval)
1138 {
1139
1140 uap->flags |= MSG_COMPAT;
1141 return (recvfrom(p, uap, retval));
1142 }
1143 #endif
1144
1145
1146 #if COMPAT_43_SOCKET
1147 int
1148 orecv(__unused struct proc *p, __unused struct orecv_args *uap,
1149 __unused register_t *retval)
1150 {
1151 /* these are no longer supported and in fact
1152 * there is no way to call it directly.
1153 * LP64todo - remove this once we're sure there are no clients
1154 */
1155
1156 return (ENOTSUP);
1157 }
1158
1159 /*
1160 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1161 * overlays the new one, missing only the flags, and with the (old) access
1162 * rights where the control fields are now.
1163 */
1164 int
1165 orecvmsg(__unused struct proc *p, __unused struct orecvmsg_args *uap,
1166 __unused register_t *retval)
1167 {
1168 /* these are no longer supported and in fact
1169 * there is no way to call it directly.
1170 * LP64todo - remove this once we're sure there are no clients
1171 */
1172
1173 return (ENOTSUP);
1174
1175 }
1176 #endif
1177
1178 int
1179 recvmsg(p, uap, retval)
1180 struct proc *p;
1181 struct recvmsg_args *uap;
1182 register_t *retval;
1183 {
1184 struct msghdr msg;
1185 struct user_msghdr user_msg;
1186 caddr_t msghdrp;
1187 int size_of_msghdr;
1188 user_addr_t uiov;
1189 register int error;
1190 int size_of_iovec;
1191 uio_t auio = NULL;
1192 struct user_iovec *iovp;
1193
1194 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0,0,0,0,0);
1195 AUDIT_ARG(fd, uap->s);
1196 if (IS_64BIT_PROCESS(p)) {
1197 msghdrp = (caddr_t) &user_msg;
1198 size_of_msghdr = sizeof(user_msg);
1199 size_of_iovec = sizeof(struct user_iovec);
1200 }
1201 else {
1202 msghdrp = (caddr_t) &msg;
1203 size_of_msghdr = sizeof(msg);
1204 size_of_iovec = sizeof(struct iovec);
1205 }
1206 error = copyin(uap->msg, msghdrp, size_of_msghdr);
1207 if (error)
1208 {
1209 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1210 return (error);
1211 }
1212
1213 /* only need to copy if user process is not 64-bit */
1214 if (!IS_64BIT_PROCESS(p)) {
1215 user_msg.msg_flags = msg.msg_flags;
1216 user_msg.msg_controllen = msg.msg_controllen;
1217 user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
1218 user_msg.msg_iovlen = msg.msg_iovlen;
1219 user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
1220 user_msg.msg_namelen = msg.msg_namelen;
1221 user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
1222 }
1223
1224 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
1225 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
1226 return (EMSGSIZE);
1227 }
1228
1229 #if COMPAT_43_SOCKET
1230 user_msg.msg_flags = uap->flags &~ MSG_COMPAT;
1231 #else
1232 user_msg.msg_flags = uap->flags;
1233 #endif
1234
1235 /* allocate a uio large enough to hold the number of iovecs passed */
1236 auio = uio_create(user_msg.msg_iovlen, 0,
1237 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
1238 UIO_READ);
1239 if (auio == NULL) {
1240 error = ENOMEM;
1241 goto done;
1242 }
1243
1244 /* get location of iovecs within the uio. then copyin the iovecs from
1245 * user space.
1246 */
1247 iovp = uio_iovsaddr(auio);
1248 if (iovp == NULL) {
1249 error = ENOMEM;
1250 goto done;
1251 }
1252 uiov = user_msg.msg_iov;
1253 user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
1254 error = copyin(uiov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec));
1255 if (error)
1256 goto done;
1257
1258 /* finish setup of uio_t */
1259 uio_calculateresid(auio);
1260
1261 error = recvit(p, uap->s, &user_msg, auio, 0, retval);
1262 if (!error) {
1263 user_msg.msg_iov = uiov;
1264 /* only need to copy if user process is not 64-bit */
1265 if (!IS_64BIT_PROCESS(p)) {
1266 // LP64todo - do all these change? if not, then no need to copy all of them!
1267 msg.msg_flags = user_msg.msg_flags;
1268 msg.msg_controllen = user_msg.msg_controllen;
1269 msg.msg_control = CAST_DOWN(caddr_t, user_msg.msg_control);
1270 msg.msg_iovlen = user_msg.msg_iovlen;
1271 msg.msg_iov = (struct iovec *) CAST_DOWN(caddr_t, user_msg.msg_iov);
1272 msg.msg_namelen = user_msg.msg_namelen;
1273 msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name);
1274 }
1275 error = copyout(msghdrp, uap->msg, size_of_msghdr);
1276 }
1277 done:
1278 if (auio != NULL) {
1279 uio_free(auio);
1280 }
1281 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
1282 return (error);
1283 }
1284
1285 /* ARGSUSED */
1286 int
1287 shutdown(__unused struct proc *p, struct shutdown_args *uap, __unused register_t *retval)
1288 {
1289 struct socket * so;
1290 int error;
1291
1292 AUDIT_ARG(fd, uap->s);
1293 error = file_socket(uap->s, &so);
1294 if (error)
1295 return (error);
1296 if (so == NULL) {
1297 error = EBADF;
1298 goto out;
1299 }
1300 error = soshutdown((struct socket *)so, uap->how);
1301 out:
1302 file_drop(uap->s);
1303 return(error);
1304 }
1305
1306
1307
1308
1309
1310 /* ARGSUSED */
1311 int
1312 setsockopt(struct proc *p, struct setsockopt_args *uap, __unused register_t *retval)
1313 {
1314 struct socket * so;
1315 struct sockopt sopt;
1316 int error;
1317
1318 AUDIT_ARG(fd, uap->s);
1319 if (uap->val == 0 && uap->valsize != 0)
1320 return (EFAULT);
1321 if (uap->valsize < 0)
1322 return (EINVAL);
1323
1324 error = file_socket(uap->s, &so);
1325 if (error)
1326 return (error);
1327
1328 sopt.sopt_dir = SOPT_SET;
1329 sopt.sopt_level = uap->level;
1330 sopt.sopt_name = uap->name;
1331 sopt.sopt_val = uap->val;
1332 sopt.sopt_valsize = uap->valsize;
1333 sopt.sopt_p = p;
1334
1335 if (so == NULL) {
1336 error = EINVAL;
1337 goto out;
1338 }
1339 error = sosetopt(so, &sopt);
1340 out:
1341 file_drop(uap->s);
1342 return(error);
1343 }
1344
1345
1346
1347 int
1348 getsockopt(struct proc *p, struct getsockopt_args *uap, __unused register_t *retval)
1349 {
1350 int error;
1351 socklen_t valsize;
1352 struct sockopt sopt;
1353 struct socket * so;
1354
1355 error = file_socket(uap->s, &so);
1356 if (error)
1357 return (error);
1358 if (uap->val) {
1359 error = copyin(uap->avalsize, (caddr_t)&valsize, sizeof (valsize));
1360 if (error)
1361 goto out;
1362 if (valsize < 0) {
1363 error = EINVAL;
1364 goto out;
1365 }
1366 } else
1367 valsize = 0;
1368
1369 sopt.sopt_dir = SOPT_GET;
1370 sopt.sopt_level = uap->level;
1371 sopt.sopt_name = uap->name;
1372 sopt.sopt_val = uap->val;
1373 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1374 sopt.sopt_p = p;
1375
1376 if (so == NULL) {
1377 error = EBADF;
1378 goto out;
1379 }
1380 error = sogetopt((struct socket *)so, &sopt);
1381 if (error == 0) {
1382 valsize = sopt.sopt_valsize;
1383 error = copyout((caddr_t)&valsize, uap->avalsize, sizeof (valsize));
1384 }
1385 out:
1386 file_drop(uap->s);
1387 return (error);
1388 }
1389
1390
1391 /*
1392 * Get socket name.
1393 */
1394 /* ARGSUSED */
1395 static int
1396 getsockname1(__unused struct proc *p, struct getsockname_args *uap, __unused register_t *retval,
1397 int compat)
1398 {
1399 struct socket *so;
1400 struct sockaddr *sa;
1401 socklen_t len;
1402 int error;
1403
1404 error = file_socket(uap->fdes, &so);
1405 if (error)
1406 return (error);
1407 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
1408 if (error)
1409 goto out;
1410 if (so == NULL) {
1411 error = EBADF;
1412 goto out;
1413 }
1414 sa = 0;
1415 socket_lock(so, 1);
1416 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1417 if (error == 0)
1418 {
1419 struct socket_filter_entry *filter;
1420 int filtered = 0;
1421 for (filter = so->so_filt; filter && error == 0;
1422 filter = filter->sfe_next_onsocket) {
1423 if (filter->sfe_filter->sf_filter.sf_getsockname) {
1424 if (!filtered) {
1425 filtered = 1;
1426 sflt_use(so);
1427 socket_unlock(so, 0);
1428 }
1429 error = filter->sfe_filter->sf_filter.sf_getsockname(filter->sfe_cookie,
1430 so, &sa);
1431 }
1432 }
1433
1434 if (error == EJUSTRETURN)
1435 error = 0;
1436
1437 if (filtered) {
1438 socket_lock(so, 0);
1439 sflt_unuse(so);
1440 }
1441 }
1442 socket_unlock(so, 1);
1443 if (error)
1444 goto bad;
1445 if (sa == 0) {
1446 len = 0;
1447 goto gotnothing;
1448 }
1449
1450 len = MIN(len, sa->sa_len);
1451 #if COMPAT_43_SOCKET
1452 if (compat)
1453 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1454 #endif
1455 error = copyout((caddr_t)sa, uap->asa, len);
1456 if (error == 0)
1457 gotnothing:
1458 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1459 bad:
1460 if (sa)
1461 FREE(sa, M_SONAME);
1462 out:
1463 file_drop(uap->fdes);
1464 return (error);
1465 }
1466
1467 int
1468 getsockname(struct proc *p, struct getsockname_args *uap, register_t *retval)
1469 {
1470 return (getsockname1(p, uap, retval, 0));
1471 }
1472
1473 #if COMPAT_43_SOCKET
1474 int
1475 ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval)
1476 {
1477 return (getsockname1(p, uap, retval, 1));
1478 }
1479 #endif /* COMPAT_43_SOCKET */
1480
1481 /*
1482 * Get name of peer for connected socket.
1483 */
1484 /* ARGSUSED */
1485 int
1486 getpeername1(__unused struct proc *p, struct getpeername_args *uap, __unused register_t *retval,
1487 int compat)
1488 {
1489 struct socket *so;
1490 struct sockaddr *sa;
1491 socklen_t len;
1492 int error;
1493
1494 error = file_socket(uap->fdes, &so);
1495 if (error)
1496 return (error);
1497 if (so == NULL) {
1498 error = EBADF;
1499 goto out;
1500 }
1501
1502 socket_lock(so, 1);
1503
1504 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1505 socket_unlock(so, 1);
1506 error = ENOTCONN;
1507 goto out;
1508 }
1509 error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
1510 if (error) {
1511 socket_unlock(so, 1);
1512 goto out;
1513 }
1514 sa = 0;
1515 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1516 if (error == 0)
1517 {
1518 struct socket_filter_entry *filter;
1519 int filtered = 0;
1520 for (filter = so->so_filt; filter && error == 0;
1521 filter = filter->sfe_next_onsocket) {
1522 if (filter->sfe_filter->sf_filter.sf_getpeername) {
1523 if (!filtered) {
1524 filtered = 1;
1525 sflt_use(so);
1526 socket_unlock(so, 0);
1527 }
1528 error = filter->sfe_filter->sf_filter.sf_getpeername(filter->sfe_cookie,
1529 so, &sa);
1530 }
1531 }
1532
1533 if (error == EJUSTRETURN)
1534 error = 0;
1535
1536 if (filtered) {
1537 socket_lock(so, 0);
1538 sflt_unuse(so);
1539 }
1540 }
1541 socket_unlock(so, 1);
1542 if (error)
1543 goto bad;
1544 if (sa == 0) {
1545 len = 0;
1546 goto gotnothing;
1547 }
1548 len = MIN(len, sa->sa_len);
1549 #if COMPAT_43_SOCKET
1550 if (compat)
1551 ((struct osockaddr *)sa)->sa_family =
1552 sa->sa_family;
1553 #endif
1554 error = copyout(sa, uap->asa, len);
1555 if (error)
1556 goto bad;
1557 gotnothing:
1558 error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
1559 bad:
1560 if (sa) FREE(sa, M_SONAME);
1561 out:
1562 file_drop(uap->fdes);
1563 return (error);
1564 }
1565
1566 int
1567 getpeername(struct proc *p, struct getpeername_args *uap, register_t *retval)
1568 {
1569
1570 return (getpeername1(p, uap, retval, 0));
1571 }
1572
1573 #if COMPAT_43_SOCKET
1574 int
1575 ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval)
1576 {
1577
1578 return (getpeername1(p, uap, retval, 1));
1579 }
1580 #endif /* COMPAT_43_SOCKET */
1581
1582 int
1583 sockargs(mp, data, buflen, type)
1584 struct mbuf **mp;
1585 user_addr_t data;
1586 int buflen, type;
1587 {
1588 register struct sockaddr *sa;
1589 register struct mbuf *m;
1590 int error;
1591
1592 if ((u_int)buflen > MLEN) {
1593 #if COMPAT_43_SOCKET
1594 if (type == MT_SONAME && (u_int)buflen <= 112)
1595 buflen = MLEN; /* unix domain compat. hack */
1596 else
1597 #endif
1598 if ((u_int)buflen > MCLBYTES)
1599 return (EINVAL);
1600 }
1601 m = m_get(M_WAIT, type);
1602 if (m == NULL)
1603 return (ENOBUFS);
1604 if ((u_int)buflen > MLEN) {
1605 MCLGET(m, M_WAIT);
1606 if ((m->m_flags & M_EXT) == 0) {
1607 m_free(m);
1608 return ENOBUFS;
1609 }
1610 }
1611 m->m_len = buflen;
1612 error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
1613 if (error)
1614 (void) m_free(m);
1615 else {
1616 *mp = m;
1617 if (type == MT_SONAME) {
1618 sa = mtod(m, struct sockaddr *);
1619
1620 #if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
1621 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1622 sa->sa_family = sa->sa_len;
1623 #endif
1624 sa->sa_len = buflen;
1625 }
1626 }
1627 return (error);
1628 }
1629
1630 /*
1631 * Given a user_addr_t of length len, allocate and fill out a *sa.
1632 */
1633 int
1634 getsockaddr(struct sockaddr **namp, user_addr_t uaddr, size_t len)
1635 {
1636 struct sockaddr *sa;
1637 int error;
1638
1639 if (len > SOCK_MAXADDRLEN)
1640 return ENAMETOOLONG;
1641
1642 if (len == 0)
1643 return EINVAL;
1644
1645 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1646 if (sa == NULL) {
1647 return ENOMEM;
1648 }
1649 error = copyin(uaddr, (caddr_t)sa, len);
1650 if (error) {
1651 FREE(sa, M_SONAME);
1652 } else {
1653 #if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
1654 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1655 sa->sa_family = sa->sa_len;
1656 #endif
1657 sa->sa_len = len;
1658 *namp = sa;
1659 }
1660 return error;
1661 }
1662
1663
1664 #if SENDFILE
1665 /*
1666 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1667 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1668 * been made static, but may be useful in the future for doing zero-copy in
1669 * other parts of the networking code.
1670 */
1671 static void
1672 sf_buf_init(void *arg)
1673 {
1674 int i;
1675
1676 SLIST_INIT(&sf_freelist);
1677 kmem_alloc_pageable(kernel_map, &sf_base, nsfbufs * PAGE_SIZE);
1678 MALLOC(sf_bufs, struct sf_buf *, nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT|M_ZERO);
1679 if (sf_bufs == NULL)
1680 return; /* XXX silently fail leaving sf_bufs NULL */
1681
1682 for (i = 0; i < nsfbufs; i++) {
1683 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1684 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1685 }
1686 }
1687
1688 /*
1689 * Get an sf_buf from the freelist. Will block if none are available.
1690 */
1691 static struct sf_buf *
1692 sf_buf_alloc()
1693 {
1694 struct sf_buf *sf;
1695
1696 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1697 sf_buf_alloc_want = 1;
1698 tsleep(&sf_freelist, PVM, "sfbufa", 0);
1699 }
1700 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1701 sf->refcnt = 1;
1702 return (sf);
1703 }
1704
1705 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1706 static void
1707 sf_buf_ref(caddr_t addr, u_int size)
1708 {
1709 struct sf_buf *sf;
1710
1711 sf = dtosf(addr);
1712 if (sf->refcnt == 0)
1713 panic("sf_buf_ref: referencing a free sf_buf");
1714 sf->refcnt++;
1715 }
1716
1717 /*
1718 * Lose a reference to an sf_buf. When none left, detach mapped page
1719 * and release resources back to the system.
1720 *
1721 * Must be called at splimp.
1722 */
1723 static void
1724 sf_buf_free(caddr_t addr, u_int size)
1725 {
1726 struct sf_buf *sf;
1727 struct vm_page *m;
1728
1729 sf = dtosf(addr);
1730 if (sf->refcnt == 0)
1731 panic("sf_buf_free: freeing free sf_buf");
1732 sf->refcnt--;
1733 if (sf->refcnt == 0) {
1734 pmap_qremove((vm_offset_t)addr, 1);
1735 m = sf->m;
1736 vm_page_unwire(m, 0);
1737 /*
1738 * Check for the object going away on us. This can
1739 * happen since we don't hold a reference to it.
1740 * If so, we're responsible for freeing the page.
1741 */
1742 if (m->wire_count == 0 && m->object == NULL)
1743 vm_page_lock_queues();
1744 vm_page_free(m);
1745 vm_page_unlock_queues();
1746 sf->m = NULL;
1747 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1748 if (sf_buf_alloc_want) {
1749 sf_buf_alloc_want = 0;
1750 wakeup(&sf_freelist);
1751 }
1752 }
1753 }
1754
1755 /*
1756 * sendfile(2).
1757 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1758 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1759 *
1760 * Send a file specified by 'fd' and starting at 'offset' to a socket
1761 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1762 * nbytes == 0. Optionally add a header and/or trailer to the socket
1763 * output. If specified, write the total number of bytes sent into *sbytes.
1764 */
1765 int
1766 sendfile(struct proc *p, struct sendfile_args *uap)
1767 {
1768 struct fileproc *fp;
1769 struct vnode *vp;
1770 struct vm_object *obj;
1771 struct socket *so;
1772 struct mbuf *m;
1773 struct sf_buf *sf;
1774 struct vm_page *pg;
1775 struct writev_args nuap;
1776 struct sf_hdtr hdtr;
1777 off_t off, xfsize, sbytes = 0;
1778 int error = 0, s;
1779 kauth_cred_t safecred;
1780
1781 if (sf_bufs == NULL) {
1782 /* Fail if initialization failed */
1783 return ENOSYS;
1784 }
1785
1786 /*
1787 * Do argument checking. Must be a regular file in, stream
1788 * type and connected socket out, positive offset.
1789 */
1790 if (error = fp_getfvp(p, uap->fd, &fp, &vp))
1791 goto done;
1792 if (fp->f_flag & FREAD) == 0) {
1793 error = EBADF;
1794 goto done1;
1795 }
1796 obj = vp->v_object;
1797 if (vp->v_type != VREG || obj == NULL) {
1798 error = EINVAL;
1799 goto done1;
1800 }
1801 error = file_socket(uap->s, &so);
1802 if (error)
1803 goto done1;
1804 if (so == NULL) {
1805 error = EBADF;
1806 goto done2;
1807 }
1808
1809 socket_lock(so, 1);
1810
1811 if (so->so_type != SOCK_STREAM) {
1812 error = EINVAL;
1813 goto done3;
1814 }
1815 if ((so->so_state & SS_ISCONNECTED) == 0) {
1816 error = ENOTCONN;
1817 goto done3;
1818 }
1819 if (uap->offset < 0) {
1820 error = EINVAL;
1821 goto done3;
1822 }
1823
1824 /*
1825 * If specified, get the pointer to the sf_hdtr struct for
1826 * any headers/trailers.
1827 */
1828 if (uap->hdtr != NULL) {
1829 error = copyin(CAST_USER_ADDR_T(uap->hdtr), &hdtr, sizeof(hdtr));
1830 if (error)
1831 goto done3;
1832 /*
1833 * Send any headers. Wimp out and use writev(2).
1834 */
1835 if (hdtr.headers != NULL) {
1836 nuap.fd = uap->s;
1837 nuap.iovp = hdtr.headers;
1838 nuap.iovcnt = hdtr.hdr_cnt;
1839 error = writev(p, &nuap);
1840 if (error)
1841 goto done3;
1842 sbytes += p->p_retval[0];
1843 }
1844 }
1845
1846 /*
1847 * Protect against multiple writers to the socket.
1848 */
1849 (void) sblock(&so->so_snd, M_WAIT);
1850
1851 /*
1852 * Loop through the pages in the file, starting with the requested
1853 * offset. Get a file page (do I/O if necessary), map the file page
1854 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1855 * it on the socket.
1856 */
1857 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1858 vm_object_offset_t pindex;
1859 vm_object_offset_t pgoff;
1860
1861 pindex = OFF_TO_IDX(off);
1862 retry_lookup:
1863 /*
1864 * Calculate the amount to transfer. Not to exceed a page,
1865 * the EOF, or the passed in nbytes.
1866 */
1867 xfsize = obj->un_pager.vnp.vnp_size - off;
1868 if (xfsize > PAGE_SIZE_64)
1869 xfsize = PAGE_SIZE;
1870 pgoff = (vm_object_offset_t)(off & PAGE_MASK_64);
1871 if (PAGE_SIZE - pgoff < xfsize)
1872 xfsize = PAGE_SIZE_64 - pgoff;
1873 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1874 xfsize = uap->nbytes - sbytes;
1875 if (xfsize <= 0)
1876 break;
1877 /*
1878 * Optimize the non-blocking case by looking at the socket space
1879 * before going to the extra work of constituting the sf_buf.
1880 */
1881 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1882 if (so->so_state & SS_CANTSENDMORE)
1883 error = EPIPE;
1884 else
1885 error = EAGAIN;
1886 sbunlock(&so->so_snd, 0); /* will release lock */
1887 goto done2;
1888 }
1889 /*
1890 * Attempt to look up the page. If the page doesn't exist or the
1891 * part we're interested in isn't valid, then read it from disk.
1892 * If some other part of the kernel has this page (i.e. it's busy),
1893 * then disk I/O may be occuring on it, so wait and retry.
1894 */
1895 pg = vm_page_lookup(obj, pindex);
1896 if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
1897 !vm_page_is_valid(pg, pgoff, xfsize))) {
1898 struct uio auio;
1899 struct iovec aiov;
1900 int bsize;
1901
1902 if (pg == NULL) {
1903 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1904 if (pg == NULL) {
1905 VM_WAIT;
1906 goto retry_lookup;
1907 }
1908 /*
1909 * don't just clear PG_BUSY manually -
1910 * vm_page_alloc() should be considered opaque,
1911 * use the VM routine provided to clear
1912 * PG_BUSY.
1913 */
1914 vm_page_wakeup(pg);
1915
1916 }
1917 /*
1918 * Ensure that our page is still around when the I/O completes.
1919 */
1920 vm_page_io_start(pg);
1921 vm_page_wire(pg);
1922 /*
1923 * Get the page from backing store.
1924 */
1925 bsize = vp->v_mount->mnt_vfsstat.f_iosize;
1926 auio.uio_iov = &aiov;
1927 auio.uio_iovcnt = 1;
1928 aiov.iov_base = 0;
1929 aiov.iov_len = MAXBSIZE;
1930 auio.uio_offset = trunc_page(off);
1931 auio.uio_segflg = UIO_NOCOPY;
1932 auio.uio_rw = UIO_READ;
1933 uio_setresid(&auio, MAXBSIZE);
1934 safecred = kauth_cred_proc_ref(p);
1935 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1936 safecred);
1937 kauth_cred_unref(&safecred);
1938 vm_page_flag_clear(pg, PG_ZERO);
1939 vm_page_io_finish(pg);
1940 if (error) {
1941 vm_page_unwire(pg, 0);
1942 /*
1943 * See if anyone else might know about this page.
1944 * If not and it is not valid, then free it.
1945 */
1946 if (pg->wire_count == 0 && pg->valid == 0 &&
1947 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1948 pg->hold_count == 0)
1949 vm_page_lock_queues();
1950 vm_page_free(pg);
1951 vm_page_unlock_queues();
1952 sbunlock(&so->so_snd, 0); /* will release socket lock */
1953 goto done2;
1954 }
1955 } else {
1956 if ((pg->flags & PG_BUSY) || pg->busy) {
1957 s = splvm();
1958 if ((pg->flags & PG_BUSY) || pg->busy) {
1959 /*
1960 * Page is busy. Wait and retry.
1961 */
1962 vm_page_flag_set(pg, PG_WANTED);
1963 tsleep(pg, PVM, "sfpbsy", 0);
1964 goto retry_lookup;
1965 }
1966 }
1967 /*
1968 * Protect from having the page ripped out from beneath us.
1969 */
1970 vm_page_wire(pg);
1971 }
1972 /*
1973 * Allocate a kernel virtual page and insert the physical page
1974 * into it.
1975 */
1976 sf = sf_buf_alloc();
1977 sf->m = pg;
1978 pmap_qenter(sf->kva, &pg, 1);
1979 /*
1980 * Get an mbuf header and set it up as having external storage.
1981 */
1982 MGETHDR(m, M_WAIT, MT_DATA);
1983 if (m == NULL) {
1984 error = ENOBUFS;
1985 sbunlock(&so->so_snd, 0); /* will release socket lock */
1986 goto done2;
1987 }
1988 m->m_ext.ext_free = sf_buf_free;
1989 m->m_ext.ext_ref = sf_buf_ref;
1990 m->m_ext.ext_buf = (void *)sf->kva;
1991 m->m_ext.ext_size = PAGE_SIZE;
1992 m->m_data = (char *) sf->kva + pgoff;
1993 m->m_flags |= M_EXT;
1994 m->m_pkthdr.len = m->m_len = xfsize;
1995 /*
1996 * Add the buffer to the socket buffer chain.
1997 */
1998 retry_space:
1999 /*
2000 * Make sure that the socket is still able to take more data.
2001 * CANTSENDMORE being true usually means that the connection
2002 * was closed. so_error is true when an error was sensed after
2003 * a previous send.
2004 * The state is checked after the page mapping and buffer
2005 * allocation above since those operations may block and make
2006 * any socket checks stale. From this point forward, nothing
2007 * blocks before the pru_send (or more accurately, any blocking
2008 * results in a loop back to here to re-check).
2009 */
2010 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
2011 if (so->so_state & SS_CANTSENDMORE) {
2012 error = EPIPE;
2013 } else {
2014 error = so->so_error;
2015 so->so_error = 0;
2016 }
2017 m_freem(m);
2018 sbunlock(&so->so_snd, 0); /* will release socket lock */
2019 goto done2;
2020 }
2021 /*
2022 * Wait for socket space to become available. We do this just
2023 * after checking the connection state above in order to avoid
2024 * a race condition with sbwait().
2025 */
2026 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2027 if (so->so_state & SS_NBIO) {
2028 m_freem(m);
2029 sbunlock(&so->so_snd, 0); /* will release socket lock */
2030 error = EAGAIN;
2031 goto done2;
2032 }
2033 error = sbwait(&so->so_snd);
2034 /*
2035 * An error from sbwait usually indicates that we've
2036 * been interrupted by a signal. If we've sent anything
2037 * then return bytes sent, otherwise return the error.
2038 */
2039 if (error) {
2040 m_freem(m);
2041 sbunlock(&so->so_snd, 0);
2042 goto done2;
2043 }
2044 goto retry_space;
2045 }
2046 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
2047 splx(s);
2048 if (error) {
2049 sbunlock(&so->so_snd, 0); /* will release socket lock */
2050 goto done2;
2051 }
2052 }
2053 sbunlock(&so->so_snd, 0); /* will release socket lock */
2054
2055 /*
2056 * Send trailers. Wimp out and use writev(2).
2057 */
2058 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2059 nuap.fd = uap->s;
2060 nuap.iovp = hdtr.trailers;
2061 nuap.iovcnt = hdtr.trl_cnt;
2062 error = writev(p, &nuap);
2063 if (error)
2064 goto done2;
2065 sbytes += p->p_retval[0];
2066 }
2067 done2:
2068 file_drop(uap->s);
2069 done1:
2070 file_drop(uap->fd);
2071 done:
2072 if (uap->sbytes != NULL) {
2073 /* XXX this appears bogus for some early failure conditions */
2074 copyout(&sbytes, CAST_USER_ADDR_T(uap->sbytes), sizeof(off_t));
2075 }
2076 return (error);
2077 done3:
2078 socket_unlock(so, 1);
2079 goto done2;
2080 }
2081
2082 #endif