2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/filedesc.h>
71 #include <sys/proc_internal.h>
72 #include <sys/file_internal.h>
73 #include <sys/malloc.h>
75 #include <kern/lock.h>
76 #include <sys/domain.h>
77 #include <sys/protosw.h>
78 #include <sys/signalvar.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
82 #include <sys/ktrace.h>
84 #include <sys/kernel.h>
85 #include <sys/uio_internal.h>
87 #include <bsm/audit_kernel.h>
89 #include <sys/kdebug.h>
90 #include <sys/sysproto.h>
92 #define f_flag f_fglob->fg_flag
93 #define f_type f_fglob->fg_type
94 #define f_msgcount f_fglob->fg_msgcount
95 #define f_cred f_fglob->fg_cred
96 #define f_ops f_fglob->fg_ops
97 #define f_offset f_fglob->fg_offset
98 #define f_data f_fglob->fg_data
101 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
102 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
103 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
104 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
105 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
106 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
107 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
108 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
109 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
110 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
115 #define HACK_FOR_4056224 1
117 static pid_t last_pid_4056224
= 0;
118 #endif /* HACK_FOR_4056224 */
122 static void sf_buf_init(void *arg
);
123 SYSINIT(sock_sf
, SI_SUB_MBUF
, SI_ORDER_ANY
, sf_buf_init
, NULL
)
124 static struct sf_buf
*sf_buf_alloc(void);
125 static void sf_buf_ref(caddr_t addr
, u_int size
);
126 static void sf_buf_free(caddr_t addr
, u_int size
);
128 static SLIST_HEAD(, sf_buf
) sf_freelist
;
129 static vm_offset_t sf_base
;
130 static struct sf_buf
*sf_bufs
;
131 static int sf_buf_alloc_want
;
134 static int sendit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
135 int flags
, register_t
*retval
);
136 static int recvit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
137 user_addr_t namelenp
, register_t
*retval
);
139 static int accept1(struct proc
*p
, struct accept_args
*uap
, register_t
*retval
, int compat
);
140 static int getsockname1(struct proc
*p
, struct getsockname_args
*uap
,
141 register_t
*retval
, int compat
);
142 static int getpeername1(struct proc
*p
, struct getpeername_args
*uap
,
143 register_t
*retval
, int compat
);
147 struct orecvmsg_args
{
152 struct osendmsg_args
{
170 int oaccept(struct proc
*p
, struct accept_args
*uap
, register_t
*retval
);
171 int ogetpeername(struct proc
*p
, struct getpeername_args
*uap
, register_t
*retval
);
172 int ogetsockname(struct proc
*p
, struct getsockname_args
*uap
, register_t
*retval
);
173 int orecv(struct proc
*p
, struct orecv_args
*uap
, register_t
*retval
);
174 int orecvfrom(struct proc
*p
, struct recvfrom_args
*uap
, register_t
*retval
);
175 int orecvmsg(struct proc
*p
, struct orecvmsg_args
*uap
, register_t
*retval
);
176 int osend(struct proc
*p
, struct osend_args
*uap
, register_t
*retval
);
177 int osendmsg(struct proc
*p
, struct osendmsg_args
*uap
, register_t
*retval
);
178 #endif // COMPAT_43_SOCKET
181 * System call interface to the socket abstraction.
184 extern struct fileops socketops
;
187 socket(p
, uap
, retval
)
189 register struct socket_args
*uap
;
196 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
198 error
= falloc(p
, &fp
, &fd
);
202 fp
->f_flag
= FREAD
|FWRITE
;
203 fp
->f_type
= DTYPE_SOCKET
;
204 fp
->f_ops
= &socketops
;
206 error
= socreate(uap
->domain
, &so
, uap
->type
, uap
->protocol
);
210 fp
->f_data
= (caddr_t
)so
;
213 *fdflags(p
, fd
) &= ~UF_RESERVED
;
215 fp_drop(p
, fd
, fp
, 1);
225 bind(struct proc
*p
, struct bind_args
*uap
, __unused register_t
*retval
)
231 AUDIT_ARG(fd
, uap
->s
);
232 error
= file_socket(uap
->s
, &so
);
235 error
= getsockaddr(&sa
, uap
->name
, uap
->namelen
);
238 AUDIT_ARG(sockaddr
, p
, sa
);
240 error
= sobind(so
, sa
);
251 listen(__unused
struct proc
*p
, register struct listen_args
*uap
,
252 __unused register_t
*retval
)
257 AUDIT_ARG(fd
, uap
->s
);
258 error
= file_socket(uap
->s
, &so
);
262 error
= solisten(so
, uap
->backlog
);
269 #if !COMPAT_43_SOCKET
270 #define accept1 accept
276 accept1(struct proc
*p
, struct accept_args
*uap
, register_t
*retval
, int compat
)
282 struct socket
*head
, *so
= NULL
;
283 lck_mtx_t
*mutex_held
;
286 short fflag
; /* type must match fp->f_flag */
289 AUDIT_ARG(fd
, uap
->s
);
291 error
= copyin(uap
->anamelen
, (caddr_t
)&namelen
,
296 error
= fp_getfsock(p
, fd
, &fp
, &head
);
298 if (error
== EOPNOTSUPP
)
307 socket_lock(head
, 1);
309 if (head
->so_proto
->pr_getlock
!= NULL
) {
310 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, 0);
314 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
319 if ((head
->so_options
& SO_ACCEPTCONN
) == 0) {
320 socket_unlock(head
, 1);
324 if ((head
->so_state
& SS_NBIO
) && head
->so_comp
.tqh_first
== NULL
) {
325 socket_unlock(head
, 1);
329 while (TAILQ_EMPTY(&head
->so_comp
) && head
->so_error
== 0) {
330 if (head
->so_state
& SS_CANTRCVMORE
) {
331 head
->so_error
= ECONNABORTED
;
334 if (head
->so_usecount
< 1)
335 panic("accept1: head=%x refcount=%d\n", head
, head
->so_usecount
);
336 error
= msleep((caddr_t
)&head
->so_timeo
, mutex_held
, PSOCK
| PCATCH
,
338 if (head
->so_usecount
< 1)
339 panic("accept1: 2 head=%x refcount=%d\n", head
, head
->so_usecount
);
340 if ((head
->so_state
& SS_DRAINING
)) {
341 error
= ECONNABORTED
;
344 socket_unlock(head
, 1);
348 if (head
->so_error
) {
349 error
= head
->so_error
;
351 socket_unlock(head
, 1);
357 * At this point we know that there is at least one connection
358 * ready to be accepted. Remove it from the queue prior to
359 * allocating the file descriptor for it since falloc() may
360 * block allowing another process to accept the connection
363 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
364 so
= TAILQ_FIRST(&head
->so_comp
);
365 TAILQ_REMOVE(&head
->so_comp
, so
, so_list
);
367 socket_unlock(head
, 0); /* unlock head to avoid deadlock with select, keep a ref on head */
370 error
= falloc_locked(p
, &fp
, &newfd
, 1);
373 * Probably ran out of file descriptors. Put the
374 * unaccepted connection back onto the queue and
375 * do another wakeup so some other process might
376 * have a chance at it.
379 socket_lock(head
, 0);
380 TAILQ_INSERT_HEAD(&head
->so_comp
, so
, so_list
);
382 wakeup_one((caddr_t
)&head
->so_timeo
);
383 socket_unlock(head
, 1);
386 *fdflags(p
, newfd
) &= ~UF_RESERVED
;
388 fp
->f_type
= DTYPE_SOCKET
;
390 fp
->f_ops
= &socketops
;
391 fp
->f_data
= (caddr_t
)so
;
392 fp_drop(p
, newfd
, fp
, 1);
394 socket_lock(head
, 0);
397 so
->so_state
&= ~SS_COMP
;
400 (void) soacceptlock(so
, &sa
, 0);
401 socket_unlock(head
, 1);
407 socket_unlock(so
, 1);
411 AUDIT_ARG(sockaddr
, p
, sa
);
413 /* check sa_len before it is destroyed */
414 if (namelen
> sa
->sa_len
)
415 namelen
= sa
->sa_len
;
418 ((struct osockaddr
*)sa
)->sa_family
=
421 error
= copyout(sa
, uap
->name
, namelen
);
424 error
= copyout((caddr_t
)&namelen
, uap
->anamelen
,
429 socket_unlock(so
, 1);
436 accept(struct proc
*p
, struct accept_args
*uap
, register_t
*retval
)
439 return (accept1(p
, uap
, retval
, 0));
444 oaccept(struct proc
*p
, struct accept_args
*uap
, register_t
*retval
)
447 return (accept1(p
, uap
, retval
, 1));
449 #endif /* COMPAT_43_SOCKET */
453 connect(struct proc
*p
, struct connect_args
*uap
, __unused register_t
*retval
)
457 lck_mtx_t
*mutex_held
;
461 AUDIT_ARG(fd
, uap
->s
);
462 error
= file_socket( fd
, &so
);
472 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
473 socket_unlock(so
, 1);
477 error
= getsockaddr(&sa
, uap
->name
, uap
->namelen
);
479 socket_unlock(so
, 1);
482 AUDIT_ARG(sockaddr
, p
, sa
);
483 error
= soconnectlock(so
, sa
, 0);
486 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
488 socket_unlock(so
, 1);
492 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
493 if (so
->so_proto
->pr_getlock
!= NULL
)
494 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
496 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
497 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
, PSOCK
| PCATCH
,
499 if ((so
->so_state
& SS_DRAINING
)) {
500 error
= ECONNABORTED
;
506 error
= so
->so_error
;
510 so
->so_state
&= ~SS_ISCONNECTING
;
511 socket_unlock(so
, 1);
513 if (error
== ERESTART
)
521 socketpair(struct proc
*p
, struct socketpair_args
*uap
, __unused register_t
*retval
)
523 struct fileproc
*fp1
, *fp2
;
524 struct socket
*so1
, *so2
;
525 int fd
, error
, sv
[2];
527 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
528 error
= socreate(uap
->domain
, &so1
, uap
->type
, uap
->protocol
);
531 error
= socreate(uap
->domain
, &so2
, uap
->type
, uap
->protocol
);
535 error
= falloc(p
, &fp1
, &fd
);
539 fp1
->f_flag
= FREAD
|FWRITE
;
540 fp1
->f_type
= DTYPE_SOCKET
;
541 fp1
->f_ops
= &socketops
;
542 fp1
->f_data
= (caddr_t
)so1
;
545 error
= falloc(p
, &fp2
, &fd
);
549 fp2
->f_flag
= FREAD
|FWRITE
;
550 fp2
->f_type
= DTYPE_SOCKET
;
551 fp2
->f_ops
= &socketops
;
552 fp2
->f_data
= (caddr_t
)so2
;
555 error
= soconnect2(so1
, so2
);
559 if (uap
->type
== SOCK_DGRAM
) {
561 * Datagram socket connection is asymmetric.
563 error
= soconnect2(so2
, so1
);
570 *fdflags(p
, sv
[0]) &= ~UF_RESERVED
;
571 *fdflags(p
, sv
[1]) &= ~UF_RESERVED
;
572 fp_drop(p
, sv
[0], fp1
, 1);
573 fp_drop(p
, sv
[1], fp2
, 1);
576 error
= copyout((caddr_t
)sv
, uap
->rsv
, 2 * sizeof(int));
577 #if 0 /* old pipe(2) syscall compatability, unused these days */
578 retval
[0] = sv
[0]; /* XXX ??? */
579 retval
[1] = sv
[1]; /* XXX ??? */
583 fp_free(p
, sv
[1], fp2
);
585 fp_free(p
, sv
[0], fp1
);
594 sendit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
595 int flags
, register_t
*retval
)
597 struct mbuf
*control
;
606 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_START
, 0,0,0,0,0);
608 error
= file_socket(s
, &so
);
611 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
,0,0,0,0);
616 error
= getsockaddr(&to
, mp
->msg_name
, mp
->msg_namelen
);
618 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
,0,0,0,0);
621 AUDIT_ARG(sockaddr
, p
, to
);
625 if (mp
->msg_control
) {
626 if (mp
->msg_controllen
< ((socklen_t
)sizeof(struct cmsghdr
))
628 && !(mp
->msg_flags
& MSG_COMPAT
)
634 error
= sockargs(&control
, mp
->msg_control
,
635 mp
->msg_controllen
, MT_CONTROL
);
639 if (mp
->msg_flags
& MSG_COMPAT
) {
640 register struct cmsghdr
*cm
;
642 M_PREPEND(control
, sizeof(*cm
), M_WAIT
);
647 cm
= mtod(control
, struct cmsghdr
*);
648 cm
->cmsg_len
= control
->m_len
;
649 cm
->cmsg_level
= SOL_SOCKET
;
650 cm
->cmsg_type
= SCM_RIGHTS
;
659 if (KTRPOINT(p
, KTR_GENIO
)) {
660 ktruio
= uio_duplicate(uiop
);
664 len
= uio_resid(uiop
);
668 error
= so
->so_proto
->pr_usrreqs
->pru_sosend(so
, to
, uiop
, 0, control
,
671 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
672 error
== EINTR
|| error
== EWOULDBLOCK
))
674 /* Generation of SIGPIPE can be controlled per socket */
675 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
))
679 *retval
= (int)(len
- uio_resid(uiop
));
682 if (ktruio
!= NULL
) {
684 uio_setresid(ktruio
, retval
[0]);
685 ktrgenio(p
->p_tracep
, s
, UIO_WRITE
, ktruio
, error
);
692 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
,0,0,0,0);
700 sendto(struct proc
*p
, struct sendto_args
*uap
, register_t
*retval
)
702 struct user_msghdr msg
;
706 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_START
, 0,0,0,0,0);
707 AUDIT_ARG(fd
, uap
->s
);
709 auio
= uio_create(1, 0,
710 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
715 uio_addiov(auio
, uap
->buf
, uap
->len
);
717 msg
.msg_name
= uap
->to
;
718 msg
.msg_namelen
= uap
->tolen
;
719 /* no need to set up msg_iov. sendit uses uio_t we send it */
725 error
= sendit(p
, uap
->s
, &msg
, auio
, uap
->flags
, retval
);
734 * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
735 * This will be removed in the release following Tiger
737 if (error
== ENOTSOCK
) {
740 if (fp_lookup(p
, uap
->s
, &fp
, 0) == 0) {
741 (void) fp_drop(p
, uap
->s
, fp
,0);
743 if (fp
->f_type
== DTYPE_PIPE
) {
744 struct write_args write_uap
;
745 user_ssize_t write_retval
;
747 if (p
->p_pid
> last_pid_4056224
) {
748 last_pid_4056224
= p
->p_pid
;
750 printf("%s[%d] uses send/recv on a pipe\n",
751 p
->p_comm
, p
->p_pid
);
754 bzero(&write_uap
, sizeof(struct write_args
));
755 write_uap
.fd
= uap
->s
;
756 write_uap
.cbuf
= uap
->buf
;
757 write_uap
.nbyte
= uap
->len
;
759 error
= write(p
, &write_uap
, &write_retval
);
760 *retval
= (int)write_retval
;
764 #endif /* HACK_FOR_4056224 */
766 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_END
, error
, *retval
,0,0,0);
773 osend(__unused
struct proc
*p
,
774 __unused
struct osend_args
*uap
,
775 __unused register_t
*retval
)
777 /* these are no longer supported and in fact
778 * there is no way to call it directly.
779 * LP64todo - remove this once we're sure there are no clients
785 osendmsg(__unused
struct proc
*p
,
786 __unused
struct osendmsg_args
*uap
,
787 __unused register_t
*retval
)
789 /* these are no longer supported and in fact
790 * there is no way to call it directly.
791 * LP64todo - remove this once we're sure there are no clients
799 sendmsg(struct proc
*p
, register struct sendmsg_args
*uap
, register_t
*retval
)
802 struct user_msghdr user_msg
;
808 struct user_iovec
*iovp
;
810 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_START
, 0,0,0,0,0);
811 AUDIT_ARG(fd
, uap
->s
);
812 if (IS_64BIT_PROCESS(p
)) {
813 msghdrp
= (caddr_t
) &user_msg
;
814 size_of_msghdr
= sizeof(user_msg
);
815 size_of_iovec
= sizeof(struct user_iovec
);
818 msghdrp
= (caddr_t
) &msg
;
819 size_of_msghdr
= sizeof(msg
);
820 size_of_iovec
= sizeof(struct iovec
);
822 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
825 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
,0,0,0,0);
829 /* only need to copy if user process is not 64-bit */
830 if (!IS_64BIT_PROCESS(p
)) {
831 user_msg
.msg_flags
= msg
.msg_flags
;
832 user_msg
.msg_controllen
= msg
.msg_controllen
;
833 user_msg
.msg_control
= CAST_USER_ADDR_T(msg
.msg_control
);
834 user_msg
.msg_iovlen
= msg
.msg_iovlen
;
835 user_msg
.msg_iov
= CAST_USER_ADDR_T(msg
.msg_iov
);
836 user_msg
.msg_namelen
= msg
.msg_namelen
;
837 user_msg
.msg_name
= CAST_USER_ADDR_T(msg
.msg_name
);
840 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
841 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, EMSGSIZE
,0,0,0,0);
845 /* allocate a uio large enough to hold the number of iovecs passed */
846 auio
= uio_create(user_msg
.msg_iovlen
, 0,
847 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
854 if (user_msg
.msg_iovlen
) {
855 /* get location of iovecs within the uio. then copyin the iovecs from
858 iovp
= uio_iovsaddr(auio
);
863 error
= copyin(user_msg
.msg_iov
, (caddr_t
)iovp
, (user_msg
.msg_iovlen
* size_of_iovec
));
866 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
868 /* finish setup of uio_t */
869 uio_calculateresid(auio
);
872 user_msg
.msg_iov
= 0;
876 user_msg
.msg_flags
= 0;
878 error
= sendit(p
, uap
->s
, &user_msg
, auio
, uap
->flags
, retval
);
883 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
,0,0,0,0);
889 recvit(p
, s
, mp
, uiop
, namelenp
, retval
)
890 register struct proc
*p
;
892 register struct user_msghdr
*mp
;
894 user_addr_t namelenp
;
898 struct mbuf
*m
, *control
= 0;
901 struct sockaddr
*fromsa
= 0;
907 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_START
, 0,0,0,0,0);
909 if ( (error
= fp_lookup(p
, s
, &fp
, 1)) ) {
910 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
,0,0,0,0);
914 if (fp
->f_type
!= DTYPE_SOCKET
) {
920 so
= (struct socket
*)fp
->f_data
;
923 if (uio_resid(uiop
) < 0) {
924 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, EINVAL
,0,0,0,0);
929 if (KTRPOINT(p
, KTR_GENIO
)) {
930 ktruio
= uio_duplicate(uiop
);
934 len
= uio_resid(uiop
);
938 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, &fromsa
, uiop
,
939 (struct mbuf
**)0, mp
->msg_control
? &control
: (struct mbuf
**)0,
942 AUDIT_ARG(sockaddr
, p
, fromsa
);
944 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
945 error
== EINTR
|| error
== EWOULDBLOCK
))
949 if (ktruio
!= NULL
) {
951 uio_setresid(ktruio
, len
- uio_resid(uiop
));
952 ktrgenio(p
->p_tracep
, s
, UIO_WRITE
, ktruio
, error
);
959 *retval
= len
- uio_resid(uiop
);
961 len
= mp
->msg_namelen
;
962 if (len
<= 0 || fromsa
== 0)
966 #define MIN(a,b) ((a)>(b)?(b):(a))
968 /* save sa_len before it is destroyed by MSG_COMPAT */
969 len
= MIN(len
, fromsa
->sa_len
);
971 if (mp
->msg_flags
& MSG_COMPAT
)
972 ((struct osockaddr
*)fromsa
)->sa_family
=
975 error
= copyout(fromsa
, mp
->msg_name
, (unsigned)len
);
979 mp
->msg_namelen
= len
;
981 (error
= copyout((caddr_t
)&len
, namelenp
, sizeof (int)))) {
983 if (mp
->msg_flags
& MSG_COMPAT
)
984 error
= 0; /* old recvfrom didn't check */
990 if (mp
->msg_control
) {
993 * We assume that old recvmsg calls won't receive access
994 * rights and other control info, esp. as control info
995 * is always optional and those options didn't exist in 4.3.
996 * If we receive rights, trim the cmsghdr; anything else
999 if (control
&& mp
->msg_flags
& MSG_COMPAT
) {
1000 if (mtod(control
, struct cmsghdr
*)->cmsg_level
!=
1002 mtod(control
, struct cmsghdr
*)->cmsg_type
!=
1004 mp
->msg_controllen
= 0;
1007 control
->m_len
-= sizeof (struct cmsghdr
);
1008 control
->m_data
+= sizeof (struct cmsghdr
);
1011 len
= mp
->msg_controllen
;
1013 mp
->msg_controllen
= 0;
1014 ctlbuf
= mp
->msg_control
;
1016 while (m
&& len
> 0) {
1017 unsigned int tocopy
;
1019 if (len
>= m
->m_len
)
1022 mp
->msg_flags
|= MSG_CTRUNC
;
1026 error
= copyout((caddr_t
)mtod(m
, caddr_t
), ctlbuf
, tocopy
);
1034 mp
->msg_controllen
= ctlbuf
- mp
->msg_control
;
1038 FREE(fromsa
, M_SONAME
);
1041 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
,0,0,0,0);
1043 fp_drop(p
, s
, fp
, 0);
1049 recvfrom(p
, uap
, retval
)
1051 register struct recvfrom_args
/* {
1061 struct user_msghdr msg
;
1065 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_START
, 0,0,0,0,0);
1066 AUDIT_ARG(fd
, uap
->s
);
1068 if (uap
->fromlenaddr
) {
1069 error
= copyin(uap
->fromlenaddr
,
1070 (caddr_t
)&msg
.msg_namelen
, sizeof (msg
.msg_namelen
));
1074 msg
.msg_namelen
= 0;
1075 msg
.msg_name
= uap
->from
;
1076 auio
= uio_create(1, 0,
1077 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1083 uio_addiov(auio
, uap
->buf
, uap
->len
);
1084 /* no need to set up msg_iov. recvit uses uio_t we send it */
1087 msg
.msg_control
= 0;
1088 msg
.msg_controllen
= 0;
1089 msg
.msg_flags
= uap
->flags
;
1090 error
= recvit(p
, uap
->s
, &msg
, auio
, uap
->fromlenaddr
, retval
);
1095 #if HACK_FOR_4056224
1098 * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
1099 * This will be removed in the release following Tiger
1101 if (error
== ENOTSOCK
&& proc_is64bit(p
) == 0) {
1102 struct fileproc
*fp
;
1104 if (fp_lookup(p
, uap
->s
, &fp
, 0) == 0) {
1105 (void) fp_drop(p
, uap
->s
, fp
,0);
1107 if (fp
->f_type
== DTYPE_PIPE
) {
1108 struct read_args read_uap
;
1109 user_ssize_t read_retval
;
1111 if (p
->p_pid
> last_pid_4056224
) {
1112 last_pid_4056224
= p
->p_pid
;
1114 printf("%s[%d] uses send/recv on a pipe\n",
1115 p
->p_comm
, p
->p_pid
);
1118 bzero(&read_uap
, sizeof(struct read_args
));
1119 read_uap
.fd
= uap
->s
;
1120 read_uap
.cbuf
= uap
->buf
;
1121 read_uap
.nbyte
= uap
->len
;
1123 error
= read(p
, &read_uap
, &read_retval
);
1124 *retval
= (int)read_retval
;
1128 #endif /* HACK_FOR_4056224 */
1130 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_END
, error
,0,0,0,0);
1135 #if COMPAT_43_SOCKET
1137 orecvfrom(struct proc
*p
, struct recvfrom_args
*uap
, register_t
*retval
)
1140 uap
->flags
|= MSG_COMPAT
;
1141 return (recvfrom(p
, uap
, retval
));
1146 #if COMPAT_43_SOCKET
1148 orecv(__unused
struct proc
*p
, __unused
struct orecv_args
*uap
,
1149 __unused register_t
*retval
)
1151 /* these are no longer supported and in fact
1152 * there is no way to call it directly.
1153 * LP64todo - remove this once we're sure there are no clients
1160 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1161 * overlays the new one, missing only the flags, and with the (old) access
1162 * rights where the control fields are now.
1165 orecvmsg(__unused
struct proc
*p
, __unused
struct orecvmsg_args
*uap
,
1166 __unused register_t
*retval
)
1168 /* these are no longer supported and in fact
1169 * there is no way to call it directly.
1170 * LP64todo - remove this once we're sure there are no clients
1179 recvmsg(p
, uap
, retval
)
1181 struct recvmsg_args
*uap
;
1185 struct user_msghdr user_msg
;
1192 struct user_iovec
*iovp
;
1194 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_START
, 0,0,0,0,0);
1195 AUDIT_ARG(fd
, uap
->s
);
1196 if (IS_64BIT_PROCESS(p
)) {
1197 msghdrp
= (caddr_t
) &user_msg
;
1198 size_of_msghdr
= sizeof(user_msg
);
1199 size_of_iovec
= sizeof(struct user_iovec
);
1202 msghdrp
= (caddr_t
) &msg
;
1203 size_of_msghdr
= sizeof(msg
);
1204 size_of_iovec
= sizeof(struct iovec
);
1206 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1209 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
,0,0,0,0);
1213 /* only need to copy if user process is not 64-bit */
1214 if (!IS_64BIT_PROCESS(p
)) {
1215 user_msg
.msg_flags
= msg
.msg_flags
;
1216 user_msg
.msg_controllen
= msg
.msg_controllen
;
1217 user_msg
.msg_control
= CAST_USER_ADDR_T(msg
.msg_control
);
1218 user_msg
.msg_iovlen
= msg
.msg_iovlen
;
1219 user_msg
.msg_iov
= CAST_USER_ADDR_T(msg
.msg_iov
);
1220 user_msg
.msg_namelen
= msg
.msg_namelen
;
1221 user_msg
.msg_name
= CAST_USER_ADDR_T(msg
.msg_name
);
1224 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1225 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, EMSGSIZE
,0,0,0,0);
1229 #if COMPAT_43_SOCKET
1230 user_msg
.msg_flags
= uap
->flags
&~ MSG_COMPAT
;
1232 user_msg
.msg_flags
= uap
->flags
;
1235 /* allocate a uio large enough to hold the number of iovecs passed */
1236 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1237 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1244 /* get location of iovecs within the uio. then copyin the iovecs from
1247 iovp
= uio_iovsaddr(auio
);
1252 uiov
= user_msg
.msg_iov
;
1253 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1254 error
= copyin(uiov
, (caddr_t
)iovp
, (user_msg
.msg_iovlen
* size_of_iovec
));
1258 /* finish setup of uio_t */
1259 uio_calculateresid(auio
);
1261 error
= recvit(p
, uap
->s
, &user_msg
, auio
, 0, retval
);
1263 user_msg
.msg_iov
= uiov
;
1264 /* only need to copy if user process is not 64-bit */
1265 if (!IS_64BIT_PROCESS(p
)) {
1266 // LP64todo - do all these change? if not, then no need to copy all of them!
1267 msg
.msg_flags
= user_msg
.msg_flags
;
1268 msg
.msg_controllen
= user_msg
.msg_controllen
;
1269 msg
.msg_control
= CAST_DOWN(caddr_t
, user_msg
.msg_control
);
1270 msg
.msg_iovlen
= user_msg
.msg_iovlen
;
1271 msg
.msg_iov
= (struct iovec
*) CAST_DOWN(caddr_t
, user_msg
.msg_iov
);
1272 msg
.msg_namelen
= user_msg
.msg_namelen
;
1273 msg
.msg_name
= CAST_DOWN(caddr_t
, user_msg
.msg_name
);
1275 error
= copyout(msghdrp
, uap
->msg
, size_of_msghdr
);
1281 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
,0,0,0,0);
1287 shutdown(__unused
struct proc
*p
, struct shutdown_args
*uap
, __unused register_t
*retval
)
1292 AUDIT_ARG(fd
, uap
->s
);
1293 error
= file_socket(uap
->s
, &so
);
1300 error
= soshutdown((struct socket
*)so
, uap
->how
);
1312 setsockopt(struct proc
*p
, struct setsockopt_args
*uap
, __unused register_t
*retval
)
1315 struct sockopt sopt
;
1318 AUDIT_ARG(fd
, uap
->s
);
1319 if (uap
->val
== 0 && uap
->valsize
!= 0)
1321 if (uap
->valsize
< 0)
1324 error
= file_socket(uap
->s
, &so
);
1328 sopt
.sopt_dir
= SOPT_SET
;
1329 sopt
.sopt_level
= uap
->level
;
1330 sopt
.sopt_name
= uap
->name
;
1331 sopt
.sopt_val
= uap
->val
;
1332 sopt
.sopt_valsize
= uap
->valsize
;
1339 error
= sosetopt(so
, &sopt
);
1348 getsockopt(struct proc
*p
, struct getsockopt_args
*uap
, __unused register_t
*retval
)
1352 struct sockopt sopt
;
1355 error
= file_socket(uap
->s
, &so
);
1359 error
= copyin(uap
->avalsize
, (caddr_t
)&valsize
, sizeof (valsize
));
1369 sopt
.sopt_dir
= SOPT_GET
;
1370 sopt
.sopt_level
= uap
->level
;
1371 sopt
.sopt_name
= uap
->name
;
1372 sopt
.sopt_val
= uap
->val
;
1373 sopt
.sopt_valsize
= (size_t)valsize
; /* checked non-negative above */
1380 error
= sogetopt((struct socket
*)so
, &sopt
);
1382 valsize
= sopt
.sopt_valsize
;
1383 error
= copyout((caddr_t
)&valsize
, uap
->avalsize
, sizeof (valsize
));
1396 getsockname1(__unused
struct proc
*p
, struct getsockname_args
*uap
, __unused register_t
*retval
,
1400 struct sockaddr
*sa
;
1404 error
= file_socket(uap
->fdes
, &so
);
1407 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof(socklen_t
));
1416 error
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &sa
);
1419 struct socket_filter_entry
*filter
;
1421 for (filter
= so
->so_filt
; filter
&& error
== 0;
1422 filter
= filter
->sfe_next_onsocket
) {
1423 if (filter
->sfe_filter
->sf_filter
.sf_getsockname
) {
1427 socket_unlock(so
, 0);
1429 error
= filter
->sfe_filter
->sf_filter
.sf_getsockname(filter
->sfe_cookie
,
1434 if (error
== EJUSTRETURN
)
1442 socket_unlock(so
, 1);
1450 len
= MIN(len
, sa
->sa_len
);
1451 #if COMPAT_43_SOCKET
1453 ((struct osockaddr
*)sa
)->sa_family
= sa
->sa_family
;
1455 error
= copyout((caddr_t
)sa
, uap
->asa
, len
);
1458 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof(socklen_t
));
1463 file_drop(uap
->fdes
);
1468 getsockname(struct proc
*p
, struct getsockname_args
*uap
, register_t
*retval
)
1470 return (getsockname1(p
, uap
, retval
, 0));
1473 #if COMPAT_43_SOCKET
1475 ogetsockname(struct proc
*p
, struct getsockname_args
*uap
, register_t
*retval
)
1477 return (getsockname1(p
, uap
, retval
, 1));
1479 #endif /* COMPAT_43_SOCKET */
1482 * Get name of peer for connected socket.
1486 getpeername1(__unused
struct proc
*p
, struct getpeername_args
*uap
, __unused register_t
*retval
,
1490 struct sockaddr
*sa
;
1494 error
= file_socket(uap
->fdes
, &so
);
1504 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONFIRMING
)) == 0) {
1505 socket_unlock(so
, 1);
1509 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof(socklen_t
));
1511 socket_unlock(so
, 1);
1515 error
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, &sa
);
1518 struct socket_filter_entry
*filter
;
1520 for (filter
= so
->so_filt
; filter
&& error
== 0;
1521 filter
= filter
->sfe_next_onsocket
) {
1522 if (filter
->sfe_filter
->sf_filter
.sf_getpeername
) {
1526 socket_unlock(so
, 0);
1528 error
= filter
->sfe_filter
->sf_filter
.sf_getpeername(filter
->sfe_cookie
,
1533 if (error
== EJUSTRETURN
)
1541 socket_unlock(so
, 1);
1548 len
= MIN(len
, sa
->sa_len
);
1549 #if COMPAT_43_SOCKET
1551 ((struct osockaddr
*)sa
)->sa_family
=
1554 error
= copyout(sa
, uap
->asa
, len
);
1558 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof(socklen_t
));
1560 if (sa
) FREE(sa
, M_SONAME
);
1562 file_drop(uap
->fdes
);
1567 getpeername(struct proc
*p
, struct getpeername_args
*uap
, register_t
*retval
)
1570 return (getpeername1(p
, uap
, retval
, 0));
1573 #if COMPAT_43_SOCKET
1575 ogetpeername(struct proc
*p
, struct getpeername_args
*uap
, register_t
*retval
)
1578 return (getpeername1(p
, uap
, retval
, 1));
1580 #endif /* COMPAT_43_SOCKET */
1583 sockargs(mp
, data
, buflen
, type
)
1588 register struct sockaddr
*sa
;
1589 register struct mbuf
*m
;
1592 if ((u_int
)buflen
> MLEN
) {
1593 #if COMPAT_43_SOCKET
1594 if (type
== MT_SONAME
&& (u_int
)buflen
<= 112)
1595 buflen
= MLEN
; /* unix domain compat. hack */
1598 if ((u_int
)buflen
> MCLBYTES
)
1601 m
= m_get(M_WAIT
, type
);
1604 if ((u_int
)buflen
> MLEN
) {
1606 if ((m
->m_flags
& M_EXT
) == 0) {
1612 error
= copyin(data
, mtod(m
, caddr_t
), (u_int
)buflen
);
1617 if (type
== MT_SONAME
) {
1618 sa
= mtod(m
, struct sockaddr
*);
1620 #if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
1621 if (sa
->sa_family
== 0 && sa
->sa_len
< AF_MAX
)
1622 sa
->sa_family
= sa
->sa_len
;
1624 sa
->sa_len
= buflen
;
1631 * Given a user_addr_t of length len, allocate and fill out a *sa.
1634 getsockaddr(struct sockaddr
**namp
, user_addr_t uaddr
, size_t len
)
1636 struct sockaddr
*sa
;
1639 if (len
> SOCK_MAXADDRLEN
)
1640 return ENAMETOOLONG
;
1645 MALLOC(sa
, struct sockaddr
*, len
, M_SONAME
, M_WAITOK
);
1649 error
= copyin(uaddr
, (caddr_t
)sa
, len
);
1653 #if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
1654 if (sa
->sa_family
== 0 && sa
->sa_len
< AF_MAX
)
1655 sa
->sa_family
= sa
->sa_len
;
1666 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1667 * XXX - The sf_buf functions are currently private to sendfile(2), so have
1668 * been made static, but may be useful in the future for doing zero-copy in
1669 * other parts of the networking code.
1672 sf_buf_init(void *arg
)
1676 SLIST_INIT(&sf_freelist
);
1677 kmem_alloc_pageable(kernel_map
, &sf_base
, nsfbufs
* PAGE_SIZE
);
1678 MALLOC(sf_bufs
, struct sf_buf
*, nsfbufs
* sizeof(struct sf_buf
), M_TEMP
, M_NOWAIT
|M_ZERO
);
1679 if (sf_bufs
== NULL
)
1680 return; /* XXX silently fail leaving sf_bufs NULL */
1682 for (i
= 0; i
< nsfbufs
; i
++) {
1683 sf_bufs
[i
].kva
= sf_base
+ i
* PAGE_SIZE
;
1684 SLIST_INSERT_HEAD(&sf_freelist
, &sf_bufs
[i
], free_list
);
1689 * Get an sf_buf from the freelist. Will block if none are available.
1691 static struct sf_buf
*
1696 while ((sf
= SLIST_FIRST(&sf_freelist
)) == NULL
) {
1697 sf_buf_alloc_want
= 1;
1698 tsleep(&sf_freelist
, PVM
, "sfbufa", 0);
1700 SLIST_REMOVE_HEAD(&sf_freelist
, free_list
);
1705 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1707 sf_buf_ref(caddr_t addr
, u_int size
)
1712 if (sf
->refcnt
== 0)
1713 panic("sf_buf_ref: referencing a free sf_buf");
1718 * Lose a reference to an sf_buf. When none left, detach mapped page
1719 * and release resources back to the system.
1721 * Must be called at splimp.
1724 sf_buf_free(caddr_t addr
, u_int size
)
1730 if (sf
->refcnt
== 0)
1731 panic("sf_buf_free: freeing free sf_buf");
1733 if (sf
->refcnt
== 0) {
1734 pmap_qremove((vm_offset_t
)addr
, 1);
1736 vm_page_unwire(m
, 0);
1738 * Check for the object going away on us. This can
1739 * happen since we don't hold a reference to it.
1740 * If so, we're responsible for freeing the page.
1742 if (m
->wire_count
== 0 && m
->object
== NULL
)
1743 vm_page_lock_queues();
1745 vm_page_unlock_queues();
1747 SLIST_INSERT_HEAD(&sf_freelist
, sf
, free_list
);
1748 if (sf_buf_alloc_want
) {
1749 sf_buf_alloc_want
= 0;
1750 wakeup(&sf_freelist
);
1757 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1758 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1760 * Send a file specified by 'fd' and starting at 'offset' to a socket
1761 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1762 * nbytes == 0. Optionally add a header and/or trailer to the socket
1763 * output. If specified, write the total number of bytes sent into *sbytes.
1766 sendfile(struct proc
*p
, struct sendfile_args
*uap
)
1768 struct fileproc
*fp
;
1770 struct vm_object
*obj
;
1775 struct writev_args nuap
;
1776 struct sf_hdtr hdtr
;
1777 off_t off
, xfsize
, sbytes
= 0;
1779 kauth_cred_t safecred
;
1781 if (sf_bufs
== NULL
) {
1782 /* Fail if initialization failed */
1787 * Do argument checking. Must be a regular file in, stream
1788 * type and connected socket out, positive offset.
1790 if (error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))
1792 if (fp
->f_flag
& FREAD
) == 0) {
1797 if (vp
->v_type
!= VREG
|| obj
== NULL
) {
1801 error
= file_socket(uap
->s
, &so
);
1811 if (so
->so_type
!= SOCK_STREAM
) {
1815 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
1819 if (uap
->offset
< 0) {
1825 * If specified, get the pointer to the sf_hdtr struct for
1826 * any headers/trailers.
1828 if (uap
->hdtr
!= NULL
) {
1829 error
= copyin(CAST_USER_ADDR_T(uap
->hdtr
), &hdtr
, sizeof(hdtr
));
1833 * Send any headers. Wimp out and use writev(2).
1835 if (hdtr
.headers
!= NULL
) {
1837 nuap
.iovp
= hdtr
.headers
;
1838 nuap
.iovcnt
= hdtr
.hdr_cnt
;
1839 error
= writev(p
, &nuap
);
1842 sbytes
+= p
->p_retval
[0];
1847 * Protect against multiple writers to the socket.
1849 (void) sblock(&so
->so_snd
, M_WAIT
);
1852 * Loop through the pages in the file, starting with the requested
1853 * offset. Get a file page (do I/O if necessary), map the file page
1854 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1857 for (off
= uap
->offset
; ; off
+= xfsize
, sbytes
+= xfsize
) {
1858 vm_object_offset_t pindex
;
1859 vm_object_offset_t pgoff
;
1861 pindex
= OFF_TO_IDX(off
);
1864 * Calculate the amount to transfer. Not to exceed a page,
1865 * the EOF, or the passed in nbytes.
1867 xfsize
= obj
->un_pager
.vnp
.vnp_size
- off
;
1868 if (xfsize
> PAGE_SIZE_64
)
1870 pgoff
= (vm_object_offset_t
)(off
& PAGE_MASK_64
);
1871 if (PAGE_SIZE
- pgoff
< xfsize
)
1872 xfsize
= PAGE_SIZE_64
- pgoff
;
1873 if (uap
->nbytes
&& xfsize
> (uap
->nbytes
- sbytes
))
1874 xfsize
= uap
->nbytes
- sbytes
;
1878 * Optimize the non-blocking case by looking at the socket space
1879 * before going to the extra work of constituting the sf_buf.
1881 if ((so
->so_state
& SS_NBIO
) && sbspace(&so
->so_snd
) <= 0) {
1882 if (so
->so_state
& SS_CANTSENDMORE
)
1886 sbunlock(&so
->so_snd
, 0); /* will release lock */
1890 * Attempt to look up the page. If the page doesn't exist or the
1891 * part we're interested in isn't valid, then read it from disk.
1892 * If some other part of the kernel has this page (i.e. it's busy),
1893 * then disk I/O may be occuring on it, so wait and retry.
1895 pg
= vm_page_lookup(obj
, pindex
);
1896 if (pg
== NULL
|| (!(pg
->flags
& PG_BUSY
) && !pg
->busy
&&
1897 !vm_page_is_valid(pg
, pgoff
, xfsize
))) {
1903 pg
= vm_page_alloc(obj
, pindex
, VM_ALLOC_NORMAL
);
1909 * don't just clear PG_BUSY manually -
1910 * vm_page_alloc() should be considered opaque,
1911 * use the VM routine provided to clear
1918 * Ensure that our page is still around when the I/O completes.
1920 vm_page_io_start(pg
);
1923 * Get the page from backing store.
1925 bsize
= vp
->v_mount
->mnt_vfsstat
.f_iosize
;
1926 auio
.uio_iov
= &aiov
;
1927 auio
.uio_iovcnt
= 1;
1929 aiov
.iov_len
= MAXBSIZE
;
1930 auio
.uio_offset
= trunc_page(off
);
1931 auio
.uio_segflg
= UIO_NOCOPY
;
1932 auio
.uio_rw
= UIO_READ
;
1933 uio_setresid(&auio
, MAXBSIZE
);
1934 safecred
= kauth_cred_proc_ref(p
);
1935 error
= VOP_READ(vp
, &auio
, IO_VMIO
| ((MAXBSIZE
/ bsize
) << 16),
1937 kauth_cred_unref(&safecred
);
1938 vm_page_flag_clear(pg
, PG_ZERO
);
1939 vm_page_io_finish(pg
);
1941 vm_page_unwire(pg
, 0);
1943 * See if anyone else might know about this page.
1944 * If not and it is not valid, then free it.
1946 if (pg
->wire_count
== 0 && pg
->valid
== 0 &&
1947 pg
->busy
== 0 && !(pg
->flags
& PG_BUSY
) &&
1948 pg
->hold_count
== 0)
1949 vm_page_lock_queues();
1951 vm_page_unlock_queues();
1952 sbunlock(&so
->so_snd
, 0); /* will release socket lock */
1956 if ((pg
->flags
& PG_BUSY
) || pg
->busy
) {
1958 if ((pg
->flags
& PG_BUSY
) || pg
->busy
) {
1960 * Page is busy. Wait and retry.
1962 vm_page_flag_set(pg
, PG_WANTED
);
1963 tsleep(pg
, PVM
, "sfpbsy", 0);
1968 * Protect from having the page ripped out from beneath us.
1973 * Allocate a kernel virtual page and insert the physical page
1976 sf
= sf_buf_alloc();
1978 pmap_qenter(sf
->kva
, &pg
, 1);
1980 * Get an mbuf header and set it up as having external storage.
1982 MGETHDR(m
, M_WAIT
, MT_DATA
);
1985 sbunlock(&so
->so_snd
, 0); /* will release socket lock */
1988 m
->m_ext
.ext_free
= sf_buf_free
;
1989 m
->m_ext
.ext_ref
= sf_buf_ref
;
1990 m
->m_ext
.ext_buf
= (void *)sf
->kva
;
1991 m
->m_ext
.ext_size
= PAGE_SIZE
;
1992 m
->m_data
= (char *) sf
->kva
+ pgoff
;
1993 m
->m_flags
|= M_EXT
;
1994 m
->m_pkthdr
.len
= m
->m_len
= xfsize
;
1996 * Add the buffer to the socket buffer chain.
2000 * Make sure that the socket is still able to take more data.
2001 * CANTSENDMORE being true usually means that the connection
2002 * was closed. so_error is true when an error was sensed after
2004 * The state is checked after the page mapping and buffer
2005 * allocation above since those operations may block and make
2006 * any socket checks stale. From this point forward, nothing
2007 * blocks before the pru_send (or more accurately, any blocking
2008 * results in a loop back to here to re-check).
2010 if ((so
->so_state
& SS_CANTSENDMORE
) || so
->so_error
) {
2011 if (so
->so_state
& SS_CANTSENDMORE
) {
2014 error
= so
->so_error
;
2018 sbunlock(&so
->so_snd
, 0); /* will release socket lock */
2022 * Wait for socket space to become available. We do this just
2023 * after checking the connection state above in order to avoid
2024 * a race condition with sbwait().
2026 if (sbspace(&so
->so_snd
) < so
->so_snd
.sb_lowat
) {
2027 if (so
->so_state
& SS_NBIO
) {
2029 sbunlock(&so
->so_snd
, 0); /* will release socket lock */
2033 error
= sbwait(&so
->so_snd
);
2035 * An error from sbwait usually indicates that we've
2036 * been interrupted by a signal. If we've sent anything
2037 * then return bytes sent, otherwise return the error.
2041 sbunlock(&so
->so_snd
, 0);
2046 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, m
, 0, 0, p
);
2049 sbunlock(&so
->so_snd
, 0); /* will release socket lock */
2053 sbunlock(&so
->so_snd
, 0); /* will release socket lock */
2056 * Send trailers. Wimp out and use writev(2).
2058 if (uap
->hdtr
!= NULL
&& hdtr
.trailers
!= NULL
) {
2060 nuap
.iovp
= hdtr
.trailers
;
2061 nuap
.iovcnt
= hdtr
.trl_cnt
;
2062 error
= writev(p
, &nuap
);
2065 sbytes
+= p
->p_retval
[0];
2072 if (uap
->sbytes
!= NULL
) {
2073 /* XXX this appears bogus for some early failure conditions */
2074 copyout(&sbytes
, CAST_USER_ADDR_T(uap
->sbytes
), sizeof(off_t
));
2078 socket_unlock(so
, 1);