2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
95 #include <security/audit/audit.h>
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
103 #include <os/ptrtools.h>
105 #if CONFIG_MACF_SOCKET_SUBSET
106 #include <security/mac_framework.h>
107 #endif /* MAC_SOCKET_SUBSET */
109 #define f_flag f_fglob->fg_flag
110 #define f_type f_fglob->fg_ops->fo_type
111 #define f_msgcount f_fglob->fg_msgcount
112 #define f_cred f_fglob->fg_cred
113 #define f_ops f_fglob->fg_ops
114 #define f_offset f_fglob->fg_offset
115 #define f_data f_fglob->fg_data
117 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
118 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
119 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
120 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
121 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
122 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
123 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
124 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
125 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
126 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
127 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
128 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
129 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
130 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
131 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
132 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
134 #if DEBUG || DEVELOPMENT
135 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
136 #define DBG_PRINTF(...) printf(__VA_ARGS__)
138 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
139 #define DBG_PRINTF(...) do { } while (0)
142 /* TODO: should be in header file */
143 int falloc_locked(proc_t
, struct fileproc
**, int *, vfs_context_t
, int);
145 static int sendit(struct proc
*, struct socket
*, struct user_msghdr
*, uio_t
,
147 static int recvit(struct proc
*, int, struct user_msghdr
*, uio_t
, user_addr_t
,
149 static int connectit(struct socket
*, struct sockaddr
*);
150 static int getsockaddr(struct socket
*, struct sockaddr
**, user_addr_t
,
152 static int getsockaddr_s(struct socket
*, struct sockaddr_storage
*,
153 user_addr_t
, size_t, boolean_t
);
155 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf
**,
157 #endif /* SENDFILE */
158 static int connectx_nocancel(struct proc
*, struct connectx_args
*, int *);
159 static int connectitx(struct socket
*, struct sockaddr
*,
160 struct sockaddr
*, struct proc
*, uint32_t, sae_associd_t
,
161 sae_connid_t
*, uio_t
, unsigned int, user_ssize_t
*);
162 static int disconnectx_nocancel(struct proc
*, struct disconnectx_args
*,
164 static int socket_common(struct proc
*, int, int, int, pid_t
, int32_t *, int);
166 static int internalize_user_msghdr_array(const void *, int, int, u_int
,
167 struct user_msghdr_x
*, struct uio
**);
168 static u_int
externalize_user_msghdr_array(void *, int, int, u_int
,
169 const struct user_msghdr_x
*, struct uio
**);
171 static void free_uio_array(struct uio
**, u_int
);
172 static int uio_array_is_valid(struct uio
**, u_int
);
173 static int recv_msg_array_is_valid(struct recv_msg_elem
*, u_int
);
174 static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int
, struct user_msghdr_x
*, struct recv_msg_elem
*);
176 static u_int
externalize_recv_msghdr_array(void *, int, int, u_int
,
177 const struct user_msghdr_x
*, struct recv_msg_elem
*);
178 static struct recv_msg_elem
*alloc_recv_msg_array(u_int count
);
179 static void free_recv_msg_array(struct recv_msg_elem
*, u_int
);
181 SYSCTL_DECL(_kern_ipc
);
183 static u_int somaxsendmsgx
= 100;
184 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxsendmsgx
,
185 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxsendmsgx
, 0, "");
186 static u_int somaxrecvmsgx
= 100;
187 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxrecvmsgx
,
188 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxrecvmsgx
, 0, "");
191 * System call interface to the socket abstraction.
194 extern const struct fileops socketops
;
198 * EACCES Mandatory Access Control failure
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
207 * socreate:??? [other protocol families, IPSEC]
210 socket(struct proc
*p
,
211 struct socket_args
*uap
,
214 return socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
215 proc_selfpid(), retval
, 0);
219 socket_delegate(struct proc
*p
,
220 struct socket_delegate_args
*uap
,
223 return socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
224 uap
->epid
, retval
, 1);
228 socket_common(struct proc
*p
,
240 AUDIT_ARG(socket
, domain
, type
, protocol
);
241 #if CONFIG_MACF_SOCKET_SUBSET
242 if ((error
= mac_socket_check_create(kauth_cred_get(), domain
,
243 type
, protocol
)) != 0) {
246 #endif /* MAC_SOCKET_SUBSET */
249 error
= priv_check_cred(kauth_cred_get(),
250 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0);
256 error
= falloc(p
, &fp
, &fd
, vfs_context_current());
260 fp
->f_flag
= FREAD
| FWRITE
;
261 fp
->f_ops
= &socketops
;
264 error
= socreate_delegate(domain
, &so
, type
, protocol
, epid
);
266 error
= socreate(domain
, &so
, type
, protocol
);
272 fp
->f_data
= (caddr_t
)so
;
275 procfdtbl_releasefd(p
, fd
, NULL
);
277 fp_drop(p
, fd
, fp
, 1);
281 if (ENTR_SHOULDTRACE
) {
282 KERNEL_ENERGYTRACE(kEnTrActKernSocket
, DBG_FUNC_START
,
283 fd
, 0, (int64_t)VM_KERNEL_ADDRPERM(so
));
291 * EDESTADDRREQ Destination address required
292 * EBADF Bad file descriptor
293 * EACCES Mandatory Access Control failure
294 * file_socket:ENOTSOCK
296 * getsockaddr:ENAMETOOLONG Filename too long
297 * getsockaddr:EINVAL Invalid argument
298 * getsockaddr:ENOMEM Not enough space
299 * getsockaddr:EFAULT Bad address
304 bind(__unused proc_t p
, struct bind_args
*uap
, __unused
int32_t *retval
)
306 struct sockaddr_storage ss
;
307 struct sockaddr
*sa
= NULL
;
309 boolean_t want_free
= TRUE
;
312 AUDIT_ARG(fd
, uap
->s
);
313 error
= file_socket(uap
->s
, &so
);
321 if (uap
->name
== USER_ADDR_NULL
) {
322 error
= EDESTADDRREQ
;
325 if (uap
->namelen
> sizeof(ss
)) {
326 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, TRUE
);
328 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, TRUE
);
330 sa
= (struct sockaddr
*)&ss
;
337 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
338 #if CONFIG_MACF_SOCKET_SUBSET
339 if ((sa
!= NULL
&& sa
->sa_family
== AF_SYSTEM
) ||
340 (error
= mac_socket_check_bind(kauth_cred_get(), so
, sa
)) == 0) {
341 error
= sobindlock(so
, sa
, 1); /* will lock socket */
344 error
= sobindlock(so
, sa
, 1); /* will lock socket */
345 #endif /* MAC_SOCKET_SUBSET */
357 * EACCES Mandatory Access Control failure
358 * file_socket:ENOTSOCK
361 * solisten:EOPNOTSUPP
365 listen(__unused
struct proc
*p
, struct listen_args
*uap
,
366 __unused
int32_t *retval
)
371 AUDIT_ARG(fd
, uap
->s
);
372 error
= file_socket(uap
->s
, &so
);
377 #if CONFIG_MACF_SOCKET_SUBSET
379 error
= mac_socket_check_listen(kauth_cred_get(), so
);
381 error
= solisten(so
, uap
->backlog
);
385 { error
= solisten(so
, uap
->backlog
);}
386 #endif /* MAC_SOCKET_SUBSET */
396 * Returns: fp_getfsock:EBADF Bad file descriptor
397 * fp_getfsock:EOPNOTSUPP ...
398 * xlate => :ENOTSOCK Socket operation on non-socket
399 * :EFAULT Bad address on copyin/copyout
400 * :EBADF Bad file descriptor
401 * :EOPNOTSUPP Operation not supported on socket
402 * :EINVAL Invalid argument
403 * :EWOULDBLOCK Operation would block
404 * :ECONNABORTED Connection aborted
405 * :EINTR Interrupted function
406 * :EACCES Mandatory Access Control failure
407 * falloc_locked:ENFILE Too many files open in system
408 * falloc_locked::EMFILE Too many open files
409 * falloc_locked::ENOMEM Not enough space
413 accept_nocancel(struct proc
*p
, struct accept_nocancel_args
*uap
,
417 struct sockaddr
*sa
= NULL
;
420 struct socket
*head
, *so
= NULL
;
421 lck_mtx_t
*mutex_held
;
424 short fflag
; /* type must match fp->f_flag */
429 AUDIT_ARG(fd
, uap
->s
);
432 error
= copyin(uap
->anamelen
, (caddr_t
)&namelen
,
438 error
= fp_getfsock(p
, fd
, &fp
, &head
);
440 if (error
== EOPNOTSUPP
) {
449 #if CONFIG_MACF_SOCKET_SUBSET
450 if ((error
= mac_socket_check_accept(kauth_cred_get(), head
)) != 0) {
453 #endif /* MAC_SOCKET_SUBSET */
455 socket_lock(head
, 1);
457 if (head
->so_proto
->pr_getlock
!= NULL
) {
458 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, PR_F_WILLUNLOCK
);
461 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
465 if ((head
->so_options
& SO_ACCEPTCONN
) == 0) {
466 if ((head
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
469 /* POSIX: The socket is not accepting connections */
472 socket_unlock(head
, 1);
476 if ((head
->so_state
& SS_NBIO
) && head
->so_comp
.tqh_first
== NULL
) {
477 socket_unlock(head
, 1);
481 while (TAILQ_EMPTY(&head
->so_comp
) && head
->so_error
== 0) {
482 if (head
->so_state
& SS_CANTRCVMORE
) {
483 head
->so_error
= ECONNABORTED
;
486 if (head
->so_usecount
< 1) {
487 panic("accept: head=%p refcount=%d\n", head
,
490 error
= msleep((caddr_t
)&head
->so_timeo
, mutex_held
,
491 PSOCK
| PCATCH
, "accept", 0);
492 if (head
->so_usecount
< 1) {
493 panic("accept: 2 head=%p refcount=%d\n", head
,
496 if ((head
->so_state
& SS_DRAINING
)) {
497 error
= ECONNABORTED
;
500 socket_unlock(head
, 1);
504 if (head
->so_error
) {
505 error
= head
->so_error
;
507 socket_unlock(head
, 1);
512 * At this point we know that there is at least one connection
513 * ready to be accepted. Remove it from the queue prior to
514 * allocating the file descriptor for it since falloc() may
515 * block allowing another process to accept the connection
518 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
520 so_acquire_accept_list(head
, NULL
);
521 if (TAILQ_EMPTY(&head
->so_comp
)) {
522 so_release_accept_list(head
);
526 so
= TAILQ_FIRST(&head
->so_comp
);
527 TAILQ_REMOVE(&head
->so_comp
, so
, so_list
);
529 so
->so_state
&= ~SS_COMP
;
531 so_release_accept_list(head
);
533 /* unlock head to avoid deadlock with select, keep a ref on head */
534 socket_unlock(head
, 0);
536 #if CONFIG_MACF_SOCKET_SUBSET
538 * Pass the pre-accepted socket to the MAC framework. This is
539 * cheaper than allocating a file descriptor for the socket,
540 * calling the protocol accept callback, and possibly freeing
541 * the file descriptor should the MAC check fails.
543 if ((error
= mac_socket_check_accepted(kauth_cred_get(), so
)) != 0) {
545 so
->so_state
&= ~SS_NOFDREF
;
546 socket_unlock(so
, 1);
548 /* Drop reference on listening socket */
552 #endif /* MAC_SOCKET_SUBSET */
555 * Pass the pre-accepted socket to any interested socket filter(s).
556 * Upon failure, the socket would have been closed by the callee.
558 if (so
->so_filt
!= NULL
&& (error
= soacceptfilter(so
, head
)) != 0) {
559 /* Drop reference on listening socket */
561 /* Propagate socket filter's error code to the caller */
566 error
= falloc(p
, &fp
, &newfd
, vfs_context_current());
569 * Probably ran out of file descriptors.
571 * <rdar://problem/8554930>
572 * Don't put this back on the socket like we used to, that
573 * just causes the client to spin. Drop the socket.
576 so
->so_state
&= ~SS_NOFDREF
;
577 socket_unlock(so
, 1);
584 fp
->f_ops
= &socketops
;
585 fp
->f_data
= (caddr_t
)so
;
587 socket_lock(head
, 0);
592 /* Sync socket non-blocking/async state with file flags */
593 if (fp
->f_flag
& FNONBLOCK
) {
594 so
->so_state
|= SS_NBIO
;
596 so
->so_state
&= ~SS_NBIO
;
599 if (fp
->f_flag
& FASYNC
) {
600 so
->so_state
|= SS_ASYNC
;
601 so
->so_rcv
.sb_flags
|= SB_ASYNC
;
602 so
->so_snd
.sb_flags
|= SB_ASYNC
;
604 so
->so_state
&= ~SS_ASYNC
;
605 so
->so_rcv
.sb_flags
&= ~SB_ASYNC
;
606 so
->so_snd
.sb_flags
&= ~SB_ASYNC
;
609 (void) soacceptlock(so
, &sa
, 0);
610 socket_unlock(head
, 1);
619 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
624 /* save sa_len before it is destroyed */
626 namelen
= MIN(namelen
, sa_len
);
627 error
= copyout(sa
, uap
->name
, namelen
);
629 /* return the actual, untruncated address length */
633 error
= copyout((caddr_t
)&namelen
, uap
->anamelen
,
640 * If the socket has been marked as inactive by sosetdefunct(),
641 * disallow further operations on it.
643 if (so
->so_flags
& SOF_DEFUNCT
) {
644 sodefunct(current_proc(), so
,
645 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
);
649 socket_unlock(so
, 1);
653 procfdtbl_releasefd(p
, newfd
, NULL
);
654 fp_drop(p
, newfd
, fp
, 1);
660 if (error
== 0 && ENTR_SHOULDTRACE
) {
661 KERNEL_ENERGYTRACE(kEnTrActKernSocket
, DBG_FUNC_START
,
662 newfd
, 0, (int64_t)VM_KERNEL_ADDRPERM(so
));
668 accept(struct proc
*p
, struct accept_args
*uap
, int32_t *retval
)
670 __pthread_testcancel(1);
671 return accept_nocancel(p
, (struct accept_nocancel_args
*)uap
,
677 * EBADF Bad file descriptor
678 * EALREADY Connection already in progress
679 * EINPROGRESS Operation in progress
680 * ECONNABORTED Connection aborted
681 * EINTR Interrupted function
682 * EACCES Mandatory Access Control failure
683 * file_socket:ENOTSOCK
685 * getsockaddr:ENAMETOOLONG Filename too long
686 * getsockaddr:EINVAL Invalid argument
687 * getsockaddr:ENOMEM Not enough space
688 * getsockaddr:EFAULT Bad address
689 * soconnectlock:EOPNOTSUPP
690 * soconnectlock:EISCONN
691 * soconnectlock:??? [depends on protocol, filters]
694 * Imputed: so_error error may be set from so_error, which
695 * may have been set by soconnectlock.
699 connect(struct proc
*p
, struct connect_args
*uap
, int32_t *retval
)
701 __pthread_testcancel(1);
702 return connect_nocancel(p
, (struct connect_nocancel_args
*)uap
,
707 connect_nocancel(proc_t p
, struct connect_nocancel_args
*uap
, int32_t *retval
)
709 #pragma unused(p, retval)
711 struct sockaddr_storage ss
;
712 struct sockaddr
*sa
= NULL
;
717 AUDIT_ARG(fd
, uap
->s
);
718 error
= file_socket(fd
, &so
);
728 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
729 * if this is a datagram socket; translate for other types.
731 dgram
= (so
->so_type
== SOCK_DGRAM
);
733 /* Get socket address now before we obtain socket lock */
734 if (uap
->namelen
> sizeof(ss
)) {
735 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, !dgram
);
737 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, !dgram
);
739 sa
= (struct sockaddr
*)&ss
;
746 error
= connectit(so
, sa
);
748 if (sa
!= NULL
&& sa
!= SA(&ss
)) {
751 if (error
== ERESTART
) {
760 connectx_nocancel(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
762 #pragma unused(p, retval)
763 struct sockaddr_storage ss
, sd
;
764 struct sockaddr
*src
= NULL
, *dst
= NULL
;
766 int error
, error1
, fd
= uap
->socket
;
768 sae_connid_t cid
= SAE_CONNID_ANY
;
769 struct user32_sa_endpoints ep32
;
770 struct user64_sa_endpoints ep64
;
771 struct user_sa_endpoints ep
;
772 user_ssize_t bytes_written
= 0;
773 struct user_iovec
*iovp
;
776 AUDIT_ARG(fd
, uap
->socket
);
777 error
= file_socket(fd
, &so
);
786 if (uap
->endpoints
== USER_ADDR_NULL
) {
791 if (IS_64BIT_PROCESS(p
)) {
792 error
= copyin(uap
->endpoints
, (caddr_t
)&ep64
, sizeof(ep64
));
797 ep
.sae_srcif
= ep64
.sae_srcif
;
798 ep
.sae_srcaddr
= ep64
.sae_srcaddr
;
799 ep
.sae_srcaddrlen
= ep64
.sae_srcaddrlen
;
800 ep
.sae_dstaddr
= ep64
.sae_dstaddr
;
801 ep
.sae_dstaddrlen
= ep64
.sae_dstaddrlen
;
803 error
= copyin(uap
->endpoints
, (caddr_t
)&ep32
, sizeof(ep32
));
808 ep
.sae_srcif
= ep32
.sae_srcif
;
809 ep
.sae_srcaddr
= ep32
.sae_srcaddr
;
810 ep
.sae_srcaddrlen
= ep32
.sae_srcaddrlen
;
811 ep
.sae_dstaddr
= ep32
.sae_dstaddr
;
812 ep
.sae_dstaddrlen
= ep32
.sae_dstaddrlen
;
816 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
817 * if this is a datagram socket; translate for other types.
819 dgram
= (so
->so_type
== SOCK_DGRAM
);
821 /* Get socket address now before we obtain socket lock */
822 if (ep
.sae_srcaddr
!= USER_ADDR_NULL
) {
823 if (ep
.sae_srcaddrlen
> sizeof(ss
)) {
824 error
= getsockaddr(so
, &src
, ep
.sae_srcaddr
, ep
.sae_srcaddrlen
, dgram
);
826 error
= getsockaddr_s(so
, &ss
, ep
.sae_srcaddr
, ep
.sae_srcaddrlen
, dgram
);
828 src
= (struct sockaddr
*)&ss
;
837 if (ep
.sae_dstaddr
== USER_ADDR_NULL
) {
842 /* Get socket address now before we obtain socket lock */
843 if (ep
.sae_dstaddrlen
> sizeof(sd
)) {
844 error
= getsockaddr(so
, &dst
, ep
.sae_dstaddr
, ep
.sae_dstaddrlen
, dgram
);
846 error
= getsockaddr_s(so
, &sd
, ep
.sae_dstaddr
, ep
.sae_dstaddrlen
, dgram
);
848 dst
= (struct sockaddr
*)&sd
;
858 if (uap
->iov
!= USER_ADDR_NULL
) {
859 /* Verify range before calling uio_create() */
860 if (uap
->iovcnt
<= 0 || uap
->iovcnt
> UIO_MAXIOV
) {
865 if (uap
->len
== USER_ADDR_NULL
) {
870 /* allocate a uio to hold the number of iovecs passed */
871 auio
= uio_create(uap
->iovcnt
, 0,
872 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
881 * get location of iovecs within the uio.
882 * then copyin the iovecs from user space.
884 iovp
= uio_iovsaddr(auio
);
889 error
= copyin_user_iovec_array(uap
->iov
,
890 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
896 /* finish setup of uio_t */
897 error
= uio_calculateresid(auio
);
903 error
= connectitx(so
, src
, dst
, p
, ep
.sae_srcif
, uap
->associd
,
904 &cid
, auio
, uap
->flags
, &bytes_written
);
905 if (error
== ERESTART
) {
909 if (uap
->len
!= USER_ADDR_NULL
) {
910 error1
= copyout(&bytes_written
, uap
->len
, sizeof(uap
->len
));
911 /* give precedence to connectitx errors */
912 if ((error1
!= 0) && (error
== 0)) {
917 if (uap
->connid
!= USER_ADDR_NULL
) {
918 error1
= copyout(&cid
, uap
->connid
, sizeof(cid
));
919 /* give precedence to connectitx errors */
920 if ((error1
!= 0) && (error
== 0)) {
929 if (src
!= NULL
&& src
!= SA(&ss
)) {
932 if (dst
!= NULL
&& dst
!= SA(&sd
)) {
939 connectx(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
942 * Due to similiarity with a POSIX interface, define as
943 * an unofficial cancellation point.
945 __pthread_testcancel(1);
946 return connectx_nocancel(p
, uap
, retval
);
950 connectit(struct socket
*so
, struct sockaddr
*sa
)
954 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
955 #if CONFIG_MACF_SOCKET_SUBSET
956 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, sa
)) != 0) {
959 #endif /* MAC_SOCKET_SUBSET */
962 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
966 error
= soconnectlock(so
, sa
, 0);
968 so
->so_state
&= ~SS_ISCONNECTING
;
971 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
975 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
976 lck_mtx_t
*mutex_held
;
978 if (so
->so_proto
->pr_getlock
!= NULL
) {
979 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
981 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
983 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
984 PSOCK
| PCATCH
, __func__
, 0);
985 if (so
->so_state
& SS_DRAINING
) {
986 error
= ECONNABORTED
;
993 error
= so
->so_error
;
997 socket_unlock(so
, 1);
1002 connectitx(struct socket
*so
, struct sockaddr
*src
,
1003 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
1004 sae_associd_t aid
, sae_connid_t
*pcid
, uio_t auio
, unsigned int flags
,
1005 user_ssize_t
*bytes_written
)
1008 #pragma unused (flags)
1010 VERIFY(dst
!= NULL
);
1012 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), dst
);
1013 #if CONFIG_MACF_SOCKET_SUBSET
1014 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, dst
)) != 0) {
1019 if ((error
= mac_socket_check_send(kauth_cred_get(), so
, dst
)) != 0) {
1023 #endif /* MAC_SOCKET_SUBSET */
1026 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
1031 if ((so
->so_proto
->pr_flags
& PR_DATA_IDEMPOTENT
) &&
1032 (flags
& CONNECT_DATA_IDEMPOTENT
)) {
1033 so
->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
1035 if (flags
& CONNECT_DATA_AUTHENTICATED
) {
1036 so
->so_flags1
|= SOF1_DATA_AUTHENTICATED
;
1041 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1042 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1043 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1044 * Case 3 allows user to combine write with connect even if they have
1045 * no use for TFO (such as regular TCP, and UDP).
1046 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1048 if ((so
->so_proto
->pr_flags
& PR_PRECONN_WRITE
) &&
1049 ((flags
& CONNECT_RESUME_ON_READ_WRITE
) || auio
)) {
1050 so
->so_flags1
|= SOF1_PRECONNECT_DATA
;
1054 * If a user sets data idempotent and does not pass an uio, or
1055 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1056 * SOF1_DATA_IDEMPOTENT.
1058 if (!(so
->so_flags1
& SOF1_PRECONNECT_DATA
) &&
1059 (so
->so_flags1
& SOF1_DATA_IDEMPOTENT
)) {
1060 /* We should return EINVAL instead perhaps. */
1061 so
->so_flags1
&= ~SOF1_DATA_IDEMPOTENT
;
1064 error
= soconnectxlocked(so
, src
, dst
, p
, ifscope
,
1065 aid
, pcid
, 0, NULL
, 0, auio
, bytes_written
);
1067 so
->so_state
&= ~SS_ISCONNECTING
;
1071 * If, after the call to soconnectxlocked the flag is still set (in case
1072 * data has been queued and the connect() has actually been triggered,
1073 * it will have been unset by the transport), we exit immediately. There
1074 * is no reason to wait on any event.
1076 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1080 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
1081 error
= EINPROGRESS
;
1084 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
1085 lck_mtx_t
*mutex_held
;
1087 if (so
->so_proto
->pr_getlock
!= NULL
) {
1088 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1090 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1092 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
1093 PSOCK
| PCATCH
, __func__
, 0);
1094 if (so
->so_state
& SS_DRAINING
) {
1095 error
= ECONNABORTED
;
1102 error
= so
->so_error
;
1106 socket_unlock(so
, 1);
1111 peeloff(struct proc
*p
, struct peeloff_args
*uap
, int *retval
)
1113 #pragma unused(p, uap, retval)
1115 * Due to similiarity with a POSIX interface, define as
1116 * an unofficial cancellation point.
1118 __pthread_testcancel(1);
1123 disconnectx(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
1126 * Due to similiarity with a POSIX interface, define as
1127 * an unofficial cancellation point.
1129 __pthread_testcancel(1);
1130 return disconnectx_nocancel(p
, uap
, retval
);
1134 disconnectx_nocancel(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
1136 #pragma unused(p, retval)
1141 error
= file_socket(fd
, &so
);
1150 error
= sodisconnectx(so
, uap
->aid
, uap
->cid
);
1157 * Returns: 0 Success
1158 * socreate:EAFNOSUPPORT
1159 * socreate:EPROTOTYPE
1160 * socreate:EPROTONOSUPPORT
1164 * socreate:??? [other protocol families, IPSEC]
1170 * soconnect2:EPROTOTYPE
1171 * soconnect2:??? [other protocol families[
1174 socketpair(struct proc
*p
, struct socketpair_args
*uap
,
1175 __unused
int32_t *retval
)
1177 struct fileproc
*fp1
, *fp2
;
1178 struct socket
*so1
, *so2
;
1179 int fd
, error
, sv
[2];
1181 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
1182 error
= socreate(uap
->domain
, &so1
, uap
->type
, uap
->protocol
);
1186 error
= socreate(uap
->domain
, &so2
, uap
->type
, uap
->protocol
);
1191 error
= falloc(p
, &fp1
, &fd
, vfs_context_current());
1195 fp1
->f_flag
= FREAD
| FWRITE
;
1196 fp1
->f_ops
= &socketops
;
1197 fp1
->f_data
= (caddr_t
)so1
;
1200 error
= falloc(p
, &fp2
, &fd
, vfs_context_current());
1204 fp2
->f_flag
= FREAD
| FWRITE
;
1205 fp2
->f_ops
= &socketops
;
1206 fp2
->f_data
= (caddr_t
)so2
;
1209 error
= soconnect2(so1
, so2
);
1213 if (uap
->type
== SOCK_DGRAM
) {
1215 * Datagram socket connection is asymmetric.
1217 error
= soconnect2(so2
, so1
);
1223 if ((error
= copyout(sv
, uap
->rsv
, 2 * sizeof(int))) != 0) {
1228 procfdtbl_releasefd(p
, sv
[0], NULL
);
1229 procfdtbl_releasefd(p
, sv
[1], NULL
);
1230 fp_drop(p
, sv
[0], fp1
, 1);
1231 fp_drop(p
, sv
[1], fp2
, 1);
1236 fp_free(p
, sv
[1], fp2
);
1238 fp_free(p
, sv
[0], fp1
);
1240 (void) soclose(so2
);
1242 (void) soclose(so1
);
1247 * Returns: 0 Success
1252 * EACCES Mandatory Access Control failure
1253 * file_socket:ENOTSOCK
1255 * getsockaddr:ENAMETOOLONG Filename too long
1256 * getsockaddr:EINVAL Invalid argument
1257 * getsockaddr:ENOMEM Not enough space
1258 * getsockaddr:EFAULT Bad address
1259 * <pru_sosend>:EACCES[TCP]
1260 * <pru_sosend>:EADDRINUSE[TCP]
1261 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1262 * <pru_sosend>:EAFNOSUPPORT[TCP]
1263 * <pru_sosend>:EAGAIN[TCP]
1264 * <pru_sosend>:EBADF
1265 * <pru_sosend>:ECONNRESET[TCP]
1266 * <pru_sosend>:EFAULT
1267 * <pru_sosend>:EHOSTUNREACH[TCP]
1268 * <pru_sosend>:EINTR
1269 * <pru_sosend>:EINVAL
1270 * <pru_sosend>:EISCONN[AF_INET]
1271 * <pru_sosend>:EMSGSIZE[TCP]
1272 * <pru_sosend>:ENETDOWN[TCP]
1273 * <pru_sosend>:ENETUNREACH[TCP]
1274 * <pru_sosend>:ENOBUFS
1275 * <pru_sosend>:ENOMEM[TCP]
1276 * <pru_sosend>:ENOTCONN[AF_INET]
1277 * <pru_sosend>:EOPNOTSUPP
1278 * <pru_sosend>:EPERM[TCP]
1279 * <pru_sosend>:EPIPE
1280 * <pru_sosend>:EWOULDBLOCK
1281 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1282 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1283 * <pru_sosend>:??? [value from so_error]
1287 sendit(struct proc
*p
, struct socket
*so
, struct user_msghdr
*mp
, uio_t uiop
,
1288 int flags
, int32_t *retval
)
1290 struct mbuf
*control
= NULL
;
1291 struct sockaddr_storage ss
;
1292 struct sockaddr
*to
= NULL
;
1293 boolean_t want_free
= TRUE
;
1297 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1299 if (mp
->msg_name
!= USER_ADDR_NULL
) {
1300 if (mp
->msg_namelen
> sizeof(ss
)) {
1301 error
= getsockaddr(so
, &to
, mp
->msg_name
,
1302 mp
->msg_namelen
, TRUE
);
1304 error
= getsockaddr_s(so
, &ss
, mp
->msg_name
,
1305 mp
->msg_namelen
, TRUE
);
1307 to
= (struct sockaddr
*)&ss
;
1314 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), to
);
1316 if (mp
->msg_control
!= USER_ADDR_NULL
) {
1317 if (mp
->msg_controllen
< sizeof(struct cmsghdr
)) {
1321 error
= sockargs(&control
, mp
->msg_control
,
1322 mp
->msg_controllen
, MT_CONTROL
);
1328 #if CONFIG_MACF_SOCKET_SUBSET
1330 * We check the state without holding the socket lock;
1331 * if a race condition occurs, it would simply result
1332 * in an extra call to the MAC check function.
1335 !(so
->so_state
& SS_DEFUNCT
) &&
1336 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
)) != 0) {
1339 #endif /* MAC_SOCKET_SUBSET */
1341 len
= uio_resid(uiop
);
1342 error
= so
->so_proto
->pr_usrreqs
->pru_sosend(so
, to
, uiop
, 0,
1345 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1346 error
== EINTR
|| error
== EWOULDBLOCK
)) {
1349 /* Generation of SIGPIPE can be controlled per socket */
1350 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
)) {
1351 psignal(p
, SIGPIPE
);
1355 *retval
= (int)(len
- uio_resid(uiop
));
1358 if (to
!= NULL
&& want_free
) {
1362 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1368 * Returns: 0 Success
1370 * sendit:??? [see sendit definition in this file]
1371 * write:??? [4056224: applicable for pipes]
1374 sendto(struct proc
*p
, struct sendto_args
*uap
, int32_t *retval
)
1376 __pthread_testcancel(1);
1377 return sendto_nocancel(p
, (struct sendto_nocancel_args
*)uap
, retval
);
1381 sendto_nocancel(struct proc
*p
,
1382 struct sendto_nocancel_args
*uap
,
1385 struct user_msghdr msg
;
1390 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1391 AUDIT_ARG(fd
, uap
->s
);
1393 if (uap
->flags
& MSG_SKIPCFIL
) {
1398 auio
= uio_create(1, 0,
1399 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1405 uio_addiov(auio
, uap
->buf
, uap
->len
);
1407 msg
.msg_name
= uap
->to
;
1408 msg
.msg_namelen
= uap
->tolen
;
1409 /* no need to set up msg_iov. sendit uses uio_t we send it */
1412 msg
.msg_control
= 0;
1415 error
= file_socket(uap
->s
, &so
);
1423 error
= sendit(p
, so
, &msg
, auio
, uap
->flags
, retval
);
1432 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_END
, error
, *retval
, 0, 0, 0);
1438 * Returns: 0 Success
1441 * sendit:??? [see sendit definition in this file]
1444 sendmsg(struct proc
*p
, struct sendmsg_args
*uap
, int32_t *retval
)
1446 __pthread_testcancel(1);
1447 return sendmsg_nocancel(p
, (struct sendmsg_nocancel_args
*)uap
,
1452 sendmsg_nocancel(struct proc
*p
, struct sendmsg_nocancel_args
*uap
,
1455 struct user32_msghdr msg32
;
1456 struct user64_msghdr msg64
;
1457 struct user_msghdr user_msg
;
1462 struct user_iovec
*iovp
;
1465 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1466 AUDIT_ARG(fd
, uap
->s
);
1468 if (uap
->flags
& MSG_SKIPCFIL
) {
1473 if (IS_64BIT_PROCESS(p
)) {
1474 msghdrp
= (caddr_t
)&msg64
;
1475 size_of_msghdr
= sizeof(msg64
);
1477 msghdrp
= (caddr_t
)&msg32
;
1478 size_of_msghdr
= sizeof(msg32
);
1480 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1482 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1486 if (IS_64BIT_PROCESS(p
)) {
1487 user_msg
.msg_flags
= msg64
.msg_flags
;
1488 user_msg
.msg_controllen
= msg64
.msg_controllen
;
1489 user_msg
.msg_control
= msg64
.msg_control
;
1490 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
1491 user_msg
.msg_iov
= msg64
.msg_iov
;
1492 user_msg
.msg_namelen
= msg64
.msg_namelen
;
1493 user_msg
.msg_name
= msg64
.msg_name
;
1495 user_msg
.msg_flags
= msg32
.msg_flags
;
1496 user_msg
.msg_controllen
= msg32
.msg_controllen
;
1497 user_msg
.msg_control
= msg32
.msg_control
;
1498 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
1499 user_msg
.msg_iov
= msg32
.msg_iov
;
1500 user_msg
.msg_namelen
= msg32
.msg_namelen
;
1501 user_msg
.msg_name
= msg32
.msg_name
;
1504 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1505 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, EMSGSIZE
,
1510 /* allocate a uio large enough to hold the number of iovecs passed */
1511 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1512 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1519 if (user_msg
.msg_iovlen
) {
1521 * get location of iovecs within the uio.
1522 * then copyin the iovecs from user space.
1524 iovp
= uio_iovsaddr(auio
);
1529 error
= copyin_user_iovec_array(user_msg
.msg_iov
,
1530 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1531 user_msg
.msg_iovlen
, iovp
);
1535 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1537 /* finish setup of uio_t */
1538 error
= uio_calculateresid(auio
);
1543 user_msg
.msg_iov
= 0;
1546 /* msg_flags is ignored for send */
1547 user_msg
.msg_flags
= 0;
1549 error
= file_socket(uap
->s
, &so
);
1556 error
= sendit(p
, so
, &user_msg
, auio
, uap
->flags
, retval
);
1563 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1569 sendmsg_x(struct proc
*p
, struct sendmsg_x_args
*uap
, user_ssize_t
*retval
)
1572 struct user_msghdr_x
*user_msg_x
= NULL
;
1573 struct uio
**uiop
= NULL
;
1576 struct sockaddr
*to
= NULL
;
1577 user_ssize_t len_before
= 0, len_after
;
1579 size_t size_of_msghdr
;
1582 int has_addr_or_ctl
= 0;
1584 KERNEL_DEBUG(DBG_FNC_SENDMSG_X
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1586 if (uap
->flags
& MSG_SKIPCFIL
) {
1591 error
= file_socket(uap
->s
, &so
);
1602 * Input parameter range check
1604 if (uap
->cnt
== 0 || uap
->cnt
> UIO_MAXIOV
) {
1609 * Clip to max currently allowed
1611 if (uap
->cnt
> somaxsendmsgx
) {
1612 uap
->cnt
= somaxsendmsgx
;
1615 user_msg_x
= _MALLOC(uap
->cnt
* sizeof(struct user_msghdr_x
),
1616 M_TEMP
, M_WAITOK
| M_ZERO
);
1617 if (user_msg_x
== NULL
) {
1618 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__
);
1622 uiop
= _MALLOC(uap
->cnt
* sizeof(struct uio
*),
1623 M_TEMP
, M_WAITOK
| M_ZERO
);
1625 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__
);
1630 size_of_msghdr
= IS_64BIT_PROCESS(p
) ?
1631 sizeof(struct user64_msghdr_x
) : sizeof(struct user32_msghdr_x
);
1633 umsgp
= _MALLOC(uap
->cnt
* size_of_msghdr
,
1634 M_TEMP
, M_WAITOK
| M_ZERO
);
1635 if (umsgp
== NULL
) {
1636 printf("%s _MALLOC() user_msg_x failed\n", __func__
);
1640 error
= copyin(uap
->msgp
, umsgp
, uap
->cnt
* size_of_msghdr
);
1642 DBG_PRINTF("%s copyin() failed\n", __func__
);
1645 error
= internalize_user_msghdr_array(umsgp
,
1646 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1647 UIO_WRITE
, uap
->cnt
, user_msg_x
, uiop
);
1649 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__
);
1653 * Make sure the size of each message iovec and
1654 * the aggregate size of all the iovec is valid
1656 if (uio_array_is_valid(uiop
, uap
->cnt
) == 0) {
1662 * Sanity check on passed arguments
1664 for (i
= 0; i
< uap
->cnt
; i
++) {
1665 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
1668 * No flags on send message
1670 if (mp
->msg_flags
!= 0) {
1675 * No support for address or ancillary data (yet)
1677 if (mp
->msg_name
!= USER_ADDR_NULL
|| mp
->msg_namelen
!= 0) {
1678 has_addr_or_ctl
= 1;
1681 if (mp
->msg_control
!= USER_ADDR_NULL
||
1682 mp
->msg_controllen
!= 0) {
1683 has_addr_or_ctl
= 1;
1686 #if CONFIG_MACF_SOCKET_SUBSET
1688 * We check the state without holding the socket lock;
1689 * if a race condition occurs, it would simply result
1690 * in an extra call to the MAC check function.
1692 * Note: The following check is never true taken with the
1693 * current limitation that we do not accept to pass an address,
1694 * this is effectively placeholder code. If we add support for
1695 * addresses, we will have to check every address.
1698 !(so
->so_state
& SS_DEFUNCT
) &&
1699 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
))
1703 #endif /* MAC_SOCKET_SUBSET */
1706 len_before
= uio_array_resid(uiop
, uap
->cnt
);
1709 * Feed list of packets at once only for connected socket without
1712 if (so
->so_proto
->pr_usrreqs
->pru_sosend_list
!=
1713 pru_sosend_list_notsupp
&&
1714 has_addr_or_ctl
== 0 && somaxsendmsgx
== 0) {
1715 error
= so
->so_proto
->pr_usrreqs
->pru_sosend_list(so
, uiop
,
1716 uap
->cnt
, uap
->flags
);
1718 for (i
= 0; i
< uap
->cnt
; i
++) {
1719 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
1720 struct user_msghdr user_msg
;
1721 uio_t auio
= uiop
[i
];
1724 user_msg
.msg_flags
= mp
->msg_flags
;
1725 user_msg
.msg_controllen
= mp
->msg_controllen
;
1726 user_msg
.msg_control
= mp
->msg_control
;
1727 user_msg
.msg_iovlen
= mp
->msg_iovlen
;
1728 user_msg
.msg_iov
= mp
->msg_iov
;
1729 user_msg
.msg_namelen
= mp
->msg_namelen
;
1730 user_msg
.msg_name
= mp
->msg_name
;
1732 error
= sendit(p
, so
, &user_msg
, auio
, uap
->flags
,
1739 len_after
= uio_array_resid(uiop
, uap
->cnt
);
1741 VERIFY(len_after
<= len_before
);
1744 if (len_after
!= len_before
&& (error
== ERESTART
||
1745 error
== EINTR
|| error
== EWOULDBLOCK
||
1746 error
== ENOBUFS
)) {
1749 /* Generation of SIGPIPE can be controlled per socket */
1750 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
)) {
1751 psignal(p
, SIGPIPE
);
1755 uiocnt
= externalize_user_msghdr_array(umsgp
,
1756 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1757 UIO_WRITE
, uap
->cnt
, user_msg_x
, uiop
);
1759 *retval
= (int)(uiocnt
);
1765 if (umsgp
!= NULL
) {
1766 _FREE(umsgp
, M_TEMP
);
1769 free_uio_array(uiop
, uap
->cnt
);
1770 _FREE(uiop
, M_TEMP
);
1772 if (user_msg_x
!= NULL
) {
1773 _FREE(user_msg_x
, M_TEMP
);
1776 KERNEL_DEBUG(DBG_FNC_SENDMSG_X
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1783 copyout_sa(struct sockaddr
*fromsa
, user_addr_t name
, socklen_t
*namelen
)
1786 socklen_t sa_len
= 0;
1790 if (len
<= 0 || fromsa
== 0) {
1794 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1796 sa_len
= fromsa
->sa_len
;
1797 len
= MIN((unsigned int)len
, sa_len
);
1798 error
= copyout(fromsa
, name
, (unsigned)len
);
1809 copyout_control(struct proc
*p
, struct mbuf
*m
, user_addr_t control
,
1810 socklen_t
*controllen
, int *flags
)
1820 while (m
&& len
> 0) {
1821 unsigned int tocopy
;
1822 struct cmsghdr
*cp
= mtod(m
, struct cmsghdr
*);
1823 int cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1824 int buflen
= m
->m_len
;
1826 while (buflen
> 0 && len
> 0) {
1828 * SCM_TIMESTAMP hack because struct timeval has a
1829 * different size for 32 bits and 64 bits processes
1831 if (cp
->cmsg_level
== SOL_SOCKET
&& cp
->cmsg_type
== SCM_TIMESTAMP
) {
1832 unsigned char tmp_buffer
[CMSG_SPACE(sizeof(struct user64_timeval
))] = {};
1833 struct cmsghdr
*tmp_cp
= (struct cmsghdr
*)(void *)tmp_buffer
;
1835 struct timeval
*tv
= (struct timeval
*)(void *)CMSG_DATA(cp
);
1837 tmp_cp
->cmsg_level
= SOL_SOCKET
;
1838 tmp_cp
->cmsg_type
= SCM_TIMESTAMP
;
1840 if (proc_is64bit(p
)) {
1841 struct user64_timeval
*tv64
= (struct user64_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1843 os_unaligned_deref(&tv64
->tv_sec
) = tv
->tv_sec
;
1844 os_unaligned_deref(&tv64
->tv_usec
) = tv
->tv_usec
;
1846 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user64_timeval
));
1847 tmp_space
= CMSG_SPACE(sizeof(struct user64_timeval
));
1849 struct user32_timeval
*tv32
= (struct user32_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1851 tv32
->tv_sec
= tv
->tv_sec
;
1852 tv32
->tv_usec
= tv
->tv_usec
;
1854 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user32_timeval
));
1855 tmp_space
= CMSG_SPACE(sizeof(struct user32_timeval
));
1857 if (len
>= tmp_space
) {
1860 *flags
|= MSG_CTRUNC
;
1863 error
= copyout(tmp_buffer
, ctlbuf
, tocopy
);
1868 if (cp_size
> buflen
) {
1869 panic("cp_size > buflen, something"
1870 "wrong with alignment!");
1872 if (len
>= cp_size
) {
1875 *flags
|= MSG_CTRUNC
;
1878 error
= copyout((caddr_t
) cp
, ctlbuf
, tocopy
);
1888 cp
= (struct cmsghdr
*)(void *)
1889 ((unsigned char *) cp
+ cp_size
);
1890 cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1895 *controllen
= ctlbuf
- control
;
1901 * Returns: 0 Success
1905 * EACCES Mandatory Access Control failure
1908 * <pru_soreceive>:ENOBUFS
1909 * <pru_soreceive>:ENOTCONN
1910 * <pru_soreceive>:EWOULDBLOCK
1911 * <pru_soreceive>:EFAULT
1912 * <pru_soreceive>:EINTR
1913 * <pru_soreceive>:EBADF
1914 * <pru_soreceive>:EINVAL
1915 * <pru_soreceive>:EMSGSIZE
1916 * <pru_soreceive>:???
1918 * Notes: Additional return values from calls through <pru_soreceive>
1919 * depend on protocols other than TCP or AF_UNIX, which are
1923 recvit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
1924 user_addr_t namelenp
, int32_t *retval
)
1928 struct mbuf
*control
= 0;
1930 struct sockaddr
*fromsa
= 0;
1931 struct fileproc
*fp
;
1933 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1935 if ((error
= fp_lookup(p
, s
, &fp
, 1))) {
1936 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1940 if (fp
->f_type
!= DTYPE_SOCKET
) {
1941 fp_drop(p
, s
, fp
, 1);
1946 so
= (struct socket
*)fp
->f_data
;
1948 fp_drop(p
, s
, fp
, 1);
1955 #if CONFIG_MACF_SOCKET_SUBSET
1957 * We check the state without holding the socket lock;
1958 * if a race condition occurs, it would simply result
1959 * in an extra call to the MAC check function.
1961 if (!(so
->so_state
& SS_DEFUNCT
) &&
1962 !(so
->so_state
& SS_ISCONNECTED
) &&
1963 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
1964 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0) {
1967 #endif /* MAC_SOCKET_SUBSET */
1968 if (uio_resid(uiop
) < 0) {
1969 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, EINVAL
, 0, 0, 0, 0);
1974 len
= uio_resid(uiop
);
1975 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, &fromsa
, uiop
,
1976 (struct mbuf
**)0, mp
->msg_control
? &control
: (struct mbuf
**)0,
1979 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()),
1983 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1984 error
== EINTR
|| error
== EWOULDBLOCK
)) {
1992 *retval
= len
- uio_resid(uiop
);
1995 error
= copyout_sa(fromsa
, mp
->msg_name
, &mp
->msg_namelen
);
1999 /* return the actual, untruncated address length */
2001 (error
= copyout((caddr_t
)&mp
->msg_namelen
, namelenp
,
2007 if (mp
->msg_control
) {
2008 error
= copyout_control(p
, control
, mp
->msg_control
,
2009 &mp
->msg_controllen
, &mp
->msg_flags
);
2013 FREE(fromsa
, M_SONAME
);
2018 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2020 fp_drop(p
, s
, fp
, 0);
2025 * Returns: 0 Success
2029 * read:??? [4056224: applicable for pipes]
2031 * Notes: The read entry point is only called as part of support for
2032 * binary backward compatability; new code should use read
2033 * instead of recv or recvfrom when attempting to read data
2036 * For full documentation of the return codes from recvit, see
2037 * the block header for the recvit function.
2040 recvfrom(struct proc
*p
, struct recvfrom_args
*uap
, int32_t *retval
)
2042 __pthread_testcancel(1);
2043 return recvfrom_nocancel(p
, (struct recvfrom_nocancel_args
*)uap
,
2048 recvfrom_nocancel(struct proc
*p
, struct recvfrom_nocancel_args
*uap
,
2051 struct user_msghdr msg
;
2055 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2056 AUDIT_ARG(fd
, uap
->s
);
2058 if (uap
->fromlenaddr
) {
2059 error
= copyin(uap
->fromlenaddr
,
2060 (caddr_t
)&msg
.msg_namelen
, sizeof(msg
.msg_namelen
));
2065 msg
.msg_namelen
= 0;
2067 msg
.msg_name
= uap
->from
;
2068 auio
= uio_create(1, 0,
2069 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
2075 uio_addiov(auio
, uap
->buf
, uap
->len
);
2076 /* no need to set up msg_iov. recvit uses uio_t we send it */
2079 msg
.msg_control
= 0;
2080 msg
.msg_controllen
= 0;
2081 msg
.msg_flags
= uap
->flags
;
2082 error
= recvit(p
, uap
->s
, &msg
, auio
, uap
->fromlenaddr
, retval
);
2087 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2093 * Returns: 0 Success
2100 * Notes: For full documentation of the return codes from recvit, see
2101 * the block header for the recvit function.
2104 recvmsg(struct proc
*p
, struct recvmsg_args
*uap
, int32_t *retval
)
2106 __pthread_testcancel(1);
2107 return recvmsg_nocancel(p
, (struct recvmsg_nocancel_args
*)uap
,
2112 recvmsg_nocancel(struct proc
*p
, struct recvmsg_nocancel_args
*uap
,
2115 struct user32_msghdr msg32
;
2116 struct user64_msghdr msg64
;
2117 struct user_msghdr user_msg
;
2123 struct user_iovec
*iovp
;
2125 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2126 AUDIT_ARG(fd
, uap
->s
);
2127 if (IS_64BIT_PROCESS(p
)) {
2128 msghdrp
= (caddr_t
)&msg64
;
2129 size_of_msghdr
= sizeof(msg64
);
2131 msghdrp
= (caddr_t
)&msg32
;
2132 size_of_msghdr
= sizeof(msg32
);
2134 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
2136 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2140 /* only need to copy if user process is not 64-bit */
2141 if (IS_64BIT_PROCESS(p
)) {
2142 user_msg
.msg_flags
= msg64
.msg_flags
;
2143 user_msg
.msg_controllen
= msg64
.msg_controllen
;
2144 user_msg
.msg_control
= msg64
.msg_control
;
2145 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
2146 user_msg
.msg_iov
= msg64
.msg_iov
;
2147 user_msg
.msg_namelen
= msg64
.msg_namelen
;
2148 user_msg
.msg_name
= msg64
.msg_name
;
2150 user_msg
.msg_flags
= msg32
.msg_flags
;
2151 user_msg
.msg_controllen
= msg32
.msg_controllen
;
2152 user_msg
.msg_control
= msg32
.msg_control
;
2153 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
2154 user_msg
.msg_iov
= msg32
.msg_iov
;
2155 user_msg
.msg_namelen
= msg32
.msg_namelen
;
2156 user_msg
.msg_name
= msg32
.msg_name
;
2159 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
2160 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, EMSGSIZE
,
2165 user_msg
.msg_flags
= uap
->flags
;
2167 /* allocate a uio large enough to hold the number of iovecs passed */
2168 auio
= uio_create(user_msg
.msg_iovlen
, 0,
2169 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
2177 * get location of iovecs within the uio. then copyin the iovecs from
2180 iovp
= uio_iovsaddr(auio
);
2185 uiov
= user_msg
.msg_iov
;
2186 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
2187 error
= copyin_user_iovec_array(uiov
,
2188 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2189 user_msg
.msg_iovlen
, iovp
);
2194 /* finish setup of uio_t */
2195 error
= uio_calculateresid(auio
);
2200 error
= recvit(p
, uap
->s
, &user_msg
, auio
, 0, retval
);
2202 user_msg
.msg_iov
= uiov
;
2203 if (IS_64BIT_PROCESS(p
)) {
2204 msg64
.msg_flags
= user_msg
.msg_flags
;
2205 msg64
.msg_controllen
= user_msg
.msg_controllen
;
2206 msg64
.msg_control
= user_msg
.msg_control
;
2207 msg64
.msg_iovlen
= user_msg
.msg_iovlen
;
2208 msg64
.msg_iov
= user_msg
.msg_iov
;
2209 msg64
.msg_namelen
= user_msg
.msg_namelen
;
2210 msg64
.msg_name
= user_msg
.msg_name
;
2212 msg32
.msg_flags
= user_msg
.msg_flags
;
2213 msg32
.msg_controllen
= user_msg
.msg_controllen
;
2214 msg32
.msg_control
= user_msg
.msg_control
;
2215 msg32
.msg_iovlen
= user_msg
.msg_iovlen
;
2216 msg32
.msg_iov
= user_msg
.msg_iov
;
2217 msg32
.msg_namelen
= user_msg
.msg_namelen
;
2218 msg32
.msg_name
= user_msg
.msg_name
;
2220 error
= copyout(msghdrp
, uap
->msg
, size_of_msghdr
);
2226 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2231 recvmsg_x(struct proc
*p
, struct recvmsg_x_args
*uap
, user_ssize_t
*retval
)
2233 int error
= EOPNOTSUPP
;
2234 struct user_msghdr_x
*user_msg_x
= NULL
;
2235 struct recv_msg_elem
*recv_msg_array
= NULL
;
2237 user_ssize_t len_before
= 0, len_after
;
2239 size_t size_of_msghdr
;
2244 KERNEL_DEBUG(DBG_FNC_RECVMSG_X
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2246 error
= file_socket(uap
->s
, &so
);
2256 * Input parameter range check
2258 if (uap
->cnt
== 0 || uap
->cnt
> UIO_MAXIOV
) {
2262 if (uap
->cnt
> somaxrecvmsgx
) {
2263 uap
->cnt
= somaxrecvmsgx
;
2266 user_msg_x
= _MALLOC(uap
->cnt
* sizeof(struct user_msghdr_x
),
2267 M_TEMP
, M_WAITOK
| M_ZERO
);
2268 if (user_msg_x
== NULL
) {
2269 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__
);
2273 recv_msg_array
= alloc_recv_msg_array(uap
->cnt
);
2274 if (recv_msg_array
== NULL
) {
2275 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__
);
2279 size_of_msghdr
= IS_64BIT_PROCESS(p
) ?
2280 sizeof(struct user64_msghdr_x
) : sizeof(struct user32_msghdr_x
);
2282 umsgp
= _MALLOC(uap
->cnt
* size_of_msghdr
, M_TEMP
, M_WAITOK
| M_ZERO
);
2283 if (umsgp
== NULL
) {
2284 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__
);
2288 error
= copyin(uap
->msgp
, umsgp
, uap
->cnt
* size_of_msghdr
);
2290 DBG_PRINTF("%s copyin() failed\n", __func__
);
2293 error
= internalize_recv_msghdr_array(umsgp
,
2294 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2295 UIO_READ
, uap
->cnt
, user_msg_x
, recv_msg_array
);
2297 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__
);
2301 * Make sure the size of each message iovec and
2302 * the aggregate size of all the iovec is valid
2304 if (recv_msg_array_is_valid(recv_msg_array
, uap
->cnt
) == 0) {
2309 * Sanity check on passed arguments
2311 for (i
= 0; i
< uap
->cnt
; i
++) {
2312 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
2314 if (mp
->msg_flags
!= 0) {
2319 #if CONFIG_MACF_SOCKET_SUBSET
2321 * We check the state without holding the socket lock;
2322 * if a race condition occurs, it would simply result
2323 * in an extra call to the MAC check function.
2325 if (!(so
->so_state
& SS_DEFUNCT
) &&
2326 !(so
->so_state
& SS_ISCONNECTED
) &&
2327 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
2328 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0) {
2331 #endif /* MAC_SOCKET_SUBSET */
2333 len_before
= recv_msg_array_resid(recv_msg_array
, uap
->cnt
);
2335 if (so
->so_proto
->pr_usrreqs
->pru_soreceive_list
!=
2336 pru_soreceive_list_notsupp
&&
2337 somaxrecvmsgx
== 0) {
2338 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive_list(so
,
2339 recv_msg_array
, uap
->cnt
, &uap
->flags
);
2341 int flags
= uap
->flags
;
2343 for (i
= 0; i
< uap
->cnt
; i
++) {
2344 struct recv_msg_elem
*recv_msg_elem
;
2346 struct sockaddr
**psa
;
2347 struct mbuf
**controlp
;
2349 recv_msg_elem
= recv_msg_array
+ i
;
2350 auio
= recv_msg_elem
->uio
;
2353 * Do not block if we got at least one packet
2356 flags
|= MSG_DONTWAIT
;
2359 psa
= (recv_msg_elem
->which
& SOCK_MSG_SA
) ?
2360 &recv_msg_elem
->psa
: NULL
;
2361 controlp
= (recv_msg_elem
->which
& SOCK_MSG_CONTROL
) ?
2362 &recv_msg_elem
->controlp
: NULL
;
2364 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, psa
,
2365 auio
, (struct mbuf
**)0, controlp
, &flags
);
2372 recv_msg_elem
->which
|= SOCK_MSG_DATA
;
2374 * Stop on partial copy
2376 if (flags
& (MSG_RCVMORE
| MSG_TRUNC
)) {
2380 if ((uap
->flags
& MSG_DONTWAIT
) == 0) {
2381 flags
&= ~MSG_DONTWAIT
;
2386 len_after
= recv_msg_array_resid(recv_msg_array
, uap
->cnt
);
2389 if (len_after
!= len_before
&& (error
== ERESTART
||
2390 error
== EINTR
|| error
== EWOULDBLOCK
)) {
2397 uiocnt
= externalize_recv_msghdr_array(umsgp
,
2398 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2399 UIO_READ
, uap
->cnt
, user_msg_x
, recv_msg_array
);
2401 error
= copyout(umsgp
, uap
->msgp
, uap
->cnt
* size_of_msghdr
);
2403 DBG_PRINTF("%s copyout() failed\n", __func__
);
2406 *retval
= (int)(uiocnt
);
2408 for (i
= 0; i
< uap
->cnt
; i
++) {
2409 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
2410 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
2411 struct sockaddr
*fromsa
= recv_msg_elem
->psa
;
2414 error
= copyout_sa(fromsa
, mp
->msg_name
,
2420 if (mp
->msg_control
) {
2421 error
= copyout_control(p
, recv_msg_elem
->controlp
,
2422 mp
->msg_control
, &mp
->msg_controllen
,
2433 if (umsgp
!= NULL
) {
2434 _FREE(umsgp
, M_TEMP
);
2436 if (recv_msg_array
!= NULL
) {
2437 free_recv_msg_array(recv_msg_array
, uap
->cnt
);
2439 if (user_msg_x
!= NULL
) {
2440 _FREE(user_msg_x
, M_TEMP
);
2443 KERNEL_DEBUG(DBG_FNC_RECVMSG_X
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2449 * Returns: 0 Success
2451 * file_socket:ENOTSOCK
2454 * soshutdown:ENOTCONN
2455 * soshutdown:EADDRNOTAVAIL[TCP]
2456 * soshutdown:ENOBUFS[TCP]
2457 * soshutdown:EMSGSIZE[TCP]
2458 * soshutdown:EHOSTUNREACH[TCP]
2459 * soshutdown:ENETUNREACH[TCP]
2460 * soshutdown:ENETDOWN[TCP]
2461 * soshutdown:ENOMEM[TCP]
2462 * soshutdown:EACCES[TCP]
2463 * soshutdown:EMSGSIZE[TCP]
2464 * soshutdown:ENOBUFS[TCP]
2465 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2466 * soshutdown:??? [other protocol families]
2470 shutdown(__unused
struct proc
*p
, struct shutdown_args
*uap
,
2471 __unused
int32_t *retval
)
2476 AUDIT_ARG(fd
, uap
->s
);
2477 error
= file_socket(uap
->s
, &so
);
2485 error
= soshutdown((struct socket
*)so
, uap
->how
);
2492 * Returns: 0 Success
2495 * EACCES Mandatory Access Control failure
2496 * file_socket:ENOTSOCK
2499 * sosetopt:ENOPROTOOPT
2503 * sosetopt:EOPNOTSUPP[AF_UNIX]
2508 setsockopt(struct proc
*p
, struct setsockopt_args
*uap
,
2509 __unused
int32_t *retval
)
2512 struct sockopt sopt
;
2515 AUDIT_ARG(fd
, uap
->s
);
2516 if (uap
->val
== 0 && uap
->valsize
!= 0) {
2519 /* No bounds checking on size (it's unsigned) */
2521 error
= file_socket(uap
->s
, &so
);
2526 sopt
.sopt_dir
= SOPT_SET
;
2527 sopt
.sopt_level
= uap
->level
;
2528 sopt
.sopt_name
= uap
->name
;
2529 sopt
.sopt_val
= uap
->val
;
2530 sopt
.sopt_valsize
= uap
->valsize
;
2537 #if CONFIG_MACF_SOCKET_SUBSET
2538 if ((error
= mac_socket_check_setsockopt(kauth_cred_get(), so
,
2542 #endif /* MAC_SOCKET_SUBSET */
2543 error
= sosetoptlock(so
, &sopt
, 1); /* will lock socket */
2552 * Returns: 0 Success
2555 * EACCES Mandatory Access Control failure
2558 * file_socket:ENOTSOCK
2563 getsockopt(struct proc
*p
, struct getsockopt_args
*uap
,
2564 __unused
int32_t *retval
)
2568 struct sockopt sopt
;
2571 error
= file_socket(uap
->s
, &so
);
2576 error
= copyin(uap
->avalsize
, (caddr_t
)&valsize
,
2581 /* No bounds checking on size (it's unsigned) */
2585 sopt
.sopt_dir
= SOPT_GET
;
2586 sopt
.sopt_level
= uap
->level
;
2587 sopt
.sopt_name
= uap
->name
;
2588 sopt
.sopt_val
= uap
->val
;
2589 sopt
.sopt_valsize
= (size_t)valsize
; /* checked non-negative above */
2596 #if CONFIG_MACF_SOCKET_SUBSET
2597 if ((error
= mac_socket_check_getsockopt(kauth_cred_get(), so
,
2601 #endif /* MAC_SOCKET_SUBSET */
2602 error
= sogetoptlock((struct socket
*)so
, &sopt
, 1); /* will lock */
2604 valsize
= sopt
.sopt_valsize
;
2605 error
= copyout((caddr_t
)&valsize
, uap
->avalsize
,
2617 * Returns: 0 Success
2619 * file_socket:ENOTSOCK
2623 * <pru_sockaddr>:ENOBUFS[TCP]
2624 * <pru_sockaddr>:ECONNRESET[TCP]
2625 * <pru_sockaddr>:EINVAL[AF_UNIX]
2626 * <sf_getsockname>:???
2630 getsockname(__unused
struct proc
*p
, struct getsockname_args
*uap
,
2631 __unused
int32_t *retval
)
2634 struct sockaddr
*sa
;
2639 error
= file_socket(uap
->fdes
, &so
);
2643 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof(socklen_t
));
2653 error
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &sa
);
2655 error
= sflt_getsockname(so
, &sa
);
2656 if (error
== EJUSTRETURN
) {
2660 socket_unlock(so
, 1);
2669 sa_len
= sa
->sa_len
;
2670 len
= MIN(len
, sa_len
);
2671 error
= copyout((caddr_t
)sa
, uap
->asa
, len
);
2675 /* return the actual, untruncated address length */
2678 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof(socklen_t
));
2684 file_drop(uap
->fdes
);
2689 * Get name of peer for connected socket.
2691 * Returns: 0 Success
2695 * file_socket:ENOTSOCK
2699 * <pru_peeraddr>:???
2700 * <sf_getpeername>:???
2704 getpeername(__unused
struct proc
*p
, struct getpeername_args
*uap
,
2705 __unused
int32_t *retval
)
2708 struct sockaddr
*sa
;
2713 error
= file_socket(uap
->fdes
, &so
);
2724 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
2725 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
2726 /* the socket has been shutdown, no more getpeername's */
2727 socket_unlock(so
, 1);
2732 if ((so
->so_state
& (SS_ISCONNECTED
| SS_ISCONFIRMING
)) == 0) {
2733 socket_unlock(so
, 1);
2737 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof(socklen_t
));
2739 socket_unlock(so
, 1);
2743 error
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, &sa
);
2745 error
= sflt_getpeername(so
, &sa
);
2746 if (error
== EJUSTRETURN
) {
2750 socket_unlock(so
, 1);
2758 sa_len
= sa
->sa_len
;
2759 len
= MIN(len
, sa_len
);
2760 error
= copyout(sa
, uap
->asa
, len
);
2764 /* return the actual, untruncated address length */
2767 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof(socklen_t
));
2773 file_drop(uap
->fdes
);
2778 sockargs(struct mbuf
**mp
, user_addr_t data
, int buflen
, int type
)
2780 struct sockaddr
*sa
;
2784 size_t alloc_buflen
= (size_t)buflen
;
2786 if (alloc_buflen
> INT_MAX
/ 2) {
2791 * The fd's in the buffer must expand to be pointers, thus we need twice
2794 if (type
== MT_CONTROL
) {
2795 alloc_buflen
= ((buflen
- sizeof(struct cmsghdr
)) * 2) +
2796 sizeof(struct cmsghdr
);
2799 if (alloc_buflen
> MLEN
) {
2800 if (type
== MT_SONAME
&& alloc_buflen
<= 112) {
2801 alloc_buflen
= MLEN
; /* unix domain compat. hack */
2802 } else if (alloc_buflen
> MCLBYTES
) {
2806 m
= m_get(M_WAIT
, type
);
2810 if (alloc_buflen
> MLEN
) {
2812 if ((m
->m_flags
& M_EXT
) == 0) {
2818 * K64: We still copyin the original buflen because it gets expanded
2819 * later and we lie about the size of the mbuf because it only affects
2823 error
= copyin(data
, mtod(m
, caddr_t
), (u_int
)buflen
);
2828 if (type
== MT_SONAME
) {
2829 sa
= mtod(m
, struct sockaddr
*);
2830 sa
->sa_len
= buflen
;
2837 * Given a user_addr_t of length len, allocate and fill out a *sa.
2839 * Returns: 0 Success
2840 * ENAMETOOLONG Filename too long
2841 * EINVAL Invalid argument
2842 * ENOMEM Not enough space
2843 * copyin:EFAULT Bad address
2846 getsockaddr(struct socket
*so
, struct sockaddr
**namp
, user_addr_t uaddr
,
2847 size_t len
, boolean_t translate_unspec
)
2849 struct sockaddr
*sa
;
2852 if (len
> SOCK_MAXADDRLEN
) {
2853 return ENAMETOOLONG
;
2856 if (len
< offsetof(struct sockaddr
, sa_data
[0])) {
2860 MALLOC(sa
, struct sockaddr
*, len
, M_SONAME
, M_WAITOK
| M_ZERO
);
2864 error
= copyin(uaddr
, (caddr_t
)sa
, len
);
2869 * Force sa_family to AF_INET on AF_INET sockets to handle
2870 * legacy applications that use AF_UNSPEC (0). On all other
2871 * sockets we leave it unchanged and let the lower layer
2874 if (translate_unspec
&& sa
->sa_family
== AF_UNSPEC
&&
2875 SOCK_CHECK_DOM(so
, PF_INET
) &&
2876 len
== sizeof(struct sockaddr_in
)) {
2877 sa
->sa_family
= AF_INET
;
2887 getsockaddr_s(struct socket
*so
, struct sockaddr_storage
*ss
,
2888 user_addr_t uaddr
, size_t len
, boolean_t translate_unspec
)
2892 if (ss
== NULL
|| uaddr
== USER_ADDR_NULL
||
2893 len
< offsetof(struct sockaddr
, sa_data
[0])) {
2898 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2899 * so the check here is inclusive.
2901 if (len
> sizeof(*ss
)) {
2902 return ENAMETOOLONG
;
2905 bzero(ss
, sizeof(*ss
));
2906 error
= copyin(uaddr
, (caddr_t
)ss
, len
);
2909 * Force sa_family to AF_INET on AF_INET sockets to handle
2910 * legacy applications that use AF_UNSPEC (0). On all other
2911 * sockets we leave it unchanged and let the lower layer
2914 if (translate_unspec
&& ss
->ss_family
== AF_UNSPEC
&&
2915 SOCK_CHECK_DOM(so
, PF_INET
) &&
2916 len
== sizeof(struct sockaddr_in
)) {
2917 ss
->ss_family
= AF_INET
;
2926 internalize_user_msghdr_array(const void *src
, int spacetype
, int direction
,
2927 u_int count
, struct user_msghdr_x
*dst
, struct uio
**uiop
)
2934 for (i
= 0; i
< count
; i
++) {
2936 struct user_iovec
*iovp
;
2937 struct user_msghdr_x
*user_msg
= dst
+ i
;
2939 if (spacetype
== UIO_USERSPACE64
) {
2940 const struct user64_msghdr_x
*msghdr64
;
2942 msghdr64
= ((const struct user64_msghdr_x
*)src
) + i
;
2944 user_msg
->msg_name
= msghdr64
->msg_name
;
2945 user_msg
->msg_namelen
= msghdr64
->msg_namelen
;
2946 user_msg
->msg_iov
= msghdr64
->msg_iov
;
2947 user_msg
->msg_iovlen
= msghdr64
->msg_iovlen
;
2948 user_msg
->msg_control
= msghdr64
->msg_control
;
2949 user_msg
->msg_controllen
= msghdr64
->msg_controllen
;
2950 user_msg
->msg_flags
= msghdr64
->msg_flags
;
2951 user_msg
->msg_datalen
= msghdr64
->msg_datalen
;
2953 const struct user32_msghdr_x
*msghdr32
;
2955 msghdr32
= ((const struct user32_msghdr_x
*)src
) + i
;
2957 user_msg
->msg_name
= msghdr32
->msg_name
;
2958 user_msg
->msg_namelen
= msghdr32
->msg_namelen
;
2959 user_msg
->msg_iov
= msghdr32
->msg_iov
;
2960 user_msg
->msg_iovlen
= msghdr32
->msg_iovlen
;
2961 user_msg
->msg_control
= msghdr32
->msg_control
;
2962 user_msg
->msg_controllen
= msghdr32
->msg_controllen
;
2963 user_msg
->msg_flags
= msghdr32
->msg_flags
;
2964 user_msg
->msg_datalen
= msghdr32
->msg_datalen
;
2967 if (user_msg
->msg_iovlen
<= 0 ||
2968 user_msg
->msg_iovlen
> UIO_MAXIOV
) {
2972 auio
= uio_create(user_msg
->msg_iovlen
, 0, spacetype
,
2980 iovp
= uio_iovsaddr(auio
);
2985 error
= copyin_user_iovec_array(user_msg
->msg_iov
,
2986 spacetype
, user_msg
->msg_iovlen
, iovp
);
2990 user_msg
->msg_iov
= CAST_USER_ADDR_T(iovp
);
2992 error
= uio_calculateresid(auio
);
2996 user_msg
->msg_datalen
= uio_resid(auio
);
2998 if (user_msg
->msg_name
&& user_msg
->msg_namelen
) {
3001 if (user_msg
->msg_control
&& user_msg
->msg_controllen
) {
3011 internalize_recv_msghdr_array(const void *src
, int spacetype
, int direction
,
3012 u_int count
, struct user_msghdr_x
*dst
,
3013 struct recv_msg_elem
*recv_msg_array
)
3018 for (i
= 0; i
< count
; i
++) {
3019 struct user_iovec
*iovp
;
3020 struct user_msghdr_x
*user_msg
= dst
+ i
;
3021 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3023 if (spacetype
== UIO_USERSPACE64
) {
3024 const struct user64_msghdr_x
*msghdr64
;
3026 msghdr64
= ((const struct user64_msghdr_x
*)src
) + i
;
3028 user_msg
->msg_name
= msghdr64
->msg_name
;
3029 user_msg
->msg_namelen
= msghdr64
->msg_namelen
;
3030 user_msg
->msg_iov
= msghdr64
->msg_iov
;
3031 user_msg
->msg_iovlen
= msghdr64
->msg_iovlen
;
3032 user_msg
->msg_control
= msghdr64
->msg_control
;
3033 user_msg
->msg_controllen
= msghdr64
->msg_controllen
;
3034 user_msg
->msg_flags
= msghdr64
->msg_flags
;
3035 user_msg
->msg_datalen
= msghdr64
->msg_datalen
;
3037 const struct user32_msghdr_x
*msghdr32
;
3039 msghdr32
= ((const struct user32_msghdr_x
*)src
) + i
;
3041 user_msg
->msg_name
= msghdr32
->msg_name
;
3042 user_msg
->msg_namelen
= msghdr32
->msg_namelen
;
3043 user_msg
->msg_iov
= msghdr32
->msg_iov
;
3044 user_msg
->msg_iovlen
= msghdr32
->msg_iovlen
;
3045 user_msg
->msg_control
= msghdr32
->msg_control
;
3046 user_msg
->msg_controllen
= msghdr32
->msg_controllen
;
3047 user_msg
->msg_flags
= msghdr32
->msg_flags
;
3048 user_msg
->msg_datalen
= msghdr32
->msg_datalen
;
3051 if (user_msg
->msg_iovlen
<= 0 ||
3052 user_msg
->msg_iovlen
> UIO_MAXIOV
) {
3056 recv_msg_elem
->uio
= uio_create(user_msg
->msg_iovlen
, 0,
3057 spacetype
, direction
);
3058 if (recv_msg_elem
->uio
== NULL
) {
3063 iovp
= uio_iovsaddr(recv_msg_elem
->uio
);
3068 error
= copyin_user_iovec_array(user_msg
->msg_iov
,
3069 spacetype
, user_msg
->msg_iovlen
, iovp
);
3073 user_msg
->msg_iov
= CAST_USER_ADDR_T(iovp
);
3075 error
= uio_calculateresid(recv_msg_elem
->uio
);
3079 user_msg
->msg_datalen
= uio_resid(recv_msg_elem
->uio
);
3081 if (user_msg
->msg_name
&& user_msg
->msg_namelen
) {
3082 recv_msg_elem
->which
|= SOCK_MSG_SA
;
3084 if (user_msg
->msg_control
&& user_msg
->msg_controllen
) {
3085 recv_msg_elem
->which
|= SOCK_MSG_CONTROL
;
3094 externalize_user_msghdr_array(void *dst
, int spacetype
, int direction
,
3095 u_int count
, const struct user_msghdr_x
*src
, struct uio
**uiop
)
3097 #pragma unused(direction)
3102 for (i
= 0; i
< count
; i
++) {
3103 const struct user_msghdr_x
*user_msg
= src
+ i
;
3104 uio_t auio
= uiop
[i
];
3105 user_ssize_t len
= user_msg
->msg_datalen
- uio_resid(auio
);
3107 if (user_msg
->msg_datalen
!= 0 && len
== 0) {
3111 if (seenlast
== 0) {
3115 if (spacetype
== UIO_USERSPACE64
) {
3116 struct user64_msghdr_x
*msghdr64
;
3118 msghdr64
= ((struct user64_msghdr_x
*)dst
) + i
;
3120 msghdr64
->msg_flags
= user_msg
->msg_flags
;
3121 msghdr64
->msg_datalen
= len
;
3123 struct user32_msghdr_x
*msghdr32
;
3125 msghdr32
= ((struct user32_msghdr_x
*)dst
) + i
;
3127 msghdr32
->msg_flags
= user_msg
->msg_flags
;
3128 msghdr32
->msg_datalen
= len
;
3135 externalize_recv_msghdr_array(void *dst
, int spacetype
, int direction
,
3136 u_int count
, const struct user_msghdr_x
*src
,
3137 struct recv_msg_elem
*recv_msg_array
)
3143 for (i
= 0; i
< count
; i
++) {
3144 const struct user_msghdr_x
*user_msg
= src
+ i
;
3145 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3148 len
= user_msg
->msg_datalen
- uio_resid(recv_msg_elem
->uio
);
3150 if (direction
== UIO_READ
) {
3151 if ((recv_msg_elem
->which
& SOCK_MSG_DATA
) == 0) {
3155 if (user_msg
->msg_datalen
!= 0 && len
== 0) {
3160 if (seenlast
== 0) {
3164 if (spacetype
== UIO_USERSPACE64
) {
3165 struct user64_msghdr_x
*msghdr64
;
3167 msghdr64
= ((struct user64_msghdr_x
*)dst
) + i
;
3169 msghdr64
->msg_flags
= user_msg
->msg_flags
;
3170 msghdr64
->msg_datalen
= len
;
3172 struct user32_msghdr_x
*msghdr32
;
3174 msghdr32
= ((struct user32_msghdr_x
*)dst
) + i
;
3176 msghdr32
->msg_flags
= user_msg
->msg_flags
;
3177 msghdr32
->msg_datalen
= len
;
3184 free_uio_array(struct uio
**uiop
, u_int count
)
3188 for (i
= 0; i
< count
; i
++) {
3189 if (uiop
[i
] != NULL
) {
3195 __private_extern__ user_ssize_t
3196 uio_array_resid(struct uio
**uiop
, u_int count
)
3198 user_ssize_t len
= 0;
3201 for (i
= 0; i
< count
; i
++) {
3202 struct uio
*auio
= uiop
[i
];
3205 len
+= uio_resid(auio
);
3212 uio_array_is_valid(struct uio
**uiop
, u_int count
)
3214 user_ssize_t len
= 0;
3217 for (i
= 0; i
< count
; i
++) {
3218 struct uio
*auio
= uiop
[i
];
3221 user_ssize_t resid
= uio_resid(auio
);
3224 * Sanity check on the validity of the iovec:
3225 * no point of going over sb_max
3227 if (resid
< 0 || (u_int32_t
)resid
> sb_max
) {
3232 if (len
< 0 || (u_int32_t
)len
> sb_max
) {
3241 struct recv_msg_elem
*
3242 alloc_recv_msg_array(u_int count
)
3244 struct recv_msg_elem
*recv_msg_array
;
3246 recv_msg_array
= _MALLOC(count
* sizeof(struct recv_msg_elem
),
3247 M_TEMP
, M_WAITOK
| M_ZERO
);
3249 return recv_msg_array
;
3253 free_recv_msg_array(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3257 for (i
= 0; i
< count
; i
++) {
3258 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3260 if (recv_msg_elem
->uio
!= NULL
) {
3261 uio_free(recv_msg_elem
->uio
);
3263 if (recv_msg_elem
->psa
!= NULL
) {
3264 _FREE(recv_msg_elem
->psa
, M_TEMP
);
3266 if (recv_msg_elem
->controlp
!= NULL
) {
3267 m_freem(recv_msg_elem
->controlp
);
3270 _FREE(recv_msg_array
, M_TEMP
);
3274 __private_extern__ user_ssize_t
3275 recv_msg_array_resid(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3277 user_ssize_t len
= 0;
3280 for (i
= 0; i
< count
; i
++) {
3281 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3283 if (recv_msg_elem
->uio
!= NULL
) {
3284 len
+= uio_resid(recv_msg_elem
->uio
);
3291 recv_msg_array_is_valid(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3293 user_ssize_t len
= 0;
3296 for (i
= 0; i
< count
; i
++) {
3297 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3299 if (recv_msg_elem
->uio
!= NULL
) {
3300 user_ssize_t resid
= uio_resid(recv_msg_elem
->uio
);
3303 * Sanity check on the validity of the iovec:
3304 * no point of going over sb_max
3306 if (resid
< 0 || (u_int32_t
)resid
> sb_max
) {
3311 if (len
< 0 || (u_int32_t
)len
> sb_max
) {
3321 #define SFUIOBUFS 64
3323 /* Macros to compute the number of mbufs needed depending on cluster size */
3324 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3325 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3327 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3328 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3330 /* Upper send limit in the number of mbuf clusters */
3331 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3332 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3335 alloc_sendpkt(int how
, size_t pktlen
, unsigned int *maxchunks
,
3336 struct mbuf
**m
, boolean_t jumbocl
)
3338 unsigned int needed
;
3341 panic("%s: pktlen (%ld) must be non-zero\n", __func__
, pktlen
);
3345 * Try to allocate for the whole thing. Since we want full control
3346 * over the buffer size and be able to accept partial result, we can't
3347 * use mbuf_allocpacket(). The logic below is similar to sosend().
3350 if (pktlen
> MBIGCLBYTES
&& jumbocl
) {
3351 needed
= MIN(SENDFILE_MAX_16K
, HOWMANY_16K(pktlen
));
3352 *m
= m_getpackets_internal(&needed
, 1, how
, 0, M16KCLBYTES
);
3355 needed
= MIN(SENDFILE_MAX_4K
, HOWMANY_4K(pktlen
));
3356 *m
= m_getpackets_internal(&needed
, 1, how
, 0, MBIGCLBYTES
);
3360 * Our previous attempt(s) at allocation had failed; the system
3361 * may be short on mbufs, and we want to block until they are
3362 * available. This time, ask just for 1 mbuf and don't return
3367 *m
= m_getpackets_internal(&needed
, 1, M_WAIT
, 1, MBIGCLBYTES
);
3370 panic("%s: blocking allocation returned NULL\n", __func__
);
3373 *maxchunks
= needed
;
3378 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3379 * struct sf_hdtr *hdtr, int flags)
3381 * Send a file specified by 'fd' and starting at 'offset' to a socket
3382 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3383 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3384 * output. If specified, write the total number of bytes sent into *nbytes.
3387 sendfile(struct proc
*p
, struct sendfile_args
*uap
, __unused
int *retval
)
3389 struct fileproc
*fp
;
3392 struct writev_nocancel_args nuap
;
3393 user_ssize_t writev_retval
;
3394 struct user_sf_hdtr user_hdtr
;
3395 struct user32_sf_hdtr user32_hdtr
;
3396 struct user64_sf_hdtr user64_hdtr
;
3398 off_t nbytes
= 0, sbytes
= 0;
3402 struct vfs_context context
= *vfs_context_current();
3404 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_START
), uap
->s
,
3407 AUDIT_ARG(fd
, uap
->fd
);
3408 AUDIT_ARG(value32
, uap
->s
);
3411 * Do argument checking. Must be a regular file in, stream
3412 * type and connected socket out, positive offset.
3414 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
3417 if ((fp
->f_flag
& FREAD
) == 0) {
3421 if (vnode_isreg(vp
) == 0) {
3425 error
= file_socket(uap
->s
, &so
);
3433 if (so
->so_type
!= SOCK_STREAM
) {
3437 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
3441 if (uap
->offset
< 0) {
3445 if (uap
->nbytes
== USER_ADDR_NULL
) {
3449 if (uap
->flags
!= 0) {
3454 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3456 #if CONFIG_MACF_SOCKET_SUBSET
3457 /* JMM - fetch connected sockaddr? */
3458 error
= mac_socket_check_send(context
.vc_ucred
, so
, NULL
);
3465 * Get number of bytes to send
3466 * Should it applies to size of header and trailer?
3468 error
= copyin(uap
->nbytes
, &nbytes
, sizeof(off_t
));
3474 * If specified, get the pointer to the sf_hdtr struct for
3475 * any headers/trailers.
3477 if (uap
->hdtr
!= USER_ADDR_NULL
) {
3480 bzero(&user_hdtr
, sizeof(user_hdtr
));
3481 if (IS_64BIT_PROCESS(p
)) {
3482 hdtrp
= (caddr_t
)&user64_hdtr
;
3483 sizeof_hdtr
= sizeof(user64_hdtr
);
3485 hdtrp
= (caddr_t
)&user32_hdtr
;
3486 sizeof_hdtr
= sizeof(user32_hdtr
);
3488 error
= copyin(uap
->hdtr
, hdtrp
, sizeof_hdtr
);
3492 if (IS_64BIT_PROCESS(p
)) {
3493 user_hdtr
.headers
= user64_hdtr
.headers
;
3494 user_hdtr
.hdr_cnt
= user64_hdtr
.hdr_cnt
;
3495 user_hdtr
.trailers
= user64_hdtr
.trailers
;
3496 user_hdtr
.trl_cnt
= user64_hdtr
.trl_cnt
;
3498 user_hdtr
.headers
= user32_hdtr
.headers
;
3499 user_hdtr
.hdr_cnt
= user32_hdtr
.hdr_cnt
;
3500 user_hdtr
.trailers
= user32_hdtr
.trailers
;
3501 user_hdtr
.trl_cnt
= user32_hdtr
.trl_cnt
;
3505 * Send any headers. Wimp out and use writev(2).
3507 if (user_hdtr
.headers
!= USER_ADDR_NULL
) {
3508 bzero(&nuap
, sizeof(struct writev_args
));
3510 nuap
.iovp
= user_hdtr
.headers
;
3511 nuap
.iovcnt
= user_hdtr
.hdr_cnt
;
3512 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
3516 sbytes
+= writev_retval
;
3521 * Get the file size for 2 reasons:
3522 * 1. We don't want to allocate more mbufs than necessary
3523 * 2. We don't want to read past the end of file
3525 if ((error
= vnode_size(vp
, &file_size
, vfs_context_current())) != 0) {
3530 * Simply read file data into a chain of mbufs that used with scatter
3531 * gather reads. We're not (yet?) setup to use zero copy external
3532 * mbufs that point to the file pages.
3535 error
= sblock(&so
->so_snd
, SBL_WAIT
);
3537 socket_unlock(so
, 1);
3540 for (off
= uap
->offset
;; off
+= xfsize
, sbytes
+= xfsize
) {
3541 mbuf_t m0
= NULL
, m
;
3542 unsigned int nbufs
= SFUIOBUFS
, i
;
3544 char uio_buf
[UIO_SIZEOF(SFUIOBUFS
)]; /* 1 KB !!! */
3552 * Calculate the amount to transfer.
3553 * Align to round number of pages.
3554 * Not to exceed send socket buffer,
3555 * the EOF, or the passed in nbytes.
3557 xfsize
= sbspace(&so
->so_snd
);
3560 if (so
->so_state
& SS_CANTSENDMORE
) {
3563 } else if ((so
->so_state
& SS_NBIO
)) {
3571 if (xfsize
> SENDFILE_MAX_BYTES
) {
3572 xfsize
= SENDFILE_MAX_BYTES
;
3573 } else if (xfsize
> PAGE_SIZE
) {
3574 xfsize
= trunc_page(xfsize
);
3576 pgoff
= off
& PAGE_MASK_64
;
3577 if (pgoff
> 0 && PAGE_SIZE
- pgoff
< xfsize
) {
3578 xfsize
= PAGE_SIZE_64
- pgoff
;
3580 if (nbytes
&& xfsize
> (nbytes
- sbytes
)) {
3581 xfsize
= nbytes
- sbytes
;
3586 if (off
+ xfsize
> file_size
) {
3587 xfsize
= file_size
- off
;
3594 * Attempt to use larger than system page-size clusters for
3595 * large writes only if there is a jumbo cluster pool and
3596 * if the socket is marked accordingly.
3598 jumbocl
= sosendjcl
&& njcl
> 0 &&
3599 ((so
->so_flags
& SOF_MULTIPAGES
) || sosendjcl_ignore_capab
);
3601 socket_unlock(so
, 0);
3602 alloc_sendpkt(M_WAIT
, xfsize
, &nbufs
, &m0
, jumbocl
);
3603 pktlen
= mbuf_pkthdr_maxlen(m0
);
3604 if (pktlen
< (size_t)xfsize
) {
3608 auio
= uio_createwithbuffer(nbufs
, off
, UIO_SYSSPACE
,
3609 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
3611 printf("sendfile failed. nbufs = %d. %s", nbufs
,
3612 "File a radar related to rdar://10146739.\n");
3619 for (i
= 0, m
= m0
, uiolen
= 0;
3620 i
< nbufs
&& m
!= NULL
&& uiolen
< (size_t)xfsize
;
3621 i
++, m
= mbuf_next(m
)) {
3622 size_t mlen
= mbuf_maxlen(m
);
3624 if (mlen
+ uiolen
> (size_t)xfsize
) {
3625 mlen
= xfsize
- uiolen
;
3627 mbuf_setlen(m
, mlen
);
3628 uio_addiov(auio
, CAST_USER_ADDR_T(mbuf_datastart(m
)),
3633 if (xfsize
!= uio_resid(auio
)) {
3634 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3635 "%lld\n", xfsize
, (long long)uio_resid(auio
));
3638 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_START
),
3639 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
3640 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
3641 error
= fo_read(fp
, auio
, FOF_OFFSET
, &context
);
3644 if (uio_resid(auio
) != xfsize
&& (error
== ERESTART
||
3645 error
== EINTR
|| error
== EWOULDBLOCK
)) {
3652 xfsize
-= uio_resid(auio
);
3653 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_END
),
3654 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
3655 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
3658 // printf("sendfile: fo_read 0 bytes, EOF\n");
3661 if (xfsize
+ off
> file_size
) {
3662 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3663 "%lld\n", xfsize
, off
, file_size
);
3665 for (i
= 0, m
= m0
, rlen
= 0;
3666 i
< nbufs
&& m
!= NULL
&& rlen
< xfsize
;
3667 i
++, m
= mbuf_next(m
)) {
3668 size_t mlen
= mbuf_maxlen(m
);
3670 if (rlen
+ mlen
> (size_t)xfsize
) {
3671 mlen
= xfsize
- rlen
;
3673 mbuf_setlen(m
, mlen
);
3677 mbuf_pkthdr_setlen(m0
, xfsize
);
3681 * Make sure that the socket is still able to take more data.
3682 * CANTSENDMORE being true usually means that the connection
3683 * was closed. so_error is true when an error was sensed after
3685 * The state is checked after the page mapping and buffer
3686 * allocation above since those operations may block and make
3687 * any socket checks stale. From this point forward, nothing
3688 * blocks before the pru_send (or more accurately, any blocking
3689 * results in a loop back to here to re-check).
3691 if ((so
->so_state
& SS_CANTSENDMORE
) || so
->so_error
) {
3692 if (so
->so_state
& SS_CANTSENDMORE
) {
3695 error
= so
->so_error
;
3702 * Wait for socket space to become available. We do this just
3703 * after checking the connection state above in order to avoid
3704 * a race condition with sbwait().
3706 if (sbspace(&so
->so_snd
) < (long)so
->so_snd
.sb_lowat
) {
3707 if (so
->so_state
& SS_NBIO
) {
3712 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
3713 DBG_FUNC_START
), uap
->s
, 0, 0, 0, 0);
3714 error
= sbwait(&so
->so_snd
);
3715 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
3716 DBG_FUNC_END
), uap
->s
, 0, 0, 0, 0);
3718 * An error from sbwait usually indicates that we've
3719 * been interrupted by a signal. If we've sent anything
3720 * then return bytes sent, otherwise return the error.
3729 struct mbuf
*control
= NULL
;
3732 * Socket filter processing
3735 error
= sflt_data_out(so
, NULL
, &m0
, &control
, 0);
3737 if (error
== EJUSTRETURN
) {
3744 * End Socket filter processing
3747 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
3748 uap
->s
, 0, 0, 0, 0);
3749 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, m0
,
3751 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
3752 uap
->s
, 0, 0, 0, 0);
3757 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
3759 * Send trailers. Wimp out and use writev(2).
3761 if (uap
->hdtr
!= USER_ADDR_NULL
&&
3762 user_hdtr
.trailers
!= USER_ADDR_NULL
) {
3763 bzero(&nuap
, sizeof(struct writev_args
));
3765 nuap
.iovp
= user_hdtr
.trailers
;
3766 nuap
.iovcnt
= user_hdtr
.trl_cnt
;
3767 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
3771 sbytes
+= writev_retval
;
3778 if (uap
->nbytes
!= USER_ADDR_NULL
) {
3779 /* XXX this appears bogus for some early failure conditions */
3780 copyout(&sbytes
, uap
->nbytes
, sizeof(off_t
));
3782 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_END
), uap
->s
,
3783 (unsigned int)((sbytes
>> 32) & 0x0ffffffff),
3784 (unsigned int)(sbytes
& 0x0ffffffff), error
, 0);
3787 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
3792 #endif /* SENDFILE */