2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
95 #include <security/audit/audit.h>
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
103 #include <os/ptrtools.h>
105 #if CONFIG_MACF_SOCKET_SUBSET
106 #include <security/mac_framework.h>
107 #endif /* MAC_SOCKET_SUBSET */
109 #define f_flag fp_glob->fg_flag
110 #define f_ops fp_glob->fg_ops
111 #define f_data fp_glob->fg_data
113 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
114 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
115 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
116 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
117 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
118 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
119 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
120 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
121 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
122 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
123 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
124 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
125 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
126 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
127 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
128 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
130 #if DEBUG || DEVELOPMENT
131 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
132 #define DBG_PRINTF(...) printf(__VA_ARGS__)
134 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
135 #define DBG_PRINTF(...) do { } while (0)
138 static int sendit(struct proc
*, struct socket
*, struct user_msghdr
*, uio_t
,
140 static int recvit(struct proc
*, int, struct user_msghdr
*, uio_t
, user_addr_t
,
142 static int connectit(struct socket
*, struct sockaddr
*);
143 static int getsockaddr(struct socket
*, struct sockaddr
**, user_addr_t
,
145 static int getsockaddr_s(struct socket
*, struct sockaddr_storage
*,
146 user_addr_t
, size_t, boolean_t
);
148 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf
**,
150 #endif /* SENDFILE */
151 static int connectx_nocancel(struct proc
*, struct connectx_args
*, int *);
152 static int connectitx(struct socket
*, struct sockaddr
*,
153 struct sockaddr
*, struct proc
*, uint32_t, sae_associd_t
,
154 sae_connid_t
*, uio_t
, unsigned int, user_ssize_t
*);
155 static int disconnectx_nocancel(struct proc
*, struct disconnectx_args
*,
157 static int socket_common(struct proc
*, int, int, int, pid_t
, int32_t *, int);
159 static int internalize_user_msghdr_array(const void *, int, int, u_int
,
160 struct user_msghdr_x
*, struct uio
**);
161 static u_int
externalize_user_msghdr_array(void *, int, int, u_int
,
162 const struct user_msghdr_x
*, struct uio
**);
164 static void free_uio_array(struct uio
**, u_int
);
165 static boolean_t
uio_array_is_valid(struct uio
**, u_int
);
166 static int recv_msg_array_is_valid(struct recv_msg_elem
*, u_int
);
167 static int internalize_recv_msghdr_array(const void *, int, int,
168 u_int
, struct user_msghdr_x
*, struct recv_msg_elem
*);
169 static u_int
externalize_recv_msghdr_array(struct proc
*, struct socket
*, void *, u_int
,
170 struct user_msghdr_x
*, struct recv_msg_elem
*, int *);
171 static struct recv_msg_elem
*alloc_recv_msg_array(u_int count
);
172 static void free_recv_msg_array(struct recv_msg_elem
*, u_int
);
174 SYSCTL_DECL(_kern_ipc
);
176 static u_int somaxsendmsgx
= 100;
177 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxsendmsgx
,
178 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxsendmsgx
, 0, "");
179 static u_int somaxrecvmsgx
= 100;
180 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxrecvmsgx
,
181 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxrecvmsgx
, 0, "");
184 * System call interface to the socket abstraction.
187 extern const struct fileops socketops
;
191 * EACCES Mandatory Access Control failure
195 * socreate:EAFNOSUPPORT
196 * socreate:EPROTOTYPE
197 * socreate:EPROTONOSUPPORT
200 * socreate:??? [other protocol families, IPSEC]
203 socket(struct proc
*p
,
204 struct socket_args
*uap
,
207 return socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
208 proc_selfpid(), retval
, 0);
212 socket_delegate(struct proc
*p
,
213 struct socket_delegate_args
*uap
,
216 return socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
217 uap
->epid
, retval
, 1);
221 socket_common(struct proc
*p
,
233 AUDIT_ARG(socket
, domain
, type
, protocol
);
234 #if CONFIG_MACF_SOCKET_SUBSET
235 if ((error
= mac_socket_check_create(kauth_cred_get(), domain
,
236 type
, protocol
)) != 0) {
239 #endif /* MAC_SOCKET_SUBSET */
242 error
= priv_check_cred(kauth_cred_get(),
243 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0);
249 error
= falloc(p
, &fp
, &fd
, vfs_context_current());
253 fp
->f_flag
= FREAD
| FWRITE
;
254 fp
->f_ops
= &socketops
;
257 error
= socreate_delegate(domain
, &so
, type
, protocol
, epid
);
259 error
= socreate(domain
, &so
, type
, protocol
);
265 fp
->f_data
= (caddr_t
)so
;
268 procfdtbl_releasefd(p
, fd
, NULL
);
270 fp_drop(p
, fd
, fp
, 1);
274 if (ENTR_SHOULDTRACE
) {
275 KERNEL_ENERGYTRACE(kEnTrActKernSocket
, DBG_FUNC_START
,
276 fd
, 0, (int64_t)VM_KERNEL_ADDRPERM(so
));
284 * EDESTADDRREQ Destination address required
285 * EBADF Bad file descriptor
286 * EACCES Mandatory Access Control failure
287 * file_socket:ENOTSOCK
289 * getsockaddr:ENAMETOOLONG Filename too long
290 * getsockaddr:EINVAL Invalid argument
291 * getsockaddr:ENOMEM Not enough space
292 * getsockaddr:EFAULT Bad address
297 bind(__unused proc_t p
, struct bind_args
*uap
, __unused
int32_t *retval
)
299 struct sockaddr_storage ss
;
300 struct sockaddr
*sa
= NULL
;
302 boolean_t want_free
= TRUE
;
305 AUDIT_ARG(fd
, uap
->s
);
306 error
= file_socket(uap
->s
, &so
);
314 if (uap
->name
== USER_ADDR_NULL
) {
315 error
= EDESTADDRREQ
;
318 if (uap
->namelen
> sizeof(ss
)) {
319 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, TRUE
);
321 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, TRUE
);
323 sa
= (struct sockaddr
*)&ss
;
330 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
331 #if CONFIG_MACF_SOCKET_SUBSET
332 if ((sa
!= NULL
&& sa
->sa_family
== AF_SYSTEM
) ||
333 (error
= mac_socket_check_bind(kauth_cred_get(), so
, sa
)) == 0) {
334 error
= sobindlock(so
, sa
, 1); /* will lock socket */
337 error
= sobindlock(so
, sa
, 1); /* will lock socket */
338 #endif /* MAC_SOCKET_SUBSET */
350 * EACCES Mandatory Access Control failure
351 * file_socket:ENOTSOCK
354 * solisten:EOPNOTSUPP
358 listen(__unused
struct proc
*p
, struct listen_args
*uap
,
359 __unused
int32_t *retval
)
364 AUDIT_ARG(fd
, uap
->s
);
365 error
= file_socket(uap
->s
, &so
);
370 #if CONFIG_MACF_SOCKET_SUBSET
372 error
= mac_socket_check_listen(kauth_cred_get(), so
);
374 error
= solisten(so
, uap
->backlog
);
378 { error
= solisten(so
, uap
->backlog
);}
379 #endif /* MAC_SOCKET_SUBSET */
389 * Returns: fp_get_ftype:EBADF Bad file descriptor
390 * fp_get_ftype:ENOTSOCK Socket operation on non-socket
391 * :EFAULT Bad address on copyin/copyout
392 * :EBADF Bad file descriptor
393 * :EOPNOTSUPP Operation not supported on socket
394 * :EINVAL Invalid argument
395 * :EWOULDBLOCK Operation would block
396 * :ECONNABORTED Connection aborted
397 * :EINTR Interrupted function
398 * :EACCES Mandatory Access Control failure
399 * falloc:ENFILE Too many files open in system
400 * falloc:EMFILE Too many open files
401 * falloc:ENOMEM Not enough space
405 accept_nocancel(struct proc
*p
, struct accept_nocancel_args
*uap
,
409 struct sockaddr
*sa
= NULL
;
412 struct socket
*head
, *so
= NULL
;
413 lck_mtx_t
*mutex_held
;
421 AUDIT_ARG(fd
, uap
->s
);
424 error
= copyin(uap
->anamelen
, (caddr_t
)&namelen
,
430 error
= fp_get_ftype(p
, fd
, DTYPE_SOCKET
, ENOTSOCK
, &fp
);
436 #if CONFIG_MACF_SOCKET_SUBSET
437 if ((error
= mac_socket_check_accept(kauth_cred_get(), head
)) != 0) {
440 #endif /* MAC_SOCKET_SUBSET */
442 socket_lock(head
, 1);
444 if (head
->so_proto
->pr_getlock
!= NULL
) {
445 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, PR_F_WILLUNLOCK
);
448 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
452 if ((head
->so_options
& SO_ACCEPTCONN
) == 0) {
453 if ((head
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
456 /* POSIX: The socket is not accepting connections */
459 socket_unlock(head
, 1);
463 if ((head
->so_state
& SS_NBIO
) && head
->so_comp
.tqh_first
== NULL
) {
464 socket_unlock(head
, 1);
468 while (TAILQ_EMPTY(&head
->so_comp
) && head
->so_error
== 0) {
469 if (head
->so_state
& SS_CANTRCVMORE
) {
470 head
->so_error
= ECONNABORTED
;
473 if (head
->so_usecount
< 1) {
474 panic("accept: head=%p refcount=%d\n", head
,
477 error
= msleep((caddr_t
)&head
->so_timeo
, mutex_held
,
478 PSOCK
| PCATCH
, "accept", 0);
479 if (head
->so_usecount
< 1) {
480 panic("accept: 2 head=%p refcount=%d\n", head
,
483 if ((head
->so_state
& SS_DRAINING
)) {
484 error
= ECONNABORTED
;
487 socket_unlock(head
, 1);
491 if (head
->so_error
) {
492 error
= head
->so_error
;
494 socket_unlock(head
, 1);
499 * At this point we know that there is at least one connection
500 * ready to be accepted. Remove it from the queue prior to
501 * allocating the file descriptor for it since falloc() may
502 * block allowing another process to accept the connection
505 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
507 so_acquire_accept_list(head
, NULL
);
508 if (TAILQ_EMPTY(&head
->so_comp
)) {
509 so_release_accept_list(head
);
513 so
= TAILQ_FIRST(&head
->so_comp
);
514 TAILQ_REMOVE(&head
->so_comp
, so
, so_list
);
516 so
->so_state
&= ~SS_COMP
;
518 so_release_accept_list(head
);
520 /* unlock head to avoid deadlock with select, keep a ref on head */
521 socket_unlock(head
, 0);
523 #if CONFIG_MACF_SOCKET_SUBSET
525 * Pass the pre-accepted socket to the MAC framework. This is
526 * cheaper than allocating a file descriptor for the socket,
527 * calling the protocol accept callback, and possibly freeing
528 * the file descriptor should the MAC check fails.
530 if ((error
= mac_socket_check_accepted(kauth_cred_get(), so
)) != 0) {
532 so
->so_state
&= ~SS_NOFDREF
;
533 socket_unlock(so
, 1);
535 /* Drop reference on listening socket */
539 #endif /* MAC_SOCKET_SUBSET */
542 * Pass the pre-accepted socket to any interested socket filter(s).
543 * Upon failure, the socket would have been closed by the callee.
545 if (so
->so_filt
!= NULL
&& (error
= soacceptfilter(so
, head
)) != 0) {
546 /* Drop reference on listening socket */
548 /* Propagate socket filter's error code to the caller */
553 error
= falloc(p
, &fp
, &newfd
, vfs_context_current());
556 * Probably ran out of file descriptors.
558 * <rdar://problem/8554930>
559 * Don't put this back on the socket like we used to, that
560 * just causes the client to spin. Drop the socket.
563 so
->so_state
&= ~SS_NOFDREF
;
564 socket_unlock(so
, 1);
571 fp
->f_ops
= &socketops
;
572 fp
->f_data
= (caddr_t
)so
;
574 socket_lock(head
, 0);
579 /* Sync socket non-blocking/async state with file flags */
580 if (fp
->f_flag
& FNONBLOCK
) {
581 so
->so_state
|= SS_NBIO
;
583 so
->so_state
&= ~SS_NBIO
;
586 if (fp
->f_flag
& FASYNC
) {
587 so
->so_state
|= SS_ASYNC
;
588 so
->so_rcv
.sb_flags
|= SB_ASYNC
;
589 so
->so_snd
.sb_flags
|= SB_ASYNC
;
591 so
->so_state
&= ~SS_ASYNC
;
592 so
->so_rcv
.sb_flags
&= ~SB_ASYNC
;
593 so
->so_snd
.sb_flags
&= ~SB_ASYNC
;
596 (void) soacceptlock(so
, &sa
, 0);
597 socket_unlock(head
, 1);
606 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
611 /* save sa_len before it is destroyed */
613 namelen
= MIN(namelen
, sa_len
);
614 error
= copyout(sa
, uap
->name
, namelen
);
616 /* return the actual, untruncated address length */
620 error
= copyout((caddr_t
)&namelen
, uap
->anamelen
,
627 * If the socket has been marked as inactive by sosetdefunct(),
628 * disallow further operations on it.
630 if (so
->so_flags
& SOF_DEFUNCT
) {
631 sodefunct(current_proc(), so
,
632 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
);
636 socket_unlock(so
, 1);
640 procfdtbl_releasefd(p
, newfd
, NULL
);
641 fp_drop(p
, newfd
, fp
, 1);
647 if (error
== 0 && ENTR_SHOULDTRACE
) {
648 KERNEL_ENERGYTRACE(kEnTrActKernSocket
, DBG_FUNC_START
,
649 newfd
, 0, (int64_t)VM_KERNEL_ADDRPERM(so
));
655 accept(struct proc
*p
, struct accept_args
*uap
, int32_t *retval
)
657 __pthread_testcancel(1);
658 return accept_nocancel(p
, (struct accept_nocancel_args
*)uap
,
664 * EBADF Bad file descriptor
665 * EALREADY Connection already in progress
666 * EINPROGRESS Operation in progress
667 * ECONNABORTED Connection aborted
668 * EINTR Interrupted function
669 * EACCES Mandatory Access Control failure
670 * file_socket:ENOTSOCK
672 * getsockaddr:ENAMETOOLONG Filename too long
673 * getsockaddr:EINVAL Invalid argument
674 * getsockaddr:ENOMEM Not enough space
675 * getsockaddr:EFAULT Bad address
676 * soconnectlock:EOPNOTSUPP
677 * soconnectlock:EISCONN
678 * soconnectlock:??? [depends on protocol, filters]
681 * Imputed: so_error error may be set from so_error, which
682 * may have been set by soconnectlock.
686 connect(struct proc
*p
, struct connect_args
*uap
, int32_t *retval
)
688 __pthread_testcancel(1);
689 return connect_nocancel(p
, (struct connect_nocancel_args
*)uap
,
694 connect_nocancel(proc_t p
, struct connect_nocancel_args
*uap
, int32_t *retval
)
696 #pragma unused(p, retval)
698 struct sockaddr_storage ss
;
699 struct sockaddr
*sa
= NULL
;
704 AUDIT_ARG(fd
, uap
->s
);
705 error
= file_socket(fd
, &so
);
715 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
716 * if this is a datagram socket; translate for other types.
718 dgram
= (so
->so_type
== SOCK_DGRAM
);
720 /* Get socket address now before we obtain socket lock */
721 if (uap
->namelen
> sizeof(ss
)) {
722 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, !dgram
);
724 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, !dgram
);
726 sa
= (struct sockaddr
*)&ss
;
733 error
= connectit(so
, sa
);
735 if (sa
!= NULL
&& sa
!= SA(&ss
)) {
738 if (error
== ERESTART
) {
747 connectx_nocancel(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
749 #pragma unused(p, retval)
750 struct sockaddr_storage ss
, sd
;
751 struct sockaddr
*src
= NULL
, *dst
= NULL
;
753 int error
, error1
, fd
= uap
->socket
;
755 sae_connid_t cid
= SAE_CONNID_ANY
;
756 struct user32_sa_endpoints ep32
;
757 struct user64_sa_endpoints ep64
;
758 struct user_sa_endpoints ep
;
759 user_ssize_t bytes_written
= 0;
760 struct user_iovec
*iovp
;
763 AUDIT_ARG(fd
, uap
->socket
);
764 error
= file_socket(fd
, &so
);
773 if (uap
->endpoints
== USER_ADDR_NULL
) {
778 if (IS_64BIT_PROCESS(p
)) {
779 error
= copyin(uap
->endpoints
, (caddr_t
)&ep64
, sizeof(ep64
));
784 ep
.sae_srcif
= ep64
.sae_srcif
;
785 ep
.sae_srcaddr
= (user_addr_t
)ep64
.sae_srcaddr
;
786 ep
.sae_srcaddrlen
= ep64
.sae_srcaddrlen
;
787 ep
.sae_dstaddr
= (user_addr_t
)ep64
.sae_dstaddr
;
788 ep
.sae_dstaddrlen
= ep64
.sae_dstaddrlen
;
790 error
= copyin(uap
->endpoints
, (caddr_t
)&ep32
, sizeof(ep32
));
795 ep
.sae_srcif
= ep32
.sae_srcif
;
796 ep
.sae_srcaddr
= ep32
.sae_srcaddr
;
797 ep
.sae_srcaddrlen
= ep32
.sae_srcaddrlen
;
798 ep
.sae_dstaddr
= ep32
.sae_dstaddr
;
799 ep
.sae_dstaddrlen
= ep32
.sae_dstaddrlen
;
803 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
804 * if this is a datagram socket; translate for other types.
806 dgram
= (so
->so_type
== SOCK_DGRAM
);
808 /* Get socket address now before we obtain socket lock */
809 if (ep
.sae_srcaddr
!= USER_ADDR_NULL
) {
810 if (ep
.sae_srcaddrlen
> sizeof(ss
)) {
811 error
= getsockaddr(so
, &src
, ep
.sae_srcaddr
, ep
.sae_srcaddrlen
, dgram
);
813 error
= getsockaddr_s(so
, &ss
, ep
.sae_srcaddr
, ep
.sae_srcaddrlen
, dgram
);
815 src
= (struct sockaddr
*)&ss
;
824 if (ep
.sae_dstaddr
== USER_ADDR_NULL
) {
829 /* Get socket address now before we obtain socket lock */
830 if (ep
.sae_dstaddrlen
> sizeof(sd
)) {
831 error
= getsockaddr(so
, &dst
, ep
.sae_dstaddr
, ep
.sae_dstaddrlen
, dgram
);
833 error
= getsockaddr_s(so
, &sd
, ep
.sae_dstaddr
, ep
.sae_dstaddrlen
, dgram
);
835 dst
= (struct sockaddr
*)&sd
;
845 if (uap
->iov
!= USER_ADDR_NULL
) {
846 /* Verify range before calling uio_create() */
847 if (uap
->iovcnt
<= 0 || uap
->iovcnt
> UIO_MAXIOV
) {
852 if (uap
->len
== USER_ADDR_NULL
) {
857 /* allocate a uio to hold the number of iovecs passed */
858 auio
= uio_create(uap
->iovcnt
, 0,
859 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
868 * get location of iovecs within the uio.
869 * then copyin the iovecs from user space.
871 iovp
= uio_iovsaddr(auio
);
876 error
= copyin_user_iovec_array(uap
->iov
,
877 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
883 /* finish setup of uio_t */
884 error
= uio_calculateresid(auio
);
890 error
= connectitx(so
, src
, dst
, p
, ep
.sae_srcif
, uap
->associd
,
891 &cid
, auio
, uap
->flags
, &bytes_written
);
892 if (error
== ERESTART
) {
896 if (uap
->len
!= USER_ADDR_NULL
) {
897 error1
= copyout(&bytes_written
, uap
->len
, sizeof(uap
->len
));
898 /* give precedence to connectitx errors */
899 if ((error1
!= 0) && (error
== 0)) {
904 if (uap
->connid
!= USER_ADDR_NULL
) {
905 error1
= copyout(&cid
, uap
->connid
, sizeof(cid
));
906 /* give precedence to connectitx errors */
907 if ((error1
!= 0) && (error
== 0)) {
916 if (src
!= NULL
&& src
!= SA(&ss
)) {
919 if (dst
!= NULL
&& dst
!= SA(&sd
)) {
926 connectx(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
929 * Due to similiarity with a POSIX interface, define as
930 * an unofficial cancellation point.
932 __pthread_testcancel(1);
933 return connectx_nocancel(p
, uap
, retval
);
937 connectit(struct socket
*so
, struct sockaddr
*sa
)
941 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
942 #if CONFIG_MACF_SOCKET_SUBSET
943 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, sa
)) != 0) {
946 #endif /* MAC_SOCKET_SUBSET */
949 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
953 error
= soconnectlock(so
, sa
, 0);
957 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
961 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
962 lck_mtx_t
*mutex_held
;
964 if (so
->so_proto
->pr_getlock
!= NULL
) {
965 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
967 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
969 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
970 PSOCK
| PCATCH
, __func__
, 0);
971 if (so
->so_state
& SS_DRAINING
) {
972 error
= ECONNABORTED
;
979 error
= so
->so_error
;
983 socket_unlock(so
, 1);
988 connectitx(struct socket
*so
, struct sockaddr
*src
,
989 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
990 sae_associd_t aid
, sae_connid_t
*pcid
, uio_t auio
, unsigned int flags
,
991 user_ssize_t
*bytes_written
)
997 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), dst
);
998 #if CONFIG_MACF_SOCKET_SUBSET
999 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, dst
)) != 0) {
1004 if ((error
= mac_socket_check_send(kauth_cred_get(), so
, dst
)) != 0) {
1008 #endif /* MAC_SOCKET_SUBSET */
1011 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
1016 error
= soconnectxlocked(so
, src
, dst
, p
, ifscope
,
1017 aid
, pcid
, flags
, NULL
, 0, auio
, bytes_written
);
1022 * If, after the call to soconnectxlocked the flag is still set (in case
1023 * data has been queued and the connect() has actually been triggered,
1024 * it will have been unset by the transport), we exit immediately. There
1025 * is no reason to wait on any event.
1027 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1031 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
1032 error
= EINPROGRESS
;
1035 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
1036 lck_mtx_t
*mutex_held
;
1038 if (so
->so_proto
->pr_getlock
!= NULL
) {
1039 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1041 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1043 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
1044 PSOCK
| PCATCH
, __func__
, 0);
1045 if (so
->so_state
& SS_DRAINING
) {
1046 error
= ECONNABORTED
;
1053 error
= so
->so_error
;
1057 socket_unlock(so
, 1);
1062 peeloff(struct proc
*p
, struct peeloff_args
*uap
, int *retval
)
1064 #pragma unused(p, uap, retval)
1066 * Due to similiarity with a POSIX interface, define as
1067 * an unofficial cancellation point.
1069 __pthread_testcancel(1);
1074 disconnectx(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
1077 * Due to similiarity with a POSIX interface, define as
1078 * an unofficial cancellation point.
1080 __pthread_testcancel(1);
1081 return disconnectx_nocancel(p
, uap
, retval
);
1085 disconnectx_nocancel(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
1087 #pragma unused(p, retval)
1092 error
= file_socket(fd
, &so
);
1101 error
= sodisconnectx(so
, uap
->aid
, uap
->cid
);
1108 * Returns: 0 Success
1109 * socreate:EAFNOSUPPORT
1110 * socreate:EPROTOTYPE
1111 * socreate:EPROTONOSUPPORT
1115 * socreate:??? [other protocol families, IPSEC]
1121 * soconnect2:EPROTOTYPE
1122 * soconnect2:??? [other protocol families[
1125 socketpair(struct proc
*p
, struct socketpair_args
*uap
,
1126 __unused
int32_t *retval
)
1128 struct fileproc
*fp1
, *fp2
;
1129 struct socket
*so1
, *so2
;
1130 int fd
, error
, sv
[2];
1132 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
1133 error
= socreate(uap
->domain
, &so1
, uap
->type
, uap
->protocol
);
1137 error
= socreate(uap
->domain
, &so2
, uap
->type
, uap
->protocol
);
1142 error
= falloc(p
, &fp1
, &fd
, vfs_context_current());
1146 fp1
->f_flag
= FREAD
| FWRITE
;
1147 fp1
->f_ops
= &socketops
;
1148 fp1
->f_data
= (caddr_t
)so1
;
1151 error
= falloc(p
, &fp2
, &fd
, vfs_context_current());
1155 fp2
->f_flag
= FREAD
| FWRITE
;
1156 fp2
->f_ops
= &socketops
;
1157 fp2
->f_data
= (caddr_t
)so2
;
1160 error
= soconnect2(so1
, so2
);
1164 if (uap
->type
== SOCK_DGRAM
) {
1166 * Datagram socket connection is asymmetric.
1168 error
= soconnect2(so2
, so1
);
1174 if ((error
= copyout(sv
, uap
->rsv
, 2 * sizeof(int))) != 0) {
1179 procfdtbl_releasefd(p
, sv
[0], NULL
);
1180 procfdtbl_releasefd(p
, sv
[1], NULL
);
1181 fp_drop(p
, sv
[0], fp1
, 1);
1182 fp_drop(p
, sv
[1], fp2
, 1);
1187 fp_free(p
, sv
[1], fp2
);
1189 fp_free(p
, sv
[0], fp1
);
1191 (void) soclose(so2
);
1193 (void) soclose(so1
);
1198 * Returns: 0 Success
1203 * EACCES Mandatory Access Control failure
1204 * file_socket:ENOTSOCK
1206 * getsockaddr:ENAMETOOLONG Filename too long
1207 * getsockaddr:EINVAL Invalid argument
1208 * getsockaddr:ENOMEM Not enough space
1209 * getsockaddr:EFAULT Bad address
1210 * <pru_sosend>:EACCES[TCP]
1211 * <pru_sosend>:EADDRINUSE[TCP]
1212 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1213 * <pru_sosend>:EAFNOSUPPORT[TCP]
1214 * <pru_sosend>:EAGAIN[TCP]
1215 * <pru_sosend>:EBADF
1216 * <pru_sosend>:ECONNRESET[TCP]
1217 * <pru_sosend>:EFAULT
1218 * <pru_sosend>:EHOSTUNREACH[TCP]
1219 * <pru_sosend>:EINTR
1220 * <pru_sosend>:EINVAL
1221 * <pru_sosend>:EISCONN[AF_INET]
1222 * <pru_sosend>:EMSGSIZE[TCP]
1223 * <pru_sosend>:ENETDOWN[TCP]
1224 * <pru_sosend>:ENETUNREACH[TCP]
1225 * <pru_sosend>:ENOBUFS
1226 * <pru_sosend>:ENOMEM[TCP]
1227 * <pru_sosend>:ENOTCONN[AF_INET]
1228 * <pru_sosend>:EOPNOTSUPP
1229 * <pru_sosend>:EPERM[TCP]
1230 * <pru_sosend>:EPIPE
1231 * <pru_sosend>:EWOULDBLOCK
1232 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1233 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1234 * <pru_sosend>:??? [value from so_error]
1238 sendit(struct proc
*p
, struct socket
*so
, struct user_msghdr
*mp
, uio_t uiop
,
1239 int flags
, int32_t *retval
)
1241 struct mbuf
*control
= NULL
;
1242 struct sockaddr_storage ss
;
1243 struct sockaddr
*to
= NULL
;
1244 boolean_t want_free
= TRUE
;
1248 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1250 if (mp
->msg_name
!= USER_ADDR_NULL
) {
1251 if (mp
->msg_namelen
> sizeof(ss
)) {
1252 error
= getsockaddr(so
, &to
, mp
->msg_name
,
1253 mp
->msg_namelen
, TRUE
);
1255 error
= getsockaddr_s(so
, &ss
, mp
->msg_name
,
1256 mp
->msg_namelen
, TRUE
);
1258 to
= (struct sockaddr
*)&ss
;
1265 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), to
);
1267 if (mp
->msg_control
!= USER_ADDR_NULL
) {
1268 if (mp
->msg_controllen
< sizeof(struct cmsghdr
)) {
1272 error
= sockargs(&control
, mp
->msg_control
,
1273 mp
->msg_controllen
, MT_CONTROL
);
1279 #if CONFIG_MACF_SOCKET_SUBSET
1281 * We check the state without holding the socket lock;
1282 * if a race condition occurs, it would simply result
1283 * in an extra call to the MAC check function.
1286 !(so
->so_state
& SS_DEFUNCT
) &&
1287 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
)) != 0) {
1290 #endif /* MAC_SOCKET_SUBSET */
1292 len
= uio_resid(uiop
);
1293 error
= so
->so_proto
->pr_usrreqs
->pru_sosend(so
, to
, uiop
, 0,
1296 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1297 error
== EINTR
|| error
== EWOULDBLOCK
)) {
1300 /* Generation of SIGPIPE can be controlled per socket */
1301 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
) &&
1302 !(flags
& MSG_NOSIGNAL
)) {
1303 psignal(p
, SIGPIPE
);
1307 *retval
= (int)(len
- uio_resid(uiop
));
1314 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1320 * Returns: 0 Success
1322 * sendit:??? [see sendit definition in this file]
1323 * write:??? [4056224: applicable for pipes]
1326 sendto(struct proc
*p
, struct sendto_args
*uap
, int32_t *retval
)
1328 __pthread_testcancel(1);
1329 return sendto_nocancel(p
, (struct sendto_nocancel_args
*)uap
, retval
);
1333 sendto_nocancel(struct proc
*p
,
1334 struct sendto_nocancel_args
*uap
,
1337 struct user_msghdr msg
;
1342 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1343 AUDIT_ARG(fd
, uap
->s
);
1345 if (uap
->flags
& MSG_SKIPCFIL
) {
1350 if (uap
->len
> LONG_MAX
) {
1355 auio
= uio_create(1, 0,
1356 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1362 uio_addiov(auio
, uap
->buf
, uap
->len
);
1364 msg
.msg_name
= uap
->to
;
1365 msg
.msg_namelen
= uap
->tolen
;
1366 /* no need to set up msg_iov. sendit uses uio_t we send it */
1369 msg
.msg_control
= 0;
1372 error
= file_socket(uap
->s
, &so
);
1380 error
= sendit(p
, so
, &msg
, auio
, uap
->flags
, retval
);
1389 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_END
, error
, *retval
, 0, 0, 0);
1395 * Returns: 0 Success
1398 * sendit:??? [see sendit definition in this file]
1401 sendmsg(struct proc
*p
, struct sendmsg_args
*uap
, int32_t *retval
)
1403 __pthread_testcancel(1);
1404 return sendmsg_nocancel(p
, (struct sendmsg_nocancel_args
*)uap
,
1409 sendmsg_nocancel(struct proc
*p
, struct sendmsg_nocancel_args
*uap
,
1412 struct user32_msghdr msg32
;
1413 struct user64_msghdr msg64
;
1414 struct user_msghdr user_msg
;
1419 struct user_iovec
*iovp
;
1422 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1423 AUDIT_ARG(fd
, uap
->s
);
1425 if (uap
->flags
& MSG_SKIPCFIL
) {
1430 if (IS_64BIT_PROCESS(p
)) {
1431 msghdrp
= (caddr_t
)&msg64
;
1432 size_of_msghdr
= sizeof(msg64
);
1434 msghdrp
= (caddr_t
)&msg32
;
1435 size_of_msghdr
= sizeof(msg32
);
1437 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1439 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1443 if (IS_64BIT_PROCESS(p
)) {
1444 user_msg
.msg_flags
= msg64
.msg_flags
;
1445 user_msg
.msg_controllen
= msg64
.msg_controllen
;
1446 user_msg
.msg_control
= (user_addr_t
)msg64
.msg_control
;
1447 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
1448 user_msg
.msg_iov
= (user_addr_t
)msg64
.msg_iov
;
1449 user_msg
.msg_namelen
= msg64
.msg_namelen
;
1450 user_msg
.msg_name
= (user_addr_t
)msg64
.msg_name
;
1452 user_msg
.msg_flags
= msg32
.msg_flags
;
1453 user_msg
.msg_controllen
= msg32
.msg_controllen
;
1454 user_msg
.msg_control
= msg32
.msg_control
;
1455 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
1456 user_msg
.msg_iov
= msg32
.msg_iov
;
1457 user_msg
.msg_namelen
= msg32
.msg_namelen
;
1458 user_msg
.msg_name
= msg32
.msg_name
;
1461 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1462 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, EMSGSIZE
,
1467 /* allocate a uio large enough to hold the number of iovecs passed */
1468 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1469 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1476 if (user_msg
.msg_iovlen
) {
1478 * get location of iovecs within the uio.
1479 * then copyin the iovecs from user space.
1481 iovp
= uio_iovsaddr(auio
);
1486 error
= copyin_user_iovec_array(user_msg
.msg_iov
,
1487 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1488 user_msg
.msg_iovlen
, iovp
);
1492 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1494 /* finish setup of uio_t */
1495 error
= uio_calculateresid(auio
);
1500 user_msg
.msg_iov
= 0;
1503 /* msg_flags is ignored for send */
1504 user_msg
.msg_flags
= 0;
1506 error
= file_socket(uap
->s
, &so
);
1513 error
= sendit(p
, so
, &user_msg
, auio
, uap
->flags
, retval
);
1520 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1526 sendmsg_x(struct proc
*p
, struct sendmsg_x_args
*uap
, user_ssize_t
*retval
)
1529 struct user_msghdr_x
*user_msg_x
= NULL
;
1530 struct uio
**uiop
= NULL
;
1533 struct sockaddr
*to
= NULL
;
1534 user_ssize_t len_before
= 0, len_after
;
1536 size_t size_of_msghdr
;
1539 int has_addr_or_ctl
= 0;
1541 KERNEL_DEBUG(DBG_FNC_SENDMSG_X
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1543 size_of_msghdr
= IS_64BIT_PROCESS(p
) ?
1544 sizeof(struct user64_msghdr_x
) : sizeof(struct user32_msghdr_x
);
1546 if (uap
->flags
& MSG_SKIPCFIL
) {
1551 error
= file_socket(uap
->s
, &so
);
1562 * Input parameter range check
1564 if (uap
->cnt
== 0 || uap
->cnt
> UIO_MAXIOV
) {
1569 * Clip to max currently allowed
1571 if (uap
->cnt
> somaxsendmsgx
) {
1572 uap
->cnt
= somaxsendmsgx
;
1575 user_msg_x
= kheap_alloc(KHEAP_TEMP
,
1576 uap
->cnt
* sizeof(struct user_msghdr_x
), Z_WAITOK
| Z_ZERO
);
1577 if (user_msg_x
== NULL
) {
1578 DBG_PRINTF("%s kheap_alloc user_msg_x failed\n", __func__
);
1582 uiop
= kheap_alloc(KHEAP_TEMP
,
1583 uap
->cnt
* sizeof(struct uio
*), Z_WAITOK
| Z_ZERO
);
1585 DBG_PRINTF("%s kheap_alloc uiop failed\n", __func__
);
1590 umsgp
= kheap_alloc(KHEAP_TEMP
,
1591 uap
->cnt
* size_of_msghdr
, Z_WAITOK
| Z_ZERO
);
1592 if (umsgp
== NULL
) {
1593 printf("%s kheap_alloc user_msg_x failed\n", __func__
);
1597 error
= copyin(uap
->msgp
, umsgp
, uap
->cnt
* size_of_msghdr
);
1599 DBG_PRINTF("%s copyin() failed\n", __func__
);
1602 error
= internalize_user_msghdr_array(umsgp
,
1603 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1604 UIO_WRITE
, uap
->cnt
, user_msg_x
, uiop
);
1606 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__
);
1610 * Make sure the size of each message iovec and
1611 * the aggregate size of all the iovec is valid
1613 if (uio_array_is_valid(uiop
, uap
->cnt
) == false) {
1619 * Sanity check on passed arguments
1621 for (i
= 0; i
< uap
->cnt
; i
++) {
1622 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
1625 * No flags on send message
1627 if (mp
->msg_flags
!= 0) {
1632 * No support for address or ancillary data (yet)
1634 if (mp
->msg_name
!= USER_ADDR_NULL
|| mp
->msg_namelen
!= 0) {
1635 has_addr_or_ctl
= 1;
1638 if (mp
->msg_control
!= USER_ADDR_NULL
||
1639 mp
->msg_controllen
!= 0) {
1640 has_addr_or_ctl
= 1;
1643 #if CONFIG_MACF_SOCKET_SUBSET
1645 * We check the state without holding the socket lock;
1646 * if a race condition occurs, it would simply result
1647 * in an extra call to the MAC check function.
1649 * Note: The following check is never true taken with the
1650 * current limitation that we do not accept to pass an address,
1651 * this is effectively placeholder code. If we add support for
1652 * addresses, we will have to check every address.
1655 !(so
->so_state
& SS_DEFUNCT
) &&
1656 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
))
1660 #endif /* MAC_SOCKET_SUBSET */
1663 len_before
= uio_array_resid(uiop
, uap
->cnt
);
1666 * Feed list of packets at once only for connected socket without
1669 if (so
->so_proto
->pr_usrreqs
->pru_sosend_list
!=
1670 pru_sosend_list_notsupp
&&
1671 has_addr_or_ctl
== 0 && somaxsendmsgx
== 0) {
1672 error
= so
->so_proto
->pr_usrreqs
->pru_sosend_list(so
, uiop
,
1673 uap
->cnt
, uap
->flags
);
1675 for (i
= 0; i
< uap
->cnt
; i
++) {
1676 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
1677 struct user_msghdr user_msg
;
1678 uio_t auio
= uiop
[i
];
1681 user_msg
.msg_flags
= mp
->msg_flags
;
1682 user_msg
.msg_controllen
= mp
->msg_controllen
;
1683 user_msg
.msg_control
= mp
->msg_control
;
1684 user_msg
.msg_iovlen
= mp
->msg_iovlen
;
1685 user_msg
.msg_iov
= mp
->msg_iov
;
1686 user_msg
.msg_namelen
= mp
->msg_namelen
;
1687 user_msg
.msg_name
= mp
->msg_name
;
1689 error
= sendit(p
, so
, &user_msg
, auio
, uap
->flags
,
1696 len_after
= uio_array_resid(uiop
, uap
->cnt
);
1698 VERIFY(len_after
<= len_before
);
1701 if (len_after
!= len_before
&& (error
== ERESTART
||
1702 error
== EINTR
|| error
== EWOULDBLOCK
||
1703 error
== ENOBUFS
)) {
1706 /* Generation of SIGPIPE can be controlled per socket */
1707 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
) &&
1708 !(uap
->flags
& MSG_NOSIGNAL
)) {
1709 psignal(p
, SIGPIPE
);
1713 uiocnt
= externalize_user_msghdr_array(umsgp
,
1714 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1715 UIO_WRITE
, uap
->cnt
, user_msg_x
, uiop
);
1717 *retval
= (int)(uiocnt
);
1723 kheap_free(KHEAP_TEMP
, umsgp
, uap
->cnt
* size_of_msghdr
);
1725 free_uio_array(uiop
, uap
->cnt
);
1726 kheap_free(KHEAP_TEMP
, uiop
,
1727 uap
->cnt
* sizeof(struct uio
*));
1729 kheap_free(KHEAP_TEMP
, user_msg_x
,
1730 uap
->cnt
* sizeof(struct user_msghdr_x
));
1732 KERNEL_DEBUG(DBG_FNC_SENDMSG_X
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1739 copyout_sa(struct sockaddr
*fromsa
, user_addr_t name
, socklen_t
*namelen
)
1742 socklen_t sa_len
= 0;
1746 if (len
<= 0 || fromsa
== 0) {
1750 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1752 sa_len
= fromsa
->sa_len
;
1753 len
= MIN((unsigned int)len
, sa_len
);
1754 error
= copyout(fromsa
, name
, (unsigned)len
);
1765 copyout_control(struct proc
*p
, struct mbuf
*m
, user_addr_t control
,
1766 socklen_t
*controllen
, int *flags
, struct socket
*so
)
1771 struct inpcb
*inp
= so
? sotoinpcb(so
) : NULL
;
1777 while (m
&& len
> 0) {
1779 struct cmsghdr
*cp
= mtod(m
, struct cmsghdr
*);
1780 socklen_t cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1781 socklen_t buflen
= m
->m_len
;
1783 while (buflen
> 0 && len
> 0) {
1785 * SCM_TIMESTAMP hack because struct timeval has a
1786 * different size for 32 bits and 64 bits processes
1788 if (cp
->cmsg_level
== SOL_SOCKET
&& cp
->cmsg_type
== SCM_TIMESTAMP
) {
1789 unsigned char tmp_buffer
[CMSG_SPACE(sizeof(struct user64_timeval
))] = {};
1790 struct cmsghdr
*tmp_cp
= (struct cmsghdr
*)(void *)tmp_buffer
;
1791 socklen_t tmp_space
;
1792 struct timeval
*tv
= (struct timeval
*)(void *)CMSG_DATA(cp
);
1794 tmp_cp
->cmsg_level
= SOL_SOCKET
;
1795 tmp_cp
->cmsg_type
= SCM_TIMESTAMP
;
1797 if (proc_is64bit(p
)) {
1798 struct user64_timeval
*tv64
= (struct user64_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1800 os_unaligned_deref(&tv64
->tv_sec
) = tv
->tv_sec
;
1801 os_unaligned_deref(&tv64
->tv_usec
) = tv
->tv_usec
;
1803 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user64_timeval
));
1804 tmp_space
= CMSG_SPACE(sizeof(struct user64_timeval
));
1806 struct user32_timeval
*tv32
= (struct user32_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1808 tv32
->tv_sec
= (user32_time_t
)tv
->tv_sec
;
1809 tv32
->tv_usec
= tv
->tv_usec
;
1811 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user32_timeval
));
1812 tmp_space
= CMSG_SPACE(sizeof(struct user32_timeval
));
1814 if (len
>= tmp_space
) {
1817 *flags
|= MSG_CTRUNC
;
1820 error
= copyout(tmp_buffer
, ctlbuf
, tocopy
);
1826 /* If socket is attached to Content Filter and socket did not request address, ignore it */
1827 if ((so
!= NULL
) && (so
->so_cfil_db
!= NULL
) &&
1828 ((cp
->cmsg_level
== IPPROTO_IP
&& cp
->cmsg_type
== IP_RECVDSTADDR
&& inp
&&
1829 !(inp
->inp_flags
& INP_RECVDSTADDR
)) ||
1830 (cp
->cmsg_level
== IPPROTO_IPV6
&& (cp
->cmsg_type
== IPV6_PKTINFO
|| cp
->cmsg_type
== IPV6_2292PKTINFO
) && inp
&&
1831 !(inp
->inp_flags
& IN6P_PKTINFO
)))) {
1836 if (cp_size
> buflen
) {
1837 panic("cp_size > buflen, something"
1838 "wrong with alignment!");
1840 if (len
>= cp_size
) {
1843 *flags
|= MSG_CTRUNC
;
1846 error
= copyout((caddr_t
) cp
, ctlbuf
, tocopy
);
1857 cp
= (struct cmsghdr
*)(void *)
1858 ((unsigned char *) cp
+ cp_size
);
1859 cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1864 *controllen
= (socklen_t
)(ctlbuf
- control
);
1870 * Returns: 0 Success
1874 * EACCES Mandatory Access Control failure
1877 * <pru_soreceive>:ENOBUFS
1878 * <pru_soreceive>:ENOTCONN
1879 * <pru_soreceive>:EWOULDBLOCK
1880 * <pru_soreceive>:EFAULT
1881 * <pru_soreceive>:EINTR
1882 * <pru_soreceive>:EBADF
1883 * <pru_soreceive>:EINVAL
1884 * <pru_soreceive>:EMSGSIZE
1885 * <pru_soreceive>:???
1887 * Notes: Additional return values from calls through <pru_soreceive>
1888 * depend on protocols other than TCP or AF_UNIX, which are
1892 recvit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
1893 user_addr_t namelenp
, int32_t *retval
)
1897 struct mbuf
*control
= 0;
1899 struct sockaddr
*fromsa
= 0;
1900 struct fileproc
*fp
;
1902 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1903 if ((error
= fp_get_ftype(p
, s
, DTYPE_SOCKET
, ENOTSOCK
, &fp
))) {
1904 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1909 #if CONFIG_MACF_SOCKET_SUBSET
1911 * We check the state without holding the socket lock;
1912 * if a race condition occurs, it would simply result
1913 * in an extra call to the MAC check function.
1915 if (!(so
->so_state
& SS_DEFUNCT
) &&
1916 !(so
->so_state
& SS_ISCONNECTED
) &&
1917 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
1918 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0) {
1921 #endif /* MAC_SOCKET_SUBSET */
1922 if (uio_resid(uiop
) < 0 || uio_resid(uiop
) > INT_MAX
) {
1923 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, EINVAL
, 0, 0, 0, 0);
1928 len
= uio_resid(uiop
);
1929 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, &fromsa
, uiop
,
1930 (struct mbuf
**)0, mp
->msg_control
? &control
: (struct mbuf
**)0,
1933 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()),
1937 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1938 error
== EINTR
|| error
== EWOULDBLOCK
)) {
1946 *retval
= (int32_t)(len
- uio_resid(uiop
));
1949 error
= copyout_sa(fromsa
, mp
->msg_name
, &mp
->msg_namelen
);
1953 /* return the actual, untruncated address length */
1955 (error
= copyout((caddr_t
)&mp
->msg_namelen
, namelenp
,
1961 if (mp
->msg_control
) {
1962 error
= copyout_control(p
, control
, mp
->msg_control
,
1963 &mp
->msg_controllen
, &mp
->msg_flags
, so
);
1966 FREE(fromsa
, M_SONAME
);
1970 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1972 fp_drop(p
, s
, fp
, 0);
1977 * Returns: 0 Success
1981 * read:??? [4056224: applicable for pipes]
1983 * Notes: The read entry point is only called as part of support for
1984 * binary backward compatability; new code should use read
1985 * instead of recv or recvfrom when attempting to read data
1988 * For full documentation of the return codes from recvit, see
1989 * the block header for the recvit function.
1992 recvfrom(struct proc
*p
, struct recvfrom_args
*uap
, int32_t *retval
)
1994 __pthread_testcancel(1);
1995 return recvfrom_nocancel(p
, (struct recvfrom_nocancel_args
*)uap
,
2000 recvfrom_nocancel(struct proc
*p
, struct recvfrom_nocancel_args
*uap
,
2003 struct user_msghdr msg
;
2007 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2008 AUDIT_ARG(fd
, uap
->s
);
2010 if (uap
->fromlenaddr
) {
2011 error
= copyin(uap
->fromlenaddr
,
2012 (caddr_t
)&msg
.msg_namelen
, sizeof(msg
.msg_namelen
));
2017 msg
.msg_namelen
= 0;
2019 msg
.msg_name
= uap
->from
;
2020 auio
= uio_create(1, 0,
2021 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
2027 uio_addiov(auio
, uap
->buf
, uap
->len
);
2028 /* no need to set up msg_iov. recvit uses uio_t we send it */
2031 msg
.msg_control
= 0;
2032 msg
.msg_controllen
= 0;
2033 msg
.msg_flags
= uap
->flags
;
2034 error
= recvit(p
, uap
->s
, &msg
, auio
, uap
->fromlenaddr
, retval
);
2039 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2045 * Returns: 0 Success
2052 * Notes: For full documentation of the return codes from recvit, see
2053 * the block header for the recvit function.
2056 recvmsg(struct proc
*p
, struct recvmsg_args
*uap
, int32_t *retval
)
2058 __pthread_testcancel(1);
2059 return recvmsg_nocancel(p
, (struct recvmsg_nocancel_args
*)uap
,
2064 recvmsg_nocancel(struct proc
*p
, struct recvmsg_nocancel_args
*uap
,
2067 struct user32_msghdr msg32
;
2068 struct user64_msghdr msg64
;
2069 struct user_msghdr user_msg
;
2075 struct user_iovec
*iovp
;
2077 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2078 AUDIT_ARG(fd
, uap
->s
);
2079 if (IS_64BIT_PROCESS(p
)) {
2080 msghdrp
= (caddr_t
)&msg64
;
2081 size_of_msghdr
= sizeof(msg64
);
2083 msghdrp
= (caddr_t
)&msg32
;
2084 size_of_msghdr
= sizeof(msg32
);
2086 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
2088 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2092 /* only need to copy if user process is not 64-bit */
2093 if (IS_64BIT_PROCESS(p
)) {
2094 user_msg
.msg_flags
= msg64
.msg_flags
;
2095 user_msg
.msg_controllen
= msg64
.msg_controllen
;
2096 user_msg
.msg_control
= (user_addr_t
)msg64
.msg_control
;
2097 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
2098 user_msg
.msg_iov
= (user_addr_t
)msg64
.msg_iov
;
2099 user_msg
.msg_namelen
= msg64
.msg_namelen
;
2100 user_msg
.msg_name
= (user_addr_t
)msg64
.msg_name
;
2102 user_msg
.msg_flags
= msg32
.msg_flags
;
2103 user_msg
.msg_controllen
= msg32
.msg_controllen
;
2104 user_msg
.msg_control
= msg32
.msg_control
;
2105 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
2106 user_msg
.msg_iov
= msg32
.msg_iov
;
2107 user_msg
.msg_namelen
= msg32
.msg_namelen
;
2108 user_msg
.msg_name
= msg32
.msg_name
;
2111 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
2112 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, EMSGSIZE
,
2117 user_msg
.msg_flags
= uap
->flags
;
2119 /* allocate a uio large enough to hold the number of iovecs passed */
2120 auio
= uio_create(user_msg
.msg_iovlen
, 0,
2121 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
2129 * get location of iovecs within the uio. then copyin the iovecs from
2132 iovp
= uio_iovsaddr(auio
);
2137 uiov
= user_msg
.msg_iov
;
2138 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
2139 error
= copyin_user_iovec_array(uiov
,
2140 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2141 user_msg
.msg_iovlen
, iovp
);
2146 /* finish setup of uio_t */
2147 error
= uio_calculateresid(auio
);
2152 error
= recvit(p
, uap
->s
, &user_msg
, auio
, 0, retval
);
2154 user_msg
.msg_iov
= uiov
;
2155 if (IS_64BIT_PROCESS(p
)) {
2156 msg64
.msg_flags
= user_msg
.msg_flags
;
2157 msg64
.msg_controllen
= user_msg
.msg_controllen
;
2158 msg64
.msg_control
= user_msg
.msg_control
;
2159 msg64
.msg_iovlen
= user_msg
.msg_iovlen
;
2160 msg64
.msg_iov
= user_msg
.msg_iov
;
2161 msg64
.msg_namelen
= user_msg
.msg_namelen
;
2162 msg64
.msg_name
= user_msg
.msg_name
;
2164 msg32
.msg_flags
= user_msg
.msg_flags
;
2165 msg32
.msg_controllen
= user_msg
.msg_controllen
;
2166 msg32
.msg_control
= (user32_addr_t
)user_msg
.msg_control
;
2167 msg32
.msg_iovlen
= user_msg
.msg_iovlen
;
2168 msg32
.msg_iov
= (user32_addr_t
)user_msg
.msg_iov
;
2169 msg32
.msg_namelen
= user_msg
.msg_namelen
;
2170 msg32
.msg_name
= (user32_addr_t
)user_msg
.msg_name
;
2172 error
= copyout(msghdrp
, uap
->msg
, size_of_msghdr
);
2178 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2183 recvmsg_x(struct proc
*p
, struct recvmsg_x_args
*uap
, user_ssize_t
*retval
)
2185 int error
= EOPNOTSUPP
;
2186 struct user_msghdr_x
*user_msg_x
= NULL
;
2187 struct recv_msg_elem
*recv_msg_array
= NULL
;
2189 user_ssize_t len_before
= 0, len_after
;
2191 size_t size_of_msghdr
;
2196 KERNEL_DEBUG(DBG_FNC_RECVMSG_X
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2198 size_of_msghdr
= IS_64BIT_PROCESS(p
) ?
2199 sizeof(struct user64_msghdr_x
) : sizeof(struct user32_msghdr_x
);
2201 error
= file_socket(uap
->s
, &so
);
2211 * Support only a subset of message flags
2213 if (uap
->flags
& ~(MSG_PEEK
| MSG_WAITALL
| MSG_DONTWAIT
| MSG_NEEDSA
| MSG_NBIO
)) {
2217 * Input parameter range check
2219 if (uap
->cnt
== 0 || uap
->cnt
> UIO_MAXIOV
) {
2223 if (uap
->cnt
> somaxrecvmsgx
) {
2224 uap
->cnt
= somaxrecvmsgx
;
2227 user_msg_x
= kheap_alloc(KHEAP_TEMP
,
2228 uap
->cnt
* sizeof(struct user_msghdr_x
), Z_WAITOK
| Z_ZERO
);
2229 if (user_msg_x
== NULL
) {
2230 DBG_PRINTF("%s kheap_alloc user_msg_x failed\n", __func__
);
2234 recv_msg_array
= alloc_recv_msg_array(uap
->cnt
);
2235 if (recv_msg_array
== NULL
) {
2236 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__
);
2241 umsgp
= kheap_alloc(KHEAP_TEMP
,
2242 uap
->cnt
* size_of_msghdr
, Z_WAITOK
| Z_ZERO
);
2243 if (umsgp
== NULL
) {
2244 DBG_PRINTF("%s kheap_alloc umsgp failed\n", __func__
);
2248 error
= copyin(uap
->msgp
, umsgp
, uap
->cnt
* size_of_msghdr
);
2250 DBG_PRINTF("%s copyin() failed\n", __func__
);
2253 error
= internalize_recv_msghdr_array(umsgp
,
2254 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2255 UIO_READ
, uap
->cnt
, user_msg_x
, recv_msg_array
);
2257 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__
);
2261 * Make sure the size of each message iovec and
2262 * the aggregate size of all the iovec is valid
2264 if (recv_msg_array_is_valid(recv_msg_array
, uap
->cnt
) == 0) {
2269 * Sanity check on passed arguments
2271 for (i
= 0; i
< uap
->cnt
; i
++) {
2272 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
2274 if (mp
->msg_flags
!= 0) {
2279 #if CONFIG_MACF_SOCKET_SUBSET
2281 * We check the state without holding the socket lock;
2282 * if a race condition occurs, it would simply result
2283 * in an extra call to the MAC check function.
2285 if (!(so
->so_state
& SS_DEFUNCT
) &&
2286 !(so
->so_state
& SS_ISCONNECTED
) &&
2287 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
2288 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0) {
2291 #endif /* MAC_SOCKET_SUBSET */
2293 len_before
= recv_msg_array_resid(recv_msg_array
, uap
->cnt
);
2295 if (so
->so_proto
->pr_usrreqs
->pru_soreceive_list
!=
2296 pru_soreceive_list_notsupp
&&
2297 somaxrecvmsgx
== 0) {
2298 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive_list(so
,
2299 recv_msg_array
, uap
->cnt
, &uap
->flags
);
2301 int flags
= uap
->flags
;
2303 for (i
= 0; i
< uap
->cnt
; i
++) {
2304 struct recv_msg_elem
*recv_msg_elem
;
2306 struct sockaddr
**psa
;
2307 struct mbuf
**controlp
;
2309 recv_msg_elem
= recv_msg_array
+ i
;
2310 auio
= recv_msg_elem
->uio
;
2313 * Do not block if we got at least one packet
2316 flags
|= MSG_DONTWAIT
;
2319 psa
= (recv_msg_elem
->which
& SOCK_MSG_SA
) ?
2320 &recv_msg_elem
->psa
: NULL
;
2321 controlp
= (recv_msg_elem
->which
& SOCK_MSG_CONTROL
) ?
2322 &recv_msg_elem
->controlp
: NULL
;
2324 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, psa
,
2325 auio
, (struct mbuf
**)NULL
, controlp
, &flags
);
2332 recv_msg_elem
->which
|= SOCK_MSG_DATA
;
2334 * Set the messages flags for this packet
2336 flags
&= ~MSG_DONTWAIT
;
2337 recv_msg_elem
->flags
= flags
;
2339 * Stop on partial copy
2341 if (recv_msg_elem
->flags
& (MSG_RCVMORE
| MSG_TRUNC
)) {
2347 len_after
= recv_msg_array_resid(recv_msg_array
, uap
->cnt
);
2350 if (len_after
!= len_before
&& (error
== ERESTART
||
2351 error
== EINTR
|| error
== EWOULDBLOCK
)) {
2358 uiocnt
= externalize_recv_msghdr_array(p
, so
, umsgp
,
2359 uap
->cnt
, user_msg_x
, recv_msg_array
, &error
);
2364 error
= copyout(umsgp
, uap
->msgp
, uap
->cnt
* size_of_msghdr
);
2366 DBG_PRINTF("%s copyout() failed\n", __func__
);
2369 *retval
= (int)(uiocnt
);
2375 kheap_free(KHEAP_TEMP
, umsgp
, uap
->cnt
* size_of_msghdr
);
2376 free_recv_msg_array(recv_msg_array
, uap
->cnt
);
2377 kheap_free(KHEAP_TEMP
, user_msg_x
,
2378 uap
->cnt
* sizeof(struct user_msghdr_x
));
2380 KERNEL_DEBUG(DBG_FNC_RECVMSG_X
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2386 * Returns: 0 Success
2388 * file_socket:ENOTSOCK
2391 * soshutdown:ENOTCONN
2392 * soshutdown:EADDRNOTAVAIL[TCP]
2393 * soshutdown:ENOBUFS[TCP]
2394 * soshutdown:EMSGSIZE[TCP]
2395 * soshutdown:EHOSTUNREACH[TCP]
2396 * soshutdown:ENETUNREACH[TCP]
2397 * soshutdown:ENETDOWN[TCP]
2398 * soshutdown:ENOMEM[TCP]
2399 * soshutdown:EACCES[TCP]
2400 * soshutdown:EMSGSIZE[TCP]
2401 * soshutdown:ENOBUFS[TCP]
2402 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2403 * soshutdown:??? [other protocol families]
2407 shutdown(__unused
struct proc
*p
, struct shutdown_args
*uap
,
2408 __unused
int32_t *retval
)
2413 AUDIT_ARG(fd
, uap
->s
);
2414 error
= file_socket(uap
->s
, &so
);
2422 error
= soshutdown((struct socket
*)so
, uap
->how
);
2429 * Returns: 0 Success
2432 * EACCES Mandatory Access Control failure
2433 * file_socket:ENOTSOCK
2436 * sosetopt:ENOPROTOOPT
2440 * sosetopt:EOPNOTSUPP[AF_UNIX]
2445 setsockopt(struct proc
*p
, struct setsockopt_args
*uap
,
2446 __unused
int32_t *retval
)
2449 struct sockopt sopt
;
2452 AUDIT_ARG(fd
, uap
->s
);
2453 if (uap
->val
== 0 && uap
->valsize
!= 0) {
2456 /* No bounds checking on size (it's unsigned) */
2458 error
= file_socket(uap
->s
, &so
);
2463 sopt
.sopt_dir
= SOPT_SET
;
2464 sopt
.sopt_level
= uap
->level
;
2465 sopt
.sopt_name
= uap
->name
;
2466 sopt
.sopt_val
= uap
->val
;
2467 sopt
.sopt_valsize
= uap
->valsize
;
2474 #if CONFIG_MACF_SOCKET_SUBSET
2475 if ((error
= mac_socket_check_setsockopt(kauth_cred_get(), so
,
2479 #endif /* MAC_SOCKET_SUBSET */
2480 error
= sosetoptlock(so
, &sopt
, 1); /* will lock socket */
2489 * Returns: 0 Success
2492 * EACCES Mandatory Access Control failure
2495 * file_socket:ENOTSOCK
2500 getsockopt(struct proc
*p
, struct getsockopt_args
*uap
,
2501 __unused
int32_t *retval
)
2505 struct sockopt sopt
;
2508 error
= file_socket(uap
->s
, &so
);
2513 error
= copyin(uap
->avalsize
, (caddr_t
)&valsize
,
2518 /* No bounds checking on size (it's unsigned) */
2522 sopt
.sopt_dir
= SOPT_GET
;
2523 sopt
.sopt_level
= uap
->level
;
2524 sopt
.sopt_name
= uap
->name
;
2525 sopt
.sopt_val
= uap
->val
;
2526 sopt
.sopt_valsize
= (size_t)valsize
; /* checked non-negative above */
2533 #if CONFIG_MACF_SOCKET_SUBSET
2534 if ((error
= mac_socket_check_getsockopt(kauth_cred_get(), so
,
2538 #endif /* MAC_SOCKET_SUBSET */
2539 error
= sogetoptlock((struct socket
*)so
, &sopt
, 1); /* will lock */
2541 valsize
= (socklen_t
)sopt
.sopt_valsize
;
2542 error
= copyout((caddr_t
)&valsize
, uap
->avalsize
,
2554 * Returns: 0 Success
2556 * file_socket:ENOTSOCK
2560 * <pru_sockaddr>:ENOBUFS[TCP]
2561 * <pru_sockaddr>:ECONNRESET[TCP]
2562 * <pru_sockaddr>:EINVAL[AF_UNIX]
2563 * <sf_getsockname>:???
2567 getsockname(__unused
struct proc
*p
, struct getsockname_args
*uap
,
2568 __unused
int32_t *retval
)
2571 struct sockaddr
*sa
;
2576 error
= file_socket(uap
->fdes
, &so
);
2580 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof(socklen_t
));
2590 error
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &sa
);
2592 error
= sflt_getsockname(so
, &sa
);
2593 if (error
== EJUSTRETURN
) {
2597 socket_unlock(so
, 1);
2606 sa_len
= sa
->sa_len
;
2607 len
= MIN(len
, sa_len
);
2608 error
= copyout((caddr_t
)sa
, uap
->asa
, len
);
2612 /* return the actual, untruncated address length */
2615 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof(socklen_t
));
2619 file_drop(uap
->fdes
);
2624 * Get name of peer for connected socket.
2626 * Returns: 0 Success
2630 * file_socket:ENOTSOCK
2634 * <pru_peeraddr>:???
2635 * <sf_getpeername>:???
2639 getpeername(__unused
struct proc
*p
, struct getpeername_args
*uap
,
2640 __unused
int32_t *retval
)
2643 struct sockaddr
*sa
;
2648 error
= file_socket(uap
->fdes
, &so
);
2659 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
2660 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
2661 /* the socket has been shutdown, no more getpeername's */
2662 socket_unlock(so
, 1);
2667 if ((so
->so_state
& (SS_ISCONNECTED
| SS_ISCONFIRMING
)) == 0) {
2668 socket_unlock(so
, 1);
2672 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof(socklen_t
));
2674 socket_unlock(so
, 1);
2678 error
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, &sa
);
2680 error
= sflt_getpeername(so
, &sa
);
2681 if (error
== EJUSTRETURN
) {
2685 socket_unlock(so
, 1);
2693 sa_len
= sa
->sa_len
;
2694 len
= MIN(len
, sa_len
);
2695 error
= copyout(sa
, uap
->asa
, len
);
2699 /* return the actual, untruncated address length */
2702 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof(socklen_t
));
2706 file_drop(uap
->fdes
);
2711 sockargs(struct mbuf
**mp
, user_addr_t data
, socklen_t buflen
, int type
)
2713 struct sockaddr
*sa
;
2716 socklen_t alloc_buflen
= buflen
;
2718 if (buflen
> INT_MAX
/ 2) {
2721 if (type
== MT_SONAME
&& buflen
> SOCK_MAXADDRLEN
) {
2727 * The fd's in the buffer must expand to be pointers, thus we need twice
2730 if (type
== MT_CONTROL
) {
2731 alloc_buflen
= ((buflen
- sizeof(struct cmsghdr
)) * 2) +
2732 sizeof(struct cmsghdr
);
2735 if (alloc_buflen
> MLEN
) {
2736 if (type
== MT_SONAME
&& alloc_buflen
<= 112) {
2737 alloc_buflen
= MLEN
; /* unix domain compat. hack */
2738 } else if (alloc_buflen
> MCLBYTES
) {
2742 m
= m_get(M_WAIT
, type
);
2746 if (alloc_buflen
> MLEN
) {
2748 if ((m
->m_flags
& M_EXT
) == 0) {
2754 * K64: We still copyin the original buflen because it gets expanded
2755 * later and we lie about the size of the mbuf because it only affects
2759 error
= copyin(data
, mtod(m
, caddr_t
), (u_int
)buflen
);
2764 if (type
== MT_SONAME
) {
2765 sa
= mtod(m
, struct sockaddr
*);
2766 VERIFY(buflen
<= SOCK_MAXADDRLEN
);
2767 sa
->sa_len
= (__uint8_t
)buflen
;
2774 * Given a user_addr_t of length len, allocate and fill out a *sa.
2776 * Returns: 0 Success
2777 * ENAMETOOLONG Filename too long
2778 * EINVAL Invalid argument
2779 * ENOMEM Not enough space
2780 * copyin:EFAULT Bad address
2783 getsockaddr(struct socket
*so
, struct sockaddr
**namp
, user_addr_t uaddr
,
2784 size_t len
, boolean_t translate_unspec
)
2786 struct sockaddr
*sa
;
2789 if (len
> SOCK_MAXADDRLEN
) {
2790 return ENAMETOOLONG
;
2793 if (len
< offsetof(struct sockaddr
, sa_data
[0])) {
2797 MALLOC(sa
, struct sockaddr
*, len
, M_SONAME
, M_WAITOK
| M_ZERO
);
2801 error
= copyin(uaddr
, (caddr_t
)sa
, len
);
2806 * Force sa_family to AF_INET on AF_INET sockets to handle
2807 * legacy applications that use AF_UNSPEC (0). On all other
2808 * sockets we leave it unchanged and let the lower layer
2811 if (translate_unspec
&& sa
->sa_family
== AF_UNSPEC
&&
2812 SOCK_CHECK_DOM(so
, PF_INET
) &&
2813 len
== sizeof(struct sockaddr_in
)) {
2814 sa
->sa_family
= AF_INET
;
2816 VERIFY(len
<= SOCK_MAXADDRLEN
);
2817 sa
->sa_len
= (__uint8_t
)len
;
2824 getsockaddr_s(struct socket
*so
, struct sockaddr_storage
*ss
,
2825 user_addr_t uaddr
, size_t len
, boolean_t translate_unspec
)
2829 if (ss
== NULL
|| uaddr
== USER_ADDR_NULL
||
2830 len
< offsetof(struct sockaddr
, sa_data
[0])) {
2835 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2836 * so the check here is inclusive.
2838 if (len
> sizeof(*ss
)) {
2839 return ENAMETOOLONG
;
2842 bzero(ss
, sizeof(*ss
));
2843 error
= copyin(uaddr
, (caddr_t
)ss
, len
);
2846 * Force sa_family to AF_INET on AF_INET sockets to handle
2847 * legacy applications that use AF_UNSPEC (0). On all other
2848 * sockets we leave it unchanged and let the lower layer
2851 if (translate_unspec
&& ss
->ss_family
== AF_UNSPEC
&&
2852 SOCK_CHECK_DOM(so
, PF_INET
) &&
2853 len
== sizeof(struct sockaddr_in
)) {
2854 ss
->ss_family
= AF_INET
;
2857 ss
->ss_len
= (__uint8_t
)len
;
2863 internalize_user_msghdr_array(const void *src
, int spacetype
, int direction
,
2864 u_int count
, struct user_msghdr_x
*dst
, struct uio
**uiop
)
2871 for (i
= 0; i
< count
; i
++) {
2873 struct user_iovec
*iovp
;
2874 struct user_msghdr_x
*user_msg
= dst
+ i
;
2876 if (spacetype
== UIO_USERSPACE64
) {
2877 const struct user64_msghdr_x
*msghdr64
;
2879 msghdr64
= ((const struct user64_msghdr_x
*)src
) + i
;
2881 user_msg
->msg_name
= (user_addr_t
)msghdr64
->msg_name
;
2882 user_msg
->msg_namelen
= msghdr64
->msg_namelen
;
2883 user_msg
->msg_iov
= (user_addr_t
)msghdr64
->msg_iov
;
2884 user_msg
->msg_iovlen
= msghdr64
->msg_iovlen
;
2885 user_msg
->msg_control
= (user_addr_t
)msghdr64
->msg_control
;
2886 user_msg
->msg_controllen
= msghdr64
->msg_controllen
;
2887 user_msg
->msg_flags
= msghdr64
->msg_flags
;
2888 user_msg
->msg_datalen
= (size_t)msghdr64
->msg_datalen
;
2890 const struct user32_msghdr_x
*msghdr32
;
2892 msghdr32
= ((const struct user32_msghdr_x
*)src
) + i
;
2894 user_msg
->msg_name
= msghdr32
->msg_name
;
2895 user_msg
->msg_namelen
= msghdr32
->msg_namelen
;
2896 user_msg
->msg_iov
= msghdr32
->msg_iov
;
2897 user_msg
->msg_iovlen
= msghdr32
->msg_iovlen
;
2898 user_msg
->msg_control
= msghdr32
->msg_control
;
2899 user_msg
->msg_controllen
= msghdr32
->msg_controllen
;
2900 user_msg
->msg_flags
= msghdr32
->msg_flags
;
2901 user_msg
->msg_datalen
= msghdr32
->msg_datalen
;
2904 if (user_msg
->msg_iovlen
<= 0 ||
2905 user_msg
->msg_iovlen
> UIO_MAXIOV
) {
2909 auio
= uio_create(user_msg
->msg_iovlen
, 0, spacetype
,
2917 iovp
= uio_iovsaddr(auio
);
2922 error
= copyin_user_iovec_array(user_msg
->msg_iov
,
2923 spacetype
, user_msg
->msg_iovlen
, iovp
);
2927 user_msg
->msg_iov
= CAST_USER_ADDR_T(iovp
);
2929 error
= uio_calculateresid(auio
);
2933 user_msg
->msg_datalen
= uio_resid(auio
);
2935 if (user_msg
->msg_name
&& user_msg
->msg_namelen
) {
2938 if (user_msg
->msg_control
&& user_msg
->msg_controllen
) {
2948 internalize_recv_msghdr_array(const void *src
, int spacetype
, int direction
,
2949 u_int count
, struct user_msghdr_x
*dst
,
2950 struct recv_msg_elem
*recv_msg_array
)
2955 for (i
= 0; i
< count
; i
++) {
2956 struct user_iovec
*iovp
;
2957 struct user_msghdr_x
*user_msg
= dst
+ i
;
2958 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
2960 if (spacetype
== UIO_USERSPACE64
) {
2961 const struct user64_msghdr_x
*msghdr64
;
2963 msghdr64
= ((const struct user64_msghdr_x
*)src
) + i
;
2965 user_msg
->msg_name
= (user_addr_t
)msghdr64
->msg_name
;
2966 user_msg
->msg_namelen
= msghdr64
->msg_namelen
;
2967 user_msg
->msg_iov
= (user_addr_t
)msghdr64
->msg_iov
;
2968 user_msg
->msg_iovlen
= msghdr64
->msg_iovlen
;
2969 user_msg
->msg_control
= (user_addr_t
)msghdr64
->msg_control
;
2970 user_msg
->msg_controllen
= msghdr64
->msg_controllen
;
2971 user_msg
->msg_flags
= msghdr64
->msg_flags
;
2972 user_msg
->msg_datalen
= (size_t)msghdr64
->msg_datalen
;
2974 const struct user32_msghdr_x
*msghdr32
;
2976 msghdr32
= ((const struct user32_msghdr_x
*)src
) + i
;
2978 user_msg
->msg_name
= msghdr32
->msg_name
;
2979 user_msg
->msg_namelen
= msghdr32
->msg_namelen
;
2980 user_msg
->msg_iov
= msghdr32
->msg_iov
;
2981 user_msg
->msg_iovlen
= msghdr32
->msg_iovlen
;
2982 user_msg
->msg_control
= msghdr32
->msg_control
;
2983 user_msg
->msg_controllen
= msghdr32
->msg_controllen
;
2984 user_msg
->msg_flags
= msghdr32
->msg_flags
;
2985 user_msg
->msg_datalen
= msghdr32
->msg_datalen
;
2988 if (user_msg
->msg_iovlen
<= 0 ||
2989 user_msg
->msg_iovlen
> UIO_MAXIOV
) {
2993 recv_msg_elem
->uio
= uio_create(user_msg
->msg_iovlen
, 0,
2994 spacetype
, direction
);
2995 if (recv_msg_elem
->uio
== NULL
) {
3000 iovp
= uio_iovsaddr(recv_msg_elem
->uio
);
3005 error
= copyin_user_iovec_array(user_msg
->msg_iov
,
3006 spacetype
, user_msg
->msg_iovlen
, iovp
);
3010 user_msg
->msg_iov
= CAST_USER_ADDR_T(iovp
);
3012 error
= uio_calculateresid(recv_msg_elem
->uio
);
3016 user_msg
->msg_datalen
= uio_resid(recv_msg_elem
->uio
);
3018 if (user_msg
->msg_name
&& user_msg
->msg_namelen
) {
3019 recv_msg_elem
->which
|= SOCK_MSG_SA
;
3021 if (user_msg
->msg_control
&& user_msg
->msg_controllen
) {
3022 recv_msg_elem
->which
|= SOCK_MSG_CONTROL
;
3031 externalize_user_msghdr_array(void *dst
, int spacetype
, int direction
,
3032 u_int count
, const struct user_msghdr_x
*src
, struct uio
**uiop
)
3034 #pragma unused(direction)
3039 for (i
= 0; i
< count
; i
++) {
3040 const struct user_msghdr_x
*user_msg
= src
+ i
;
3041 uio_t auio
= uiop
[i
];
3042 user_ssize_t len
= user_msg
->msg_datalen
- uio_resid(auio
);
3044 if (user_msg
->msg_datalen
!= 0 && len
== 0) {
3048 if (seenlast
== 0) {
3052 if (spacetype
== UIO_USERSPACE64
) {
3053 struct user64_msghdr_x
*msghdr64
;
3055 msghdr64
= ((struct user64_msghdr_x
*)dst
) + i
;
3057 msghdr64
->msg_flags
= user_msg
->msg_flags
;
3058 msghdr64
->msg_datalen
= len
;
3060 struct user32_msghdr_x
*msghdr32
;
3062 msghdr32
= ((struct user32_msghdr_x
*)dst
) + i
;
3064 msghdr32
->msg_flags
= user_msg
->msg_flags
;
3065 msghdr32
->msg_datalen
= (user32_size_t
)len
;
3072 externalize_recv_msghdr_array(struct proc
*p
, struct socket
*so
, void *dst
,
3073 u_int count
, struct user_msghdr_x
*src
,
3074 struct recv_msg_elem
*recv_msg_array
, int *ret_error
)
3078 int spacetype
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
;
3082 for (i
= 0; i
< count
; i
++) {
3083 struct user_msghdr_x
*user_msg
= src
+ i
;
3084 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3085 user_ssize_t len
= 0;
3088 len
= user_msg
->msg_datalen
- uio_resid(recv_msg_elem
->uio
);
3090 if ((recv_msg_elem
->which
& SOCK_MSG_DATA
)) {
3094 if (recv_msg_elem
->which
& SOCK_MSG_SA
) {
3095 error
= copyout_sa(recv_msg_elem
->psa
, user_msg
->msg_name
,
3096 &user_msg
->msg_namelen
);
3102 if (recv_msg_elem
->which
& SOCK_MSG_CONTROL
) {
3103 error
= copyout_control(p
, recv_msg_elem
->controlp
,
3104 user_msg
->msg_control
, &user_msg
->msg_controllen
,
3105 &recv_msg_elem
->flags
, so
);
3113 if (spacetype
== UIO_USERSPACE64
) {
3114 struct user64_msghdr_x
*msghdr64
= ((struct user64_msghdr_x
*)dst
) + i
;
3116 msghdr64
->msg_namelen
= user_msg
->msg_namelen
;
3117 msghdr64
->msg_controllen
= user_msg
->msg_controllen
;
3118 msghdr64
->msg_flags
= recv_msg_elem
->flags
;
3119 msghdr64
->msg_datalen
= len
;
3121 struct user32_msghdr_x
*msghdr32
= ((struct user32_msghdr_x
*)dst
) + i
;
3123 msghdr32
->msg_namelen
= user_msg
->msg_namelen
;
3124 msghdr32
->msg_controllen
= user_msg
->msg_controllen
;
3125 msghdr32
->msg_flags
= recv_msg_elem
->flags
;
3126 msghdr32
->msg_datalen
= (user32_size_t
)len
;
3133 free_uio_array(struct uio
**uiop
, u_int count
)
3137 for (i
= 0; i
< count
; i
++) {
3138 if (uiop
[i
] != NULL
) {
3144 __private_extern__ user_ssize_t
3145 uio_array_resid(struct uio
**uiop
, u_int count
)
3147 user_ssize_t len
= 0;
3150 for (i
= 0; i
< count
; i
++) {
3151 struct uio
*auio
= uiop
[i
];
3154 len
+= uio_resid(auio
);
3161 uio_array_is_valid(struct uio
**uiop
, u_int count
)
3163 user_ssize_t len
= 0;
3166 for (i
= 0; i
< count
; i
++) {
3167 struct uio
*auio
= uiop
[i
];
3170 user_ssize_t resid
= uio_resid(auio
);
3173 * Sanity check on the validity of the iovec:
3174 * no point of going over sb_max
3176 if (resid
< 0 || resid
> (user_ssize_t
)sb_max
) {
3181 if (len
< 0 || len
> (user_ssize_t
)sb_max
) {
3190 struct recv_msg_elem
*
3191 alloc_recv_msg_array(u_int count
)
3193 return kheap_alloc(KHEAP_TEMP
,
3194 count
* sizeof(struct recv_msg_elem
), Z_WAITOK
| Z_ZERO
);
3198 free_recv_msg_array(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3200 if (recv_msg_array
== NULL
) {
3203 for (uint32_t i
= 0; i
< count
; i
++) {
3204 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3206 if (recv_msg_elem
->uio
!= NULL
) {
3207 uio_free(recv_msg_elem
->uio
);
3209 _FREE(recv_msg_elem
->psa
, M_TEMP
);
3210 if (recv_msg_elem
->controlp
!= NULL
) {
3211 m_freem(recv_msg_elem
->controlp
);
3214 kheap_free(KHEAP_TEMP
, recv_msg_array
,
3215 count
* sizeof(struct recv_msg_elem
));
3219 __private_extern__ user_ssize_t
3220 recv_msg_array_resid(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3222 user_ssize_t len
= 0;
3225 for (i
= 0; i
< count
; i
++) {
3226 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3228 if (recv_msg_elem
->uio
!= NULL
) {
3229 len
+= uio_resid(recv_msg_elem
->uio
);
3236 recv_msg_array_is_valid(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3238 user_ssize_t len
= 0;
3241 for (i
= 0; i
< count
; i
++) {
3242 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3244 if (recv_msg_elem
->uio
!= NULL
) {
3245 user_ssize_t resid
= uio_resid(recv_msg_elem
->uio
);
3248 * Sanity check on the validity of the iovec:
3249 * no point of going over sb_max
3251 if (resid
< 0 || (u_int32_t
)resid
> sb_max
) {
3256 if (len
< 0 || (u_int32_t
)len
> sb_max
) {
3266 #define SFUIOBUFS 64
3268 /* Macros to compute the number of mbufs needed depending on cluster size */
3269 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3270 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3272 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3273 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3275 /* Upper send limit in the number of mbuf clusters */
3276 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3277 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3280 alloc_sendpkt(int how
, size_t pktlen
, unsigned int *maxchunks
,
3281 struct mbuf
**m
, boolean_t jumbocl
)
3283 unsigned int needed
;
3286 panic("%s: pktlen (%ld) must be non-zero\n", __func__
, pktlen
);
3290 * Try to allocate for the whole thing. Since we want full control
3291 * over the buffer size and be able to accept partial result, we can't
3292 * use mbuf_allocpacket(). The logic below is similar to sosend().
3295 if (pktlen
> MBIGCLBYTES
&& jumbocl
) {
3296 needed
= MIN(SENDFILE_MAX_16K
, HOWMANY_16K(pktlen
));
3297 *m
= m_getpackets_internal(&needed
, 1, how
, 0, M16KCLBYTES
);
3300 needed
= MIN(SENDFILE_MAX_4K
, HOWMANY_4K(pktlen
));
3301 *m
= m_getpackets_internal(&needed
, 1, how
, 0, MBIGCLBYTES
);
3305 * Our previous attempt(s) at allocation had failed; the system
3306 * may be short on mbufs, and we want to block until they are
3307 * available. This time, ask just for 1 mbuf and don't return
3312 *m
= m_getpackets_internal(&needed
, 1, M_WAIT
, 1, MBIGCLBYTES
);
3315 panic("%s: blocking allocation returned NULL\n", __func__
);
3318 *maxchunks
= needed
;
3323 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3324 * struct sf_hdtr *hdtr, int flags)
3326 * Send a file specified by 'fd' and starting at 'offset' to a socket
3327 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3328 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3329 * output. If specified, write the total number of bytes sent into *nbytes.
3332 sendfile(struct proc
*p
, struct sendfile_args
*uap
, __unused
int *retval
)
3334 struct fileproc
*fp
;
3337 struct writev_nocancel_args nuap
;
3338 user_ssize_t writev_retval
;
3339 struct user_sf_hdtr user_hdtr
;
3340 struct user32_sf_hdtr user32_hdtr
;
3341 struct user64_sf_hdtr user64_hdtr
;
3343 off_t nbytes
= 0, sbytes
= 0;
3347 struct vfs_context context
= *vfs_context_current();
3349 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_START
), uap
->s
,
3352 AUDIT_ARG(fd
, uap
->fd
);
3353 AUDIT_ARG(value32
, uap
->s
);
3356 * Do argument checking. Must be a regular file in, stream
3357 * type and connected socket out, positive offset.
3359 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
3362 if ((fp
->f_flag
& FREAD
) == 0) {
3366 if (vnode_isreg(vp
) == 0) {
3370 error
= file_socket(uap
->s
, &so
);
3378 if (so
->so_type
!= SOCK_STREAM
) {
3382 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
3386 if (uap
->offset
< 0) {
3390 if (uap
->nbytes
== USER_ADDR_NULL
) {
3394 if (uap
->flags
!= 0) {
3399 context
.vc_ucred
= fp
->fp_glob
->fg_cred
;
3401 #if CONFIG_MACF_SOCKET_SUBSET
3402 /* JMM - fetch connected sockaddr? */
3403 error
= mac_socket_check_send(context
.vc_ucred
, so
, NULL
);
3410 * Get number of bytes to send
3411 * Should it applies to size of header and trailer?
3413 error
= copyin(uap
->nbytes
, &nbytes
, sizeof(off_t
));
3419 * If specified, get the pointer to the sf_hdtr struct for
3420 * any headers/trailers.
3422 if (uap
->hdtr
!= USER_ADDR_NULL
) {
3425 bzero(&user_hdtr
, sizeof(user_hdtr
));
3426 if (IS_64BIT_PROCESS(p
)) {
3427 hdtrp
= (caddr_t
)&user64_hdtr
;
3428 sizeof_hdtr
= sizeof(user64_hdtr
);
3430 hdtrp
= (caddr_t
)&user32_hdtr
;
3431 sizeof_hdtr
= sizeof(user32_hdtr
);
3433 error
= copyin(uap
->hdtr
, hdtrp
, sizeof_hdtr
);
3437 if (IS_64BIT_PROCESS(p
)) {
3438 user_hdtr
.headers
= user64_hdtr
.headers
;
3439 user_hdtr
.hdr_cnt
= user64_hdtr
.hdr_cnt
;
3440 user_hdtr
.trailers
= user64_hdtr
.trailers
;
3441 user_hdtr
.trl_cnt
= user64_hdtr
.trl_cnt
;
3443 user_hdtr
.headers
= user32_hdtr
.headers
;
3444 user_hdtr
.hdr_cnt
= user32_hdtr
.hdr_cnt
;
3445 user_hdtr
.trailers
= user32_hdtr
.trailers
;
3446 user_hdtr
.trl_cnt
= user32_hdtr
.trl_cnt
;
3450 * Send any headers. Wimp out and use writev(2).
3452 if (user_hdtr
.headers
!= USER_ADDR_NULL
) {
3453 bzero(&nuap
, sizeof(struct writev_args
));
3455 nuap
.iovp
= user_hdtr
.headers
;
3456 nuap
.iovcnt
= user_hdtr
.hdr_cnt
;
3457 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
3461 sbytes
+= writev_retval
;
3466 * Get the file size for 2 reasons:
3467 * 1. We don't want to allocate more mbufs than necessary
3468 * 2. We don't want to read past the end of file
3470 if ((error
= vnode_size(vp
, &file_size
, vfs_context_current())) != 0) {
3475 * Simply read file data into a chain of mbufs that used with scatter
3476 * gather reads. We're not (yet?) setup to use zero copy external
3477 * mbufs that point to the file pages.
3480 error
= sblock(&so
->so_snd
, SBL_WAIT
);
3482 socket_unlock(so
, 1);
3485 for (off
= uap
->offset
;; off
+= xfsize
, sbytes
+= xfsize
) {
3486 mbuf_t m0
= NULL
, m
;
3487 unsigned int nbufs
= SFUIOBUFS
, i
;
3489 char uio_buf
[UIO_SIZEOF(SFUIOBUFS
)]; /* 1 KB !!! */
3497 * Calculate the amount to transfer.
3498 * Align to round number of pages.
3499 * Not to exceed send socket buffer,
3500 * the EOF, or the passed in nbytes.
3502 xfsize
= sbspace(&so
->so_snd
);
3505 if (so
->so_state
& SS_CANTSENDMORE
) {
3508 } else if ((so
->so_state
& SS_NBIO
)) {
3516 if (xfsize
> SENDFILE_MAX_BYTES
) {
3517 xfsize
= SENDFILE_MAX_BYTES
;
3518 } else if (xfsize
> PAGE_SIZE
) {
3519 xfsize
= trunc_page(xfsize
);
3521 pgoff
= off
& PAGE_MASK_64
;
3522 if (pgoff
> 0 && PAGE_SIZE
- pgoff
< xfsize
) {
3523 xfsize
= PAGE_SIZE_64
- pgoff
;
3525 if (nbytes
&& xfsize
> (nbytes
- sbytes
)) {
3526 xfsize
= nbytes
- sbytes
;
3531 if (off
+ xfsize
> file_size
) {
3532 xfsize
= file_size
- off
;
3539 * Attempt to use larger than system page-size clusters for
3540 * large writes only if there is a jumbo cluster pool and
3541 * if the socket is marked accordingly.
3543 jumbocl
= sosendjcl
&& njcl
> 0 &&
3544 ((so
->so_flags
& SOF_MULTIPAGES
) || sosendjcl_ignore_capab
);
3546 socket_unlock(so
, 0);
3547 alloc_sendpkt(M_WAIT
, xfsize
, &nbufs
, &m0
, jumbocl
);
3548 pktlen
= mbuf_pkthdr_maxlen(m0
);
3549 if (pktlen
< (size_t)xfsize
) {
3553 auio
= uio_createwithbuffer(nbufs
, off
, UIO_SYSSPACE
,
3554 UIO_READ
, &uio_buf
[0], sizeof(uio_buf
));
3556 printf("sendfile failed. nbufs = %d. %s", nbufs
,
3557 "File a radar related to rdar://10146739.\n");
3564 for (i
= 0, m
= m0
, uiolen
= 0;
3565 i
< nbufs
&& m
!= NULL
&& uiolen
< (size_t)xfsize
;
3566 i
++, m
= mbuf_next(m
)) {
3567 size_t mlen
= mbuf_maxlen(m
);
3569 if (mlen
+ uiolen
> (size_t)xfsize
) {
3570 mlen
= xfsize
- uiolen
;
3572 mbuf_setlen(m
, mlen
);
3573 uio_addiov(auio
, CAST_USER_ADDR_T(mbuf_datastart(m
)),
3578 if (xfsize
!= uio_resid(auio
)) {
3579 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3580 "%lld\n", xfsize
, (long long)uio_resid(auio
));
3583 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_START
),
3584 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
3585 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
3586 error
= fo_read(fp
, auio
, FOF_OFFSET
, &context
);
3589 if (uio_resid(auio
) != xfsize
&& (error
== ERESTART
||
3590 error
== EINTR
|| error
== EWOULDBLOCK
)) {
3597 xfsize
-= uio_resid(auio
);
3598 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_END
),
3599 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
3600 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
3603 // printf("sendfile: fo_read 0 bytes, EOF\n");
3606 if (xfsize
+ off
> file_size
) {
3607 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3608 "%lld\n", xfsize
, off
, file_size
);
3610 for (i
= 0, m
= m0
, rlen
= 0;
3611 i
< nbufs
&& m
!= NULL
&& rlen
< xfsize
;
3612 i
++, m
= mbuf_next(m
)) {
3613 size_t mlen
= mbuf_maxlen(m
);
3615 if (rlen
+ mlen
> (size_t)xfsize
) {
3616 mlen
= xfsize
- rlen
;
3618 mbuf_setlen(m
, mlen
);
3622 mbuf_pkthdr_setlen(m0
, xfsize
);
3626 * Make sure that the socket is still able to take more data.
3627 * CANTSENDMORE being true usually means that the connection
3628 * was closed. so_error is true when an error was sensed after
3630 * The state is checked after the page mapping and buffer
3631 * allocation above since those operations may block and make
3632 * any socket checks stale. From this point forward, nothing
3633 * blocks before the pru_send (or more accurately, any blocking
3634 * results in a loop back to here to re-check).
3636 if ((so
->so_state
& SS_CANTSENDMORE
) || so
->so_error
) {
3637 if (so
->so_state
& SS_CANTSENDMORE
) {
3640 error
= so
->so_error
;
3647 * Wait for socket space to become available. We do this just
3648 * after checking the connection state above in order to avoid
3649 * a race condition with sbwait().
3651 if (sbspace(&so
->so_snd
) < (long)so
->so_snd
.sb_lowat
) {
3652 if (so
->so_state
& SS_NBIO
) {
3657 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
3658 DBG_FUNC_START
), uap
->s
, 0, 0, 0, 0);
3659 error
= sbwait(&so
->so_snd
);
3660 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
3661 DBG_FUNC_END
), uap
->s
, 0, 0, 0, 0);
3663 * An error from sbwait usually indicates that we've
3664 * been interrupted by a signal. If we've sent anything
3665 * then return bytes sent, otherwise return the error.
3674 struct mbuf
*control
= NULL
;
3677 * Socket filter processing
3680 error
= sflt_data_out(so
, NULL
, &m0
, &control
, 0);
3682 if (error
== EJUSTRETURN
) {
3689 * End Socket filter processing
3692 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
3693 uap
->s
, 0, 0, 0, 0);
3694 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, m0
,
3696 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
3697 uap
->s
, 0, 0, 0, 0);
3702 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
3704 * Send trailers. Wimp out and use writev(2).
3706 if (uap
->hdtr
!= USER_ADDR_NULL
&&
3707 user_hdtr
.trailers
!= USER_ADDR_NULL
) {
3708 bzero(&nuap
, sizeof(struct writev_args
));
3710 nuap
.iovp
= user_hdtr
.trailers
;
3711 nuap
.iovcnt
= user_hdtr
.trl_cnt
;
3712 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
3716 sbytes
+= writev_retval
;
3723 if (uap
->nbytes
!= USER_ADDR_NULL
) {
3724 /* XXX this appears bogus for some early failure conditions */
3725 copyout(&sbytes
, uap
->nbytes
, sizeof(off_t
));
3727 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_END
), uap
->s
,
3728 (unsigned int)((sbytes
>> 32) & 0x0ffffffff),
3729 (unsigned int)(sbytes
& 0x0ffffffff), error
, 0);
3732 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
3737 #endif /* SENDFILE */