2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
92 #include <sys/sysctl.h>
94 #include <security/audit/audit.h>
96 #include <sys/kdebug.h>
97 #include <sys/sysproto.h>
98 #include <netinet/in.h>
99 #include <net/route.h>
100 #include <netinet/in_pcb.h>
102 #if CONFIG_MACF_SOCKET_SUBSET
103 #include <security/mac_framework.h>
104 #endif /* MAC_SOCKET_SUBSET */
106 #define f_flag f_fglob->fg_flag
107 #define f_type f_fglob->fg_ops->fo_type
108 #define f_msgcount f_fglob->fg_msgcount
109 #define f_cred f_fglob->fg_cred
110 #define f_ops f_fglob->fg_ops
111 #define f_offset f_fglob->fg_offset
112 #define f_data f_fglob->fg_data
114 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
115 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
116 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
117 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
118 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
128 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
129 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
131 #if DEBUG || DEVELOPMENT
132 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
133 #define DBG_PRINTF(...) printf(__VA_ARGS__)
135 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
136 #define DBG_PRINTF(...) do { } while (0)
139 /* TODO: should be in header file */
140 int falloc_locked(proc_t
, struct fileproc
**, int *, vfs_context_t
, int);
142 static int sendit(struct proc
*, struct socket
*, struct user_msghdr
*, uio_t
,
144 static int recvit(struct proc
*, int, struct user_msghdr
*, uio_t
, user_addr_t
,
146 static int connectit(struct socket
*, struct sockaddr
*);
147 static int getsockaddr(struct socket
*, struct sockaddr
**, user_addr_t
,
149 static int getsockaddr_s(struct socket
*, struct sockaddr_storage
*,
150 user_addr_t
, size_t, boolean_t
);
151 static int getsockaddrlist(struct socket
*, struct sockaddr_list
**,
152 user_addr_t
, socklen_t
, boolean_t
);
154 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf
**,
156 #endif /* SENDFILE */
157 static int connectx_nocancel(struct proc
*, struct connectx_args
*, int *);
158 static int connectitx(struct socket
*, struct sockaddr_list
**,
159 struct sockaddr_list
**, struct proc
*, uint32_t, sae_associd_t
,
160 sae_connid_t
*, uio_t
, unsigned int, user_ssize_t
*);
161 static int peeloff_nocancel(struct proc
*, struct peeloff_args
*, int *);
162 static int disconnectx_nocancel(struct proc
*, struct disconnectx_args
*,
164 static int socket_common(struct proc
*, int, int, int, pid_t
, int32_t *, int);
166 static int internalize_user_msghdr_array(const void *, int, int, u_int
,
167 struct user_msghdr_x
*, struct uio
**);
168 static u_int
externalize_user_msghdr_array(void *, int, int, u_int
,
169 const struct user_msghdr_x
*, struct uio
**);
171 static void free_uio_array(struct uio
**, u_int
);
172 static int uio_array_is_valid(struct uio
**, u_int
);
173 static int recv_msg_array_is_valid(struct recv_msg_elem
*, u_int
);
174 static int internalize_recv_msghdr_array(const void *, int, int,
175 u_int
, struct user_msghdr_x
*, struct recv_msg_elem
*);
176 static u_int
externalize_recv_msghdr_array(void *, int, int, u_int
,
177 const struct user_msghdr_x
*, struct recv_msg_elem
*);
178 static struct recv_msg_elem
*alloc_recv_msg_array(u_int count
);
179 static void free_recv_msg_array(struct recv_msg_elem
*, u_int
);
181 SYSCTL_DECL(_kern_ipc
);
183 static u_int somaxsendmsgx
= 100;
184 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxsendmsgx
,
185 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxsendmsgx
, 0, "");
186 static u_int somaxrecvmsgx
= 100;
187 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxrecvmsgx
,
188 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxrecvmsgx
, 0, "");
191 * System call interface to the socket abstraction.
194 extern const struct fileops socketops
;
198 * EACCES Mandatory Access Control failure
202 * socreate:EAFNOSUPPORT
203 * socreate:EPROTOTYPE
204 * socreate:EPROTONOSUPPORT
207 * socreate:??? [other protocol families, IPSEC]
210 socket(struct proc
*p
,
211 struct socket_args
*uap
,
214 return (socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
215 proc_selfpid(), retval
, 0));
219 socket_delegate(struct proc
*p
,
220 struct socket_delegate_args
*uap
,
223 return socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
224 uap
->epid
, retval
, 1);
228 socket_common(struct proc
*p
,
240 AUDIT_ARG(socket
, domain
, type
, protocol
);
241 #if CONFIG_MACF_SOCKET_SUBSET
242 if ((error
= mac_socket_check_create(kauth_cred_get(), domain
,
243 type
, protocol
)) != 0)
245 #endif /* MAC_SOCKET_SUBSET */
248 error
= priv_check_cred(kauth_cred_get(),
249 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0);
254 error
= falloc(p
, &fp
, &fd
, vfs_context_current());
258 fp
->f_flag
= FREAD
|FWRITE
;
259 fp
->f_ops
= &socketops
;
262 error
= socreate_delegate(domain
, &so
, type
, protocol
, epid
);
264 error
= socreate(domain
, &so
, type
, protocol
);
269 fp
->f_data
= (caddr_t
)so
;
272 procfdtbl_releasefd(p
, fd
, NULL
);
274 fp_drop(p
, fd
, fp
, 1);
278 if (ENTR_SHOULDTRACE
) {
279 KERNEL_ENERGYTRACE(kEnTrActKernSocket
, DBG_FUNC_START
,
280 fd
, 0, (int64_t)VM_KERNEL_ADDRPERM(so
));
288 * EDESTADDRREQ Destination address required
289 * EBADF Bad file descriptor
290 * EACCES Mandatory Access Control failure
291 * file_socket:ENOTSOCK
293 * getsockaddr:ENAMETOOLONG Filename too long
294 * getsockaddr:EINVAL Invalid argument
295 * getsockaddr:ENOMEM Not enough space
296 * getsockaddr:EFAULT Bad address
301 bind(__unused proc_t p
, struct bind_args
*uap
, __unused
int32_t *retval
)
303 struct sockaddr_storage ss
;
304 struct sockaddr
*sa
= NULL
;
306 boolean_t want_free
= TRUE
;
309 AUDIT_ARG(fd
, uap
->s
);
310 error
= file_socket(uap
->s
, &so
);
317 if (uap
->name
== USER_ADDR_NULL
) {
318 error
= EDESTADDRREQ
;
321 if (uap
->namelen
> sizeof (ss
)) {
322 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, TRUE
);
324 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, TRUE
);
326 sa
= (struct sockaddr
*)&ss
;
332 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
333 #if CONFIG_MACF_SOCKET_SUBSET
334 if ((error
= mac_socket_check_bind(kauth_cred_get(), so
, sa
)) == 0)
335 error
= sobindlock(so
, sa
, 1); /* will lock socket */
337 error
= sobindlock(so
, sa
, 1); /* will lock socket */
338 #endif /* MAC_SOCKET_SUBSET */
349 * EACCES Mandatory Access Control failure
350 * file_socket:ENOTSOCK
353 * solisten:EOPNOTSUPP
357 listen(__unused
struct proc
*p
, struct listen_args
*uap
,
358 __unused
int32_t *retval
)
363 AUDIT_ARG(fd
, uap
->s
);
364 error
= file_socket(uap
->s
, &so
);
368 #if CONFIG_MACF_SOCKET_SUBSET
370 error
= mac_socket_check_listen(kauth_cred_get(), so
);
372 error
= solisten(so
, uap
->backlog
);
375 error
= solisten(so
, uap
->backlog
);
376 #endif /* MAC_SOCKET_SUBSET */
385 * Returns: fp_getfsock:EBADF Bad file descriptor
386 * fp_getfsock:EOPNOTSUPP ...
387 * xlate => :ENOTSOCK Socket operation on non-socket
388 * :EFAULT Bad address on copyin/copyout
389 * :EBADF Bad file descriptor
390 * :EOPNOTSUPP Operation not supported on socket
391 * :EINVAL Invalid argument
392 * :EWOULDBLOCK Operation would block
393 * :ECONNABORTED Connection aborted
394 * :EINTR Interrupted function
395 * :EACCES Mandatory Access Control failure
396 * falloc_locked:ENFILE Too many files open in system
397 * falloc_locked::EMFILE Too many open files
398 * falloc_locked::ENOMEM Not enough space
402 accept_nocancel(struct proc
*p
, struct accept_nocancel_args
*uap
,
406 struct sockaddr
*sa
= NULL
;
409 struct socket
*head
, *so
= NULL
;
410 lck_mtx_t
*mutex_held
;
413 short fflag
; /* type must match fp->f_flag */
418 AUDIT_ARG(fd
, uap
->s
);
421 error
= copyin(uap
->anamelen
, (caddr_t
)&namelen
,
426 error
= fp_getfsock(p
, fd
, &fp
, &head
);
428 if (error
== EOPNOTSUPP
)
436 #if CONFIG_MACF_SOCKET_SUBSET
437 if ((error
= mac_socket_check_accept(kauth_cred_get(), head
)) != 0)
439 #endif /* MAC_SOCKET_SUBSET */
441 socket_lock(head
, 1);
443 if (head
->so_proto
->pr_getlock
!= NULL
) {
444 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, 0);
447 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
451 if ((head
->so_options
& SO_ACCEPTCONN
) == 0) {
452 if ((head
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
455 /* POSIX: The socket is not accepting connections */
458 socket_unlock(head
, 1);
461 if ((head
->so_state
& SS_NBIO
) && head
->so_comp
.tqh_first
== NULL
) {
462 socket_unlock(head
, 1);
466 while (TAILQ_EMPTY(&head
->so_comp
) && head
->so_error
== 0) {
467 if (head
->so_state
& SS_CANTRCVMORE
) {
468 head
->so_error
= ECONNABORTED
;
471 if (head
->so_usecount
< 1)
472 panic("accept: head=%p refcount=%d\n", head
,
474 error
= msleep((caddr_t
)&head
->so_timeo
, mutex_held
,
475 PSOCK
| PCATCH
, "accept", 0);
476 if (head
->so_usecount
< 1)
477 panic("accept: 2 head=%p refcount=%d\n", head
,
479 if ((head
->so_state
& SS_DRAINING
)) {
480 error
= ECONNABORTED
;
483 socket_unlock(head
, 1);
487 if (head
->so_error
) {
488 error
= head
->so_error
;
490 socket_unlock(head
, 1);
496 * At this point we know that there is at least one connection
497 * ready to be accepted. Remove it from the queue prior to
498 * allocating the file descriptor for it since falloc() may
499 * block allowing another process to accept the connection
502 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
503 so
= TAILQ_FIRST(&head
->so_comp
);
504 TAILQ_REMOVE(&head
->so_comp
, so
, so_list
);
506 /* unlock head to avoid deadlock with select, keep a ref on head */
507 socket_unlock(head
, 0);
509 #if CONFIG_MACF_SOCKET_SUBSET
511 * Pass the pre-accepted socket to the MAC framework. This is
512 * cheaper than allocating a file descriptor for the socket,
513 * calling the protocol accept callback, and possibly freeing
514 * the file descriptor should the MAC check fails.
516 if ((error
= mac_socket_check_accepted(kauth_cred_get(), so
)) != 0) {
518 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
520 socket_unlock(so
, 1);
522 /* Drop reference on listening socket */
526 #endif /* MAC_SOCKET_SUBSET */
529 * Pass the pre-accepted socket to any interested socket filter(s).
530 * Upon failure, the socket would have been closed by the callee.
532 if (so
->so_filt
!= NULL
&& (error
= soacceptfilter(so
)) != 0) {
533 /* Drop reference on listening socket */
535 /* Propagate socket filter's error code to the caller */
540 error
= falloc(p
, &fp
, &newfd
, vfs_context_current());
543 * Probably ran out of file descriptors.
545 * <rdar://problem/8554930>
546 * Don't put this back on the socket like we used to, that
547 * just causes the client to spin. Drop the socket.
550 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
552 socket_unlock(so
, 1);
559 fp
->f_ops
= &socketops
;
560 fp
->f_data
= (caddr_t
)so
;
562 socket_lock(head
, 0);
566 so
->so_state
&= ~SS_COMP
;
569 /* Sync socket non-blocking/async state with file flags */
570 if (fp
->f_flag
& FNONBLOCK
) {
571 so
->so_state
|= SS_NBIO
;
573 so
->so_state
&= ~SS_NBIO
;
576 if (fp
->f_flag
& FASYNC
) {
577 so
->so_state
|= SS_ASYNC
;
578 so
->so_rcv
.sb_flags
|= SB_ASYNC
;
579 so
->so_snd
.sb_flags
|= SB_ASYNC
;
581 so
->so_state
&= ~SS_ASYNC
;
582 so
->so_rcv
.sb_flags
&= ~SB_ASYNC
;
583 so
->so_snd
.sb_flags
&= ~SB_ASYNC
;
586 (void) soacceptlock(so
, &sa
, 0);
587 socket_unlock(head
, 1);
595 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
600 /* save sa_len before it is destroyed */
602 namelen
= MIN(namelen
, sa_len
);
603 error
= copyout(sa
, uap
->name
, namelen
);
605 /* return the actual, untruncated address length */
608 error
= copyout((caddr_t
)&namelen
, uap
->anamelen
,
615 * If the socket has been marked as inactive by sosetdefunct(),
616 * disallow further operations on it.
618 if (so
->so_flags
& SOF_DEFUNCT
) {
619 sodefunct(current_proc(), so
,
620 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
);
624 socket_unlock(so
, 1);
627 procfdtbl_releasefd(p
, newfd
, NULL
);
628 fp_drop(p
, newfd
, fp
, 1);
634 if (error
== 0 && ENTR_SHOULDTRACE
) {
635 KERNEL_ENERGYTRACE(kEnTrActKernSocket
, DBG_FUNC_START
,
636 newfd
, 0, (int64_t)VM_KERNEL_ADDRPERM(so
));
642 accept(struct proc
*p
, struct accept_args
*uap
, int32_t *retval
)
644 __pthread_testcancel(1);
645 return (accept_nocancel(p
, (struct accept_nocancel_args
*)uap
,
651 * EBADF Bad file descriptor
652 * EALREADY Connection already in progress
653 * EINPROGRESS Operation in progress
654 * ECONNABORTED Connection aborted
655 * EINTR Interrupted function
656 * EACCES Mandatory Access Control failure
657 * file_socket:ENOTSOCK
659 * getsockaddr:ENAMETOOLONG Filename too long
660 * getsockaddr:EINVAL Invalid argument
661 * getsockaddr:ENOMEM Not enough space
662 * getsockaddr:EFAULT Bad address
663 * soconnectlock:EOPNOTSUPP
664 * soconnectlock:EISCONN
665 * soconnectlock:??? [depends on protocol, filters]
668 * Imputed: so_error error may be set from so_error, which
669 * may have been set by soconnectlock.
673 connect(struct proc
*p
, struct connect_args
*uap
, int32_t *retval
)
675 __pthread_testcancel(1);
676 return (connect_nocancel(p
, (struct connect_nocancel_args
*)uap
,
681 connect_nocancel(proc_t p
, struct connect_nocancel_args
*uap
, int32_t *retval
)
683 #pragma unused(p, retval)
685 struct sockaddr_storage ss
;
686 struct sockaddr
*sa
= NULL
;
691 AUDIT_ARG(fd
, uap
->s
);
692 error
= file_socket(fd
, &so
);
701 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
702 * if this is a datagram socket; translate for other types.
704 dgram
= (so
->so_type
== SOCK_DGRAM
);
706 /* Get socket address now before we obtain socket lock */
707 if (uap
->namelen
> sizeof (ss
)) {
708 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, !dgram
);
710 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, !dgram
);
712 sa
= (struct sockaddr
*)&ss
;
717 error
= connectit(so
, sa
);
719 if (sa
!= NULL
&& sa
!= SA(&ss
))
721 if (error
== ERESTART
)
729 connectx_nocancel(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
731 #pragma unused(p, retval)
732 struct sockaddr_list
*src_sl
= NULL
, *dst_sl
= NULL
;
734 int error
, error1
, fd
= uap
->socket
;
736 sae_connid_t cid
= SAE_CONNID_ANY
;
737 struct user32_sa_endpoints ep32
;
738 struct user64_sa_endpoints ep64
;
739 struct user_sa_endpoints ep
;
740 user_ssize_t bytes_written
= 0;
741 struct user_iovec
*iovp
;
744 AUDIT_ARG(fd
, uap
->socket
);
745 error
= file_socket(fd
, &so
);
753 if (uap
->endpoints
== USER_ADDR_NULL
) {
758 if (IS_64BIT_PROCESS(p
)) {
759 error
= copyin(uap
->endpoints
, (caddr_t
)&ep64
, sizeof(ep64
));
763 ep
.sae_srcif
= ep64
.sae_srcif
;
764 ep
.sae_srcaddr
= ep64
.sae_srcaddr
;
765 ep
.sae_srcaddrlen
= ep64
.sae_srcaddrlen
;
766 ep
.sae_dstaddr
= ep64
.sae_dstaddr
;
767 ep
.sae_dstaddrlen
= ep64
.sae_dstaddrlen
;
769 error
= copyin(uap
->endpoints
, (caddr_t
)&ep32
, sizeof(ep32
));
773 ep
.sae_srcif
= ep32
.sae_srcif
;
774 ep
.sae_srcaddr
= ep32
.sae_srcaddr
;
775 ep
.sae_srcaddrlen
= ep32
.sae_srcaddrlen
;
776 ep
.sae_dstaddr
= ep32
.sae_dstaddr
;
777 ep
.sae_dstaddrlen
= ep32
.sae_dstaddrlen
;
781 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
782 * if this is a datagram socket; translate for other types.
784 dgram
= (so
->so_type
== SOCK_DGRAM
);
787 * Get socket address(es) now before we obtain socket lock; use
788 * sockaddr_list for src address for convenience, if present,
789 * even though it won't hold more than one.
791 if (ep
.sae_srcaddr
!= USER_ADDR_NULL
&& (error
= getsockaddrlist(so
,
792 &src_sl
, (user_addr_t
)(caddr_t
)ep
.sae_srcaddr
, ep
.sae_srcaddrlen
,
796 if (ep
.sae_dstaddr
== USER_ADDR_NULL
) {
801 error
= getsockaddrlist(so
, &dst_sl
, (user_addr_t
)(caddr_t
)ep
.sae_dstaddr
,
802 ep
.sae_dstaddrlen
, dgram
);
806 VERIFY(dst_sl
!= NULL
&&
807 !TAILQ_EMPTY(&dst_sl
->sl_head
) && dst_sl
->sl_cnt
> 0);
809 if (uap
->iov
!= USER_ADDR_NULL
) {
810 /* Verify range before calling uio_create() */
811 if (uap
->iovcnt
<= 0 || uap
->iovcnt
> UIO_MAXIOV
)
814 if (uap
->len
== USER_ADDR_NULL
)
817 /* allocate a uio to hold the number of iovecs passed */
818 auio
= uio_create(uap
->iovcnt
, 0,
819 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
828 * get location of iovecs within the uio.
829 * then copyin the iovecs from user space.
831 iovp
= uio_iovsaddr(auio
);
836 error
= copyin_user_iovec_array(uap
->iov
,
837 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
842 /* finish setup of uio_t */
843 error
= uio_calculateresid(auio
);
849 error
= connectitx(so
, &src_sl
, &dst_sl
, p
, ep
.sae_srcif
, uap
->associd
,
850 &cid
, auio
, uap
->flags
, &bytes_written
);
851 if (error
== ERESTART
)
854 if (uap
->len
!= USER_ADDR_NULL
) {
855 error1
= copyout(&bytes_written
, uap
->len
, sizeof (uap
->len
));
856 /* give precedence to connectitx errors */
857 if ((error1
!= 0) && (error
== 0))
861 if (uap
->connid
!= USER_ADDR_NULL
) {
862 error1
= copyout(&cid
, uap
->connid
, sizeof (cid
));
863 /* give precedence to connectitx errors */
864 if ((error1
!= 0) && (error
== 0))
873 sockaddrlist_free(src_sl
);
875 sockaddrlist_free(dst_sl
);
880 connectx(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
883 * Due to similiarity with a POSIX interface, define as
884 * an unofficial cancellation point.
886 __pthread_testcancel(1);
887 return (connectx_nocancel(p
, uap
, retval
));
891 connectit(struct socket
*so
, struct sockaddr
*sa
)
895 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
896 #if CONFIG_MACF_SOCKET_SUBSET
897 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, sa
)) != 0)
899 #endif /* MAC_SOCKET_SUBSET */
902 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
906 error
= soconnectlock(so
, sa
, 0);
908 so
->so_state
&= ~SS_ISCONNECTING
;
911 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
915 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
916 lck_mtx_t
*mutex_held
;
918 if (so
->so_proto
->pr_getlock
!= NULL
)
919 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
921 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
922 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
923 PSOCK
| PCATCH
, __func__
, 0);
924 if (so
->so_state
& SS_DRAINING
) {
925 error
= ECONNABORTED
;
931 error
= so
->so_error
;
935 socket_unlock(so
, 1);
940 connectitx(struct socket
*so
, struct sockaddr_list
**src_sl
,
941 struct sockaddr_list
**dst_sl
, struct proc
*p
, uint32_t ifscope
,
942 sae_associd_t aid
, sae_connid_t
*pcid
, uio_t auio
, unsigned int flags
,
943 user_ssize_t
*bytes_written
)
945 struct sockaddr_entry
*se
;
947 #pragma unused (flags)
949 VERIFY(dst_sl
!= NULL
&& *dst_sl
!= NULL
);
951 TAILQ_FOREACH(se
, &(*dst_sl
)->sl_head
, se_link
) {
952 VERIFY(se
->se_addr
!= NULL
);
953 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()),
955 #if CONFIG_MACF_SOCKET_SUBSET
956 if ((error
= mac_socket_check_connect(kauth_cred_get(),
957 so
, se
->se_addr
)) != 0)
959 #endif /* MAC_SOCKET_SUBSET */
963 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
968 if ((so
->so_proto
->pr_flags
& PR_DATA_IDEMPOTENT
) &&
969 (flags
& CONNECT_DATA_IDEMPOTENT
))
970 so
->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
973 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
974 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
975 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
976 * Case 3 allows user to combine write with connect even if they have
977 * no use for TFO (such as regular TCP, and UDP).
978 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
980 if ((so
->so_proto
->pr_flags
& PR_PRECONN_WRITE
) &&
981 ((flags
& CONNECT_RESUME_ON_READ_WRITE
) || auio
))
982 so
->so_flags1
|= SOF1_PRECONNECT_DATA
;
985 * If a user sets data idempotent and does not pass an uio, or
986 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
987 * SOF1_DATA_IDEMPOTENT.
989 if (!(so
->so_flags1
& SOF1_PRECONNECT_DATA
) &&
990 (so
->so_flags1
& SOF1_DATA_IDEMPOTENT
)) {
991 /* We should return EINVAL instead perhaps. */
992 so
->so_flags1
&= ~SOF1_DATA_IDEMPOTENT
;
995 error
= soconnectxlocked(so
, src_sl
, dst_sl
, p
, ifscope
,
996 aid
, pcid
, 0, NULL
, 0, auio
, bytes_written
);
998 so
->so_state
&= ~SS_ISCONNECTING
;
1002 * If, after the call to soconnectxlocked the flag is still set (in case
1003 * data has been queued and the connect() has actually been triggered,
1004 * it will have been unset by the transport), we exit immediately. There
1005 * is no reason to wait on any event.
1007 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1011 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
1012 error
= EINPROGRESS
;
1015 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
1016 lck_mtx_t
*mutex_held
;
1018 if (so
->so_proto
->pr_getlock
!= NULL
)
1019 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1021 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1022 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
1023 PSOCK
| PCATCH
, __func__
, 0);
1024 if (so
->so_state
& SS_DRAINING
) {
1025 error
= ECONNABORTED
;
1031 error
= so
->so_error
;
1035 socket_unlock(so
, 1);
1040 peeloff(struct proc
*p
, struct peeloff_args
*uap
, int *retval
)
1043 * Due to similiarity with a POSIX interface, define as
1044 * an unofficial cancellation point.
1046 __pthread_testcancel(1);
1047 return (peeloff_nocancel(p
, uap
, retval
));
1051 peeloff_nocancel(struct proc
*p
, struct peeloff_args
*uap
, int *retval
)
1053 struct fileproc
*fp
;
1054 struct socket
*mp_so
, *so
= NULL
;
1055 int newfd
, fd
= uap
->s
;
1056 short fflag
; /* type must match fp->f_flag */
1061 error
= fp_getfsock(p
, fd
, &fp
, &mp_so
);
1063 if (error
== EOPNOTSUPP
)
1067 if (mp_so
== NULL
) {
1072 socket_lock(mp_so
, 1);
1073 error
= sopeelofflocked(mp_so
, uap
->aid
, &so
);
1075 socket_unlock(mp_so
, 1);
1079 socket_unlock(mp_so
, 0); /* keep ref on mp_so for us */
1082 error
= falloc(p
, &fp
, &newfd
, vfs_context_current());
1084 /* drop this socket (probably ran out of file descriptors) */
1086 sodereference(mp_so
); /* our mp_so ref */
1091 fp
->f_ops
= &socketops
;
1092 fp
->f_data
= (caddr_t
)so
;
1095 * If the socket has been marked as inactive by sosetdefunct(),
1096 * disallow further operations on it.
1098 if (so
->so_flags
& SOF_DEFUNCT
) {
1099 sodefunct(current_proc(), so
,
1100 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
);
1104 procfdtbl_releasefd(p
, newfd
, NULL
);
1105 fp_drop(p
, newfd
, fp
, 1);
1108 sodereference(mp_so
); /* our mp_so ref */
1119 disconnectx(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
1122 * Due to similiarity with a POSIX interface, define as
1123 * an unofficial cancellation point.
1125 __pthread_testcancel(1);
1126 return (disconnectx_nocancel(p
, uap
, retval
));
1130 disconnectx_nocancel(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
1132 #pragma unused(p, retval)
1137 error
= file_socket(fd
, &so
);
1145 error
= sodisconnectx(so
, uap
->aid
, uap
->cid
);
1152 * Returns: 0 Success
1153 * socreate:EAFNOSUPPORT
1154 * socreate:EPROTOTYPE
1155 * socreate:EPROTONOSUPPORT
1159 * socreate:??? [other protocol families, IPSEC]
1165 * soconnect2:EPROTOTYPE
1166 * soconnect2:??? [other protocol families[
1169 socketpair(struct proc
*p
, struct socketpair_args
*uap
,
1170 __unused
int32_t *retval
)
1172 struct fileproc
*fp1
, *fp2
;
1173 struct socket
*so1
, *so2
;
1174 int fd
, error
, sv
[2];
1176 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
1177 error
= socreate(uap
->domain
, &so1
, uap
->type
, uap
->protocol
);
1180 error
= socreate(uap
->domain
, &so2
, uap
->type
, uap
->protocol
);
1184 error
= falloc(p
, &fp1
, &fd
, vfs_context_current());
1188 fp1
->f_flag
= FREAD
|FWRITE
;
1189 fp1
->f_ops
= &socketops
;
1190 fp1
->f_data
= (caddr_t
)so1
;
1193 error
= falloc(p
, &fp2
, &fd
, vfs_context_current());
1197 fp2
->f_flag
= FREAD
|FWRITE
;
1198 fp2
->f_ops
= &socketops
;
1199 fp2
->f_data
= (caddr_t
)so2
;
1202 error
= soconnect2(so1
, so2
);
1206 if (uap
->type
== SOCK_DGRAM
) {
1208 * Datagram socket connection is asymmetric.
1210 error
= soconnect2(so2
, so1
);
1216 if ((error
= copyout(sv
, uap
->rsv
, 2 * sizeof (int))) != 0)
1220 procfdtbl_releasefd(p
, sv
[0], NULL
);
1221 procfdtbl_releasefd(p
, sv
[1], NULL
);
1222 fp_drop(p
, sv
[0], fp1
, 1);
1223 fp_drop(p
, sv
[1], fp2
, 1);
1228 fp_free(p
, sv
[1], fp2
);
1230 fp_free(p
, sv
[0], fp1
);
1232 (void) soclose(so2
);
1234 (void) soclose(so1
);
1239 * Returns: 0 Success
1244 * EACCES Mandatory Access Control failure
1245 * file_socket:ENOTSOCK
1247 * getsockaddr:ENAMETOOLONG Filename too long
1248 * getsockaddr:EINVAL Invalid argument
1249 * getsockaddr:ENOMEM Not enough space
1250 * getsockaddr:EFAULT Bad address
1251 * <pru_sosend>:EACCES[TCP]
1252 * <pru_sosend>:EADDRINUSE[TCP]
1253 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1254 * <pru_sosend>:EAFNOSUPPORT[TCP]
1255 * <pru_sosend>:EAGAIN[TCP]
1256 * <pru_sosend>:EBADF
1257 * <pru_sosend>:ECONNRESET[TCP]
1258 * <pru_sosend>:EFAULT
1259 * <pru_sosend>:EHOSTUNREACH[TCP]
1260 * <pru_sosend>:EINTR
1261 * <pru_sosend>:EINVAL
1262 * <pru_sosend>:EISCONN[AF_INET]
1263 * <pru_sosend>:EMSGSIZE[TCP]
1264 * <pru_sosend>:ENETDOWN[TCP]
1265 * <pru_sosend>:ENETUNREACH[TCP]
1266 * <pru_sosend>:ENOBUFS
1267 * <pru_sosend>:ENOMEM[TCP]
1268 * <pru_sosend>:ENOTCONN[AF_INET]
1269 * <pru_sosend>:EOPNOTSUPP
1270 * <pru_sosend>:EPERM[TCP]
1271 * <pru_sosend>:EPIPE
1272 * <pru_sosend>:EWOULDBLOCK
1273 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1274 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1275 * <pru_sosend>:??? [value from so_error]
1279 sendit(struct proc
*p
, struct socket
*so
, struct user_msghdr
*mp
, uio_t uiop
,
1280 int flags
, int32_t *retval
)
1282 struct mbuf
*control
= NULL
;
1283 struct sockaddr_storage ss
;
1284 struct sockaddr
*to
= NULL
;
1285 boolean_t want_free
= TRUE
;
1289 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1291 if (mp
->msg_name
!= USER_ADDR_NULL
) {
1292 if (mp
->msg_namelen
> sizeof (ss
)) {
1293 error
= getsockaddr(so
, &to
, mp
->msg_name
,
1294 mp
->msg_namelen
, TRUE
);
1296 error
= getsockaddr_s(so
, &ss
, mp
->msg_name
,
1297 mp
->msg_namelen
, TRUE
);
1299 to
= (struct sockaddr
*)&ss
;
1305 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), to
);
1307 if (mp
->msg_control
!= USER_ADDR_NULL
) {
1308 if (mp
->msg_controllen
< sizeof (struct cmsghdr
)) {
1312 error
= sockargs(&control
, mp
->msg_control
,
1313 mp
->msg_controllen
, MT_CONTROL
);
1318 #if CONFIG_MACF_SOCKET_SUBSET
1320 * We check the state without holding the socket lock;
1321 * if a race condition occurs, it would simply result
1322 * in an extra call to the MAC check function.
1325 !(so
->so_state
& SS_DEFUNCT
) &&
1326 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
)) != 0)
1328 #endif /* MAC_SOCKET_SUBSET */
1330 len
= uio_resid(uiop
);
1331 error
= so
->so_proto
->pr_usrreqs
->pru_sosend(so
, to
, uiop
, 0,
1334 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1335 error
== EINTR
|| error
== EWOULDBLOCK
))
1337 /* Generation of SIGPIPE can be controlled per socket */
1338 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
))
1339 psignal(p
, SIGPIPE
);
1342 *retval
= (int)(len
- uio_resid(uiop
));
1344 if (to
!= NULL
&& want_free
)
1347 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1353 * Returns: 0 Success
1355 * sendit:??? [see sendit definition in this file]
1356 * write:??? [4056224: applicable for pipes]
1359 sendto(struct proc
*p
, struct sendto_args
*uap
, int32_t *retval
)
1361 __pthread_testcancel(1);
1362 return (sendto_nocancel(p
, (struct sendto_nocancel_args
*)uap
, retval
));
1366 sendto_nocancel(struct proc
*p
,
1367 struct sendto_nocancel_args
*uap
,
1370 struct user_msghdr msg
;
1375 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1376 AUDIT_ARG(fd
, uap
->s
);
1378 auio
= uio_create(1, 0,
1379 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1385 uio_addiov(auio
, uap
->buf
, uap
->len
);
1387 msg
.msg_name
= uap
->to
;
1388 msg
.msg_namelen
= uap
->tolen
;
1389 /* no need to set up msg_iov. sendit uses uio_t we send it */
1392 msg
.msg_control
= 0;
1395 error
= file_socket(uap
->s
, &so
);
1402 error
= sendit(p
, so
, &msg
, auio
, uap
->flags
, retval
);
1410 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_END
, error
, *retval
, 0, 0, 0);
1416 * Returns: 0 Success
1419 * sendit:??? [see sendit definition in this file]
1422 sendmsg(struct proc
*p
, struct sendmsg_args
*uap
, int32_t *retval
)
1424 __pthread_testcancel(1);
1425 return (sendmsg_nocancel(p
, (struct sendmsg_nocancel_args
*)uap
,
1430 sendmsg_nocancel(struct proc
*p
, struct sendmsg_nocancel_args
*uap
,
1433 struct user32_msghdr msg32
;
1434 struct user64_msghdr msg64
;
1435 struct user_msghdr user_msg
;
1440 struct user_iovec
*iovp
;
1443 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1444 AUDIT_ARG(fd
, uap
->s
);
1445 if (IS_64BIT_PROCESS(p
)) {
1446 msghdrp
= (caddr_t
)&msg64
;
1447 size_of_msghdr
= sizeof (msg64
);
1449 msghdrp
= (caddr_t
)&msg32
;
1450 size_of_msghdr
= sizeof (msg32
);
1452 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1454 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1458 if (IS_64BIT_PROCESS(p
)) {
1459 user_msg
.msg_flags
= msg64
.msg_flags
;
1460 user_msg
.msg_controllen
= msg64
.msg_controllen
;
1461 user_msg
.msg_control
= msg64
.msg_control
;
1462 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
1463 user_msg
.msg_iov
= msg64
.msg_iov
;
1464 user_msg
.msg_namelen
= msg64
.msg_namelen
;
1465 user_msg
.msg_name
= msg64
.msg_name
;
1467 user_msg
.msg_flags
= msg32
.msg_flags
;
1468 user_msg
.msg_controllen
= msg32
.msg_controllen
;
1469 user_msg
.msg_control
= msg32
.msg_control
;
1470 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
1471 user_msg
.msg_iov
= msg32
.msg_iov
;
1472 user_msg
.msg_namelen
= msg32
.msg_namelen
;
1473 user_msg
.msg_name
= msg32
.msg_name
;
1476 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1477 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, EMSGSIZE
,
1482 /* allocate a uio large enough to hold the number of iovecs passed */
1483 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1484 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1491 if (user_msg
.msg_iovlen
) {
1493 * get location of iovecs within the uio.
1494 * then copyin the iovecs from user space.
1496 iovp
= uio_iovsaddr(auio
);
1501 error
= copyin_user_iovec_array(user_msg
.msg_iov
,
1502 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1503 user_msg
.msg_iovlen
, iovp
);
1506 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1508 /* finish setup of uio_t */
1509 error
= uio_calculateresid(auio
);
1514 user_msg
.msg_iov
= 0;
1517 /* msg_flags is ignored for send */
1518 user_msg
.msg_flags
= 0;
1520 error
= file_socket(uap
->s
, &so
);
1527 error
= sendit(p
, so
, &user_msg
, auio
, uap
->flags
, retval
);
1534 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1540 sendmsg_x(struct proc
*p
, struct sendmsg_x_args
*uap
, user_ssize_t
*retval
)
1543 struct user_msghdr_x
*user_msg_x
= NULL
;
1544 struct uio
**uiop
= NULL
;
1547 struct sockaddr
*to
= NULL
;
1548 user_ssize_t len_before
= 0, len_after
;
1550 size_t size_of_msghdr
;
1553 int has_addr_or_ctl
= 0;
1555 KERNEL_DEBUG(DBG_FNC_SENDMSG_X
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1557 error
= file_socket(uap
->s
, &so
);
1568 * Input parameter range check
1570 if (uap
->cnt
== 0 || uap
->cnt
> UIO_MAXIOV
) {
1575 * Clip to max currently allowed
1577 if (uap
->cnt
> somaxsendmsgx
)
1578 uap
->cnt
= somaxsendmsgx
;
1580 user_msg_x
= _MALLOC(uap
->cnt
* sizeof(struct user_msghdr_x
),
1581 M_TEMP
, M_WAITOK
| M_ZERO
);
1582 if (user_msg_x
== NULL
) {
1583 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__
);
1587 uiop
= _MALLOC(uap
->cnt
* sizeof(struct uio
*),
1588 M_TEMP
, M_WAITOK
| M_ZERO
);
1590 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__
);
1595 size_of_msghdr
= IS_64BIT_PROCESS(p
) ?
1596 sizeof(struct user64_msghdr_x
) : sizeof(struct user32_msghdr_x
);
1598 umsgp
= _MALLOC(uap
->cnt
* size_of_msghdr
,
1599 M_TEMP
, M_WAITOK
| M_ZERO
);
1600 if (umsgp
== NULL
) {
1601 printf("%s _MALLOC() user_msg_x failed\n", __func__
);
1605 error
= copyin(uap
->msgp
, umsgp
, uap
->cnt
* size_of_msghdr
);
1607 DBG_PRINTF("%s copyin() failed\n", __func__
);
1610 error
= internalize_user_msghdr_array(umsgp
,
1611 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1612 UIO_WRITE
, uap
->cnt
, user_msg_x
, uiop
);
1614 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__
);
1618 * Make sure the size of each message iovec and
1619 * the aggregate size of all the iovec is valid
1621 if (uio_array_is_valid(uiop
, uap
->cnt
) == 0) {
1627 * Sanity check on passed arguments
1629 for (i
= 0; i
< uap
->cnt
; i
++) {
1630 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
1633 * No flags on send message
1635 if (mp
->msg_flags
!= 0) {
1640 * No support for address or ancillary data (yet)
1642 if (mp
->msg_name
!= USER_ADDR_NULL
|| mp
->msg_namelen
!= 0)
1643 has_addr_or_ctl
= 1;
1645 if (mp
->msg_control
!= USER_ADDR_NULL
||
1646 mp
->msg_controllen
!= 0)
1647 has_addr_or_ctl
= 1;
1649 #if CONFIG_MACF_SOCKET_SUBSET
1651 * We check the state without holding the socket lock;
1652 * if a race condition occurs, it would simply result
1653 * in an extra call to the MAC check function.
1655 * Note: The following check is never true taken with the
1656 * current limitation that we do not accept to pass an address,
1657 * this is effectively placeholder code. If we add support for
1658 * addresses, we will have to check every address.
1661 !(so
->so_state
& SS_DEFUNCT
) &&
1662 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
))
1665 #endif /* MAC_SOCKET_SUBSET */
1668 len_before
= uio_array_resid(uiop
, uap
->cnt
);
1671 * Feed list of packets at once only for connected socket without
1674 if (so
->so_proto
->pr_usrreqs
->pru_sosend_list
!=
1675 pru_sosend_list_notsupp
&&
1676 has_addr_or_ctl
== 0 && somaxsendmsgx
== 0) {
1677 error
= so
->so_proto
->pr_usrreqs
->pru_sosend_list(so
, uiop
,
1678 uap
->cnt
, uap
->flags
);
1680 for (i
= 0; i
< uap
->cnt
; i
++) {
1681 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
1682 struct user_msghdr user_msg
;
1683 uio_t auio
= uiop
[i
];
1686 user_msg
.msg_flags
= mp
->msg_flags
;
1687 user_msg
.msg_controllen
= mp
->msg_controllen
;
1688 user_msg
.msg_control
= mp
->msg_control
;
1689 user_msg
.msg_iovlen
= mp
->msg_iovlen
;
1690 user_msg
.msg_iov
= mp
->msg_iov
;
1691 user_msg
.msg_namelen
= mp
->msg_namelen
;
1692 user_msg
.msg_name
= mp
->msg_name
;
1694 error
= sendit(p
, so
, &user_msg
, auio
, uap
->flags
,
1700 len_after
= uio_array_resid(uiop
, uap
->cnt
);
1702 VERIFY(len_after
<= len_before
);
1705 if (len_after
!= len_before
&& (error
== ERESTART
||
1706 error
== EINTR
|| error
== EWOULDBLOCK
||
1709 /* Generation of SIGPIPE can be controlled per socket */
1710 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
))
1711 psignal(p
, SIGPIPE
);
1714 uiocnt
= externalize_user_msghdr_array(umsgp
,
1715 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1716 UIO_WRITE
, uap
->cnt
, user_msg_x
, uiop
);
1718 *retval
= (int)(uiocnt
);
1724 _FREE(umsgp
, M_TEMP
);
1726 free_uio_array(uiop
, uap
->cnt
);
1727 _FREE(uiop
, M_TEMP
);
1729 if (user_msg_x
!= NULL
)
1730 _FREE(user_msg_x
, M_TEMP
);
1732 KERNEL_DEBUG(DBG_FNC_SENDMSG_X
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1739 copyout_sa(struct sockaddr
*fromsa
, user_addr_t name
, socklen_t
*namelen
)
1742 socklen_t sa_len
= 0;
1746 if (len
<= 0 || fromsa
== 0) {
1750 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1752 sa_len
= fromsa
->sa_len
;
1753 len
= MIN((unsigned int)len
, sa_len
);
1754 error
= copyout(fromsa
, name
, (unsigned)len
);
1764 copyout_control(struct proc
*p
, struct mbuf
*m
, user_addr_t control
,
1765 socklen_t
*controllen
, int *flags
)
1775 while (m
&& len
> 0) {
1776 unsigned int tocopy
;
1777 struct cmsghdr
*cp
= mtod(m
, struct cmsghdr
*);
1778 int cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1779 int buflen
= m
->m_len
;
1781 while (buflen
> 0 && len
> 0) {
1783 * SCM_TIMESTAMP hack because struct timeval has a
1784 * different size for 32 bits and 64 bits processes
1786 if (cp
->cmsg_level
== SOL_SOCKET
&& cp
->cmsg_type
== SCM_TIMESTAMP
) {
1787 unsigned char tmp_buffer
[CMSG_SPACE(sizeof(struct user64_timeval
))];
1788 struct cmsghdr
*tmp_cp
= (struct cmsghdr
*)(void *)tmp_buffer
;
1790 struct timeval
*tv
= (struct timeval
*)(void *)CMSG_DATA(cp
);
1792 tmp_cp
->cmsg_level
= SOL_SOCKET
;
1793 tmp_cp
->cmsg_type
= SCM_TIMESTAMP
;
1795 if (proc_is64bit(p
)) {
1796 struct user64_timeval
*tv64
= (struct user64_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1798 tv64
->tv_sec
= tv
->tv_sec
;
1799 tv64
->tv_usec
= tv
->tv_usec
;
1801 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user64_timeval
));
1802 tmp_space
= CMSG_SPACE(sizeof(struct user64_timeval
));
1804 struct user32_timeval
*tv32
= (struct user32_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1806 tv32
->tv_sec
= tv
->tv_sec
;
1807 tv32
->tv_usec
= tv
->tv_usec
;
1809 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user32_timeval
));
1810 tmp_space
= CMSG_SPACE(sizeof(struct user32_timeval
));
1812 if (len
>= tmp_space
) {
1815 *flags
|= MSG_CTRUNC
;
1818 error
= copyout(tmp_buffer
, ctlbuf
, tocopy
);
1822 if (cp_size
> buflen
) {
1823 panic("cp_size > buflen, something"
1824 "wrong with alignment!");
1826 if (len
>= cp_size
) {
1829 *flags
|= MSG_CTRUNC
;
1832 error
= copyout((caddr_t
) cp
, ctlbuf
, tocopy
);
1841 cp
= (struct cmsghdr
*)(void *)
1842 ((unsigned char *) cp
+ cp_size
);
1843 cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1848 *controllen
= ctlbuf
- control
;
1854 * Returns: 0 Success
1858 * EACCES Mandatory Access Control failure
1861 * <pru_soreceive>:ENOBUFS
1862 * <pru_soreceive>:ENOTCONN
1863 * <pru_soreceive>:EWOULDBLOCK
1864 * <pru_soreceive>:EFAULT
1865 * <pru_soreceive>:EINTR
1866 * <pru_soreceive>:EBADF
1867 * <pru_soreceive>:EINVAL
1868 * <pru_soreceive>:EMSGSIZE
1869 * <pru_soreceive>:???
1871 * Notes: Additional return values from calls through <pru_soreceive>
1872 * depend on protocols other than TCP or AF_UNIX, which are
1876 recvit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
1877 user_addr_t namelenp
, int32_t *retval
)
1881 struct mbuf
*control
= 0;
1883 struct sockaddr
*fromsa
= 0;
1884 struct fileproc
*fp
;
1886 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1888 if ((error
= fp_lookup(p
, s
, &fp
, 1))) {
1889 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1893 if (fp
->f_type
!= DTYPE_SOCKET
) {
1894 fp_drop(p
, s
, fp
, 1);
1899 so
= (struct socket
*)fp
->f_data
;
1901 fp_drop(p
, s
, fp
, 1);
1908 #if CONFIG_MACF_SOCKET_SUBSET
1910 * We check the state without holding the socket lock;
1911 * if a race condition occurs, it would simply result
1912 * in an extra call to the MAC check function.
1914 if (!(so
->so_state
& SS_DEFUNCT
) &&
1915 !(so
->so_state
& SS_ISCONNECTED
) &&
1916 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
1917 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0)
1919 #endif /* MAC_SOCKET_SUBSET */
1920 if (uio_resid(uiop
) < 0) {
1921 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, EINVAL
, 0, 0, 0, 0);
1926 len
= uio_resid(uiop
);
1927 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, &fromsa
, uiop
,
1928 (struct mbuf
**)0, mp
->msg_control
? &control
: (struct mbuf
**)0,
1931 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()),
1934 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1935 error
== EINTR
|| error
== EWOULDBLOCK
))
1941 *retval
= len
- uio_resid(uiop
);
1944 error
= copyout_sa(fromsa
, mp
->msg_name
, &mp
->msg_namelen
);
1947 /* return the actual, untruncated address length */
1949 (error
= copyout((caddr_t
)&mp
->msg_namelen
, namelenp
,
1955 if (mp
->msg_control
) {
1956 error
= copyout_control(p
, control
, mp
->msg_control
,
1957 &mp
->msg_controllen
, &mp
->msg_flags
);
1961 FREE(fromsa
, M_SONAME
);
1964 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1966 fp_drop(p
, s
, fp
, 0);
1971 * Returns: 0 Success
1975 * read:??? [4056224: applicable for pipes]
1977 * Notes: The read entry point is only called as part of support for
1978 * binary backward compatability; new code should use read
1979 * instead of recv or recvfrom when attempting to read data
1982 * For full documentation of the return codes from recvit, see
1983 * the block header for the recvit function.
1986 recvfrom(struct proc
*p
, struct recvfrom_args
*uap
, int32_t *retval
)
1988 __pthread_testcancel(1);
1989 return (recvfrom_nocancel(p
, (struct recvfrom_nocancel_args
*)uap
,
1994 recvfrom_nocancel(struct proc
*p
, struct recvfrom_nocancel_args
*uap
,
1997 struct user_msghdr msg
;
2001 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2002 AUDIT_ARG(fd
, uap
->s
);
2004 if (uap
->fromlenaddr
) {
2005 error
= copyin(uap
->fromlenaddr
,
2006 (caddr_t
)&msg
.msg_namelen
, sizeof (msg
.msg_namelen
));
2010 msg
.msg_namelen
= 0;
2012 msg
.msg_name
= uap
->from
;
2013 auio
= uio_create(1, 0,
2014 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
2020 uio_addiov(auio
, uap
->buf
, uap
->len
);
2021 /* no need to set up msg_iov. recvit uses uio_t we send it */
2024 msg
.msg_control
= 0;
2025 msg
.msg_controllen
= 0;
2026 msg
.msg_flags
= uap
->flags
;
2027 error
= recvit(p
, uap
->s
, &msg
, auio
, uap
->fromlenaddr
, retval
);
2032 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2038 * Returns: 0 Success
2045 * Notes: For full documentation of the return codes from recvit, see
2046 * the block header for the recvit function.
2049 recvmsg(struct proc
*p
, struct recvmsg_args
*uap
, int32_t *retval
)
2051 __pthread_testcancel(1);
2052 return (recvmsg_nocancel(p
, (struct recvmsg_nocancel_args
*)uap
,
2057 recvmsg_nocancel(struct proc
*p
, struct recvmsg_nocancel_args
*uap
,
2060 struct user32_msghdr msg32
;
2061 struct user64_msghdr msg64
;
2062 struct user_msghdr user_msg
;
2068 struct user_iovec
*iovp
;
2070 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2071 AUDIT_ARG(fd
, uap
->s
);
2072 if (IS_64BIT_PROCESS(p
)) {
2073 msghdrp
= (caddr_t
)&msg64
;
2074 size_of_msghdr
= sizeof (msg64
);
2076 msghdrp
= (caddr_t
)&msg32
;
2077 size_of_msghdr
= sizeof (msg32
);
2079 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
2081 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2085 /* only need to copy if user process is not 64-bit */
2086 if (IS_64BIT_PROCESS(p
)) {
2087 user_msg
.msg_flags
= msg64
.msg_flags
;
2088 user_msg
.msg_controllen
= msg64
.msg_controllen
;
2089 user_msg
.msg_control
= msg64
.msg_control
;
2090 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
2091 user_msg
.msg_iov
= msg64
.msg_iov
;
2092 user_msg
.msg_namelen
= msg64
.msg_namelen
;
2093 user_msg
.msg_name
= msg64
.msg_name
;
2095 user_msg
.msg_flags
= msg32
.msg_flags
;
2096 user_msg
.msg_controllen
= msg32
.msg_controllen
;
2097 user_msg
.msg_control
= msg32
.msg_control
;
2098 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
2099 user_msg
.msg_iov
= msg32
.msg_iov
;
2100 user_msg
.msg_namelen
= msg32
.msg_namelen
;
2101 user_msg
.msg_name
= msg32
.msg_name
;
2104 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
2105 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, EMSGSIZE
,
2110 user_msg
.msg_flags
= uap
->flags
;
2112 /* allocate a uio large enough to hold the number of iovecs passed */
2113 auio
= uio_create(user_msg
.msg_iovlen
, 0,
2114 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
2122 * get location of iovecs within the uio. then copyin the iovecs from
2125 iovp
= uio_iovsaddr(auio
);
2130 uiov
= user_msg
.msg_iov
;
2131 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
2132 error
= copyin_user_iovec_array(uiov
,
2133 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2134 user_msg
.msg_iovlen
, iovp
);
2138 /* finish setup of uio_t */
2139 error
= uio_calculateresid(auio
);
2144 error
= recvit(p
, uap
->s
, &user_msg
, auio
, 0, retval
);
2146 user_msg
.msg_iov
= uiov
;
2147 if (IS_64BIT_PROCESS(p
)) {
2148 msg64
.msg_flags
= user_msg
.msg_flags
;
2149 msg64
.msg_controllen
= user_msg
.msg_controllen
;
2150 msg64
.msg_control
= user_msg
.msg_control
;
2151 msg64
.msg_iovlen
= user_msg
.msg_iovlen
;
2152 msg64
.msg_iov
= user_msg
.msg_iov
;
2153 msg64
.msg_namelen
= user_msg
.msg_namelen
;
2154 msg64
.msg_name
= user_msg
.msg_name
;
2156 msg32
.msg_flags
= user_msg
.msg_flags
;
2157 msg32
.msg_controllen
= user_msg
.msg_controllen
;
2158 msg32
.msg_control
= user_msg
.msg_control
;
2159 msg32
.msg_iovlen
= user_msg
.msg_iovlen
;
2160 msg32
.msg_iov
= user_msg
.msg_iov
;
2161 msg32
.msg_namelen
= user_msg
.msg_namelen
;
2162 msg32
.msg_name
= user_msg
.msg_name
;
2164 error
= copyout(msghdrp
, uap
->msg
, size_of_msghdr
);
2170 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2175 recvmsg_x(struct proc
*p
, struct recvmsg_x_args
*uap
, user_ssize_t
*retval
)
2177 int error
= EOPNOTSUPP
;
2178 struct user_msghdr_x
*user_msg_x
= NULL
;
2179 struct recv_msg_elem
*recv_msg_array
= NULL
;
2181 user_ssize_t len_before
= 0, len_after
;
2183 size_t size_of_msghdr
;
2188 KERNEL_DEBUG(DBG_FNC_RECVMSG_X
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2190 error
= file_socket(uap
->s
, &so
);
2200 * Input parameter range check
2202 if (uap
->cnt
== 0 || uap
->cnt
> UIO_MAXIOV
) {
2206 if (uap
->cnt
> somaxrecvmsgx
)
2207 uap
->cnt
= somaxrecvmsgx
;
2209 user_msg_x
= _MALLOC(uap
->cnt
* sizeof(struct user_msghdr_x
),
2210 M_TEMP
, M_WAITOK
| M_ZERO
);
2211 if (user_msg_x
== NULL
) {
2212 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__
);
2216 recv_msg_array
= alloc_recv_msg_array(uap
->cnt
);
2217 if (recv_msg_array
== NULL
) {
2218 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__
);
2222 size_of_msghdr
= IS_64BIT_PROCESS(p
) ?
2223 sizeof(struct user64_msghdr_x
) : sizeof(struct user32_msghdr_x
);
2225 umsgp
= _MALLOC(uap
->cnt
* size_of_msghdr
, M_TEMP
, M_WAITOK
| M_ZERO
);
2226 if (umsgp
== NULL
) {
2227 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__
);
2231 error
= copyin(uap
->msgp
, umsgp
, uap
->cnt
* size_of_msghdr
);
2233 DBG_PRINTF("%s copyin() failed\n", __func__
);
2236 error
= internalize_recv_msghdr_array(umsgp
,
2237 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2238 UIO_READ
, uap
->cnt
, user_msg_x
, recv_msg_array
);
2240 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__
);
2244 * Make sure the size of each message iovec and
2245 * the aggregate size of all the iovec is valid
2247 if (recv_msg_array_is_valid(recv_msg_array
, uap
->cnt
) == 0) {
2252 * Sanity check on passed arguments
2254 for (i
= 0; i
< uap
->cnt
; i
++) {
2255 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
2257 if (mp
->msg_flags
!= 0) {
2262 #if CONFIG_MACF_SOCKET_SUBSET
2264 * We check the state without holding the socket lock;
2265 * if a race condition occurs, it would simply result
2266 * in an extra call to the MAC check function.
2268 if (!(so
->so_state
& SS_DEFUNCT
) &&
2269 !(so
->so_state
& SS_ISCONNECTED
) &&
2270 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
2271 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0)
2273 #endif /* MAC_SOCKET_SUBSET */
2275 len_before
= recv_msg_array_resid(recv_msg_array
, uap
->cnt
);
2277 if (so
->so_proto
->pr_usrreqs
->pru_soreceive_list
!=
2278 pru_soreceive_list_notsupp
&&
2279 somaxrecvmsgx
== 0) {
2280 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive_list(so
,
2281 recv_msg_array
, uap
->cnt
, &uap
->flags
);
2283 int flags
= uap
->flags
;
2285 for (i
= 0; i
< uap
->cnt
; i
++) {
2286 struct recv_msg_elem
*recv_msg_elem
;
2288 struct sockaddr
**psa
;
2289 struct mbuf
**controlp
;
2291 recv_msg_elem
= recv_msg_array
+ i
;
2292 auio
= recv_msg_elem
->uio
;
2295 * Do not block if we got at least one packet
2298 flags
|= MSG_DONTWAIT
;
2300 psa
= (recv_msg_elem
->which
& SOCK_MSG_SA
) ?
2301 &recv_msg_elem
->psa
: NULL
;
2302 controlp
= (recv_msg_elem
->which
& SOCK_MSG_CONTROL
) ?
2303 &recv_msg_elem
->controlp
: NULL
;
2305 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, psa
,
2306 auio
, (struct mbuf
**)0, controlp
, &flags
);
2312 recv_msg_elem
->which
|= SOCK_MSG_DATA
;
2314 * Stop on partial copy
2316 if (flags
& (MSG_RCVMORE
| MSG_TRUNC
))
2319 if ((uap
->flags
& MSG_DONTWAIT
) == 0)
2320 flags
&= ~MSG_DONTWAIT
;
2324 len_after
= recv_msg_array_resid(recv_msg_array
, uap
->cnt
);
2327 if (len_after
!= len_before
&& (error
== ERESTART
||
2328 error
== EINTR
|| error
== EWOULDBLOCK
))
2334 uiocnt
= externalize_recv_msghdr_array(umsgp
,
2335 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2336 UIO_READ
, uap
->cnt
, user_msg_x
, recv_msg_array
);
2338 error
= copyout(umsgp
, uap
->msgp
, uap
->cnt
* size_of_msghdr
);
2340 DBG_PRINTF("%s copyout() failed\n", __func__
);
2343 *retval
= (int)(uiocnt
);
2345 for (i
= 0; i
< uap
->cnt
; i
++) {
2346 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
2347 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
2348 struct sockaddr
*fromsa
= recv_msg_elem
->psa
;
2351 error
= copyout_sa(fromsa
, mp
->msg_name
,
2356 if (mp
->msg_control
) {
2357 error
= copyout_control(p
, recv_msg_elem
->controlp
,
2358 mp
->msg_control
, &mp
->msg_controllen
,
2368 _FREE(umsgp
, M_TEMP
);
2369 if (recv_msg_array
!= NULL
)
2370 free_recv_msg_array(recv_msg_array
, uap
->cnt
);
2371 if (user_msg_x
!= NULL
)
2372 _FREE(user_msg_x
, M_TEMP
);
2374 KERNEL_DEBUG(DBG_FNC_RECVMSG_X
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2380 * Returns: 0 Success
2382 * file_socket:ENOTSOCK
2385 * soshutdown:ENOTCONN
2386 * soshutdown:EADDRNOTAVAIL[TCP]
2387 * soshutdown:ENOBUFS[TCP]
2388 * soshutdown:EMSGSIZE[TCP]
2389 * soshutdown:EHOSTUNREACH[TCP]
2390 * soshutdown:ENETUNREACH[TCP]
2391 * soshutdown:ENETDOWN[TCP]
2392 * soshutdown:ENOMEM[TCP]
2393 * soshutdown:EACCES[TCP]
2394 * soshutdown:EMSGSIZE[TCP]
2395 * soshutdown:ENOBUFS[TCP]
2396 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2397 * soshutdown:??? [other protocol families]
2401 shutdown(__unused
struct proc
*p
, struct shutdown_args
*uap
,
2402 __unused
int32_t *retval
)
2407 AUDIT_ARG(fd
, uap
->s
);
2408 error
= file_socket(uap
->s
, &so
);
2415 error
= soshutdown((struct socket
*)so
, uap
->how
);
2422 * Returns: 0 Success
2425 * EACCES Mandatory Access Control failure
2426 * file_socket:ENOTSOCK
2429 * sosetopt:ENOPROTOOPT
2433 * sosetopt:EOPNOTSUPP[AF_UNIX]
2438 setsockopt(struct proc
*p
, struct setsockopt_args
*uap
,
2439 __unused
int32_t *retval
)
2442 struct sockopt sopt
;
2445 AUDIT_ARG(fd
, uap
->s
);
2446 if (uap
->val
== 0 && uap
->valsize
!= 0)
2448 /* No bounds checking on size (it's unsigned) */
2450 error
= file_socket(uap
->s
, &so
);
2454 sopt
.sopt_dir
= SOPT_SET
;
2455 sopt
.sopt_level
= uap
->level
;
2456 sopt
.sopt_name
= uap
->name
;
2457 sopt
.sopt_val
= uap
->val
;
2458 sopt
.sopt_valsize
= uap
->valsize
;
2465 #if CONFIG_MACF_SOCKET_SUBSET
2466 if ((error
= mac_socket_check_setsockopt(kauth_cred_get(), so
,
2469 #endif /* MAC_SOCKET_SUBSET */
2470 error
= sosetoptlock(so
, &sopt
, 1); /* will lock socket */
2479 * Returns: 0 Success
2482 * EACCES Mandatory Access Control failure
2485 * file_socket:ENOTSOCK
2490 getsockopt(struct proc
*p
, struct getsockopt_args
*uap
,
2491 __unused
int32_t *retval
)
2495 struct sockopt sopt
;
2498 error
= file_socket(uap
->s
, &so
);
2502 error
= copyin(uap
->avalsize
, (caddr_t
)&valsize
,
2506 /* No bounds checking on size (it's unsigned) */
2510 sopt
.sopt_dir
= SOPT_GET
;
2511 sopt
.sopt_level
= uap
->level
;
2512 sopt
.sopt_name
= uap
->name
;
2513 sopt
.sopt_val
= uap
->val
;
2514 sopt
.sopt_valsize
= (size_t)valsize
; /* checked non-negative above */
2521 #if CONFIG_MACF_SOCKET_SUBSET
2522 if ((error
= mac_socket_check_getsockopt(kauth_cred_get(), so
,
2525 #endif /* MAC_SOCKET_SUBSET */
2526 error
= sogetoptlock((struct socket
*)so
, &sopt
, 1); /* will lock */
2528 valsize
= sopt
.sopt_valsize
;
2529 error
= copyout((caddr_t
)&valsize
, uap
->avalsize
,
2541 * Returns: 0 Success
2543 * file_socket:ENOTSOCK
2547 * <pru_sockaddr>:ENOBUFS[TCP]
2548 * <pru_sockaddr>:ECONNRESET[TCP]
2549 * <pru_sockaddr>:EINVAL[AF_UNIX]
2550 * <sf_getsockname>:???
2554 getsockname(__unused
struct proc
*p
, struct getsockname_args
*uap
,
2555 __unused
int32_t *retval
)
2558 struct sockaddr
*sa
;
2563 error
= file_socket(uap
->fdes
, &so
);
2566 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof (socklen_t
));
2575 error
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &sa
);
2577 error
= sflt_getsockname(so
, &sa
);
2578 if (error
== EJUSTRETURN
)
2581 socket_unlock(so
, 1);
2589 sa_len
= sa
->sa_len
;
2590 len
= MIN(len
, sa_len
);
2591 error
= copyout((caddr_t
)sa
, uap
->asa
, len
);
2594 /* return the actual, untruncated address length */
2597 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof (socklen_t
));
2602 file_drop(uap
->fdes
);
2607 * Get name of peer for connected socket.
2609 * Returns: 0 Success
2613 * file_socket:ENOTSOCK
2617 * <pru_peeraddr>:???
2618 * <sf_getpeername>:???
2622 getpeername(__unused
struct proc
*p
, struct getpeername_args
*uap
,
2623 __unused
int32_t *retval
)
2626 struct sockaddr
*sa
;
2631 error
= file_socket(uap
->fdes
, &so
);
2641 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
2642 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
2643 /* the socket has been shutdown, no more getpeername's */
2644 socket_unlock(so
, 1);
2649 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONFIRMING
)) == 0) {
2650 socket_unlock(so
, 1);
2654 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof (socklen_t
));
2656 socket_unlock(so
, 1);
2660 error
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, &sa
);
2662 error
= sflt_getpeername(so
, &sa
);
2663 if (error
== EJUSTRETURN
)
2666 socket_unlock(so
, 1);
2673 sa_len
= sa
->sa_len
;
2674 len
= MIN(len
, sa_len
);
2675 error
= copyout(sa
, uap
->asa
, len
);
2678 /* return the actual, untruncated address length */
2681 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof (socklen_t
));
2683 if (sa
) FREE(sa
, M_SONAME
);
2685 file_drop(uap
->fdes
);
2690 sockargs(struct mbuf
**mp
, user_addr_t data
, int buflen
, int type
)
2692 struct sockaddr
*sa
;
2696 size_t alloc_buflen
= (size_t)buflen
;
2698 if (alloc_buflen
> INT_MAX
/2)
2702 * The fd's in the buffer must expand to be pointers, thus we need twice
2705 if (type
== MT_CONTROL
)
2706 alloc_buflen
= ((buflen
- sizeof(struct cmsghdr
))*2) +
2707 sizeof(struct cmsghdr
);
2709 if (alloc_buflen
> MLEN
) {
2710 if (type
== MT_SONAME
&& alloc_buflen
<= 112)
2711 alloc_buflen
= MLEN
; /* unix domain compat. hack */
2712 else if (alloc_buflen
> MCLBYTES
)
2715 m
= m_get(M_WAIT
, type
);
2718 if (alloc_buflen
> MLEN
) {
2720 if ((m
->m_flags
& M_EXT
) == 0) {
2726 * K64: We still copyin the original buflen because it gets expanded
2727 * later and we lie about the size of the mbuf because it only affects
2731 error
= copyin(data
, mtod(m
, caddr_t
), (u_int
)buflen
);
2736 if (type
== MT_SONAME
) {
2737 sa
= mtod(m
, struct sockaddr
*);
2738 sa
->sa_len
= buflen
;
2745 * Given a user_addr_t of length len, allocate and fill out a *sa.
2747 * Returns: 0 Success
2748 * ENAMETOOLONG Filename too long
2749 * EINVAL Invalid argument
2750 * ENOMEM Not enough space
2751 * copyin:EFAULT Bad address
2754 getsockaddr(struct socket
*so
, struct sockaddr
**namp
, user_addr_t uaddr
,
2755 size_t len
, boolean_t translate_unspec
)
2757 struct sockaddr
*sa
;
2761 if (len
> SOCK_MAXADDRLEN
)
2762 return (ENAMETOOLONG
);
2764 if (len
< offsetof(struct sockaddr
, sa_data
[0]))
2768 * Workaround for rdar://23362120
2769 * Allways allocate a buffer that can hold an IPv6 socket address
2771 alloclen
= MAX(len
, sizeof(struct sockaddr_in6
));
2772 MALLOC(sa
, struct sockaddr
*, alloclen
, M_SONAME
, M_WAITOK
| M_ZERO
);
2776 error
= copyin(uaddr
, (caddr_t
)sa
, len
);
2781 * Force sa_family to AF_INET on AF_INET sockets to handle
2782 * legacy applications that use AF_UNSPEC (0). On all other
2783 * sockets we leave it unchanged and let the lower layer
2786 if (translate_unspec
&& sa
->sa_family
== AF_UNSPEC
&&
2787 SOCK_CHECK_DOM(so
, PF_INET
) &&
2788 len
== sizeof (struct sockaddr_in
))
2789 sa
->sa_family
= AF_INET
;
2798 getsockaddr_s(struct socket
*so
, struct sockaddr_storage
*ss
,
2799 user_addr_t uaddr
, size_t len
, boolean_t translate_unspec
)
2803 if (ss
== NULL
|| uaddr
== USER_ADDR_NULL
||
2804 len
< offsetof(struct sockaddr
, sa_data
[0]))
2808 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2809 * so the check here is inclusive.
2811 if (len
> sizeof (*ss
))
2812 return (ENAMETOOLONG
);
2814 bzero(ss
, sizeof (*ss
));
2815 error
= copyin(uaddr
, (caddr_t
)ss
, len
);
2818 * Force sa_family to AF_INET on AF_INET sockets to handle
2819 * legacy applications that use AF_UNSPEC (0). On all other
2820 * sockets we leave it unchanged and let the lower layer
2823 if (translate_unspec
&& ss
->ss_family
== AF_UNSPEC
&&
2824 SOCK_CHECK_DOM(so
, PF_INET
) &&
2825 len
== sizeof (struct sockaddr_in
))
2826 ss
->ss_family
= AF_INET
;
2834 * Hard limit on the number of source and/or destination addresses
2835 * that can be specified by an application.
2837 #define SOCKADDRLIST_MAX_ENTRIES 64
2840 getsockaddrlist(struct socket
*so
, struct sockaddr_list
**slp
,
2841 user_addr_t uaddr
, socklen_t uaddrlen
, boolean_t xlate_unspec
)
2843 struct sockaddr_list
*sl
;
2848 if (uaddr
== USER_ADDR_NULL
|| uaddrlen
== 0)
2851 sl
= sockaddrlist_alloc(M_WAITOK
);
2855 VERIFY(sl
->sl_cnt
== 0);
2856 while (uaddrlen
> 0 && sl
->sl_cnt
< SOCKADDRLIST_MAX_ENTRIES
) {
2857 struct sockaddr_storage ss
;
2858 struct sockaddr_entry
*se
;
2859 struct sockaddr
*sa
;
2861 if (uaddrlen
< sizeof (struct sockaddr
)) {
2866 bzero(&ss
, sizeof (ss
));
2867 error
= copyin(uaddr
, (caddr_t
)&ss
, sizeof (struct sockaddr
));
2871 /* getsockaddr does the same but we need them now */
2872 if (uaddrlen
< ss
.ss_len
||
2873 ss
.ss_len
< offsetof(struct sockaddr
, sa_data
[0])) {
2876 } else if (ss
.ss_len
> sizeof (ss
)) {
2878 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2879 * so the check here is inclusive. We could user the
2880 * latter instead, but seems like an overkill for now.
2882 error
= ENAMETOOLONG
;
2886 se
= sockaddrentry_alloc(M_WAITOK
);
2890 sockaddrlist_insert(sl
, se
);
2892 error
= getsockaddr(so
, &sa
, uaddr
, ss
.ss_len
, xlate_unspec
);
2896 VERIFY(sa
!= NULL
&& sa
->sa_len
== ss
.ss_len
);
2900 VERIFY(((signed)uaddrlen
- ss
.ss_len
) >= 0);
2901 uaddrlen
-= ss
.ss_len
;
2905 sockaddrlist_free(sl
);
2913 internalize_user_msghdr_array(const void *src
, int spacetype
, int direction
,
2914 u_int count
, struct user_msghdr_x
*dst
, struct uio
**uiop
)
2921 for (i
= 0; i
< count
; i
++) {
2923 struct user_iovec
*iovp
;
2924 struct user_msghdr_x
*user_msg
= dst
+ i
;
2926 if (spacetype
== UIO_USERSPACE64
) {
2927 const struct user64_msghdr_x
*msghdr64
;
2929 msghdr64
= ((const struct user64_msghdr_x
*)src
) + i
;
2931 user_msg
->msg_name
= msghdr64
->msg_name
;
2932 user_msg
->msg_namelen
= msghdr64
->msg_namelen
;
2933 user_msg
->msg_iov
= msghdr64
->msg_iov
;
2934 user_msg
->msg_iovlen
= msghdr64
->msg_iovlen
;
2935 user_msg
->msg_control
= msghdr64
->msg_control
;
2936 user_msg
->msg_controllen
= msghdr64
->msg_controllen
;
2937 user_msg
->msg_flags
= msghdr64
->msg_flags
;
2938 user_msg
->msg_datalen
= msghdr64
->msg_datalen
;
2940 const struct user32_msghdr_x
*msghdr32
;
2942 msghdr32
= ((const struct user32_msghdr_x
*)src
) + i
;
2944 user_msg
->msg_name
= msghdr32
->msg_name
;
2945 user_msg
->msg_namelen
= msghdr32
->msg_namelen
;
2946 user_msg
->msg_iov
= msghdr32
->msg_iov
;
2947 user_msg
->msg_iovlen
= msghdr32
->msg_iovlen
;
2948 user_msg
->msg_control
= msghdr32
->msg_control
;
2949 user_msg
->msg_controllen
= msghdr32
->msg_controllen
;
2950 user_msg
->msg_flags
= msghdr32
->msg_flags
;
2951 user_msg
->msg_datalen
= msghdr32
->msg_datalen
;
2954 if (user_msg
->msg_iovlen
<= 0 ||
2955 user_msg
->msg_iovlen
> UIO_MAXIOV
) {
2959 auio
= uio_create(user_msg
->msg_iovlen
, 0, spacetype
,
2967 iovp
= uio_iovsaddr(auio
);
2972 error
= copyin_user_iovec_array(user_msg
->msg_iov
,
2973 spacetype
, user_msg
->msg_iovlen
, iovp
);
2976 user_msg
->msg_iov
= CAST_USER_ADDR_T(iovp
);
2978 error
= uio_calculateresid(auio
);
2981 user_msg
->msg_datalen
= uio_resid(auio
);
2983 if (user_msg
->msg_name
&& user_msg
->msg_namelen
)
2985 if (user_msg
->msg_control
&& user_msg
->msg_controllen
)
2994 internalize_recv_msghdr_array(const void *src
, int spacetype
, int direction
,
2995 u_int count
, struct user_msghdr_x
*dst
,
2996 struct recv_msg_elem
*recv_msg_array
)
3001 for (i
= 0; i
< count
; i
++) {
3002 struct user_iovec
*iovp
;
3003 struct user_msghdr_x
*user_msg
= dst
+ i
;
3004 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3006 if (spacetype
== UIO_USERSPACE64
) {
3007 const struct user64_msghdr_x
*msghdr64
;
3009 msghdr64
= ((const struct user64_msghdr_x
*)src
) + i
;
3011 user_msg
->msg_name
= msghdr64
->msg_name
;
3012 user_msg
->msg_namelen
= msghdr64
->msg_namelen
;
3013 user_msg
->msg_iov
= msghdr64
->msg_iov
;
3014 user_msg
->msg_iovlen
= msghdr64
->msg_iovlen
;
3015 user_msg
->msg_control
= msghdr64
->msg_control
;
3016 user_msg
->msg_controllen
= msghdr64
->msg_controllen
;
3017 user_msg
->msg_flags
= msghdr64
->msg_flags
;
3018 user_msg
->msg_datalen
= msghdr64
->msg_datalen
;
3020 const struct user32_msghdr_x
*msghdr32
;
3022 msghdr32
= ((const struct user32_msghdr_x
*)src
) + i
;
3024 user_msg
->msg_name
= msghdr32
->msg_name
;
3025 user_msg
->msg_namelen
= msghdr32
->msg_namelen
;
3026 user_msg
->msg_iov
= msghdr32
->msg_iov
;
3027 user_msg
->msg_iovlen
= msghdr32
->msg_iovlen
;
3028 user_msg
->msg_control
= msghdr32
->msg_control
;
3029 user_msg
->msg_controllen
= msghdr32
->msg_controllen
;
3030 user_msg
->msg_flags
= msghdr32
->msg_flags
;
3031 user_msg
->msg_datalen
= msghdr32
->msg_datalen
;
3034 if (user_msg
->msg_iovlen
<= 0 ||
3035 user_msg
->msg_iovlen
> UIO_MAXIOV
) {
3039 recv_msg_elem
->uio
= uio_create(user_msg
->msg_iovlen
, 0,
3040 spacetype
, direction
);
3041 if (recv_msg_elem
->uio
== NULL
) {
3046 iovp
= uio_iovsaddr(recv_msg_elem
->uio
);
3051 error
= copyin_user_iovec_array(user_msg
->msg_iov
,
3052 spacetype
, user_msg
->msg_iovlen
, iovp
);
3055 user_msg
->msg_iov
= CAST_USER_ADDR_T(iovp
);
3057 error
= uio_calculateresid(recv_msg_elem
->uio
);
3060 user_msg
->msg_datalen
= uio_resid(recv_msg_elem
->uio
);
3062 if (user_msg
->msg_name
&& user_msg
->msg_namelen
)
3063 recv_msg_elem
->which
|= SOCK_MSG_SA
;
3064 if (user_msg
->msg_control
&& user_msg
->msg_controllen
)
3065 recv_msg_elem
->which
|= SOCK_MSG_CONTROL
;
3073 externalize_user_msghdr_array(void *dst
, int spacetype
, int direction
,
3074 u_int count
, const struct user_msghdr_x
*src
, struct uio
**uiop
)
3076 #pragma unused(direction)
3081 for (i
= 0; i
< count
; i
++) {
3082 const struct user_msghdr_x
*user_msg
= src
+ i
;
3083 uio_t auio
= uiop
[i
];
3084 user_ssize_t len
= user_msg
->msg_datalen
- uio_resid(auio
);
3086 if (user_msg
->msg_datalen
!= 0 && len
== 0)
3092 if (spacetype
== UIO_USERSPACE64
) {
3093 struct user64_msghdr_x
*msghdr64
;
3095 msghdr64
= ((struct user64_msghdr_x
*)dst
) + i
;
3097 msghdr64
->msg_flags
= user_msg
->msg_flags
;
3098 msghdr64
->msg_datalen
= len
;
3101 struct user32_msghdr_x
*msghdr32
;
3103 msghdr32
= ((struct user32_msghdr_x
*)dst
) + i
;
3105 msghdr32
->msg_flags
= user_msg
->msg_flags
;
3106 msghdr32
->msg_datalen
= len
;
3113 externalize_recv_msghdr_array(void *dst
, int spacetype
, int direction
,
3114 u_int count
, const struct user_msghdr_x
*src
,
3115 struct recv_msg_elem
*recv_msg_array
)
3121 for (i
= 0; i
< count
; i
++) {
3122 const struct user_msghdr_x
*user_msg
= src
+ i
;
3123 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3126 len
= user_msg
->msg_datalen
- uio_resid(recv_msg_elem
->uio
);
3128 if (direction
== UIO_READ
) {
3129 if ((recv_msg_elem
->which
& SOCK_MSG_DATA
) == 0)
3132 if (user_msg
->msg_datalen
!= 0 && len
== 0)
3139 if (spacetype
== UIO_USERSPACE64
) {
3140 struct user64_msghdr_x
*msghdr64
;
3142 msghdr64
= ((struct user64_msghdr_x
*)dst
) + i
;
3144 msghdr64
->msg_flags
= user_msg
->msg_flags
;
3145 msghdr64
->msg_datalen
= len
;
3148 struct user32_msghdr_x
*msghdr32
;
3150 msghdr32
= ((struct user32_msghdr_x
*)dst
) + i
;
3152 msghdr32
->msg_flags
= user_msg
->msg_flags
;
3153 msghdr32
->msg_datalen
= len
;
3160 free_uio_array(struct uio
**uiop
, u_int count
)
3164 for (i
= 0; i
< count
; i
++) {
3165 if (uiop
[i
] != NULL
)
3170 __private_extern__ user_ssize_t
3171 uio_array_resid(struct uio
**uiop
, u_int count
)
3173 user_ssize_t len
= 0;
3176 for (i
= 0; i
< count
; i
++) {
3177 struct uio
*auio
= uiop
[i
];
3180 len
+= uio_resid(auio
);
3186 uio_array_is_valid(struct uio
**uiop
, u_int count
)
3188 user_ssize_t len
= 0;
3191 for (i
= 0; i
< count
; i
++) {
3192 struct uio
*auio
= uiop
[i
];
3195 user_ssize_t resid
= uio_resid(auio
);
3198 * Sanity check on the validity of the iovec:
3199 * no point of going over sb_max
3201 if (resid
< 0 || (u_int32_t
)resid
> sb_max
)
3205 if (len
< 0 || (u_int32_t
)len
> sb_max
)
3213 struct recv_msg_elem
*
3214 alloc_recv_msg_array(u_int count
)
3216 struct recv_msg_elem
*recv_msg_array
;
3218 recv_msg_array
= _MALLOC(count
* sizeof(struct recv_msg_elem
),
3219 M_TEMP
, M_WAITOK
| M_ZERO
);
3221 return (recv_msg_array
);
3225 free_recv_msg_array(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3229 for (i
= 0; i
< count
; i
++) {
3230 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3232 if (recv_msg_elem
->uio
!= NULL
)
3233 uio_free(recv_msg_elem
->uio
);
3234 if (recv_msg_elem
->psa
!= NULL
)
3235 _FREE(recv_msg_elem
->psa
, M_TEMP
);
3236 if (recv_msg_elem
->controlp
!= NULL
)
3237 m_freem(recv_msg_elem
->controlp
);
3239 _FREE(recv_msg_array
, M_TEMP
);
3243 __private_extern__ user_ssize_t
3244 recv_msg_array_resid(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3246 user_ssize_t len
= 0;
3249 for (i
= 0; i
< count
; i
++) {
3250 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3252 if (recv_msg_elem
->uio
!= NULL
)
3253 len
+= uio_resid(recv_msg_elem
->uio
);
3259 recv_msg_array_is_valid(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3261 user_ssize_t len
= 0;
3264 for (i
= 0; i
< count
; i
++) {
3265 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3267 if (recv_msg_elem
->uio
!= NULL
) {
3268 user_ssize_t resid
= uio_resid(recv_msg_elem
->uio
);
3271 * Sanity check on the validity of the iovec:
3272 * no point of going over sb_max
3274 if (resid
< 0 || (u_int32_t
)resid
> sb_max
)
3278 if (len
< 0 || (u_int32_t
)len
> sb_max
)
3287 #define SFUIOBUFS 64
3289 /* Macros to compute the number of mbufs needed depending on cluster size */
3290 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3291 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3293 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3294 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3296 /* Upper send limit in the number of mbuf clusters */
3297 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3298 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3301 alloc_sendpkt(int how
, size_t pktlen
, unsigned int *maxchunks
,
3302 struct mbuf
**m
, boolean_t jumbocl
)
3304 unsigned int needed
;
3307 panic("%s: pktlen (%ld) must be non-zero\n", __func__
, pktlen
);
3310 * Try to allocate for the whole thing. Since we want full control
3311 * over the buffer size and be able to accept partial result, we can't
3312 * use mbuf_allocpacket(). The logic below is similar to sosend().
3315 if (pktlen
> MBIGCLBYTES
&& jumbocl
) {
3316 needed
= MIN(SENDFILE_MAX_16K
, HOWMANY_16K(pktlen
));
3317 *m
= m_getpackets_internal(&needed
, 1, how
, 0, M16KCLBYTES
);
3320 needed
= MIN(SENDFILE_MAX_4K
, HOWMANY_4K(pktlen
));
3321 *m
= m_getpackets_internal(&needed
, 1, how
, 0, MBIGCLBYTES
);
3325 * Our previous attempt(s) at allocation had failed; the system
3326 * may be short on mbufs, and we want to block until they are
3327 * available. This time, ask just for 1 mbuf and don't return
3332 *m
= m_getpackets_internal(&needed
, 1, M_WAIT
, 1, MBIGCLBYTES
);
3335 panic("%s: blocking allocation returned NULL\n", __func__
);
3337 *maxchunks
= needed
;
3342 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3343 * struct sf_hdtr *hdtr, int flags)
3345 * Send a file specified by 'fd' and starting at 'offset' to a socket
3346 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3347 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3348 * output. If specified, write the total number of bytes sent into *nbytes.
3351 sendfile(struct proc
*p
, struct sendfile_args
*uap
, __unused
int *retval
)
3353 struct fileproc
*fp
;
3356 struct writev_nocancel_args nuap
;
3357 user_ssize_t writev_retval
;
3358 struct user_sf_hdtr user_hdtr
;
3359 struct user32_sf_hdtr user32_hdtr
;
3360 struct user64_sf_hdtr user64_hdtr
;
3362 off_t nbytes
= 0, sbytes
= 0;
3366 struct vfs_context context
= *vfs_context_current();
3368 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_START
), uap
->s
,
3371 AUDIT_ARG(fd
, uap
->fd
);
3372 AUDIT_ARG(value32
, uap
->s
);
3375 * Do argument checking. Must be a regular file in, stream
3376 * type and connected socket out, positive offset.
3378 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
3381 if ((fp
->f_flag
& FREAD
) == 0) {
3385 if (vnode_isreg(vp
) == 0) {
3389 error
= file_socket(uap
->s
, &so
);
3397 if (so
->so_type
!= SOCK_STREAM
) {
3401 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
3405 if (uap
->offset
< 0) {
3409 if (uap
->nbytes
== USER_ADDR_NULL
) {
3413 if (uap
->flags
!= 0) {
3418 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3420 #if CONFIG_MACF_SOCKET_SUBSET
3421 /* JMM - fetch connected sockaddr? */
3422 error
= mac_socket_check_send(context
.vc_ucred
, so
, NULL
);
3428 * Get number of bytes to send
3429 * Should it applies to size of header and trailer?
3430 * JMM - error handling?
3432 copyin(uap
->nbytes
, &nbytes
, sizeof (off_t
));
3435 * If specified, get the pointer to the sf_hdtr struct for
3436 * any headers/trailers.
3438 if (uap
->hdtr
!= USER_ADDR_NULL
) {
3441 bzero(&user_hdtr
, sizeof (user_hdtr
));
3442 if (IS_64BIT_PROCESS(p
)) {
3443 hdtrp
= (caddr_t
)&user64_hdtr
;
3444 sizeof_hdtr
= sizeof (user64_hdtr
);
3446 hdtrp
= (caddr_t
)&user32_hdtr
;
3447 sizeof_hdtr
= sizeof (user32_hdtr
);
3449 error
= copyin(uap
->hdtr
, hdtrp
, sizeof_hdtr
);
3452 if (IS_64BIT_PROCESS(p
)) {
3453 user_hdtr
.headers
= user64_hdtr
.headers
;
3454 user_hdtr
.hdr_cnt
= user64_hdtr
.hdr_cnt
;
3455 user_hdtr
.trailers
= user64_hdtr
.trailers
;
3456 user_hdtr
.trl_cnt
= user64_hdtr
.trl_cnt
;
3458 user_hdtr
.headers
= user32_hdtr
.headers
;
3459 user_hdtr
.hdr_cnt
= user32_hdtr
.hdr_cnt
;
3460 user_hdtr
.trailers
= user32_hdtr
.trailers
;
3461 user_hdtr
.trl_cnt
= user32_hdtr
.trl_cnt
;
3465 * Send any headers. Wimp out and use writev(2).
3467 if (user_hdtr
.headers
!= USER_ADDR_NULL
) {
3468 bzero(&nuap
, sizeof (struct writev_args
));
3470 nuap
.iovp
= user_hdtr
.headers
;
3471 nuap
.iovcnt
= user_hdtr
.hdr_cnt
;
3472 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
3476 sbytes
+= writev_retval
;
3481 * Get the file size for 2 reasons:
3482 * 1. We don't want to allocate more mbufs than necessary
3483 * 2. We don't want to read past the end of file
3485 if ((error
= vnode_size(vp
, &file_size
, vfs_context_current())) != 0) {
3490 * Simply read file data into a chain of mbufs that used with scatter
3491 * gather reads. We're not (yet?) setup to use zero copy external
3492 * mbufs that point to the file pages.
3495 error
= sblock(&so
->so_snd
, SBL_WAIT
);
3497 socket_unlock(so
, 1);
3500 for (off
= uap
->offset
; ; off
+= xfsize
, sbytes
+= xfsize
) {
3501 mbuf_t m0
= NULL
, m
;
3502 unsigned int nbufs
= SFUIOBUFS
, i
;
3504 char uio_buf
[UIO_SIZEOF(SFUIOBUFS
)]; /* 1 KB !!! */
3512 * Calculate the amount to transfer.
3513 * Align to round number of pages.
3514 * Not to exceed send socket buffer,
3515 * the EOF, or the passed in nbytes.
3517 xfsize
= sbspace(&so
->so_snd
);
3520 if (so
->so_state
& SS_CANTSENDMORE
) {
3523 } else if ((so
->so_state
& SS_NBIO
)) {
3531 if (xfsize
> SENDFILE_MAX_BYTES
)
3532 xfsize
= SENDFILE_MAX_BYTES
;
3533 else if (xfsize
> PAGE_SIZE
)
3534 xfsize
= trunc_page(xfsize
);
3535 pgoff
= off
& PAGE_MASK_64
;
3536 if (pgoff
> 0 && PAGE_SIZE
- pgoff
< xfsize
)
3537 xfsize
= PAGE_SIZE_64
- pgoff
;
3538 if (nbytes
&& xfsize
> (nbytes
- sbytes
))
3539 xfsize
= nbytes
- sbytes
;
3542 if (off
+ xfsize
> file_size
)
3543 xfsize
= file_size
- off
;
3548 * Attempt to use larger than system page-size clusters for
3549 * large writes only if there is a jumbo cluster pool and
3550 * if the socket is marked accordingly.
3552 jumbocl
= sosendjcl
&& njcl
> 0 &&
3553 ((so
->so_flags
& SOF_MULTIPAGES
) || sosendjcl_ignore_capab
);
3555 socket_unlock(so
, 0);
3556 alloc_sendpkt(M_WAIT
, xfsize
, &nbufs
, &m0
, jumbocl
);
3557 pktlen
= mbuf_pkthdr_maxlen(m0
);
3558 if (pktlen
< (size_t)xfsize
)
3561 auio
= uio_createwithbuffer(nbufs
, off
, UIO_SYSSPACE
,
3562 UIO_READ
, &uio_buf
[0], sizeof (uio_buf
));
3564 printf("sendfile failed. nbufs = %d. %s", nbufs
,
3565 "File a radar related to rdar://10146739.\n");
3572 for (i
= 0, m
= m0
, uiolen
= 0;
3573 i
< nbufs
&& m
!= NULL
&& uiolen
< (size_t)xfsize
;
3574 i
++, m
= mbuf_next(m
)) {
3575 size_t mlen
= mbuf_maxlen(m
);
3577 if (mlen
+ uiolen
> (size_t)xfsize
)
3578 mlen
= xfsize
- uiolen
;
3579 mbuf_setlen(m
, mlen
);
3580 uio_addiov(auio
, CAST_USER_ADDR_T(mbuf_datastart(m
)),
3585 if (xfsize
!= uio_resid(auio
))
3586 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3587 "%lld\n", xfsize
, (long long)uio_resid(auio
));
3589 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_START
),
3590 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
3591 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
3592 error
= fo_read(fp
, auio
, FOF_OFFSET
, &context
);
3595 if (uio_resid(auio
) != xfsize
&& (error
== ERESTART
||
3596 error
== EINTR
|| error
== EWOULDBLOCK
)) {
3603 xfsize
-= uio_resid(auio
);
3604 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_END
),
3605 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
3606 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
3609 // printf("sendfile: fo_read 0 bytes, EOF\n");
3612 if (xfsize
+ off
> file_size
)
3613 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3614 "%lld\n", xfsize
, off
, file_size
);
3615 for (i
= 0, m
= m0
, rlen
= 0;
3616 i
< nbufs
&& m
!= NULL
&& rlen
< xfsize
;
3617 i
++, m
= mbuf_next(m
)) {
3618 size_t mlen
= mbuf_maxlen(m
);
3620 if (rlen
+ mlen
> (size_t)xfsize
)
3621 mlen
= xfsize
- rlen
;
3622 mbuf_setlen(m
, mlen
);
3626 mbuf_pkthdr_setlen(m0
, xfsize
);
3630 * Make sure that the socket is still able to take more data.
3631 * CANTSENDMORE being true usually means that the connection
3632 * was closed. so_error is true when an error was sensed after
3634 * The state is checked after the page mapping and buffer
3635 * allocation above since those operations may block and make
3636 * any socket checks stale. From this point forward, nothing
3637 * blocks before the pru_send (or more accurately, any blocking
3638 * results in a loop back to here to re-check).
3640 if ((so
->so_state
& SS_CANTSENDMORE
) || so
->so_error
) {
3641 if (so
->so_state
& SS_CANTSENDMORE
) {
3644 error
= so
->so_error
;
3651 * Wait for socket space to become available. We do this just
3652 * after checking the connection state above in order to avoid
3653 * a race condition with sbwait().
3655 if (sbspace(&so
->so_snd
) < (long)so
->so_snd
.sb_lowat
) {
3656 if (so
->so_state
& SS_NBIO
) {
3661 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
3662 DBG_FUNC_START
), uap
->s
, 0, 0, 0, 0);
3663 error
= sbwait(&so
->so_snd
);
3664 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
3665 DBG_FUNC_END
), uap
->s
, 0, 0, 0, 0);
3667 * An error from sbwait usually indicates that we've
3668 * been interrupted by a signal. If we've sent anything
3669 * then return bytes sent, otherwise return the error.
3678 struct mbuf
*control
= NULL
;
3681 * Socket filter processing
3684 error
= sflt_data_out(so
, NULL
, &m0
, &control
, 0);
3686 if (error
== EJUSTRETURN
) {
3693 * End Socket filter processing
3696 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
3697 uap
->s
, 0, 0, 0, 0);
3698 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, m0
,
3700 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
3701 uap
->s
, 0, 0, 0, 0);
3706 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
3708 * Send trailers. Wimp out and use writev(2).
3710 if (uap
->hdtr
!= USER_ADDR_NULL
&&
3711 user_hdtr
.trailers
!= USER_ADDR_NULL
) {
3712 bzero(&nuap
, sizeof (struct writev_args
));
3714 nuap
.iovp
= user_hdtr
.trailers
;
3715 nuap
.iovcnt
= user_hdtr
.trl_cnt
;
3716 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
3720 sbytes
+= writev_retval
;
3727 if (uap
->nbytes
!= USER_ADDR_NULL
) {
3728 /* XXX this appears bogus for some early failure conditions */
3729 copyout(&sbytes
, uap
->nbytes
, sizeof (off_t
));
3731 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_END
), uap
->s
,
3732 (unsigned int)((sbytes
>> 32) & 0x0ffffffff),
3733 (unsigned int)(sbytes
& 0x0ffffffff), error
, 0);
3736 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
3741 #endif /* SENDFILE */