2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
81 #include <kern/locks.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
92 #include <sys/sysctl.h>
93 #include <sys/sys_domain.h>
95 #include <security/audit/audit.h>
97 #include <sys/kdebug.h>
98 #include <sys/sysproto.h>
99 #include <netinet/in.h>
100 #include <net/route.h>
101 #include <netinet/in_pcb.h>
103 #if CONFIG_MACF_SOCKET_SUBSET
104 #include <security/mac_framework.h>
105 #endif /* MAC_SOCKET_SUBSET */
107 #define f_flag f_fglob->fg_flag
108 #define f_type f_fglob->fg_ops->fo_type
109 #define f_msgcount f_fglob->fg_msgcount
110 #define f_cred f_fglob->fg_cred
111 #define f_ops f_fglob->fg_ops
112 #define f_offset f_fglob->fg_offset
113 #define f_data f_fglob->fg_data
115 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
116 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
117 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
118 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
119 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
120 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
121 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
122 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
123 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
124 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
125 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
126 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
127 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
128 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
129 #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8))
130 #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8))
132 #if DEBUG || DEVELOPMENT
133 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
134 #define DBG_PRINTF(...) printf(__VA_ARGS__)
136 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
137 #define DBG_PRINTF(...) do { } while (0)
140 /* TODO: should be in header file */
141 int falloc_locked(proc_t
, struct fileproc
**, int *, vfs_context_t
, int);
143 static int sendit(struct proc
*, struct socket
*, struct user_msghdr
*, uio_t
,
145 static int recvit(struct proc
*, int, struct user_msghdr
*, uio_t
, user_addr_t
,
147 static int connectit(struct socket
*, struct sockaddr
*);
148 static int getsockaddr(struct socket
*, struct sockaddr
**, user_addr_t
,
150 static int getsockaddr_s(struct socket
*, struct sockaddr_storage
*,
151 user_addr_t
, size_t, boolean_t
);
153 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf
**,
155 #endif /* SENDFILE */
156 static int connectx_nocancel(struct proc
*, struct connectx_args
*, int *);
157 static int connectitx(struct socket
*, struct sockaddr
*,
158 struct sockaddr
*, struct proc
*, uint32_t, sae_associd_t
,
159 sae_connid_t
*, uio_t
, unsigned int, user_ssize_t
*);
160 static int disconnectx_nocancel(struct proc
*, struct disconnectx_args
*,
162 static int socket_common(struct proc
*, int, int, int, pid_t
, int32_t *, int);
164 static int internalize_user_msghdr_array(const void *, int, int, u_int
,
165 struct user_msghdr_x
*, struct uio
**);
166 static u_int
externalize_user_msghdr_array(void *, int, int, u_int
,
167 const struct user_msghdr_x
*, struct uio
**);
169 static void free_uio_array(struct uio
**, u_int
);
170 static int uio_array_is_valid(struct uio
**, u_int
);
171 static int recv_msg_array_is_valid(struct recv_msg_elem
*, u_int
);
172 static int internalize_recv_msghdr_array(const void *, int, int,
173 u_int
, struct user_msghdr_x
*, struct recv_msg_elem
*);
174 static u_int
externalize_recv_msghdr_array(void *, int, int, u_int
,
175 const struct user_msghdr_x
*, struct recv_msg_elem
*);
176 static struct recv_msg_elem
*alloc_recv_msg_array(u_int count
);
177 static void free_recv_msg_array(struct recv_msg_elem
*, u_int
);
179 SYSCTL_DECL(_kern_ipc
);
181 static u_int somaxsendmsgx
= 100;
182 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxsendmsgx
,
183 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxsendmsgx
, 0, "");
184 static u_int somaxrecvmsgx
= 100;
185 SYSCTL_UINT(_kern_ipc
, OID_AUTO
, maxrecvmsgx
,
186 CTLFLAG_RW
| CTLFLAG_LOCKED
, &somaxrecvmsgx
, 0, "");
189 * System call interface to the socket abstraction.
192 extern const struct fileops socketops
;
196 * EACCES Mandatory Access Control failure
200 * socreate:EAFNOSUPPORT
201 * socreate:EPROTOTYPE
202 * socreate:EPROTONOSUPPORT
205 * socreate:??? [other protocol families, IPSEC]
208 socket(struct proc
*p
,
209 struct socket_args
*uap
,
212 return (socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
213 proc_selfpid(), retval
, 0));
217 socket_delegate(struct proc
*p
,
218 struct socket_delegate_args
*uap
,
221 return socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
222 uap
->epid
, retval
, 1);
226 socket_common(struct proc
*p
,
238 AUDIT_ARG(socket
, domain
, type
, protocol
);
239 #if CONFIG_MACF_SOCKET_SUBSET
240 if ((error
= mac_socket_check_create(kauth_cred_get(), domain
,
241 type
, protocol
)) != 0)
243 #endif /* MAC_SOCKET_SUBSET */
246 error
= priv_check_cred(kauth_cred_get(),
247 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0);
252 error
= falloc(p
, &fp
, &fd
, vfs_context_current());
256 fp
->f_flag
= FREAD
|FWRITE
;
257 fp
->f_ops
= &socketops
;
260 error
= socreate_delegate(domain
, &so
, type
, protocol
, epid
);
262 error
= socreate(domain
, &so
, type
, protocol
);
267 fp
->f_data
= (caddr_t
)so
;
270 procfdtbl_releasefd(p
, fd
, NULL
);
272 fp_drop(p
, fd
, fp
, 1);
276 if (ENTR_SHOULDTRACE
) {
277 KERNEL_ENERGYTRACE(kEnTrActKernSocket
, DBG_FUNC_START
,
278 fd
, 0, (int64_t)VM_KERNEL_ADDRPERM(so
));
286 * EDESTADDRREQ Destination address required
287 * EBADF Bad file descriptor
288 * EACCES Mandatory Access Control failure
289 * file_socket:ENOTSOCK
291 * getsockaddr:ENAMETOOLONG Filename too long
292 * getsockaddr:EINVAL Invalid argument
293 * getsockaddr:ENOMEM Not enough space
294 * getsockaddr:EFAULT Bad address
299 bind(__unused proc_t p
, struct bind_args
*uap
, __unused
int32_t *retval
)
301 struct sockaddr_storage ss
;
302 struct sockaddr
*sa
= NULL
;
304 boolean_t want_free
= TRUE
;
307 AUDIT_ARG(fd
, uap
->s
);
308 error
= file_socket(uap
->s
, &so
);
315 if (uap
->name
== USER_ADDR_NULL
) {
316 error
= EDESTADDRREQ
;
319 if (uap
->namelen
> sizeof (ss
)) {
320 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, TRUE
);
322 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, TRUE
);
324 sa
= (struct sockaddr
*)&ss
;
330 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
331 #if CONFIG_MACF_SOCKET_SUBSET
332 if ((sa
!= NULL
&& sa
->sa_family
== AF_SYSTEM
) ||
333 (error
= mac_socket_check_bind(kauth_cred_get(), so
, sa
)) == 0)
334 error
= sobindlock(so
, sa
, 1); /* will lock socket */
336 error
= sobindlock(so
, sa
, 1); /* will lock socket */
337 #endif /* MAC_SOCKET_SUBSET */
348 * EACCES Mandatory Access Control failure
349 * file_socket:ENOTSOCK
352 * solisten:EOPNOTSUPP
356 listen(__unused
struct proc
*p
, struct listen_args
*uap
,
357 __unused
int32_t *retval
)
362 AUDIT_ARG(fd
, uap
->s
);
363 error
= file_socket(uap
->s
, &so
);
367 #if CONFIG_MACF_SOCKET_SUBSET
369 error
= mac_socket_check_listen(kauth_cred_get(), so
);
371 error
= solisten(so
, uap
->backlog
);
374 error
= solisten(so
, uap
->backlog
);
375 #endif /* MAC_SOCKET_SUBSET */
384 * Returns: fp_getfsock:EBADF Bad file descriptor
385 * fp_getfsock:EOPNOTSUPP ...
386 * xlate => :ENOTSOCK Socket operation on non-socket
387 * :EFAULT Bad address on copyin/copyout
388 * :EBADF Bad file descriptor
389 * :EOPNOTSUPP Operation not supported on socket
390 * :EINVAL Invalid argument
391 * :EWOULDBLOCK Operation would block
392 * :ECONNABORTED Connection aborted
393 * :EINTR Interrupted function
394 * :EACCES Mandatory Access Control failure
395 * falloc_locked:ENFILE Too many files open in system
396 * falloc_locked::EMFILE Too many open files
397 * falloc_locked::ENOMEM Not enough space
401 accept_nocancel(struct proc
*p
, struct accept_nocancel_args
*uap
,
405 struct sockaddr
*sa
= NULL
;
408 struct socket
*head
, *so
= NULL
;
409 lck_mtx_t
*mutex_held
;
412 short fflag
; /* type must match fp->f_flag */
417 AUDIT_ARG(fd
, uap
->s
);
420 error
= copyin(uap
->anamelen
, (caddr_t
)&namelen
,
425 error
= fp_getfsock(p
, fd
, &fp
, &head
);
427 if (error
== EOPNOTSUPP
)
435 #if CONFIG_MACF_SOCKET_SUBSET
436 if ((error
= mac_socket_check_accept(kauth_cred_get(), head
)) != 0)
438 #endif /* MAC_SOCKET_SUBSET */
440 socket_lock(head
, 1);
442 if (head
->so_proto
->pr_getlock
!= NULL
) {
443 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, PR_F_WILLUNLOCK
);
446 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
450 if ((head
->so_options
& SO_ACCEPTCONN
) == 0) {
451 if ((head
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
454 /* POSIX: The socket is not accepting connections */
457 socket_unlock(head
, 1);
461 if ((head
->so_state
& SS_NBIO
) && head
->so_comp
.tqh_first
== NULL
) {
462 socket_unlock(head
, 1);
466 while (TAILQ_EMPTY(&head
->so_comp
) && head
->so_error
== 0) {
467 if (head
->so_state
& SS_CANTRCVMORE
) {
468 head
->so_error
= ECONNABORTED
;
471 if (head
->so_usecount
< 1)
472 panic("accept: head=%p refcount=%d\n", head
,
474 error
= msleep((caddr_t
)&head
->so_timeo
, mutex_held
,
475 PSOCK
| PCATCH
, "accept", 0);
476 if (head
->so_usecount
< 1)
477 panic("accept: 2 head=%p refcount=%d\n", head
,
479 if ((head
->so_state
& SS_DRAINING
)) {
480 error
= ECONNABORTED
;
483 socket_unlock(head
, 1);
487 if (head
->so_error
) {
488 error
= head
->so_error
;
490 socket_unlock(head
, 1);
495 * At this point we know that there is at least one connection
496 * ready to be accepted. Remove it from the queue prior to
497 * allocating the file descriptor for it since falloc() may
498 * block allowing another process to accept the connection
501 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
503 so_acquire_accept_list(head
, NULL
);
504 if (TAILQ_EMPTY(&head
->so_comp
)) {
505 so_release_accept_list(head
);
509 so
= TAILQ_FIRST(&head
->so_comp
);
510 TAILQ_REMOVE(&head
->so_comp
, so
, so_list
);
512 so
->so_state
&= ~SS_COMP
;
514 so_release_accept_list(head
);
516 /* unlock head to avoid deadlock with select, keep a ref on head */
517 socket_unlock(head
, 0);
519 #if CONFIG_MACF_SOCKET_SUBSET
521 * Pass the pre-accepted socket to the MAC framework. This is
522 * cheaper than allocating a file descriptor for the socket,
523 * calling the protocol accept callback, and possibly freeing
524 * the file descriptor should the MAC check fails.
526 if ((error
= mac_socket_check_accepted(kauth_cred_get(), so
)) != 0) {
528 so
->so_state
&= ~SS_NOFDREF
;
529 socket_unlock(so
, 1);
531 /* Drop reference on listening socket */
535 #endif /* MAC_SOCKET_SUBSET */
538 * Pass the pre-accepted socket to any interested socket filter(s).
539 * Upon failure, the socket would have been closed by the callee.
541 if (so
->so_filt
!= NULL
&& (error
= soacceptfilter(so
, head
)) != 0) {
542 /* Drop reference on listening socket */
544 /* Propagate socket filter's error code to the caller */
549 error
= falloc(p
, &fp
, &newfd
, vfs_context_current());
552 * Probably ran out of file descriptors.
554 * <rdar://problem/8554930>
555 * Don't put this back on the socket like we used to, that
556 * just causes the client to spin. Drop the socket.
559 so
->so_state
&= ~SS_NOFDREF
;
560 socket_unlock(so
, 1);
567 fp
->f_ops
= &socketops
;
568 fp
->f_data
= (caddr_t
)so
;
570 socket_lock(head
, 0);
574 /* Sync socket non-blocking/async state with file flags */
575 if (fp
->f_flag
& FNONBLOCK
) {
576 so
->so_state
|= SS_NBIO
;
578 so
->so_state
&= ~SS_NBIO
;
581 if (fp
->f_flag
& FASYNC
) {
582 so
->so_state
|= SS_ASYNC
;
583 so
->so_rcv
.sb_flags
|= SB_ASYNC
;
584 so
->so_snd
.sb_flags
|= SB_ASYNC
;
586 so
->so_state
&= ~SS_ASYNC
;
587 so
->so_rcv
.sb_flags
&= ~SB_ASYNC
;
588 so
->so_snd
.sb_flags
&= ~SB_ASYNC
;
591 (void) soacceptlock(so
, &sa
, 0);
592 socket_unlock(head
, 1);
600 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
605 /* save sa_len before it is destroyed */
607 namelen
= MIN(namelen
, sa_len
);
608 error
= copyout(sa
, uap
->name
, namelen
);
610 /* return the actual, untruncated address length */
613 error
= copyout((caddr_t
)&namelen
, uap
->anamelen
,
620 * If the socket has been marked as inactive by sosetdefunct(),
621 * disallow further operations on it.
623 if (so
->so_flags
& SOF_DEFUNCT
) {
624 sodefunct(current_proc(), so
,
625 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
);
629 socket_unlock(so
, 1);
632 procfdtbl_releasefd(p
, newfd
, NULL
);
633 fp_drop(p
, newfd
, fp
, 1);
639 if (error
== 0 && ENTR_SHOULDTRACE
) {
640 KERNEL_ENERGYTRACE(kEnTrActKernSocket
, DBG_FUNC_START
,
641 newfd
, 0, (int64_t)VM_KERNEL_ADDRPERM(so
));
647 accept(struct proc
*p
, struct accept_args
*uap
, int32_t *retval
)
649 __pthread_testcancel(1);
650 return (accept_nocancel(p
, (struct accept_nocancel_args
*)uap
,
656 * EBADF Bad file descriptor
657 * EALREADY Connection already in progress
658 * EINPROGRESS Operation in progress
659 * ECONNABORTED Connection aborted
660 * EINTR Interrupted function
661 * EACCES Mandatory Access Control failure
662 * file_socket:ENOTSOCK
664 * getsockaddr:ENAMETOOLONG Filename too long
665 * getsockaddr:EINVAL Invalid argument
666 * getsockaddr:ENOMEM Not enough space
667 * getsockaddr:EFAULT Bad address
668 * soconnectlock:EOPNOTSUPP
669 * soconnectlock:EISCONN
670 * soconnectlock:??? [depends on protocol, filters]
673 * Imputed: so_error error may be set from so_error, which
674 * may have been set by soconnectlock.
678 connect(struct proc
*p
, struct connect_args
*uap
, int32_t *retval
)
680 __pthread_testcancel(1);
681 return (connect_nocancel(p
, (struct connect_nocancel_args
*)uap
,
686 connect_nocancel(proc_t p
, struct connect_nocancel_args
*uap
, int32_t *retval
)
688 #pragma unused(p, retval)
690 struct sockaddr_storage ss
;
691 struct sockaddr
*sa
= NULL
;
696 AUDIT_ARG(fd
, uap
->s
);
697 error
= file_socket(fd
, &so
);
706 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
707 * if this is a datagram socket; translate for other types.
709 dgram
= (so
->so_type
== SOCK_DGRAM
);
711 /* Get socket address now before we obtain socket lock */
712 if (uap
->namelen
> sizeof (ss
)) {
713 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, !dgram
);
715 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, !dgram
);
717 sa
= (struct sockaddr
*)&ss
;
722 error
= connectit(so
, sa
);
724 if (sa
!= NULL
&& sa
!= SA(&ss
))
726 if (error
== ERESTART
)
734 connectx_nocancel(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
736 #pragma unused(p, retval)
737 struct sockaddr_storage ss
, sd
;
738 struct sockaddr
*src
= NULL
, *dst
= NULL
;
740 int error
, error1
, fd
= uap
->socket
;
742 sae_connid_t cid
= SAE_CONNID_ANY
;
743 struct user32_sa_endpoints ep32
;
744 struct user64_sa_endpoints ep64
;
745 struct user_sa_endpoints ep
;
746 user_ssize_t bytes_written
= 0;
747 struct user_iovec
*iovp
;
750 AUDIT_ARG(fd
, uap
->socket
);
751 error
= file_socket(fd
, &so
);
759 if (uap
->endpoints
== USER_ADDR_NULL
) {
764 if (IS_64BIT_PROCESS(p
)) {
765 error
= copyin(uap
->endpoints
, (caddr_t
)&ep64
, sizeof(ep64
));
769 ep
.sae_srcif
= ep64
.sae_srcif
;
770 ep
.sae_srcaddr
= ep64
.sae_srcaddr
;
771 ep
.sae_srcaddrlen
= ep64
.sae_srcaddrlen
;
772 ep
.sae_dstaddr
= ep64
.sae_dstaddr
;
773 ep
.sae_dstaddrlen
= ep64
.sae_dstaddrlen
;
775 error
= copyin(uap
->endpoints
, (caddr_t
)&ep32
, sizeof(ep32
));
779 ep
.sae_srcif
= ep32
.sae_srcif
;
780 ep
.sae_srcaddr
= ep32
.sae_srcaddr
;
781 ep
.sae_srcaddrlen
= ep32
.sae_srcaddrlen
;
782 ep
.sae_dstaddr
= ep32
.sae_dstaddr
;
783 ep
.sae_dstaddrlen
= ep32
.sae_dstaddrlen
;
787 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
788 * if this is a datagram socket; translate for other types.
790 dgram
= (so
->so_type
== SOCK_DGRAM
);
792 /* Get socket address now before we obtain socket lock */
793 if (ep
.sae_srcaddr
!= USER_ADDR_NULL
) {
794 if (ep
.sae_srcaddrlen
> sizeof (ss
)) {
795 error
= getsockaddr(so
, &src
, ep
.sae_srcaddr
, ep
.sae_srcaddrlen
, dgram
);
797 error
= getsockaddr_s(so
, &ss
, ep
.sae_srcaddr
, ep
.sae_srcaddrlen
, dgram
);
799 src
= (struct sockaddr
*)&ss
;
806 if (ep
.sae_dstaddr
== USER_ADDR_NULL
) {
811 /* Get socket address now before we obtain socket lock */
812 if (ep
.sae_dstaddrlen
> sizeof (sd
)) {
813 error
= getsockaddr(so
, &dst
, ep
.sae_dstaddr
, ep
.sae_dstaddrlen
, dgram
);
815 error
= getsockaddr_s(so
, &sd
, ep
.sae_dstaddr
, ep
.sae_dstaddrlen
, dgram
);
817 dst
= (struct sockaddr
*)&sd
;
825 if (uap
->iov
!= USER_ADDR_NULL
) {
826 /* Verify range before calling uio_create() */
827 if (uap
->iovcnt
<= 0 || uap
->iovcnt
> UIO_MAXIOV
){
832 if (uap
->len
== USER_ADDR_NULL
){
837 /* allocate a uio to hold the number of iovecs passed */
838 auio
= uio_create(uap
->iovcnt
, 0,
839 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
848 * get location of iovecs within the uio.
849 * then copyin the iovecs from user space.
851 iovp
= uio_iovsaddr(auio
);
856 error
= copyin_user_iovec_array(uap
->iov
,
857 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
862 /* finish setup of uio_t */
863 error
= uio_calculateresid(auio
);
869 error
= connectitx(so
, src
, dst
, p
, ep
.sae_srcif
, uap
->associd
,
870 &cid
, auio
, uap
->flags
, &bytes_written
);
871 if (error
== ERESTART
)
874 if (uap
->len
!= USER_ADDR_NULL
) {
875 error1
= copyout(&bytes_written
, uap
->len
, sizeof (uap
->len
));
876 /* give precedence to connectitx errors */
877 if ((error1
!= 0) && (error
== 0))
881 if (uap
->connid
!= USER_ADDR_NULL
) {
882 error1
= copyout(&cid
, uap
->connid
, sizeof (cid
));
883 /* give precedence to connectitx errors */
884 if ((error1
!= 0) && (error
== 0))
892 if (src
!= NULL
&& src
!= SA(&ss
))
894 if (dst
!= NULL
&& dst
!= SA(&sd
))
900 connectx(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
903 * Due to similiarity with a POSIX interface, define as
904 * an unofficial cancellation point.
906 __pthread_testcancel(1);
907 return (connectx_nocancel(p
, uap
, retval
));
911 connectit(struct socket
*so
, struct sockaddr
*sa
)
915 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
916 #if CONFIG_MACF_SOCKET_SUBSET
917 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, sa
)) != 0)
919 #endif /* MAC_SOCKET_SUBSET */
922 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
926 error
= soconnectlock(so
, sa
, 0);
928 so
->so_state
&= ~SS_ISCONNECTING
;
931 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
935 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
936 lck_mtx_t
*mutex_held
;
938 if (so
->so_proto
->pr_getlock
!= NULL
)
939 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
941 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
942 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
943 PSOCK
| PCATCH
, __func__
, 0);
944 if (so
->so_state
& SS_DRAINING
) {
945 error
= ECONNABORTED
;
951 error
= so
->so_error
;
955 socket_unlock(so
, 1);
960 connectitx(struct socket
*so
, struct sockaddr
*src
,
961 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
962 sae_associd_t aid
, sae_connid_t
*pcid
, uio_t auio
, unsigned int flags
,
963 user_ssize_t
*bytes_written
)
966 #pragma unused (flags)
970 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), dst
);
971 #if CONFIG_MACF_SOCKET_SUBSET
972 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, dst
)) != 0)
974 #endif /* MAC_SOCKET_SUBSET */
977 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
982 if ((so
->so_proto
->pr_flags
& PR_DATA_IDEMPOTENT
) &&
983 (flags
& CONNECT_DATA_IDEMPOTENT
)) {
984 so
->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
986 if (flags
& CONNECT_DATA_AUTHENTICATED
)
987 so
->so_flags1
|= SOF1_DATA_AUTHENTICATED
;
991 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
992 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
993 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
994 * Case 3 allows user to combine write with connect even if they have
995 * no use for TFO (such as regular TCP, and UDP).
996 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
998 if ((so
->so_proto
->pr_flags
& PR_PRECONN_WRITE
) &&
999 ((flags
& CONNECT_RESUME_ON_READ_WRITE
) || auio
))
1000 so
->so_flags1
|= SOF1_PRECONNECT_DATA
;
1003 * If a user sets data idempotent and does not pass an uio, or
1004 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1005 * SOF1_DATA_IDEMPOTENT.
1007 if (!(so
->so_flags1
& SOF1_PRECONNECT_DATA
) &&
1008 (so
->so_flags1
& SOF1_DATA_IDEMPOTENT
)) {
1009 /* We should return EINVAL instead perhaps. */
1010 so
->so_flags1
&= ~SOF1_DATA_IDEMPOTENT
;
1013 error
= soconnectxlocked(so
, src
, dst
, p
, ifscope
,
1014 aid
, pcid
, 0, NULL
, 0, auio
, bytes_written
);
1016 so
->so_state
&= ~SS_ISCONNECTING
;
1020 * If, after the call to soconnectxlocked the flag is still set (in case
1021 * data has been queued and the connect() has actually been triggered,
1022 * it will have been unset by the transport), we exit immediately. There
1023 * is no reason to wait on any event.
1025 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1029 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
1030 error
= EINPROGRESS
;
1033 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
1034 lck_mtx_t
*mutex_held
;
1036 if (so
->so_proto
->pr_getlock
!= NULL
)
1037 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1039 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1040 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
1041 PSOCK
| PCATCH
, __func__
, 0);
1042 if (so
->so_state
& SS_DRAINING
) {
1043 error
= ECONNABORTED
;
1049 error
= so
->so_error
;
1053 socket_unlock(so
, 1);
1058 peeloff(struct proc
*p
, struct peeloff_args
*uap
, int *retval
)
1060 #pragma unused(p, uap, retval)
1062 * Due to similiarity with a POSIX interface, define as
1063 * an unofficial cancellation point.
1065 __pthread_testcancel(1);
1070 disconnectx(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
1073 * Due to similiarity with a POSIX interface, define as
1074 * an unofficial cancellation point.
1076 __pthread_testcancel(1);
1077 return (disconnectx_nocancel(p
, uap
, retval
));
1081 disconnectx_nocancel(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
1083 #pragma unused(p, retval)
1088 error
= file_socket(fd
, &so
);
1096 error
= sodisconnectx(so
, uap
->aid
, uap
->cid
);
1103 * Returns: 0 Success
1104 * socreate:EAFNOSUPPORT
1105 * socreate:EPROTOTYPE
1106 * socreate:EPROTONOSUPPORT
1110 * socreate:??? [other protocol families, IPSEC]
1116 * soconnect2:EPROTOTYPE
1117 * soconnect2:??? [other protocol families[
1120 socketpair(struct proc
*p
, struct socketpair_args
*uap
,
1121 __unused
int32_t *retval
)
1123 struct fileproc
*fp1
, *fp2
;
1124 struct socket
*so1
, *so2
;
1125 int fd
, error
, sv
[2];
1127 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
1128 error
= socreate(uap
->domain
, &so1
, uap
->type
, uap
->protocol
);
1131 error
= socreate(uap
->domain
, &so2
, uap
->type
, uap
->protocol
);
1135 error
= falloc(p
, &fp1
, &fd
, vfs_context_current());
1139 fp1
->f_flag
= FREAD
|FWRITE
;
1140 fp1
->f_ops
= &socketops
;
1141 fp1
->f_data
= (caddr_t
)so1
;
1144 error
= falloc(p
, &fp2
, &fd
, vfs_context_current());
1148 fp2
->f_flag
= FREAD
|FWRITE
;
1149 fp2
->f_ops
= &socketops
;
1150 fp2
->f_data
= (caddr_t
)so2
;
1153 error
= soconnect2(so1
, so2
);
1157 if (uap
->type
== SOCK_DGRAM
) {
1159 * Datagram socket connection is asymmetric.
1161 error
= soconnect2(so2
, so1
);
1167 if ((error
= copyout(sv
, uap
->rsv
, 2 * sizeof (int))) != 0)
1171 procfdtbl_releasefd(p
, sv
[0], NULL
);
1172 procfdtbl_releasefd(p
, sv
[1], NULL
);
1173 fp_drop(p
, sv
[0], fp1
, 1);
1174 fp_drop(p
, sv
[1], fp2
, 1);
1179 fp_free(p
, sv
[1], fp2
);
1181 fp_free(p
, sv
[0], fp1
);
1183 (void) soclose(so2
);
1185 (void) soclose(so1
);
1190 * Returns: 0 Success
1195 * EACCES Mandatory Access Control failure
1196 * file_socket:ENOTSOCK
1198 * getsockaddr:ENAMETOOLONG Filename too long
1199 * getsockaddr:EINVAL Invalid argument
1200 * getsockaddr:ENOMEM Not enough space
1201 * getsockaddr:EFAULT Bad address
1202 * <pru_sosend>:EACCES[TCP]
1203 * <pru_sosend>:EADDRINUSE[TCP]
1204 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1205 * <pru_sosend>:EAFNOSUPPORT[TCP]
1206 * <pru_sosend>:EAGAIN[TCP]
1207 * <pru_sosend>:EBADF
1208 * <pru_sosend>:ECONNRESET[TCP]
1209 * <pru_sosend>:EFAULT
1210 * <pru_sosend>:EHOSTUNREACH[TCP]
1211 * <pru_sosend>:EINTR
1212 * <pru_sosend>:EINVAL
1213 * <pru_sosend>:EISCONN[AF_INET]
1214 * <pru_sosend>:EMSGSIZE[TCP]
1215 * <pru_sosend>:ENETDOWN[TCP]
1216 * <pru_sosend>:ENETUNREACH[TCP]
1217 * <pru_sosend>:ENOBUFS
1218 * <pru_sosend>:ENOMEM[TCP]
1219 * <pru_sosend>:ENOTCONN[AF_INET]
1220 * <pru_sosend>:EOPNOTSUPP
1221 * <pru_sosend>:EPERM[TCP]
1222 * <pru_sosend>:EPIPE
1223 * <pru_sosend>:EWOULDBLOCK
1224 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1225 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1226 * <pru_sosend>:??? [value from so_error]
1230 sendit(struct proc
*p
, struct socket
*so
, struct user_msghdr
*mp
, uio_t uiop
,
1231 int flags
, int32_t *retval
)
1233 struct mbuf
*control
= NULL
;
1234 struct sockaddr_storage ss
;
1235 struct sockaddr
*to
= NULL
;
1236 boolean_t want_free
= TRUE
;
1240 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1242 if (mp
->msg_name
!= USER_ADDR_NULL
) {
1243 if (mp
->msg_namelen
> sizeof (ss
)) {
1244 error
= getsockaddr(so
, &to
, mp
->msg_name
,
1245 mp
->msg_namelen
, TRUE
);
1247 error
= getsockaddr_s(so
, &ss
, mp
->msg_name
,
1248 mp
->msg_namelen
, TRUE
);
1250 to
= (struct sockaddr
*)&ss
;
1256 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), to
);
1258 if (mp
->msg_control
!= USER_ADDR_NULL
) {
1259 if (mp
->msg_controllen
< sizeof (struct cmsghdr
)) {
1263 error
= sockargs(&control
, mp
->msg_control
,
1264 mp
->msg_controllen
, MT_CONTROL
);
1269 #if CONFIG_MACF_SOCKET_SUBSET
1271 * We check the state without holding the socket lock;
1272 * if a race condition occurs, it would simply result
1273 * in an extra call to the MAC check function.
1276 !(so
->so_state
& SS_DEFUNCT
) &&
1277 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
)) != 0)
1279 #endif /* MAC_SOCKET_SUBSET */
1281 len
= uio_resid(uiop
);
1282 error
= so
->so_proto
->pr_usrreqs
->pru_sosend(so
, to
, uiop
, 0,
1285 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1286 error
== EINTR
|| error
== EWOULDBLOCK
))
1288 /* Generation of SIGPIPE can be controlled per socket */
1289 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
))
1290 psignal(p
, SIGPIPE
);
1293 *retval
= (int)(len
- uio_resid(uiop
));
1295 if (to
!= NULL
&& want_free
)
1298 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1304 * Returns: 0 Success
1306 * sendit:??? [see sendit definition in this file]
1307 * write:??? [4056224: applicable for pipes]
1310 sendto(struct proc
*p
, struct sendto_args
*uap
, int32_t *retval
)
1312 __pthread_testcancel(1);
1313 return (sendto_nocancel(p
, (struct sendto_nocancel_args
*)uap
, retval
));
1317 sendto_nocancel(struct proc
*p
,
1318 struct sendto_nocancel_args
*uap
,
1321 struct user_msghdr msg
;
1326 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1327 AUDIT_ARG(fd
, uap
->s
);
1329 auio
= uio_create(1, 0,
1330 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1336 uio_addiov(auio
, uap
->buf
, uap
->len
);
1338 msg
.msg_name
= uap
->to
;
1339 msg
.msg_namelen
= uap
->tolen
;
1340 /* no need to set up msg_iov. sendit uses uio_t we send it */
1343 msg
.msg_control
= 0;
1346 error
= file_socket(uap
->s
, &so
);
1353 error
= sendit(p
, so
, &msg
, auio
, uap
->flags
, retval
);
1361 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_END
, error
, *retval
, 0, 0, 0);
1367 * Returns: 0 Success
1370 * sendit:??? [see sendit definition in this file]
1373 sendmsg(struct proc
*p
, struct sendmsg_args
*uap
, int32_t *retval
)
1375 __pthread_testcancel(1);
1376 return (sendmsg_nocancel(p
, (struct sendmsg_nocancel_args
*)uap
,
1381 sendmsg_nocancel(struct proc
*p
, struct sendmsg_nocancel_args
*uap
,
1384 struct user32_msghdr msg32
;
1385 struct user64_msghdr msg64
;
1386 struct user_msghdr user_msg
;
1391 struct user_iovec
*iovp
;
1394 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1395 AUDIT_ARG(fd
, uap
->s
);
1396 if (IS_64BIT_PROCESS(p
)) {
1397 msghdrp
= (caddr_t
)&msg64
;
1398 size_of_msghdr
= sizeof (msg64
);
1400 msghdrp
= (caddr_t
)&msg32
;
1401 size_of_msghdr
= sizeof (msg32
);
1403 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1405 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1409 if (IS_64BIT_PROCESS(p
)) {
1410 user_msg
.msg_flags
= msg64
.msg_flags
;
1411 user_msg
.msg_controllen
= msg64
.msg_controllen
;
1412 user_msg
.msg_control
= msg64
.msg_control
;
1413 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
1414 user_msg
.msg_iov
= msg64
.msg_iov
;
1415 user_msg
.msg_namelen
= msg64
.msg_namelen
;
1416 user_msg
.msg_name
= msg64
.msg_name
;
1418 user_msg
.msg_flags
= msg32
.msg_flags
;
1419 user_msg
.msg_controllen
= msg32
.msg_controllen
;
1420 user_msg
.msg_control
= msg32
.msg_control
;
1421 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
1422 user_msg
.msg_iov
= msg32
.msg_iov
;
1423 user_msg
.msg_namelen
= msg32
.msg_namelen
;
1424 user_msg
.msg_name
= msg32
.msg_name
;
1427 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1428 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, EMSGSIZE
,
1433 /* allocate a uio large enough to hold the number of iovecs passed */
1434 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1435 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1442 if (user_msg
.msg_iovlen
) {
1444 * get location of iovecs within the uio.
1445 * then copyin the iovecs from user space.
1447 iovp
= uio_iovsaddr(auio
);
1452 error
= copyin_user_iovec_array(user_msg
.msg_iov
,
1453 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1454 user_msg
.msg_iovlen
, iovp
);
1457 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1459 /* finish setup of uio_t */
1460 error
= uio_calculateresid(auio
);
1465 user_msg
.msg_iov
= 0;
1468 /* msg_flags is ignored for send */
1469 user_msg
.msg_flags
= 0;
1471 error
= file_socket(uap
->s
, &so
);
1478 error
= sendit(p
, so
, &user_msg
, auio
, uap
->flags
, retval
);
1485 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1491 sendmsg_x(struct proc
*p
, struct sendmsg_x_args
*uap
, user_ssize_t
*retval
)
1494 struct user_msghdr_x
*user_msg_x
= NULL
;
1495 struct uio
**uiop
= NULL
;
1498 struct sockaddr
*to
= NULL
;
1499 user_ssize_t len_before
= 0, len_after
;
1501 size_t size_of_msghdr
;
1504 int has_addr_or_ctl
= 0;
1506 KERNEL_DEBUG(DBG_FNC_SENDMSG_X
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1508 error
= file_socket(uap
->s
, &so
);
1519 * Input parameter range check
1521 if (uap
->cnt
== 0 || uap
->cnt
> UIO_MAXIOV
) {
1526 * Clip to max currently allowed
1528 if (uap
->cnt
> somaxsendmsgx
)
1529 uap
->cnt
= somaxsendmsgx
;
1531 user_msg_x
= _MALLOC(uap
->cnt
* sizeof(struct user_msghdr_x
),
1532 M_TEMP
, M_WAITOK
| M_ZERO
);
1533 if (user_msg_x
== NULL
) {
1534 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__
);
1538 uiop
= _MALLOC(uap
->cnt
* sizeof(struct uio
*),
1539 M_TEMP
, M_WAITOK
| M_ZERO
);
1541 DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__
);
1546 size_of_msghdr
= IS_64BIT_PROCESS(p
) ?
1547 sizeof(struct user64_msghdr_x
) : sizeof(struct user32_msghdr_x
);
1549 umsgp
= _MALLOC(uap
->cnt
* size_of_msghdr
,
1550 M_TEMP
, M_WAITOK
| M_ZERO
);
1551 if (umsgp
== NULL
) {
1552 printf("%s _MALLOC() user_msg_x failed\n", __func__
);
1556 error
= copyin(uap
->msgp
, umsgp
, uap
->cnt
* size_of_msghdr
);
1558 DBG_PRINTF("%s copyin() failed\n", __func__
);
1561 error
= internalize_user_msghdr_array(umsgp
,
1562 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1563 UIO_WRITE
, uap
->cnt
, user_msg_x
, uiop
);
1565 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__
);
1569 * Make sure the size of each message iovec and
1570 * the aggregate size of all the iovec is valid
1572 if (uio_array_is_valid(uiop
, uap
->cnt
) == 0) {
1578 * Sanity check on passed arguments
1580 for (i
= 0; i
< uap
->cnt
; i
++) {
1581 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
1584 * No flags on send message
1586 if (mp
->msg_flags
!= 0) {
1591 * No support for address or ancillary data (yet)
1593 if (mp
->msg_name
!= USER_ADDR_NULL
|| mp
->msg_namelen
!= 0)
1594 has_addr_or_ctl
= 1;
1596 if (mp
->msg_control
!= USER_ADDR_NULL
||
1597 mp
->msg_controllen
!= 0)
1598 has_addr_or_ctl
= 1;
1600 #if CONFIG_MACF_SOCKET_SUBSET
1602 * We check the state without holding the socket lock;
1603 * if a race condition occurs, it would simply result
1604 * in an extra call to the MAC check function.
1606 * Note: The following check is never true taken with the
1607 * current limitation that we do not accept to pass an address,
1608 * this is effectively placeholder code. If we add support for
1609 * addresses, we will have to check every address.
1612 !(so
->so_state
& SS_DEFUNCT
) &&
1613 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
))
1616 #endif /* MAC_SOCKET_SUBSET */
1619 len_before
= uio_array_resid(uiop
, uap
->cnt
);
1622 * Feed list of packets at once only for connected socket without
1625 if (so
->so_proto
->pr_usrreqs
->pru_sosend_list
!=
1626 pru_sosend_list_notsupp
&&
1627 has_addr_or_ctl
== 0 && somaxsendmsgx
== 0) {
1628 error
= so
->so_proto
->pr_usrreqs
->pru_sosend_list(so
, uiop
,
1629 uap
->cnt
, uap
->flags
);
1631 for (i
= 0; i
< uap
->cnt
; i
++) {
1632 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
1633 struct user_msghdr user_msg
;
1634 uio_t auio
= uiop
[i
];
1637 user_msg
.msg_flags
= mp
->msg_flags
;
1638 user_msg
.msg_controllen
= mp
->msg_controllen
;
1639 user_msg
.msg_control
= mp
->msg_control
;
1640 user_msg
.msg_iovlen
= mp
->msg_iovlen
;
1641 user_msg
.msg_iov
= mp
->msg_iov
;
1642 user_msg
.msg_namelen
= mp
->msg_namelen
;
1643 user_msg
.msg_name
= mp
->msg_name
;
1645 error
= sendit(p
, so
, &user_msg
, auio
, uap
->flags
,
1651 len_after
= uio_array_resid(uiop
, uap
->cnt
);
1653 VERIFY(len_after
<= len_before
);
1656 if (len_after
!= len_before
&& (error
== ERESTART
||
1657 error
== EINTR
|| error
== EWOULDBLOCK
||
1660 /* Generation of SIGPIPE can be controlled per socket */
1661 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
))
1662 psignal(p
, SIGPIPE
);
1665 uiocnt
= externalize_user_msghdr_array(umsgp
,
1666 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1667 UIO_WRITE
, uap
->cnt
, user_msg_x
, uiop
);
1669 *retval
= (int)(uiocnt
);
1675 _FREE(umsgp
, M_TEMP
);
1677 free_uio_array(uiop
, uap
->cnt
);
1678 _FREE(uiop
, M_TEMP
);
1680 if (user_msg_x
!= NULL
)
1681 _FREE(user_msg_x
, M_TEMP
);
1683 KERNEL_DEBUG(DBG_FNC_SENDMSG_X
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1690 copyout_sa(struct sockaddr
*fromsa
, user_addr_t name
, socklen_t
*namelen
)
1693 socklen_t sa_len
= 0;
1697 if (len
<= 0 || fromsa
== 0) {
1701 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1703 sa_len
= fromsa
->sa_len
;
1704 len
= MIN((unsigned int)len
, sa_len
);
1705 error
= copyout(fromsa
, name
, (unsigned)len
);
1715 copyout_control(struct proc
*p
, struct mbuf
*m
, user_addr_t control
,
1716 socklen_t
*controllen
, int *flags
)
1726 while (m
&& len
> 0) {
1727 unsigned int tocopy
;
1728 struct cmsghdr
*cp
= mtod(m
, struct cmsghdr
*);
1729 int cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1730 int buflen
= m
->m_len
;
1732 while (buflen
> 0 && len
> 0) {
1734 * SCM_TIMESTAMP hack because struct timeval has a
1735 * different size for 32 bits and 64 bits processes
1737 if (cp
->cmsg_level
== SOL_SOCKET
&& cp
->cmsg_type
== SCM_TIMESTAMP
) {
1738 unsigned char tmp_buffer
[CMSG_SPACE(sizeof(struct user64_timeval
))] = {};
1739 struct cmsghdr
*tmp_cp
= (struct cmsghdr
*)(void *)tmp_buffer
;
1741 struct timeval
*tv
= (struct timeval
*)(void *)CMSG_DATA(cp
);
1743 tmp_cp
->cmsg_level
= SOL_SOCKET
;
1744 tmp_cp
->cmsg_type
= SCM_TIMESTAMP
;
1746 if (proc_is64bit(p
)) {
1747 struct user64_timeval
*tv64
= (struct user64_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1749 tv64
->tv_sec
= tv
->tv_sec
;
1750 tv64
->tv_usec
= tv
->tv_usec
;
1752 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user64_timeval
));
1753 tmp_space
= CMSG_SPACE(sizeof(struct user64_timeval
));
1755 struct user32_timeval
*tv32
= (struct user32_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1757 tv32
->tv_sec
= tv
->tv_sec
;
1758 tv32
->tv_usec
= tv
->tv_usec
;
1760 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user32_timeval
));
1761 tmp_space
= CMSG_SPACE(sizeof(struct user32_timeval
));
1763 if (len
>= tmp_space
) {
1766 *flags
|= MSG_CTRUNC
;
1769 error
= copyout(tmp_buffer
, ctlbuf
, tocopy
);
1773 if (cp_size
> buflen
) {
1774 panic("cp_size > buflen, something"
1775 "wrong with alignment!");
1777 if (len
>= cp_size
) {
1780 *flags
|= MSG_CTRUNC
;
1783 error
= copyout((caddr_t
) cp
, ctlbuf
, tocopy
);
1792 cp
= (struct cmsghdr
*)(void *)
1793 ((unsigned char *) cp
+ cp_size
);
1794 cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1799 *controllen
= ctlbuf
- control
;
1805 * Returns: 0 Success
1809 * EACCES Mandatory Access Control failure
1812 * <pru_soreceive>:ENOBUFS
1813 * <pru_soreceive>:ENOTCONN
1814 * <pru_soreceive>:EWOULDBLOCK
1815 * <pru_soreceive>:EFAULT
1816 * <pru_soreceive>:EINTR
1817 * <pru_soreceive>:EBADF
1818 * <pru_soreceive>:EINVAL
1819 * <pru_soreceive>:EMSGSIZE
1820 * <pru_soreceive>:???
1822 * Notes: Additional return values from calls through <pru_soreceive>
1823 * depend on protocols other than TCP or AF_UNIX, which are
1827 recvit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
1828 user_addr_t namelenp
, int32_t *retval
)
1832 struct mbuf
*control
= 0;
1834 struct sockaddr
*fromsa
= 0;
1835 struct fileproc
*fp
;
1837 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1839 if ((error
= fp_lookup(p
, s
, &fp
, 1))) {
1840 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1844 if (fp
->f_type
!= DTYPE_SOCKET
) {
1845 fp_drop(p
, s
, fp
, 1);
1850 so
= (struct socket
*)fp
->f_data
;
1852 fp_drop(p
, s
, fp
, 1);
1859 #if CONFIG_MACF_SOCKET_SUBSET
1861 * We check the state without holding the socket lock;
1862 * if a race condition occurs, it would simply result
1863 * in an extra call to the MAC check function.
1865 if (!(so
->so_state
& SS_DEFUNCT
) &&
1866 !(so
->so_state
& SS_ISCONNECTED
) &&
1867 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
1868 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0)
1870 #endif /* MAC_SOCKET_SUBSET */
1871 if (uio_resid(uiop
) < 0) {
1872 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, EINVAL
, 0, 0, 0, 0);
1877 len
= uio_resid(uiop
);
1878 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, &fromsa
, uiop
,
1879 (struct mbuf
**)0, mp
->msg_control
? &control
: (struct mbuf
**)0,
1882 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()),
1885 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1886 error
== EINTR
|| error
== EWOULDBLOCK
))
1892 *retval
= len
- uio_resid(uiop
);
1895 error
= copyout_sa(fromsa
, mp
->msg_name
, &mp
->msg_namelen
);
1898 /* return the actual, untruncated address length */
1900 (error
= copyout((caddr_t
)&mp
->msg_namelen
, namelenp
,
1906 if (mp
->msg_control
) {
1907 error
= copyout_control(p
, control
, mp
->msg_control
,
1908 &mp
->msg_controllen
, &mp
->msg_flags
);
1912 FREE(fromsa
, M_SONAME
);
1915 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1917 fp_drop(p
, s
, fp
, 0);
1922 * Returns: 0 Success
1926 * read:??? [4056224: applicable for pipes]
1928 * Notes: The read entry point is only called as part of support for
1929 * binary backward compatability; new code should use read
1930 * instead of recv or recvfrom when attempting to read data
1933 * For full documentation of the return codes from recvit, see
1934 * the block header for the recvit function.
1937 recvfrom(struct proc
*p
, struct recvfrom_args
*uap
, int32_t *retval
)
1939 __pthread_testcancel(1);
1940 return (recvfrom_nocancel(p
, (struct recvfrom_nocancel_args
*)uap
,
1945 recvfrom_nocancel(struct proc
*p
, struct recvfrom_nocancel_args
*uap
,
1948 struct user_msghdr msg
;
1952 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1953 AUDIT_ARG(fd
, uap
->s
);
1955 if (uap
->fromlenaddr
) {
1956 error
= copyin(uap
->fromlenaddr
,
1957 (caddr_t
)&msg
.msg_namelen
, sizeof (msg
.msg_namelen
));
1961 msg
.msg_namelen
= 0;
1963 msg
.msg_name
= uap
->from
;
1964 auio
= uio_create(1, 0,
1965 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1971 uio_addiov(auio
, uap
->buf
, uap
->len
);
1972 /* no need to set up msg_iov. recvit uses uio_t we send it */
1975 msg
.msg_control
= 0;
1976 msg
.msg_controllen
= 0;
1977 msg
.msg_flags
= uap
->flags
;
1978 error
= recvit(p
, uap
->s
, &msg
, auio
, uap
->fromlenaddr
, retval
);
1983 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1989 * Returns: 0 Success
1996 * Notes: For full documentation of the return codes from recvit, see
1997 * the block header for the recvit function.
2000 recvmsg(struct proc
*p
, struct recvmsg_args
*uap
, int32_t *retval
)
2002 __pthread_testcancel(1);
2003 return (recvmsg_nocancel(p
, (struct recvmsg_nocancel_args
*)uap
,
2008 recvmsg_nocancel(struct proc
*p
, struct recvmsg_nocancel_args
*uap
,
2011 struct user32_msghdr msg32
;
2012 struct user64_msghdr msg64
;
2013 struct user_msghdr user_msg
;
2019 struct user_iovec
*iovp
;
2021 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2022 AUDIT_ARG(fd
, uap
->s
);
2023 if (IS_64BIT_PROCESS(p
)) {
2024 msghdrp
= (caddr_t
)&msg64
;
2025 size_of_msghdr
= sizeof (msg64
);
2027 msghdrp
= (caddr_t
)&msg32
;
2028 size_of_msghdr
= sizeof (msg32
);
2030 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
2032 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2036 /* only need to copy if user process is not 64-bit */
2037 if (IS_64BIT_PROCESS(p
)) {
2038 user_msg
.msg_flags
= msg64
.msg_flags
;
2039 user_msg
.msg_controllen
= msg64
.msg_controllen
;
2040 user_msg
.msg_control
= msg64
.msg_control
;
2041 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
2042 user_msg
.msg_iov
= msg64
.msg_iov
;
2043 user_msg
.msg_namelen
= msg64
.msg_namelen
;
2044 user_msg
.msg_name
= msg64
.msg_name
;
2046 user_msg
.msg_flags
= msg32
.msg_flags
;
2047 user_msg
.msg_controllen
= msg32
.msg_controllen
;
2048 user_msg
.msg_control
= msg32
.msg_control
;
2049 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
2050 user_msg
.msg_iov
= msg32
.msg_iov
;
2051 user_msg
.msg_namelen
= msg32
.msg_namelen
;
2052 user_msg
.msg_name
= msg32
.msg_name
;
2055 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
2056 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, EMSGSIZE
,
2061 user_msg
.msg_flags
= uap
->flags
;
2063 /* allocate a uio large enough to hold the number of iovecs passed */
2064 auio
= uio_create(user_msg
.msg_iovlen
, 0,
2065 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
2073 * get location of iovecs within the uio. then copyin the iovecs from
2076 iovp
= uio_iovsaddr(auio
);
2081 uiov
= user_msg
.msg_iov
;
2082 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
2083 error
= copyin_user_iovec_array(uiov
,
2084 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2085 user_msg
.msg_iovlen
, iovp
);
2089 /* finish setup of uio_t */
2090 error
= uio_calculateresid(auio
);
2095 error
= recvit(p
, uap
->s
, &user_msg
, auio
, 0, retval
);
2097 user_msg
.msg_iov
= uiov
;
2098 if (IS_64BIT_PROCESS(p
)) {
2099 msg64
.msg_flags
= user_msg
.msg_flags
;
2100 msg64
.msg_controllen
= user_msg
.msg_controllen
;
2101 msg64
.msg_control
= user_msg
.msg_control
;
2102 msg64
.msg_iovlen
= user_msg
.msg_iovlen
;
2103 msg64
.msg_iov
= user_msg
.msg_iov
;
2104 msg64
.msg_namelen
= user_msg
.msg_namelen
;
2105 msg64
.msg_name
= user_msg
.msg_name
;
2107 msg32
.msg_flags
= user_msg
.msg_flags
;
2108 msg32
.msg_controllen
= user_msg
.msg_controllen
;
2109 msg32
.msg_control
= user_msg
.msg_control
;
2110 msg32
.msg_iovlen
= user_msg
.msg_iovlen
;
2111 msg32
.msg_iov
= user_msg
.msg_iov
;
2112 msg32
.msg_namelen
= user_msg
.msg_namelen
;
2113 msg32
.msg_name
= user_msg
.msg_name
;
2115 error
= copyout(msghdrp
, uap
->msg
, size_of_msghdr
);
2121 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2126 recvmsg_x(struct proc
*p
, struct recvmsg_x_args
*uap
, user_ssize_t
*retval
)
2128 int error
= EOPNOTSUPP
;
2129 struct user_msghdr_x
*user_msg_x
= NULL
;
2130 struct recv_msg_elem
*recv_msg_array
= NULL
;
2132 user_ssize_t len_before
= 0, len_after
;
2134 size_t size_of_msghdr
;
2139 KERNEL_DEBUG(DBG_FNC_RECVMSG_X
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
2141 error
= file_socket(uap
->s
, &so
);
2151 * Input parameter range check
2153 if (uap
->cnt
== 0 || uap
->cnt
> UIO_MAXIOV
) {
2157 if (uap
->cnt
> somaxrecvmsgx
)
2158 uap
->cnt
= somaxrecvmsgx
;
2160 user_msg_x
= _MALLOC(uap
->cnt
* sizeof(struct user_msghdr_x
),
2161 M_TEMP
, M_WAITOK
| M_ZERO
);
2162 if (user_msg_x
== NULL
) {
2163 DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__
);
2167 recv_msg_array
= alloc_recv_msg_array(uap
->cnt
);
2168 if (recv_msg_array
== NULL
) {
2169 DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__
);
2173 size_of_msghdr
= IS_64BIT_PROCESS(p
) ?
2174 sizeof(struct user64_msghdr_x
) : sizeof(struct user32_msghdr_x
);
2176 umsgp
= _MALLOC(uap
->cnt
* size_of_msghdr
, M_TEMP
, M_WAITOK
| M_ZERO
);
2177 if (umsgp
== NULL
) {
2178 DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__
);
2182 error
= copyin(uap
->msgp
, umsgp
, uap
->cnt
* size_of_msghdr
);
2184 DBG_PRINTF("%s copyin() failed\n", __func__
);
2187 error
= internalize_recv_msghdr_array(umsgp
,
2188 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2189 UIO_READ
, uap
->cnt
, user_msg_x
, recv_msg_array
);
2191 DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__
);
2195 * Make sure the size of each message iovec and
2196 * the aggregate size of all the iovec is valid
2198 if (recv_msg_array_is_valid(recv_msg_array
, uap
->cnt
) == 0) {
2203 * Sanity check on passed arguments
2205 for (i
= 0; i
< uap
->cnt
; i
++) {
2206 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
2208 if (mp
->msg_flags
!= 0) {
2213 #if CONFIG_MACF_SOCKET_SUBSET
2215 * We check the state without holding the socket lock;
2216 * if a race condition occurs, it would simply result
2217 * in an extra call to the MAC check function.
2219 if (!(so
->so_state
& SS_DEFUNCT
) &&
2220 !(so
->so_state
& SS_ISCONNECTED
) &&
2221 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
2222 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0)
2224 #endif /* MAC_SOCKET_SUBSET */
2226 len_before
= recv_msg_array_resid(recv_msg_array
, uap
->cnt
);
2228 if (so
->so_proto
->pr_usrreqs
->pru_soreceive_list
!=
2229 pru_soreceive_list_notsupp
&&
2230 somaxrecvmsgx
== 0) {
2231 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive_list(so
,
2232 recv_msg_array
, uap
->cnt
, &uap
->flags
);
2234 int flags
= uap
->flags
;
2236 for (i
= 0; i
< uap
->cnt
; i
++) {
2237 struct recv_msg_elem
*recv_msg_elem
;
2239 struct sockaddr
**psa
;
2240 struct mbuf
**controlp
;
2242 recv_msg_elem
= recv_msg_array
+ i
;
2243 auio
= recv_msg_elem
->uio
;
2246 * Do not block if we got at least one packet
2249 flags
|= MSG_DONTWAIT
;
2251 psa
= (recv_msg_elem
->which
& SOCK_MSG_SA
) ?
2252 &recv_msg_elem
->psa
: NULL
;
2253 controlp
= (recv_msg_elem
->which
& SOCK_MSG_CONTROL
) ?
2254 &recv_msg_elem
->controlp
: NULL
;
2256 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, psa
,
2257 auio
, (struct mbuf
**)0, controlp
, &flags
);
2263 recv_msg_elem
->which
|= SOCK_MSG_DATA
;
2265 * Stop on partial copy
2267 if (flags
& (MSG_RCVMORE
| MSG_TRUNC
))
2270 if ((uap
->flags
& MSG_DONTWAIT
) == 0)
2271 flags
&= ~MSG_DONTWAIT
;
2275 len_after
= recv_msg_array_resid(recv_msg_array
, uap
->cnt
);
2278 if (len_after
!= len_before
&& (error
== ERESTART
||
2279 error
== EINTR
|| error
== EWOULDBLOCK
))
2285 uiocnt
= externalize_recv_msghdr_array(umsgp
,
2286 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
2287 UIO_READ
, uap
->cnt
, user_msg_x
, recv_msg_array
);
2289 error
= copyout(umsgp
, uap
->msgp
, uap
->cnt
* size_of_msghdr
);
2291 DBG_PRINTF("%s copyout() failed\n", __func__
);
2294 *retval
= (int)(uiocnt
);
2296 for (i
= 0; i
< uap
->cnt
; i
++) {
2297 struct user_msghdr_x
*mp
= user_msg_x
+ i
;
2298 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
2299 struct sockaddr
*fromsa
= recv_msg_elem
->psa
;
2302 error
= copyout_sa(fromsa
, mp
->msg_name
,
2307 if (mp
->msg_control
) {
2308 error
= copyout_control(p
, recv_msg_elem
->controlp
,
2309 mp
->msg_control
, &mp
->msg_controllen
,
2319 _FREE(umsgp
, M_TEMP
);
2320 if (recv_msg_array
!= NULL
)
2321 free_recv_msg_array(recv_msg_array
, uap
->cnt
);
2322 if (user_msg_x
!= NULL
)
2323 _FREE(user_msg_x
, M_TEMP
);
2325 KERNEL_DEBUG(DBG_FNC_RECVMSG_X
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
2331 * Returns: 0 Success
2333 * file_socket:ENOTSOCK
2336 * soshutdown:ENOTCONN
2337 * soshutdown:EADDRNOTAVAIL[TCP]
2338 * soshutdown:ENOBUFS[TCP]
2339 * soshutdown:EMSGSIZE[TCP]
2340 * soshutdown:EHOSTUNREACH[TCP]
2341 * soshutdown:ENETUNREACH[TCP]
2342 * soshutdown:ENETDOWN[TCP]
2343 * soshutdown:ENOMEM[TCP]
2344 * soshutdown:EACCES[TCP]
2345 * soshutdown:EMSGSIZE[TCP]
2346 * soshutdown:ENOBUFS[TCP]
2347 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2348 * soshutdown:??? [other protocol families]
2352 shutdown(__unused
struct proc
*p
, struct shutdown_args
*uap
,
2353 __unused
int32_t *retval
)
2358 AUDIT_ARG(fd
, uap
->s
);
2359 error
= file_socket(uap
->s
, &so
);
2366 error
= soshutdown((struct socket
*)so
, uap
->how
);
2373 * Returns: 0 Success
2376 * EACCES Mandatory Access Control failure
2377 * file_socket:ENOTSOCK
2380 * sosetopt:ENOPROTOOPT
2384 * sosetopt:EOPNOTSUPP[AF_UNIX]
2389 setsockopt(struct proc
*p
, struct setsockopt_args
*uap
,
2390 __unused
int32_t *retval
)
2393 struct sockopt sopt
;
2396 AUDIT_ARG(fd
, uap
->s
);
2397 if (uap
->val
== 0 && uap
->valsize
!= 0)
2399 /* No bounds checking on size (it's unsigned) */
2401 error
= file_socket(uap
->s
, &so
);
2405 sopt
.sopt_dir
= SOPT_SET
;
2406 sopt
.sopt_level
= uap
->level
;
2407 sopt
.sopt_name
= uap
->name
;
2408 sopt
.sopt_val
= uap
->val
;
2409 sopt
.sopt_valsize
= uap
->valsize
;
2416 #if CONFIG_MACF_SOCKET_SUBSET
2417 if ((error
= mac_socket_check_setsockopt(kauth_cred_get(), so
,
2420 #endif /* MAC_SOCKET_SUBSET */
2421 error
= sosetoptlock(so
, &sopt
, 1); /* will lock socket */
2430 * Returns: 0 Success
2433 * EACCES Mandatory Access Control failure
2436 * file_socket:ENOTSOCK
2441 getsockopt(struct proc
*p
, struct getsockopt_args
*uap
,
2442 __unused
int32_t *retval
)
2446 struct sockopt sopt
;
2449 error
= file_socket(uap
->s
, &so
);
2453 error
= copyin(uap
->avalsize
, (caddr_t
)&valsize
,
2457 /* No bounds checking on size (it's unsigned) */
2461 sopt
.sopt_dir
= SOPT_GET
;
2462 sopt
.sopt_level
= uap
->level
;
2463 sopt
.sopt_name
= uap
->name
;
2464 sopt
.sopt_val
= uap
->val
;
2465 sopt
.sopt_valsize
= (size_t)valsize
; /* checked non-negative above */
2472 #if CONFIG_MACF_SOCKET_SUBSET
2473 if ((error
= mac_socket_check_getsockopt(kauth_cred_get(), so
,
2476 #endif /* MAC_SOCKET_SUBSET */
2477 error
= sogetoptlock((struct socket
*)so
, &sopt
, 1); /* will lock */
2479 valsize
= sopt
.sopt_valsize
;
2480 error
= copyout((caddr_t
)&valsize
, uap
->avalsize
,
2492 * Returns: 0 Success
2494 * file_socket:ENOTSOCK
2498 * <pru_sockaddr>:ENOBUFS[TCP]
2499 * <pru_sockaddr>:ECONNRESET[TCP]
2500 * <pru_sockaddr>:EINVAL[AF_UNIX]
2501 * <sf_getsockname>:???
2505 getsockname(__unused
struct proc
*p
, struct getsockname_args
*uap
,
2506 __unused
int32_t *retval
)
2509 struct sockaddr
*sa
;
2514 error
= file_socket(uap
->fdes
, &so
);
2517 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof (socklen_t
));
2526 error
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &sa
);
2528 error
= sflt_getsockname(so
, &sa
);
2529 if (error
== EJUSTRETURN
)
2532 socket_unlock(so
, 1);
2540 sa_len
= sa
->sa_len
;
2541 len
= MIN(len
, sa_len
);
2542 error
= copyout((caddr_t
)sa
, uap
->asa
, len
);
2545 /* return the actual, untruncated address length */
2548 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof (socklen_t
));
2553 file_drop(uap
->fdes
);
2558 * Get name of peer for connected socket.
2560 * Returns: 0 Success
2564 * file_socket:ENOTSOCK
2568 * <pru_peeraddr>:???
2569 * <sf_getpeername>:???
2573 getpeername(__unused
struct proc
*p
, struct getpeername_args
*uap
,
2574 __unused
int32_t *retval
)
2577 struct sockaddr
*sa
;
2582 error
= file_socket(uap
->fdes
, &so
);
2592 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
2593 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
2594 /* the socket has been shutdown, no more getpeername's */
2595 socket_unlock(so
, 1);
2600 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONFIRMING
)) == 0) {
2601 socket_unlock(so
, 1);
2605 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof (socklen_t
));
2607 socket_unlock(so
, 1);
2611 error
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, &sa
);
2613 error
= sflt_getpeername(so
, &sa
);
2614 if (error
== EJUSTRETURN
)
2617 socket_unlock(so
, 1);
2624 sa_len
= sa
->sa_len
;
2625 len
= MIN(len
, sa_len
);
2626 error
= copyout(sa
, uap
->asa
, len
);
2629 /* return the actual, untruncated address length */
2632 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof (socklen_t
));
2634 if (sa
) FREE(sa
, M_SONAME
);
2636 file_drop(uap
->fdes
);
2641 sockargs(struct mbuf
**mp
, user_addr_t data
, int buflen
, int type
)
2643 struct sockaddr
*sa
;
2647 size_t alloc_buflen
= (size_t)buflen
;
2649 if (alloc_buflen
> INT_MAX
/2)
2653 * The fd's in the buffer must expand to be pointers, thus we need twice
2656 if (type
== MT_CONTROL
)
2657 alloc_buflen
= ((buflen
- sizeof(struct cmsghdr
))*2) +
2658 sizeof(struct cmsghdr
);
2660 if (alloc_buflen
> MLEN
) {
2661 if (type
== MT_SONAME
&& alloc_buflen
<= 112)
2662 alloc_buflen
= MLEN
; /* unix domain compat. hack */
2663 else if (alloc_buflen
> MCLBYTES
)
2666 m
= m_get(M_WAIT
, type
);
2669 if (alloc_buflen
> MLEN
) {
2671 if ((m
->m_flags
& M_EXT
) == 0) {
2677 * K64: We still copyin the original buflen because it gets expanded
2678 * later and we lie about the size of the mbuf because it only affects
2682 error
= copyin(data
, mtod(m
, caddr_t
), (u_int
)buflen
);
2687 if (type
== MT_SONAME
) {
2688 sa
= mtod(m
, struct sockaddr
*);
2689 sa
->sa_len
= buflen
;
2696 * Given a user_addr_t of length len, allocate and fill out a *sa.
2698 * Returns: 0 Success
2699 * ENAMETOOLONG Filename too long
2700 * EINVAL Invalid argument
2701 * ENOMEM Not enough space
2702 * copyin:EFAULT Bad address
2705 getsockaddr(struct socket
*so
, struct sockaddr
**namp
, user_addr_t uaddr
,
2706 size_t len
, boolean_t translate_unspec
)
2708 struct sockaddr
*sa
;
2711 if (len
> SOCK_MAXADDRLEN
)
2712 return (ENAMETOOLONG
);
2714 if (len
< offsetof(struct sockaddr
, sa_data
[0]))
2717 MALLOC(sa
, struct sockaddr
*, len
, M_SONAME
, M_WAITOK
| M_ZERO
);
2721 error
= copyin(uaddr
, (caddr_t
)sa
, len
);
2726 * Force sa_family to AF_INET on AF_INET sockets to handle
2727 * legacy applications that use AF_UNSPEC (0). On all other
2728 * sockets we leave it unchanged and let the lower layer
2731 if (translate_unspec
&& sa
->sa_family
== AF_UNSPEC
&&
2732 SOCK_CHECK_DOM(so
, PF_INET
) &&
2733 len
== sizeof (struct sockaddr_in
))
2734 sa
->sa_family
= AF_INET
;
2743 getsockaddr_s(struct socket
*so
, struct sockaddr_storage
*ss
,
2744 user_addr_t uaddr
, size_t len
, boolean_t translate_unspec
)
2748 if (ss
== NULL
|| uaddr
== USER_ADDR_NULL
||
2749 len
< offsetof(struct sockaddr
, sa_data
[0]))
2753 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2754 * so the check here is inclusive.
2756 if (len
> sizeof (*ss
))
2757 return (ENAMETOOLONG
);
2759 bzero(ss
, sizeof (*ss
));
2760 error
= copyin(uaddr
, (caddr_t
)ss
, len
);
2763 * Force sa_family to AF_INET on AF_INET sockets to handle
2764 * legacy applications that use AF_UNSPEC (0). On all other
2765 * sockets we leave it unchanged and let the lower layer
2768 if (translate_unspec
&& ss
->ss_family
== AF_UNSPEC
&&
2769 SOCK_CHECK_DOM(so
, PF_INET
) &&
2770 len
== sizeof (struct sockaddr_in
))
2771 ss
->ss_family
= AF_INET
;
2779 internalize_user_msghdr_array(const void *src
, int spacetype
, int direction
,
2780 u_int count
, struct user_msghdr_x
*dst
, struct uio
**uiop
)
2787 for (i
= 0; i
< count
; i
++) {
2789 struct user_iovec
*iovp
;
2790 struct user_msghdr_x
*user_msg
= dst
+ i
;
2792 if (spacetype
== UIO_USERSPACE64
) {
2793 const struct user64_msghdr_x
*msghdr64
;
2795 msghdr64
= ((const struct user64_msghdr_x
*)src
) + i
;
2797 user_msg
->msg_name
= msghdr64
->msg_name
;
2798 user_msg
->msg_namelen
= msghdr64
->msg_namelen
;
2799 user_msg
->msg_iov
= msghdr64
->msg_iov
;
2800 user_msg
->msg_iovlen
= msghdr64
->msg_iovlen
;
2801 user_msg
->msg_control
= msghdr64
->msg_control
;
2802 user_msg
->msg_controllen
= msghdr64
->msg_controllen
;
2803 user_msg
->msg_flags
= msghdr64
->msg_flags
;
2804 user_msg
->msg_datalen
= msghdr64
->msg_datalen
;
2806 const struct user32_msghdr_x
*msghdr32
;
2808 msghdr32
= ((const struct user32_msghdr_x
*)src
) + i
;
2810 user_msg
->msg_name
= msghdr32
->msg_name
;
2811 user_msg
->msg_namelen
= msghdr32
->msg_namelen
;
2812 user_msg
->msg_iov
= msghdr32
->msg_iov
;
2813 user_msg
->msg_iovlen
= msghdr32
->msg_iovlen
;
2814 user_msg
->msg_control
= msghdr32
->msg_control
;
2815 user_msg
->msg_controllen
= msghdr32
->msg_controllen
;
2816 user_msg
->msg_flags
= msghdr32
->msg_flags
;
2817 user_msg
->msg_datalen
= msghdr32
->msg_datalen
;
2820 if (user_msg
->msg_iovlen
<= 0 ||
2821 user_msg
->msg_iovlen
> UIO_MAXIOV
) {
2825 auio
= uio_create(user_msg
->msg_iovlen
, 0, spacetype
,
2833 iovp
= uio_iovsaddr(auio
);
2838 error
= copyin_user_iovec_array(user_msg
->msg_iov
,
2839 spacetype
, user_msg
->msg_iovlen
, iovp
);
2842 user_msg
->msg_iov
= CAST_USER_ADDR_T(iovp
);
2844 error
= uio_calculateresid(auio
);
2847 user_msg
->msg_datalen
= uio_resid(auio
);
2849 if (user_msg
->msg_name
&& user_msg
->msg_namelen
)
2851 if (user_msg
->msg_control
&& user_msg
->msg_controllen
)
2860 internalize_recv_msghdr_array(const void *src
, int spacetype
, int direction
,
2861 u_int count
, struct user_msghdr_x
*dst
,
2862 struct recv_msg_elem
*recv_msg_array
)
2867 for (i
= 0; i
< count
; i
++) {
2868 struct user_iovec
*iovp
;
2869 struct user_msghdr_x
*user_msg
= dst
+ i
;
2870 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
2872 if (spacetype
== UIO_USERSPACE64
) {
2873 const struct user64_msghdr_x
*msghdr64
;
2875 msghdr64
= ((const struct user64_msghdr_x
*)src
) + i
;
2877 user_msg
->msg_name
= msghdr64
->msg_name
;
2878 user_msg
->msg_namelen
= msghdr64
->msg_namelen
;
2879 user_msg
->msg_iov
= msghdr64
->msg_iov
;
2880 user_msg
->msg_iovlen
= msghdr64
->msg_iovlen
;
2881 user_msg
->msg_control
= msghdr64
->msg_control
;
2882 user_msg
->msg_controllen
= msghdr64
->msg_controllen
;
2883 user_msg
->msg_flags
= msghdr64
->msg_flags
;
2884 user_msg
->msg_datalen
= msghdr64
->msg_datalen
;
2886 const struct user32_msghdr_x
*msghdr32
;
2888 msghdr32
= ((const struct user32_msghdr_x
*)src
) + i
;
2890 user_msg
->msg_name
= msghdr32
->msg_name
;
2891 user_msg
->msg_namelen
= msghdr32
->msg_namelen
;
2892 user_msg
->msg_iov
= msghdr32
->msg_iov
;
2893 user_msg
->msg_iovlen
= msghdr32
->msg_iovlen
;
2894 user_msg
->msg_control
= msghdr32
->msg_control
;
2895 user_msg
->msg_controllen
= msghdr32
->msg_controllen
;
2896 user_msg
->msg_flags
= msghdr32
->msg_flags
;
2897 user_msg
->msg_datalen
= msghdr32
->msg_datalen
;
2900 if (user_msg
->msg_iovlen
<= 0 ||
2901 user_msg
->msg_iovlen
> UIO_MAXIOV
) {
2905 recv_msg_elem
->uio
= uio_create(user_msg
->msg_iovlen
, 0,
2906 spacetype
, direction
);
2907 if (recv_msg_elem
->uio
== NULL
) {
2912 iovp
= uio_iovsaddr(recv_msg_elem
->uio
);
2917 error
= copyin_user_iovec_array(user_msg
->msg_iov
,
2918 spacetype
, user_msg
->msg_iovlen
, iovp
);
2921 user_msg
->msg_iov
= CAST_USER_ADDR_T(iovp
);
2923 error
= uio_calculateresid(recv_msg_elem
->uio
);
2926 user_msg
->msg_datalen
= uio_resid(recv_msg_elem
->uio
);
2928 if (user_msg
->msg_name
&& user_msg
->msg_namelen
)
2929 recv_msg_elem
->which
|= SOCK_MSG_SA
;
2930 if (user_msg
->msg_control
&& user_msg
->msg_controllen
)
2931 recv_msg_elem
->which
|= SOCK_MSG_CONTROL
;
2939 externalize_user_msghdr_array(void *dst
, int spacetype
, int direction
,
2940 u_int count
, const struct user_msghdr_x
*src
, struct uio
**uiop
)
2942 #pragma unused(direction)
2947 for (i
= 0; i
< count
; i
++) {
2948 const struct user_msghdr_x
*user_msg
= src
+ i
;
2949 uio_t auio
= uiop
[i
];
2950 user_ssize_t len
= user_msg
->msg_datalen
- uio_resid(auio
);
2952 if (user_msg
->msg_datalen
!= 0 && len
== 0)
2958 if (spacetype
== UIO_USERSPACE64
) {
2959 struct user64_msghdr_x
*msghdr64
;
2961 msghdr64
= ((struct user64_msghdr_x
*)dst
) + i
;
2963 msghdr64
->msg_flags
= user_msg
->msg_flags
;
2964 msghdr64
->msg_datalen
= len
;
2967 struct user32_msghdr_x
*msghdr32
;
2969 msghdr32
= ((struct user32_msghdr_x
*)dst
) + i
;
2971 msghdr32
->msg_flags
= user_msg
->msg_flags
;
2972 msghdr32
->msg_datalen
= len
;
2979 externalize_recv_msghdr_array(void *dst
, int spacetype
, int direction
,
2980 u_int count
, const struct user_msghdr_x
*src
,
2981 struct recv_msg_elem
*recv_msg_array
)
2987 for (i
= 0; i
< count
; i
++) {
2988 const struct user_msghdr_x
*user_msg
= src
+ i
;
2989 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
2992 len
= user_msg
->msg_datalen
- uio_resid(recv_msg_elem
->uio
);
2994 if (direction
== UIO_READ
) {
2995 if ((recv_msg_elem
->which
& SOCK_MSG_DATA
) == 0)
2998 if (user_msg
->msg_datalen
!= 0 && len
== 0)
3005 if (spacetype
== UIO_USERSPACE64
) {
3006 struct user64_msghdr_x
*msghdr64
;
3008 msghdr64
= ((struct user64_msghdr_x
*)dst
) + i
;
3010 msghdr64
->msg_flags
= user_msg
->msg_flags
;
3011 msghdr64
->msg_datalen
= len
;
3014 struct user32_msghdr_x
*msghdr32
;
3016 msghdr32
= ((struct user32_msghdr_x
*)dst
) + i
;
3018 msghdr32
->msg_flags
= user_msg
->msg_flags
;
3019 msghdr32
->msg_datalen
= len
;
3026 free_uio_array(struct uio
**uiop
, u_int count
)
3030 for (i
= 0; i
< count
; i
++) {
3031 if (uiop
[i
] != NULL
)
3036 __private_extern__ user_ssize_t
3037 uio_array_resid(struct uio
**uiop
, u_int count
)
3039 user_ssize_t len
= 0;
3042 for (i
= 0; i
< count
; i
++) {
3043 struct uio
*auio
= uiop
[i
];
3046 len
+= uio_resid(auio
);
3052 uio_array_is_valid(struct uio
**uiop
, u_int count
)
3054 user_ssize_t len
= 0;
3057 for (i
= 0; i
< count
; i
++) {
3058 struct uio
*auio
= uiop
[i
];
3061 user_ssize_t resid
= uio_resid(auio
);
3064 * Sanity check on the validity of the iovec:
3065 * no point of going over sb_max
3067 if (resid
< 0 || (u_int32_t
)resid
> sb_max
)
3071 if (len
< 0 || (u_int32_t
)len
> sb_max
)
3079 struct recv_msg_elem
*
3080 alloc_recv_msg_array(u_int count
)
3082 struct recv_msg_elem
*recv_msg_array
;
3084 recv_msg_array
= _MALLOC(count
* sizeof(struct recv_msg_elem
),
3085 M_TEMP
, M_WAITOK
| M_ZERO
);
3087 return (recv_msg_array
);
3091 free_recv_msg_array(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3095 for (i
= 0; i
< count
; i
++) {
3096 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3098 if (recv_msg_elem
->uio
!= NULL
)
3099 uio_free(recv_msg_elem
->uio
);
3100 if (recv_msg_elem
->psa
!= NULL
)
3101 _FREE(recv_msg_elem
->psa
, M_TEMP
);
3102 if (recv_msg_elem
->controlp
!= NULL
)
3103 m_freem(recv_msg_elem
->controlp
);
3105 _FREE(recv_msg_array
, M_TEMP
);
3109 __private_extern__ user_ssize_t
3110 recv_msg_array_resid(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3112 user_ssize_t len
= 0;
3115 for (i
= 0; i
< count
; i
++) {
3116 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3118 if (recv_msg_elem
->uio
!= NULL
)
3119 len
+= uio_resid(recv_msg_elem
->uio
);
3125 recv_msg_array_is_valid(struct recv_msg_elem
*recv_msg_array
, u_int count
)
3127 user_ssize_t len
= 0;
3130 for (i
= 0; i
< count
; i
++) {
3131 struct recv_msg_elem
*recv_msg_elem
= recv_msg_array
+ i
;
3133 if (recv_msg_elem
->uio
!= NULL
) {
3134 user_ssize_t resid
= uio_resid(recv_msg_elem
->uio
);
3137 * Sanity check on the validity of the iovec:
3138 * no point of going over sb_max
3140 if (resid
< 0 || (u_int32_t
)resid
> sb_max
)
3144 if (len
< 0 || (u_int32_t
)len
> sb_max
)
3153 #define SFUIOBUFS 64
3155 /* Macros to compute the number of mbufs needed depending on cluster size */
3156 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
3157 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
3159 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
3160 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
3162 /* Upper send limit in the number of mbuf clusters */
3163 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
3164 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
3167 alloc_sendpkt(int how
, size_t pktlen
, unsigned int *maxchunks
,
3168 struct mbuf
**m
, boolean_t jumbocl
)
3170 unsigned int needed
;
3173 panic("%s: pktlen (%ld) must be non-zero\n", __func__
, pktlen
);
3176 * Try to allocate for the whole thing. Since we want full control
3177 * over the buffer size and be able to accept partial result, we can't
3178 * use mbuf_allocpacket(). The logic below is similar to sosend().
3181 if (pktlen
> MBIGCLBYTES
&& jumbocl
) {
3182 needed
= MIN(SENDFILE_MAX_16K
, HOWMANY_16K(pktlen
));
3183 *m
= m_getpackets_internal(&needed
, 1, how
, 0, M16KCLBYTES
);
3186 needed
= MIN(SENDFILE_MAX_4K
, HOWMANY_4K(pktlen
));
3187 *m
= m_getpackets_internal(&needed
, 1, how
, 0, MBIGCLBYTES
);
3191 * Our previous attempt(s) at allocation had failed; the system
3192 * may be short on mbufs, and we want to block until they are
3193 * available. This time, ask just for 1 mbuf and don't return
3198 *m
= m_getpackets_internal(&needed
, 1, M_WAIT
, 1, MBIGCLBYTES
);
3201 panic("%s: blocking allocation returned NULL\n", __func__
);
3203 *maxchunks
= needed
;
3208 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
3209 * struct sf_hdtr *hdtr, int flags)
3211 * Send a file specified by 'fd' and starting at 'offset' to a socket
3212 * specified by 's'. Send only '*nbytes' of the file or until EOF if
3213 * *nbytes == 0. Optionally add a header and/or trailer to the socket
3214 * output. If specified, write the total number of bytes sent into *nbytes.
3217 sendfile(struct proc
*p
, struct sendfile_args
*uap
, __unused
int *retval
)
3219 struct fileproc
*fp
;
3222 struct writev_nocancel_args nuap
;
3223 user_ssize_t writev_retval
;
3224 struct user_sf_hdtr user_hdtr
;
3225 struct user32_sf_hdtr user32_hdtr
;
3226 struct user64_sf_hdtr user64_hdtr
;
3228 off_t nbytes
= 0, sbytes
= 0;
3232 struct vfs_context context
= *vfs_context_current();
3234 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_START
), uap
->s
,
3237 AUDIT_ARG(fd
, uap
->fd
);
3238 AUDIT_ARG(value32
, uap
->s
);
3241 * Do argument checking. Must be a regular file in, stream
3242 * type and connected socket out, positive offset.
3244 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
3247 if ((fp
->f_flag
& FREAD
) == 0) {
3251 if (vnode_isreg(vp
) == 0) {
3255 error
= file_socket(uap
->s
, &so
);
3263 if (so
->so_type
!= SOCK_STREAM
) {
3267 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
3271 if (uap
->offset
< 0) {
3275 if (uap
->nbytes
== USER_ADDR_NULL
) {
3279 if (uap
->flags
!= 0) {
3284 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3286 #if CONFIG_MACF_SOCKET_SUBSET
3287 /* JMM - fetch connected sockaddr? */
3288 error
= mac_socket_check_send(context
.vc_ucred
, so
, NULL
);
3294 * Get number of bytes to send
3295 * Should it applies to size of header and trailer?
3296 * JMM - error handling?
3298 copyin(uap
->nbytes
, &nbytes
, sizeof (off_t
));
3301 * If specified, get the pointer to the sf_hdtr struct for
3302 * any headers/trailers.
3304 if (uap
->hdtr
!= USER_ADDR_NULL
) {
3307 bzero(&user_hdtr
, sizeof (user_hdtr
));
3308 if (IS_64BIT_PROCESS(p
)) {
3309 hdtrp
= (caddr_t
)&user64_hdtr
;
3310 sizeof_hdtr
= sizeof (user64_hdtr
);
3312 hdtrp
= (caddr_t
)&user32_hdtr
;
3313 sizeof_hdtr
= sizeof (user32_hdtr
);
3315 error
= copyin(uap
->hdtr
, hdtrp
, sizeof_hdtr
);
3318 if (IS_64BIT_PROCESS(p
)) {
3319 user_hdtr
.headers
= user64_hdtr
.headers
;
3320 user_hdtr
.hdr_cnt
= user64_hdtr
.hdr_cnt
;
3321 user_hdtr
.trailers
= user64_hdtr
.trailers
;
3322 user_hdtr
.trl_cnt
= user64_hdtr
.trl_cnt
;
3324 user_hdtr
.headers
= user32_hdtr
.headers
;
3325 user_hdtr
.hdr_cnt
= user32_hdtr
.hdr_cnt
;
3326 user_hdtr
.trailers
= user32_hdtr
.trailers
;
3327 user_hdtr
.trl_cnt
= user32_hdtr
.trl_cnt
;
3331 * Send any headers. Wimp out and use writev(2).
3333 if (user_hdtr
.headers
!= USER_ADDR_NULL
) {
3334 bzero(&nuap
, sizeof (struct writev_args
));
3336 nuap
.iovp
= user_hdtr
.headers
;
3337 nuap
.iovcnt
= user_hdtr
.hdr_cnt
;
3338 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
3342 sbytes
+= writev_retval
;
3347 * Get the file size for 2 reasons:
3348 * 1. We don't want to allocate more mbufs than necessary
3349 * 2. We don't want to read past the end of file
3351 if ((error
= vnode_size(vp
, &file_size
, vfs_context_current())) != 0) {
3356 * Simply read file data into a chain of mbufs that used with scatter
3357 * gather reads. We're not (yet?) setup to use zero copy external
3358 * mbufs that point to the file pages.
3361 error
= sblock(&so
->so_snd
, SBL_WAIT
);
3363 socket_unlock(so
, 1);
3366 for (off
= uap
->offset
; ; off
+= xfsize
, sbytes
+= xfsize
) {
3367 mbuf_t m0
= NULL
, m
;
3368 unsigned int nbufs
= SFUIOBUFS
, i
;
3370 char uio_buf
[UIO_SIZEOF(SFUIOBUFS
)]; /* 1 KB !!! */
3378 * Calculate the amount to transfer.
3379 * Align to round number of pages.
3380 * Not to exceed send socket buffer,
3381 * the EOF, or the passed in nbytes.
3383 xfsize
= sbspace(&so
->so_snd
);
3386 if (so
->so_state
& SS_CANTSENDMORE
) {
3389 } else if ((so
->so_state
& SS_NBIO
)) {
3397 if (xfsize
> SENDFILE_MAX_BYTES
)
3398 xfsize
= SENDFILE_MAX_BYTES
;
3399 else if (xfsize
> PAGE_SIZE
)
3400 xfsize
= trunc_page(xfsize
);
3401 pgoff
= off
& PAGE_MASK_64
;
3402 if (pgoff
> 0 && PAGE_SIZE
- pgoff
< xfsize
)
3403 xfsize
= PAGE_SIZE_64
- pgoff
;
3404 if (nbytes
&& xfsize
> (nbytes
- sbytes
))
3405 xfsize
= nbytes
- sbytes
;
3408 if (off
+ xfsize
> file_size
)
3409 xfsize
= file_size
- off
;
3414 * Attempt to use larger than system page-size clusters for
3415 * large writes only if there is a jumbo cluster pool and
3416 * if the socket is marked accordingly.
3418 jumbocl
= sosendjcl
&& njcl
> 0 &&
3419 ((so
->so_flags
& SOF_MULTIPAGES
) || sosendjcl_ignore_capab
);
3421 socket_unlock(so
, 0);
3422 alloc_sendpkt(M_WAIT
, xfsize
, &nbufs
, &m0
, jumbocl
);
3423 pktlen
= mbuf_pkthdr_maxlen(m0
);
3424 if (pktlen
< (size_t)xfsize
)
3427 auio
= uio_createwithbuffer(nbufs
, off
, UIO_SYSSPACE
,
3428 UIO_READ
, &uio_buf
[0], sizeof (uio_buf
));
3430 printf("sendfile failed. nbufs = %d. %s", nbufs
,
3431 "File a radar related to rdar://10146739.\n");
3438 for (i
= 0, m
= m0
, uiolen
= 0;
3439 i
< nbufs
&& m
!= NULL
&& uiolen
< (size_t)xfsize
;
3440 i
++, m
= mbuf_next(m
)) {
3441 size_t mlen
= mbuf_maxlen(m
);
3443 if (mlen
+ uiolen
> (size_t)xfsize
)
3444 mlen
= xfsize
- uiolen
;
3445 mbuf_setlen(m
, mlen
);
3446 uio_addiov(auio
, CAST_USER_ADDR_T(mbuf_datastart(m
)),
3451 if (xfsize
!= uio_resid(auio
))
3452 printf("sendfile: xfsize: %lld != uio_resid(auio): "
3453 "%lld\n", xfsize
, (long long)uio_resid(auio
));
3455 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_START
),
3456 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
3457 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
3458 error
= fo_read(fp
, auio
, FOF_OFFSET
, &context
);
3461 if (uio_resid(auio
) != xfsize
&& (error
== ERESTART
||
3462 error
== EINTR
|| error
== EWOULDBLOCK
)) {
3469 xfsize
-= uio_resid(auio
);
3470 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_END
),
3471 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
3472 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
3475 // printf("sendfile: fo_read 0 bytes, EOF\n");
3478 if (xfsize
+ off
> file_size
)
3479 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
3480 "%lld\n", xfsize
, off
, file_size
);
3481 for (i
= 0, m
= m0
, rlen
= 0;
3482 i
< nbufs
&& m
!= NULL
&& rlen
< xfsize
;
3483 i
++, m
= mbuf_next(m
)) {
3484 size_t mlen
= mbuf_maxlen(m
);
3486 if (rlen
+ mlen
> (size_t)xfsize
)
3487 mlen
= xfsize
- rlen
;
3488 mbuf_setlen(m
, mlen
);
3492 mbuf_pkthdr_setlen(m0
, xfsize
);
3496 * Make sure that the socket is still able to take more data.
3497 * CANTSENDMORE being true usually means that the connection
3498 * was closed. so_error is true when an error was sensed after
3500 * The state is checked after the page mapping and buffer
3501 * allocation above since those operations may block and make
3502 * any socket checks stale. From this point forward, nothing
3503 * blocks before the pru_send (or more accurately, any blocking
3504 * results in a loop back to here to re-check).
3506 if ((so
->so_state
& SS_CANTSENDMORE
) || so
->so_error
) {
3507 if (so
->so_state
& SS_CANTSENDMORE
) {
3510 error
= so
->so_error
;
3517 * Wait for socket space to become available. We do this just
3518 * after checking the connection state above in order to avoid
3519 * a race condition with sbwait().
3521 if (sbspace(&so
->so_snd
) < (long)so
->so_snd
.sb_lowat
) {
3522 if (so
->so_state
& SS_NBIO
) {
3527 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
3528 DBG_FUNC_START
), uap
->s
, 0, 0, 0, 0);
3529 error
= sbwait(&so
->so_snd
);
3530 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
3531 DBG_FUNC_END
), uap
->s
, 0, 0, 0, 0);
3533 * An error from sbwait usually indicates that we've
3534 * been interrupted by a signal. If we've sent anything
3535 * then return bytes sent, otherwise return the error.
3544 struct mbuf
*control
= NULL
;
3547 * Socket filter processing
3550 error
= sflt_data_out(so
, NULL
, &m0
, &control
, 0);
3552 if (error
== EJUSTRETURN
) {
3559 * End Socket filter processing
3562 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
3563 uap
->s
, 0, 0, 0, 0);
3564 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, m0
,
3566 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
3567 uap
->s
, 0, 0, 0, 0);
3572 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
3574 * Send trailers. Wimp out and use writev(2).
3576 if (uap
->hdtr
!= USER_ADDR_NULL
&&
3577 user_hdtr
.trailers
!= USER_ADDR_NULL
) {
3578 bzero(&nuap
, sizeof (struct writev_args
));
3580 nuap
.iovp
= user_hdtr
.trailers
;
3581 nuap
.iovcnt
= user_hdtr
.trl_cnt
;
3582 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
3586 sbytes
+= writev_retval
;
3593 if (uap
->nbytes
!= USER_ADDR_NULL
) {
3594 /* XXX this appears bogus for some early failure conditions */
3595 copyout(&sbytes
, uap
->nbytes
, sizeof (off_t
));
3597 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_END
), uap
->s
,
3598 (unsigned int)((sbytes
>> 32) & 0x0ffffffff),
3599 (unsigned int)(sbytes
& 0x0ffffffff), error
, 0);
3602 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
3607 #endif /* SENDFILE */