2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
79 #include <sys/mcache.h>
81 #include <kern/lock.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/signalvar.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/kernel.h>
88 #include <sys/uio_internal.h>
89 #include <sys/kauth.h>
90 #include <kern/task.h>
93 #include <security/audit/audit.h>
95 #include <sys/kdebug.h>
96 #include <sys/sysproto.h>
97 #include <netinet/in.h>
98 #include <net/route.h>
99 #include <netinet/in_pcb.h>
101 #if CONFIG_MACF_SOCKET_SUBSET
102 #include <security/mac_framework.h>
103 #endif /* MAC_SOCKET_SUBSET */
105 #define f_flag f_fglob->fg_flag
106 #define f_type f_fglob->fg_ops->fo_type
107 #define f_msgcount f_fglob->fg_msgcount
108 #define f_cred f_fglob->fg_cred
109 #define f_ops f_fglob->fg_ops
110 #define f_offset f_fglob->fg_offset
111 #define f_data f_fglob->fg_data
114 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
115 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
116 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
117 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
118 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
119 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
120 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
121 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
122 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
123 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
124 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
125 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
126 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
127 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
130 /* TODO: should be in header file */
131 int falloc_locked(proc_t
, struct fileproc
**, int *, vfs_context_t
, int);
133 static int sendit(struct proc
*, int, struct user_msghdr
*, uio_t
, int,
135 static int recvit(struct proc
*, int, struct user_msghdr
*, uio_t
, user_addr_t
,
137 static int connectit(struct socket
*, struct sockaddr
*);
138 static int getsockaddr(struct socket
*, struct sockaddr
**, user_addr_t
,
140 static int getsockaddr_s(struct socket
*, struct sockaddr_storage
*,
141 user_addr_t
, size_t, boolean_t
);
142 static int getsockaddrlist(struct socket
*, struct sockaddr_list
**,
143 user_addr_t
, socklen_t
, boolean_t
);
145 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf
**,
147 #endif /* SENDFILE */
148 static int connectx_nocancel(struct proc
*, struct connectx_args
*, int *);
149 static int connectitx(struct socket
*, struct sockaddr_list
**,
150 struct sockaddr_list
**, struct proc
*, uint32_t, associd_t
, connid_t
*);
151 static int peeloff_nocancel(struct proc
*, struct peeloff_args
*, int *);
152 static int disconnectx_nocancel(struct proc
*, struct disconnectx_args
*,
154 static int socket_common(struct proc
*, int, int, int, pid_t
, int32_t *, int);
157 * System call interface to the socket abstraction.
160 extern const struct fileops socketops
;
164 * EACCES Mandatory Access Control failure
168 * socreate:EAFNOSUPPORT
169 * socreate:EPROTOTYPE
170 * socreate:EPROTONOSUPPORT
173 * socreate:??? [other protocol families, IPSEC]
176 socket(struct proc
*p
,
177 struct socket_args
*uap
,
180 return (socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
181 proc_selfpid(), retval
, 0));
185 socket_delegate(struct proc
*p
,
186 struct socket_delegate_args
*uap
,
189 return socket_common(p
, uap
->domain
, uap
->type
, uap
->protocol
,
190 uap
->epid
, retval
, 1);
194 socket_common(struct proc
*p
,
206 AUDIT_ARG(socket
, domain
, type
, protocol
);
207 #if CONFIG_MACF_SOCKET_SUBSET
208 if ((error
= mac_socket_check_create(kauth_cred_get(), domain
,
209 type
, protocol
)) != 0)
211 #endif /* MAC_SOCKET_SUBSET */
214 error
= priv_check_cred(kauth_cred_get(),
215 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE
, 0);
220 error
= falloc(p
, &fp
, &fd
, vfs_context_current());
224 fp
->f_flag
= FREAD
|FWRITE
;
225 fp
->f_ops
= &socketops
;
228 error
= socreate_delegate(domain
, &so
, type
, protocol
, epid
);
230 error
= socreate(domain
, &so
, type
, protocol
);
235 fp
->f_data
= (caddr_t
)so
;
238 procfdtbl_releasefd(p
, fd
, NULL
);
240 fp_drop(p
, fd
, fp
, 1);
250 * EDESTADDRREQ Destination address required
251 * EBADF Bad file descriptor
252 * EACCES Mandatory Access Control failure
253 * file_socket:ENOTSOCK
255 * getsockaddr:ENAMETOOLONG Filename too long
256 * getsockaddr:EINVAL Invalid argument
257 * getsockaddr:ENOMEM Not enough space
258 * getsockaddr:EFAULT Bad address
263 bind(__unused proc_t p
, struct bind_args
*uap
, __unused
int32_t *retval
)
265 struct sockaddr_storage ss
;
266 struct sockaddr
*sa
= NULL
;
268 boolean_t want_free
= TRUE
;
271 AUDIT_ARG(fd
, uap
->s
);
272 error
= file_socket(uap
->s
, &so
);
279 if (uap
->name
== USER_ADDR_NULL
) {
280 error
= EDESTADDRREQ
;
283 if (uap
->namelen
> sizeof (ss
)) {
284 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, TRUE
);
286 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, TRUE
);
288 sa
= (struct sockaddr
*)&ss
;
294 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
295 #if CONFIG_MACF_SOCKET_SUBSET
296 if ((error
= mac_socket_check_bind(kauth_cred_get(), so
, sa
)) == 0)
297 error
= sobindlock(so
, sa
, 1); /* will lock socket */
299 error
= sobindlock(so
, sa
, 1); /* will lock socket */
300 #endif /* MAC_SOCKET_SUBSET */
311 * EACCES Mandatory Access Control failure
312 * file_socket:ENOTSOCK
315 * solisten:EOPNOTSUPP
319 listen(__unused
struct proc
*p
, struct listen_args
*uap
,
320 __unused
int32_t *retval
)
325 AUDIT_ARG(fd
, uap
->s
);
326 error
= file_socket(uap
->s
, &so
);
330 #if CONFIG_MACF_SOCKET_SUBSET
332 error
= mac_socket_check_listen(kauth_cred_get(), so
);
334 error
= solisten(so
, uap
->backlog
);
337 error
= solisten(so
, uap
->backlog
);
338 #endif /* MAC_SOCKET_SUBSET */
347 * Returns: fp_getfsock:EBADF Bad file descriptor
348 * fp_getfsock:EOPNOTSUPP ...
349 * xlate => :ENOTSOCK Socket operation on non-socket
350 * :EFAULT Bad address on copyin/copyout
351 * :EBADF Bad file descriptor
352 * :EOPNOTSUPP Operation not supported on socket
353 * :EINVAL Invalid argument
354 * :EWOULDBLOCK Operation would block
355 * :ECONNABORTED Connection aborted
356 * :EINTR Interrupted function
357 * :EACCES Mandatory Access Control failure
358 * falloc_locked:ENFILE Too many files open in system
359 * falloc_locked::EMFILE Too many open files
360 * falloc_locked::ENOMEM Not enough space
364 accept_nocancel(struct proc
*p
, struct accept_nocancel_args
*uap
,
368 struct sockaddr
*sa
= NULL
;
371 struct socket
*head
, *so
= NULL
;
372 lck_mtx_t
*mutex_held
;
375 short fflag
; /* type must match fp->f_flag */
380 AUDIT_ARG(fd
, uap
->s
);
383 error
= copyin(uap
->anamelen
, (caddr_t
)&namelen
,
388 error
= fp_getfsock(p
, fd
, &fp
, &head
);
390 if (error
== EOPNOTSUPP
)
398 #if CONFIG_MACF_SOCKET_SUBSET
399 if ((error
= mac_socket_check_accept(kauth_cred_get(), head
)) != 0)
401 #endif /* MAC_SOCKET_SUBSET */
403 socket_lock(head
, 1);
405 if (head
->so_proto
->pr_getlock
!= NULL
) {
406 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, 0);
409 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
413 if ((head
->so_options
& SO_ACCEPTCONN
) == 0) {
414 if ((head
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
417 /* POSIX: The socket is not accepting connections */
420 socket_unlock(head
, 1);
423 if ((head
->so_state
& SS_NBIO
) && head
->so_comp
.tqh_first
== NULL
) {
424 socket_unlock(head
, 1);
428 while (TAILQ_EMPTY(&head
->so_comp
) && head
->so_error
== 0) {
429 if (head
->so_state
& SS_CANTRCVMORE
) {
430 head
->so_error
= ECONNABORTED
;
433 if (head
->so_usecount
< 1)
434 panic("accept: head=%p refcount=%d\n", head
,
436 error
= msleep((caddr_t
)&head
->so_timeo
, mutex_held
,
437 PSOCK
| PCATCH
, "accept", 0);
438 if (head
->so_usecount
< 1)
439 panic("accept: 2 head=%p refcount=%d\n", head
,
441 if ((head
->so_state
& SS_DRAINING
)) {
442 error
= ECONNABORTED
;
445 socket_unlock(head
, 1);
449 if (head
->so_error
) {
450 error
= head
->so_error
;
452 socket_unlock(head
, 1);
458 * At this point we know that there is at least one connection
459 * ready to be accepted. Remove it from the queue prior to
460 * allocating the file descriptor for it since falloc() may
461 * block allowing another process to accept the connection
464 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
465 so
= TAILQ_FIRST(&head
->so_comp
);
466 TAILQ_REMOVE(&head
->so_comp
, so
, so_list
);
468 /* unlock head to avoid deadlock with select, keep a ref on head */
469 socket_unlock(head
, 0);
471 #if CONFIG_MACF_SOCKET_SUBSET
473 * Pass the pre-accepted socket to the MAC framework. This is
474 * cheaper than allocating a file descriptor for the socket,
475 * calling the protocol accept callback, and possibly freeing
476 * the file descriptor should the MAC check fails.
478 if ((error
= mac_socket_check_accepted(kauth_cred_get(), so
)) != 0) {
480 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
482 socket_unlock(so
, 1);
484 /* Drop reference on listening socket */
488 #endif /* MAC_SOCKET_SUBSET */
491 * Pass the pre-accepted socket to any interested socket filter(s).
492 * Upon failure, the socket would have been closed by the callee.
494 if (so
->so_filt
!= NULL
&& (error
= soacceptfilter(so
)) != 0) {
495 /* Drop reference on listening socket */
497 /* Propagate socket filter's error code to the caller */
502 error
= falloc(p
, &fp
, &newfd
, vfs_context_current());
505 * Probably ran out of file descriptors.
507 * <rdar://problem/8554930>
508 * Don't put this back on the socket like we used to, that
509 * just causes the client to spin. Drop the socket.
512 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
514 socket_unlock(so
, 1);
521 fp
->f_ops
= &socketops
;
522 fp
->f_data
= (caddr_t
)so
;
523 socket_lock(head
, 0);
526 so
->so_state
&= ~SS_COMP
;
528 (void) soacceptlock(so
, &sa
, 0);
529 socket_unlock(head
, 1);
537 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
542 /* save sa_len before it is destroyed */
544 namelen
= MIN(namelen
, sa_len
);
545 error
= copyout(sa
, uap
->name
, namelen
);
547 /* return the actual, untruncated address length */
550 error
= copyout((caddr_t
)&namelen
, uap
->anamelen
,
557 * If the socket has been marked as inactive by sosetdefunct(),
558 * disallow further operations on it.
560 if (so
->so_flags
& SOF_DEFUNCT
) {
561 sodefunct(current_proc(), so
,
562 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
);
566 socket_unlock(so
, 1);
569 procfdtbl_releasefd(p
, newfd
, NULL
);
570 fp_drop(p
, newfd
, fp
, 1);
579 accept(struct proc
*p
, struct accept_args
*uap
, int32_t *retval
)
581 __pthread_testcancel(1);
582 return(accept_nocancel(p
, (struct accept_nocancel_args
*)uap
, retval
));
587 * EBADF Bad file descriptor
588 * EALREADY Connection already in progress
589 * EINPROGRESS Operation in progress
590 * ECONNABORTED Connection aborted
591 * EINTR Interrupted function
592 * EACCES Mandatory Access Control failure
593 * file_socket:ENOTSOCK
595 * getsockaddr:ENAMETOOLONG Filename too long
596 * getsockaddr:EINVAL Invalid argument
597 * getsockaddr:ENOMEM Not enough space
598 * getsockaddr:EFAULT Bad address
599 * soconnectlock:EOPNOTSUPP
600 * soconnectlock:EISCONN
601 * soconnectlock:??? [depends on protocol, filters]
604 * Imputed: so_error error may be set from so_error, which
605 * may have been set by soconnectlock.
609 connect(struct proc
*p
, struct connect_args
*uap
, int32_t *retval
)
611 __pthread_testcancel(1);
612 return(connect_nocancel(p
, (struct connect_nocancel_args
*)uap
, retval
));
616 connect_nocancel(proc_t p
, struct connect_nocancel_args
*uap
, int32_t *retval
)
618 #pragma unused(p, retval)
620 struct sockaddr_storage ss
;
621 struct sockaddr
*sa
= NULL
;
626 AUDIT_ARG(fd
, uap
->s
);
627 error
= file_socket(fd
, &so
);
636 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
637 * if this is a datagram socket; translate for other types.
639 dgram
= (so
->so_type
== SOCK_DGRAM
);
641 /* Get socket address now before we obtain socket lock */
642 if (uap
->namelen
> sizeof (ss
)) {
643 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, !dgram
);
645 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, !dgram
);
647 sa
= (struct sockaddr
*)&ss
;
652 error
= connectit(so
, sa
);
654 if (sa
!= NULL
&& sa
!= SA(&ss
))
656 if (error
== ERESTART
)
664 connectx_nocancel(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
666 #pragma unused(p, retval)
667 struct sockaddr_list
*src_sl
= NULL
, *dst_sl
= NULL
;
669 int error
, fd
= uap
->s
;
671 connid_t cid
= CONNID_ANY
;
673 AUDIT_ARG(fd
, uap
->s
);
674 error
= file_socket(fd
, &so
);
683 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
684 * if this is a datagram socket; translate for other types.
686 dgram
= (so
->so_type
== SOCK_DGRAM
);
689 * Get socket address(es) now before we obtain socket lock; use
690 * sockaddr_list for src address for convenience, if present,
691 * even though it won't hold more than one.
693 if (uap
->src
!= USER_ADDR_NULL
&& (error
= getsockaddrlist(so
,
694 &src_sl
, uap
->src
, uap
->srclen
, dgram
)) != 0)
697 error
= getsockaddrlist(so
, &dst_sl
, uap
->dsts
, uap
->dstlen
, dgram
);
701 VERIFY(dst_sl
!= NULL
&&
702 !TAILQ_EMPTY(&dst_sl
->sl_head
) && dst_sl
->sl_cnt
> 0);
704 error
= connectitx(so
, &src_sl
, &dst_sl
, p
, uap
->ifscope
,
706 if (error
== ERESTART
)
709 if (uap
->cid
!= USER_ADDR_NULL
)
710 (void) copyout(&cid
, uap
->cid
, sizeof (cid
));
715 sockaddrlist_free(src_sl
);
717 sockaddrlist_free(dst_sl
);
722 connectx(struct proc
*p
, struct connectx_args
*uap
, int *retval
)
725 * Due to similiarity with a POSIX interface, define as
726 * an unofficial cancellation point.
728 __pthread_testcancel(1);
729 return (connectx_nocancel(p
, uap
, retval
));
733 connectit(struct socket
*so
, struct sockaddr
*sa
)
737 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
738 #if CONFIG_MACF_SOCKET_SUBSET
739 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, sa
)) != 0)
741 #endif /* MAC_SOCKET_SUBSET */
744 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
748 error
= soconnectlock(so
, sa
, 0);
750 so
->so_state
&= ~SS_ISCONNECTING
;
753 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
757 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
758 lck_mtx_t
*mutex_held
;
760 if (so
->so_proto
->pr_getlock
!= NULL
)
761 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
763 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
764 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
765 PSOCK
| PCATCH
, __func__
, 0);
766 if (so
->so_state
& SS_DRAINING
) {
767 error
= ECONNABORTED
;
773 error
= so
->so_error
;
777 socket_unlock(so
, 1);
782 connectitx(struct socket
*so
, struct sockaddr_list
**src_sl
,
783 struct sockaddr_list
**dst_sl
, struct proc
*p
, uint32_t ifscope
,
784 associd_t aid
, connid_t
*pcid
)
786 struct sockaddr_entry
*se
;
789 VERIFY(dst_sl
!= NULL
&& *dst_sl
!= NULL
);
791 TAILQ_FOREACH(se
, &(*dst_sl
)->sl_head
, se_link
) {
792 VERIFY(se
->se_addr
!= NULL
);
793 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()),
795 #if CONFIG_MACF_SOCKET_SUBSET
796 if ((error
= mac_socket_check_connect(kauth_cred_get(),
797 so
, se
->se_addr
)) != 0)
799 #endif /* MAC_SOCKET_SUBSET */
803 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
807 error
= soconnectxlocked(so
, src_sl
, dst_sl
, p
, ifscope
,
808 aid
, pcid
, 0, NULL
, 0);
810 so
->so_state
&= ~SS_ISCONNECTING
;
813 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
817 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
818 lck_mtx_t
*mutex_held
;
820 if (so
->so_proto
->pr_getlock
!= NULL
)
821 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
823 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
824 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
825 PSOCK
| PCATCH
, __func__
, 0);
826 if (so
->so_state
& SS_DRAINING
) {
827 error
= ECONNABORTED
;
833 error
= so
->so_error
;
837 socket_unlock(so
, 1);
842 peeloff(struct proc
*p
, struct peeloff_args
*uap
, int *retval
)
845 * Due to similiarity with a POSIX interface, define as
846 * an unofficial cancellation point.
848 __pthread_testcancel(1);
849 return (peeloff_nocancel(p
, uap
, retval
));
853 peeloff_nocancel(struct proc
*p
, struct peeloff_args
*uap
, int *retval
)
856 struct socket
*mp_so
, *so
= NULL
;
857 int newfd
, fd
= uap
->s
;
858 short fflag
; /* type must match fp->f_flag */
863 error
= fp_getfsock(p
, fd
, &fp
, &mp_so
);
865 if (error
== EOPNOTSUPP
)
874 socket_lock(mp_so
, 1);
875 error
= sopeelofflocked(mp_so
, uap
->aid
, &so
);
877 socket_unlock(mp_so
, 1);
881 socket_unlock(mp_so
, 0); /* keep ref on mp_so for us */
884 error
= falloc(p
, &fp
, &newfd
, vfs_context_current());
886 /* drop this socket (probably ran out of file descriptors) */
888 sodereference(mp_so
); /* our mp_so ref */
893 fp
->f_ops
= &socketops
;
894 fp
->f_data
= (caddr_t
)so
;
897 * If the socket has been marked as inactive by sosetdefunct(),
898 * disallow further operations on it.
900 if (so
->so_flags
& SOF_DEFUNCT
) {
901 sodefunct(current_proc(), so
,
902 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL
);
906 procfdtbl_releasefd(p
, newfd
, NULL
);
907 fp_drop(p
, newfd
, fp
, 1);
910 sodereference(mp_so
); /* our mp_so ref */
921 disconnectx(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
924 * Due to similiarity with a POSIX interface, define as
925 * an unofficial cancellation point.
927 __pthread_testcancel(1);
928 return (disconnectx_nocancel(p
, uap
, retval
));
932 disconnectx_nocancel(struct proc
*p
, struct disconnectx_args
*uap
, int *retval
)
934 #pragma unused(p, retval)
939 error
= file_socket(fd
, &so
);
947 error
= sodisconnectx(so
, uap
->aid
, uap
->cid
);
955 * socreate:EAFNOSUPPORT
956 * socreate:EPROTOTYPE
957 * socreate:EPROTONOSUPPORT
961 * socreate:??? [other protocol families, IPSEC]
967 * soconnect2:EPROTOTYPE
968 * soconnect2:??? [other protocol families[
971 socketpair(struct proc
*p
, struct socketpair_args
*uap
,
972 __unused
int32_t *retval
)
974 struct fileproc
*fp1
, *fp2
;
975 struct socket
*so1
, *so2
;
976 int fd
, error
, sv
[2];
978 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
979 error
= socreate(uap
->domain
, &so1
, uap
->type
, uap
->protocol
);
982 error
= socreate(uap
->domain
, &so2
, uap
->type
, uap
->protocol
);
986 error
= falloc(p
, &fp1
, &fd
, vfs_context_current());
990 fp1
->f_flag
= FREAD
|FWRITE
;
991 fp1
->f_ops
= &socketops
;
992 fp1
->f_data
= (caddr_t
)so1
;
995 error
= falloc(p
, &fp2
, &fd
, vfs_context_current());
999 fp2
->f_flag
= FREAD
|FWRITE
;
1000 fp2
->f_ops
= &socketops
;
1001 fp2
->f_data
= (caddr_t
)so2
;
1004 error
= soconnect2(so1
, so2
);
1008 if (uap
->type
== SOCK_DGRAM
) {
1010 * Datagram socket connection is asymmetric.
1012 error
= soconnect2(so2
, so1
);
1018 if ((error
= copyout(sv
, uap
->rsv
, 2 * sizeof (int))) != 0)
1022 procfdtbl_releasefd(p
, sv
[0], NULL
);
1023 procfdtbl_releasefd(p
, sv
[1], NULL
);
1024 fp_drop(p
, sv
[0], fp1
, 1);
1025 fp_drop(p
, sv
[1], fp2
, 1);
1030 fp_free(p
, sv
[1], fp2
);
1032 fp_free(p
, sv
[0], fp1
);
1034 (void) soclose(so2
);
1036 (void) soclose(so1
);
1041 * Returns: 0 Success
1046 * EACCES Mandatory Access Control failure
1047 * file_socket:ENOTSOCK
1049 * getsockaddr:ENAMETOOLONG Filename too long
1050 * getsockaddr:EINVAL Invalid argument
1051 * getsockaddr:ENOMEM Not enough space
1052 * getsockaddr:EFAULT Bad address
1053 * <pru_sosend>:EACCES[TCP]
1054 * <pru_sosend>:EADDRINUSE[TCP]
1055 * <pru_sosend>:EADDRNOTAVAIL[TCP]
1056 * <pru_sosend>:EAFNOSUPPORT[TCP]
1057 * <pru_sosend>:EAGAIN[TCP]
1058 * <pru_sosend>:EBADF
1059 * <pru_sosend>:ECONNRESET[TCP]
1060 * <pru_sosend>:EFAULT
1061 * <pru_sosend>:EHOSTUNREACH[TCP]
1062 * <pru_sosend>:EINTR
1063 * <pru_sosend>:EINVAL
1064 * <pru_sosend>:EISCONN[AF_INET]
1065 * <pru_sosend>:EMSGSIZE[TCP]
1066 * <pru_sosend>:ENETDOWN[TCP]
1067 * <pru_sosend>:ENETUNREACH[TCP]
1068 * <pru_sosend>:ENOBUFS
1069 * <pru_sosend>:ENOMEM[TCP]
1070 * <pru_sosend>:ENOTCONN[AF_INET]
1071 * <pru_sosend>:EOPNOTSUPP
1072 * <pru_sosend>:EPERM[TCP]
1073 * <pru_sosend>:EPIPE
1074 * <pru_sosend>:EWOULDBLOCK
1075 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1076 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
1077 * <pru_sosend>:??? [value from so_error]
1081 sendit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
1082 int flags
, int32_t *retval
)
1084 struct mbuf
*control
= NULL
;
1085 struct sockaddr_storage ss
;
1086 struct sockaddr
*to
= NULL
;
1087 boolean_t want_free
= TRUE
;
1092 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1094 error
= file_socket(s
, &so
);
1096 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1103 if (mp
->msg_name
!= USER_ADDR_NULL
) {
1104 if (mp
->msg_namelen
> sizeof (ss
)) {
1105 error
= getsockaddr(so
, &to
, mp
->msg_name
,
1106 mp
->msg_namelen
, TRUE
);
1108 error
= getsockaddr_s(so
, &ss
, mp
->msg_name
,
1109 mp
->msg_namelen
, TRUE
);
1111 to
= (struct sockaddr
*)&ss
;
1117 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), to
);
1119 if (mp
->msg_control
!= USER_ADDR_NULL
) {
1120 if (mp
->msg_controllen
< sizeof (struct cmsghdr
)) {
1124 error
= sockargs(&control
, mp
->msg_control
,
1125 mp
->msg_controllen
, MT_CONTROL
);
1130 #if CONFIG_MACF_SOCKET_SUBSET
1132 * We check the state without holding the socket lock;
1133 * if a race condition occurs, it would simply result
1134 * in an extra call to the MAC check function.
1137 !(so
->so_state
& SS_DEFUNCT
) &&
1138 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
)) != 0)
1140 #endif /* MAC_SOCKET_SUBSET */
1142 len
= uio_resid(uiop
);
1143 error
= so
->so_proto
->pr_usrreqs
->pru_sosend(so
, to
, uiop
, 0,
1146 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1147 error
== EINTR
|| error
== EWOULDBLOCK
))
1149 /* Generation of SIGPIPE can be controlled per socket */
1150 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
))
1151 psignal(p
, SIGPIPE
);
1154 *retval
= (int)(len
- uio_resid(uiop
));
1156 if (to
!= NULL
&& want_free
)
1159 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1165 * Returns: 0 Success
1167 * sendit:??? [see sendit definition in this file]
1168 * write:??? [4056224: applicable for pipes]
1171 sendto(struct proc
*p
, struct sendto_args
*uap
, int32_t *retval
)
1173 __pthread_testcancel(1);
1174 return (sendto_nocancel(p
, (struct sendto_nocancel_args
*)uap
, retval
));
1178 sendto_nocancel(struct proc
*p
,
1179 struct sendto_nocancel_args
*uap
,
1182 struct user_msghdr msg
;
1186 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1187 AUDIT_ARG(fd
, uap
->s
);
1189 auio
= uio_create(1, 0,
1190 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1195 uio_addiov(auio
, uap
->buf
, uap
->len
);
1197 msg
.msg_name
= uap
->to
;
1198 msg
.msg_namelen
= uap
->tolen
;
1199 /* no need to set up msg_iov. sendit uses uio_t we send it */
1202 msg
.msg_control
= 0;
1205 error
= sendit(p
, uap
->s
, &msg
, auio
, uap
->flags
, retval
);
1211 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_END
, error
, *retval
, 0, 0, 0);
1217 * Returns: 0 Success
1220 * sendit:??? [see sendit definition in this file]
1223 sendmsg(struct proc
*p
, struct sendmsg_args
*uap
, int32_t *retval
)
1225 __pthread_testcancel(1);
1226 return (sendmsg_nocancel(p
, (struct sendmsg_nocancel_args
*)uap
, retval
));
1230 sendmsg_nocancel(struct proc
*p
, struct sendmsg_nocancel_args
*uap
, int32_t *retval
)
1232 struct user32_msghdr msg32
;
1233 struct user64_msghdr msg64
;
1234 struct user_msghdr user_msg
;
1239 struct user_iovec
*iovp
;
1241 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1242 AUDIT_ARG(fd
, uap
->s
);
1243 if (IS_64BIT_PROCESS(p
)) {
1244 msghdrp
= (caddr_t
)&msg64
;
1245 size_of_msghdr
= sizeof (msg64
);
1247 msghdrp
= (caddr_t
)&msg32
;
1248 size_of_msghdr
= sizeof (msg32
);
1250 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1252 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1256 if (IS_64BIT_PROCESS(p
)) {
1257 user_msg
.msg_flags
= msg64
.msg_flags
;
1258 user_msg
.msg_controllen
= msg64
.msg_controllen
;
1259 user_msg
.msg_control
= msg64
.msg_control
;
1260 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
1261 user_msg
.msg_iov
= msg64
.msg_iov
;
1262 user_msg
.msg_namelen
= msg64
.msg_namelen
;
1263 user_msg
.msg_name
= msg64
.msg_name
;
1265 user_msg
.msg_flags
= msg32
.msg_flags
;
1266 user_msg
.msg_controllen
= msg32
.msg_controllen
;
1267 user_msg
.msg_control
= msg32
.msg_control
;
1268 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
1269 user_msg
.msg_iov
= msg32
.msg_iov
;
1270 user_msg
.msg_namelen
= msg32
.msg_namelen
;
1271 user_msg
.msg_name
= msg32
.msg_name
;
1274 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1275 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, EMSGSIZE
,
1280 /* allocate a uio large enough to hold the number of iovecs passed */
1281 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1282 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1289 if (user_msg
.msg_iovlen
) {
1291 * get location of iovecs within the uio.
1292 * then copyin the iovecs from user space.
1294 iovp
= uio_iovsaddr(auio
);
1299 error
= copyin_user_iovec_array(user_msg
.msg_iov
,
1300 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1301 user_msg
.msg_iovlen
, iovp
);
1304 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1306 /* finish setup of uio_t */
1307 error
= uio_calculateresid(auio
);
1312 user_msg
.msg_iov
= 0;
1315 /* msg_flags is ignored for send */
1316 user_msg
.msg_flags
= 0;
1318 error
= sendit(p
, uap
->s
, &user_msg
, auio
, uap
->flags
, retval
);
1323 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1329 * Returns: 0 Success
1333 * EACCES Mandatory Access Control failure
1336 * <pru_soreceive>:ENOBUFS
1337 * <pru_soreceive>:ENOTCONN
1338 * <pru_soreceive>:EWOULDBLOCK
1339 * <pru_soreceive>:EFAULT
1340 * <pru_soreceive>:EINTR
1341 * <pru_soreceive>:EBADF
1342 * <pru_soreceive>:EINVAL
1343 * <pru_soreceive>:EMSGSIZE
1344 * <pru_soreceive>:???
1346 * Notes: Additional return values from calls through <pru_soreceive>
1347 * depend on protocols other than TCP or AF_UNIX, which are
1351 recvit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
1352 user_addr_t namelenp
, int32_t *retval
)
1356 struct mbuf
*m
, *control
= 0;
1359 struct sockaddr
*fromsa
= 0;
1360 struct fileproc
*fp
;
1362 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1364 if ((error
= fp_lookup(p
, s
, &fp
, 1))) {
1365 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1369 if (fp
->f_type
!= DTYPE_SOCKET
) {
1370 fp_drop(p
, s
, fp
, 1);
1375 so
= (struct socket
*)fp
->f_data
;
1377 fp_drop(p
, s
, fp
, 1);
1384 #if CONFIG_MACF_SOCKET_SUBSET
1386 * We check the state without holding the socket lock;
1387 * if a race condition occurs, it would simply result
1388 * in an extra call to the MAC check function.
1390 if (!(so
->so_state
& SS_DEFUNCT
) &&
1391 !(so
->so_state
& SS_ISCONNECTED
) &&
1392 !(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) &&
1393 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0)
1395 #endif /* MAC_SOCKET_SUBSET */
1396 if (uio_resid(uiop
) < 0) {
1397 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, EINVAL
, 0, 0, 0, 0);
1402 len
= uio_resid(uiop
);
1403 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, &fromsa
, uiop
,
1404 (struct mbuf
**)0, mp
->msg_control
? &control
: (struct mbuf
**)0,
1407 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()),
1410 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1411 error
== EINTR
|| error
== EWOULDBLOCK
))
1418 *retval
= len
- uio_resid(uiop
);
1420 socklen_t sa_len
= 0;
1422 len
= mp
->msg_namelen
;
1423 if (len
<= 0 || fromsa
== 0) {
1427 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1429 sa_len
= fromsa
->sa_len
;
1430 len
= MIN((unsigned int)len
, sa_len
);
1431 error
= copyout(fromsa
, mp
->msg_name
, (unsigned)len
);
1435 mp
->msg_namelen
= sa_len
;
1436 /* return the actual, untruncated address length */
1438 (error
= copyout((caddr_t
)&sa_len
, namelenp
,
1443 if (mp
->msg_control
) {
1444 len
= mp
->msg_controllen
;
1446 mp
->msg_controllen
= 0;
1447 ctlbuf
= mp
->msg_control
;
1449 while (m
&& len
> 0) {
1450 unsigned int tocopy
;
1451 struct cmsghdr
*cp
= mtod(m
, struct cmsghdr
*);
1452 int cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1453 int buflen
= m
->m_len
;
1455 while (buflen
> 0 && len
> 0) {
1458 SCM_TIMESTAMP hack because struct timeval has a
1459 * different size for 32 bits and 64 bits processes
1461 if (cp
->cmsg_level
== SOL_SOCKET
&& cp
->cmsg_type
== SCM_TIMESTAMP
) {
1462 unsigned char tmp_buffer
[CMSG_SPACE(sizeof(struct user64_timeval
))];
1463 struct cmsghdr
*tmp_cp
= (struct cmsghdr
*)(void *)tmp_buffer
;
1465 struct timeval
*tv
= (struct timeval
*)(void *)CMSG_DATA(cp
);
1467 tmp_cp
->cmsg_level
= SOL_SOCKET
;
1468 tmp_cp
->cmsg_type
= SCM_TIMESTAMP
;
1470 if (proc_is64bit(p
)) {
1471 struct user64_timeval
*tv64
= (struct user64_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1473 tv64
->tv_sec
= tv
->tv_sec
;
1474 tv64
->tv_usec
= tv
->tv_usec
;
1476 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user64_timeval
));
1477 tmp_space
= CMSG_SPACE(sizeof(struct user64_timeval
));
1479 struct user32_timeval
*tv32
= (struct user32_timeval
*)(void *)CMSG_DATA(tmp_cp
);
1481 tv32
->tv_sec
= tv
->tv_sec
;
1482 tv32
->tv_usec
= tv
->tv_usec
;
1484 tmp_cp
->cmsg_len
= CMSG_LEN(sizeof(struct user32_timeval
));
1485 tmp_space
= CMSG_SPACE(sizeof(struct user32_timeval
));
1487 if (len
>= tmp_space
) {
1490 mp
->msg_flags
|= MSG_CTRUNC
;
1493 error
= copyout(tmp_buffer
, ctlbuf
, tocopy
);
1499 if (cp_size
> buflen
) {
1500 panic("cp_size > buflen, something wrong with alignment!");
1503 if (len
>= cp_size
) {
1506 mp
->msg_flags
|= MSG_CTRUNC
;
1510 error
= copyout((caddr_t
) cp
, ctlbuf
,
1521 cp
= (struct cmsghdr
*)(void *)((unsigned char *) cp
+ cp_size
);
1522 cp_size
= CMSG_ALIGN(cp
->cmsg_len
);
1527 mp
->msg_controllen
= ctlbuf
- mp
->msg_control
;
1531 FREE(fromsa
, M_SONAME
);
1534 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1536 fp_drop(p
, s
, fp
, 0);
1541 * Returns: 0 Success
1545 * read:??? [4056224: applicable for pipes]
1547 * Notes: The read entry point is only called as part of support for
1548 * binary backward compatability; new code should use read
1549 * instead of recv or recvfrom when attempting to read data
1552 * For full documentation of the return codes from recvit, see
1553 * the block header for the recvit function.
1556 recvfrom(struct proc
*p
, struct recvfrom_args
*uap
, int32_t *retval
)
1558 __pthread_testcancel(1);
1559 return(recvfrom_nocancel(p
, (struct recvfrom_nocancel_args
*)uap
, retval
));
1563 recvfrom_nocancel(struct proc
*p
, struct recvfrom_nocancel_args
*uap
, int32_t *retval
)
1565 struct user_msghdr msg
;
1569 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1570 AUDIT_ARG(fd
, uap
->s
);
1572 if (uap
->fromlenaddr
) {
1573 error
= copyin(uap
->fromlenaddr
,
1574 (caddr_t
)&msg
.msg_namelen
, sizeof (msg
.msg_namelen
));
1578 msg
.msg_namelen
= 0;
1580 msg
.msg_name
= uap
->from
;
1581 auio
= uio_create(1, 0,
1582 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1588 uio_addiov(auio
, uap
->buf
, uap
->len
);
1589 /* no need to set up msg_iov. recvit uses uio_t we send it */
1592 msg
.msg_control
= 0;
1593 msg
.msg_controllen
= 0;
1594 msg
.msg_flags
= uap
->flags
;
1595 error
= recvit(p
, uap
->s
, &msg
, auio
, uap
->fromlenaddr
, retval
);
1600 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1606 * Returns: 0 Success
1613 * Notes: For full documentation of the return codes from recvit, see
1614 * the block header for the recvit function.
1617 recvmsg(struct proc
*p
, struct recvmsg_args
*uap
, int32_t *retval
)
1619 __pthread_testcancel(1);
1620 return(recvmsg_nocancel(p
, (struct recvmsg_nocancel_args
*)uap
, retval
));
1624 recvmsg_nocancel(struct proc
*p
, struct recvmsg_nocancel_args
*uap
, int32_t *retval
)
1626 struct user32_msghdr msg32
;
1627 struct user64_msghdr msg64
;
1628 struct user_msghdr user_msg
;
1634 struct user_iovec
*iovp
;
1636 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1637 AUDIT_ARG(fd
, uap
->s
);
1638 if (IS_64BIT_PROCESS(p
)) {
1639 msghdrp
= (caddr_t
)&msg64
;
1640 size_of_msghdr
= sizeof (msg64
);
1642 msghdrp
= (caddr_t
)&msg32
;
1643 size_of_msghdr
= sizeof (msg32
);
1645 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1647 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1651 /* only need to copy if user process is not 64-bit */
1652 if (IS_64BIT_PROCESS(p
)) {
1653 user_msg
.msg_flags
= msg64
.msg_flags
;
1654 user_msg
.msg_controllen
= msg64
.msg_controllen
;
1655 user_msg
.msg_control
= msg64
.msg_control
;
1656 user_msg
.msg_iovlen
= msg64
.msg_iovlen
;
1657 user_msg
.msg_iov
= msg64
.msg_iov
;
1658 user_msg
.msg_namelen
= msg64
.msg_namelen
;
1659 user_msg
.msg_name
= msg64
.msg_name
;
1661 user_msg
.msg_flags
= msg32
.msg_flags
;
1662 user_msg
.msg_controllen
= msg32
.msg_controllen
;
1663 user_msg
.msg_control
= msg32
.msg_control
;
1664 user_msg
.msg_iovlen
= msg32
.msg_iovlen
;
1665 user_msg
.msg_iov
= msg32
.msg_iov
;
1666 user_msg
.msg_namelen
= msg32
.msg_namelen
;
1667 user_msg
.msg_name
= msg32
.msg_name
;
1670 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1671 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, EMSGSIZE
,
1676 user_msg
.msg_flags
= uap
->flags
;
1678 /* allocate a uio large enough to hold the number of iovecs passed */
1679 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1680 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1688 * get location of iovecs within the uio. then copyin the iovecs from
1691 iovp
= uio_iovsaddr(auio
);
1696 uiov
= user_msg
.msg_iov
;
1697 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1698 error
= copyin_user_iovec_array(uiov
,
1699 IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
,
1700 user_msg
.msg_iovlen
, iovp
);
1704 /* finish setup of uio_t */
1705 error
= uio_calculateresid(auio
);
1710 error
= recvit(p
, uap
->s
, &user_msg
, auio
, 0, retval
);
1712 user_msg
.msg_iov
= uiov
;
1713 if (IS_64BIT_PROCESS(p
)) {
1714 msg64
.msg_flags
= user_msg
.msg_flags
;
1715 msg64
.msg_controllen
= user_msg
.msg_controllen
;
1716 msg64
.msg_control
= user_msg
.msg_control
;
1717 msg64
.msg_iovlen
= user_msg
.msg_iovlen
;
1718 msg64
.msg_iov
= user_msg
.msg_iov
;
1719 msg64
.msg_namelen
= user_msg
.msg_namelen
;
1720 msg64
.msg_name
= user_msg
.msg_name
;
1722 msg32
.msg_flags
= user_msg
.msg_flags
;
1723 msg32
.msg_controllen
= user_msg
.msg_controllen
;
1724 msg32
.msg_control
= user_msg
.msg_control
;
1725 msg32
.msg_iovlen
= user_msg
.msg_iovlen
;
1726 msg32
.msg_iov
= user_msg
.msg_iov
;
1727 msg32
.msg_namelen
= user_msg
.msg_namelen
;
1728 msg32
.msg_name
= user_msg
.msg_name
;
1730 error
= copyout(msghdrp
, uap
->msg
, size_of_msghdr
);
1736 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1741 * Returns: 0 Success
1743 * file_socket:ENOTSOCK
1746 * soshutdown:ENOTCONN
1747 * soshutdown:EADDRNOTAVAIL[TCP]
1748 * soshutdown:ENOBUFS[TCP]
1749 * soshutdown:EMSGSIZE[TCP]
1750 * soshutdown:EHOSTUNREACH[TCP]
1751 * soshutdown:ENETUNREACH[TCP]
1752 * soshutdown:ENETDOWN[TCP]
1753 * soshutdown:ENOMEM[TCP]
1754 * soshutdown:EACCES[TCP]
1755 * soshutdown:EMSGSIZE[TCP]
1756 * soshutdown:ENOBUFS[TCP]
1757 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1758 * soshutdown:??? [other protocol families]
1762 shutdown(__unused
struct proc
*p
, struct shutdown_args
*uap
,
1763 __unused
int32_t *retval
)
1768 AUDIT_ARG(fd
, uap
->s
);
1769 error
= file_socket(uap
->s
, &so
);
1776 error
= soshutdown((struct socket
*)so
, uap
->how
);
1783 * Returns: 0 Success
1786 * EACCES Mandatory Access Control failure
1787 * file_socket:ENOTSOCK
1790 * sosetopt:ENOPROTOOPT
1794 * sosetopt:EOPNOTSUPP[AF_UNIX]
1799 setsockopt(struct proc
*p
, struct setsockopt_args
*uap
,
1800 __unused
int32_t *retval
)
1803 struct sockopt sopt
;
1806 AUDIT_ARG(fd
, uap
->s
);
1807 if (uap
->val
== 0 && uap
->valsize
!= 0)
1809 /* No bounds checking on size (it's unsigned) */
1811 error
= file_socket(uap
->s
, &so
);
1815 sopt
.sopt_dir
= SOPT_SET
;
1816 sopt
.sopt_level
= uap
->level
;
1817 sopt
.sopt_name
= uap
->name
;
1818 sopt
.sopt_val
= uap
->val
;
1819 sopt
.sopt_valsize
= uap
->valsize
;
1826 #if CONFIG_MACF_SOCKET_SUBSET
1827 if ((error
= mac_socket_check_setsockopt(kauth_cred_get(), so
,
1830 #endif /* MAC_SOCKET_SUBSET */
1831 error
= sosetoptlock(so
, &sopt
, 1); /* will lock socket */
1840 * Returns: 0 Success
1843 * EACCES Mandatory Access Control failure
1846 * file_socket:ENOTSOCK
1851 getsockopt(struct proc
*p
, struct getsockopt_args
*uap
,
1852 __unused
int32_t *retval
)
1856 struct sockopt sopt
;
1859 error
= file_socket(uap
->s
, &so
);
1863 error
= copyin(uap
->avalsize
, (caddr_t
)&valsize
,
1867 /* No bounds checking on size (it's unsigned) */
1871 sopt
.sopt_dir
= SOPT_GET
;
1872 sopt
.sopt_level
= uap
->level
;
1873 sopt
.sopt_name
= uap
->name
;
1874 sopt
.sopt_val
= uap
->val
;
1875 sopt
.sopt_valsize
= (size_t)valsize
; /* checked non-negative above */
1882 #if CONFIG_MACF_SOCKET_SUBSET
1883 if ((error
= mac_socket_check_getsockopt(kauth_cred_get(), so
,
1886 #endif /* MAC_SOCKET_SUBSET */
1887 error
= sogetoptlock((struct socket
*)so
, &sopt
, 1); /* will lock */
1889 valsize
= sopt
.sopt_valsize
;
1890 error
= copyout((caddr_t
)&valsize
, uap
->avalsize
,
1902 * Returns: 0 Success
1904 * file_socket:ENOTSOCK
1908 * <pru_sockaddr>:ENOBUFS[TCP]
1909 * <pru_sockaddr>:ECONNRESET[TCP]
1910 * <pru_sockaddr>:EINVAL[AF_UNIX]
1911 * <sf_getsockname>:???
1915 getsockname(__unused
struct proc
*p
, struct getsockname_args
*uap
,
1916 __unused
int32_t *retval
)
1919 struct sockaddr
*sa
;
1924 error
= file_socket(uap
->fdes
, &so
);
1927 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof (socklen_t
));
1936 error
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &sa
);
1938 error
= sflt_getsockname(so
, &sa
);
1939 if (error
== EJUSTRETURN
)
1942 socket_unlock(so
, 1);
1950 sa_len
= sa
->sa_len
;
1951 len
= MIN(len
, sa_len
);
1952 error
= copyout((caddr_t
)sa
, uap
->asa
, len
);
1955 /* return the actual, untruncated address length */
1958 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof (socklen_t
));
1963 file_drop(uap
->fdes
);
1968 * Get name of peer for connected socket.
1970 * Returns: 0 Success
1974 * file_socket:ENOTSOCK
1978 * <pru_peeraddr>:???
1979 * <sf_getpeername>:???
1983 getpeername(__unused
struct proc
*p
, struct getpeername_args
*uap
,
1984 __unused
int32_t *retval
)
1987 struct sockaddr
*sa
;
1992 error
= file_socket(uap
->fdes
, &so
);
2002 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
2003 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
2004 /* the socket has been shutdown, no more getpeername's */
2005 socket_unlock(so
, 1);
2010 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONFIRMING
)) == 0) {
2011 socket_unlock(so
, 1);
2015 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof (socklen_t
));
2017 socket_unlock(so
, 1);
2021 error
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, &sa
);
2023 error
= sflt_getpeername(so
, &sa
);
2024 if (error
== EJUSTRETURN
)
2027 socket_unlock(so
, 1);
2034 sa_len
= sa
->sa_len
;
2035 len
= MIN(len
, sa_len
);
2036 error
= copyout(sa
, uap
->asa
, len
);
2039 /* return the actual, untruncated address length */
2042 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof (socklen_t
));
2044 if (sa
) FREE(sa
, M_SONAME
);
2046 file_drop(uap
->fdes
);
2051 sockargs(struct mbuf
**mp
, user_addr_t data
, int buflen
, int type
)
2053 struct sockaddr
*sa
;
2057 size_t alloc_buflen
= (size_t)buflen
;
2059 if(alloc_buflen
> INT_MAX
/2)
2062 /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
2063 if(type
== MT_CONTROL
)
2064 alloc_buflen
= ((buflen
- sizeof(struct cmsghdr
))*2) + sizeof(struct cmsghdr
);
2066 if (alloc_buflen
> MLEN
) {
2067 if (type
== MT_SONAME
&& alloc_buflen
<= 112)
2068 alloc_buflen
= MLEN
; /* unix domain compat. hack */
2069 else if (alloc_buflen
> MCLBYTES
)
2072 m
= m_get(M_WAIT
, type
);
2075 if (alloc_buflen
> MLEN
) {
2077 if ((m
->m_flags
& M_EXT
) == 0) {
2082 /* K64: We still copyin the original buflen because it gets expanded later
2083 * and we lie about the size of the mbuf because it only affects unp_* functions
2086 error
= copyin(data
, mtod(m
, caddr_t
), (u_int
)buflen
);
2091 if (type
== MT_SONAME
) {
2092 sa
= mtod(m
, struct sockaddr
*);
2093 sa
->sa_len
= buflen
;
2100 * Given a user_addr_t of length len, allocate and fill out a *sa.
2102 * Returns: 0 Success
2103 * ENAMETOOLONG Filename too long
2104 * EINVAL Invalid argument
2105 * ENOMEM Not enough space
2106 * copyin:EFAULT Bad address
2109 getsockaddr(struct socket
*so
, struct sockaddr
**namp
, user_addr_t uaddr
,
2110 size_t len
, boolean_t translate_unspec
)
2112 struct sockaddr
*sa
;
2115 if (len
> SOCK_MAXADDRLEN
)
2116 return (ENAMETOOLONG
);
2118 if (len
< offsetof(struct sockaddr
, sa_data
[0]))
2121 MALLOC(sa
, struct sockaddr
*, len
, M_SONAME
, M_WAITOK
| M_ZERO
);
2125 error
= copyin(uaddr
, (caddr_t
)sa
, len
);
2130 * Force sa_family to AF_INET on AF_INET sockets to handle
2131 * legacy applications that use AF_UNSPEC (0). On all other
2132 * sockets we leave it unchanged and let the lower layer
2135 if (translate_unspec
&& sa
->sa_family
== AF_UNSPEC
&&
2136 SOCK_CHECK_DOM(so
, PF_INET
) &&
2137 len
== sizeof (struct sockaddr_in
))
2138 sa
->sa_family
= AF_INET
;
2147 getsockaddr_s(struct socket
*so
, struct sockaddr_storage
*ss
,
2148 user_addr_t uaddr
, size_t len
, boolean_t translate_unspec
)
2152 if (ss
== NULL
|| uaddr
== USER_ADDR_NULL
||
2153 len
< offsetof(struct sockaddr
, sa_data
[0]))
2157 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2158 * so the check here is inclusive.
2160 if (len
> sizeof (*ss
))
2161 return (ENAMETOOLONG
);
2163 bzero(ss
, sizeof (*ss
));
2164 error
= copyin(uaddr
, (caddr_t
)ss
, len
);
2167 * Force sa_family to AF_INET on AF_INET sockets to handle
2168 * legacy applications that use AF_UNSPEC (0). On all other
2169 * sockets we leave it unchanged and let the lower layer
2172 if (translate_unspec
&& ss
->ss_family
== AF_UNSPEC
&&
2173 SOCK_CHECK_DOM(so
, PF_INET
) &&
2174 len
== sizeof (struct sockaddr_in
))
2175 ss
->ss_family
= AF_INET
;
2183 * Hard limit on the number of source and/or destination addresses
2184 * that can be specified by an application.
2186 #define SOCKADDRLIST_MAX_ENTRIES 64
2189 getsockaddrlist(struct socket
*so
, struct sockaddr_list
**slp
,
2190 user_addr_t uaddr
, socklen_t uaddrlen
, boolean_t xlate_unspec
)
2192 struct sockaddr_list
*sl
;
2197 if (uaddr
== USER_ADDR_NULL
|| uaddrlen
== 0)
2200 sl
= sockaddrlist_alloc(M_WAITOK
);
2204 VERIFY(sl
->sl_cnt
== 0);
2205 while (uaddrlen
> 0 && sl
->sl_cnt
< SOCKADDRLIST_MAX_ENTRIES
) {
2206 struct sockaddr_storage ss
;
2207 struct sockaddr_entry
*se
;
2208 struct sockaddr
*sa
;
2210 if (uaddrlen
< sizeof (struct sockaddr
)) {
2215 bzero(&ss
, sizeof (ss
));
2216 error
= copyin(uaddr
, (caddr_t
)&ss
, sizeof (struct sockaddr
));
2220 /* getsockaddr does the same but we need them now */
2221 if (uaddrlen
< ss
.ss_len
||
2222 ss
.ss_len
< offsetof(struct sockaddr
, sa_data
[0])) {
2225 } else if (ss
.ss_len
> sizeof (ss
)) {
2227 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
2228 * so the check here is inclusive. We could user the
2229 * latter instead, but seems like an overkill for now.
2231 error
= ENAMETOOLONG
;
2235 se
= sockaddrentry_alloc(M_WAITOK
);
2239 sockaddrlist_insert(sl
, se
);
2241 error
= getsockaddr(so
, &sa
, uaddr
, ss
.ss_len
, xlate_unspec
);
2245 VERIFY(sa
!= NULL
&& sa
->sa_len
== ss
.ss_len
);
2249 VERIFY(((signed)uaddrlen
- ss
.ss_len
) >= 0);
2250 uaddrlen
-= ss
.ss_len
;
2254 sockaddrlist_free(sl
);
2263 #define SFUIOBUFS 64
2265 /* Macros to compute the number of mbufs needed depending on cluster size */
2266 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
2267 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
2269 /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
2270 #define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
2272 /* Upper send limit in the number of mbuf clusters */
2273 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
2274 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
2276 size_t mbuf_pkt_maxlen(mbuf_t m
);
2278 __private_extern__
size_t
2279 mbuf_pkt_maxlen(mbuf_t m
)
2284 maxlen
+= mbuf_maxlen(m
);
2291 alloc_sendpkt(int how
, size_t pktlen
, unsigned int *maxchunks
,
2292 struct mbuf
**m
, boolean_t jumbocl
)
2294 unsigned int needed
;
2297 panic("%s: pktlen (%ld) must be non-zero\n", __func__
, pktlen
);
2300 * Try to allocate for the whole thing. Since we want full control
2301 * over the buffer size and be able to accept partial result, we can't
2302 * use mbuf_allocpacket(). The logic below is similar to sosend().
2305 if (pktlen
> MBIGCLBYTES
&& jumbocl
) {
2306 needed
= MIN(SENDFILE_MAX_16K
, HOWMANY_16K(pktlen
));
2307 *m
= m_getpackets_internal(&needed
, 1, how
, 0, M16KCLBYTES
);
2310 needed
= MIN(SENDFILE_MAX_4K
, HOWMANY_4K(pktlen
));
2311 *m
= m_getpackets_internal(&needed
, 1, how
, 0, MBIGCLBYTES
);
2315 * Our previous attempt(s) at allocation had failed; the system
2316 * may be short on mbufs, and we want to block until they are
2317 * available. This time, ask just for 1 mbuf and don't return
2322 *m
= m_getpackets_internal(&needed
, 1, M_WAIT
, 1, MBIGCLBYTES
);
2325 panic("%s: blocking allocation returned NULL\n", __func__
);
2327 *maxchunks
= needed
;
2332 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
2333 * struct sf_hdtr *hdtr, int flags)
2335 * Send a file specified by 'fd' and starting at 'offset' to a socket
2336 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2337 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2338 * output. If specified, write the total number of bytes sent into *nbytes.
2341 sendfile(struct proc
*p
, struct sendfile_args
*uap
, __unused
int *retval
)
2343 struct fileproc
*fp
;
2346 struct writev_nocancel_args nuap
;
2347 user_ssize_t writev_retval
;
2348 struct user_sf_hdtr user_hdtr
;
2349 struct user32_sf_hdtr user32_hdtr
;
2350 struct user64_sf_hdtr user64_hdtr
;
2352 off_t nbytes
= 0, sbytes
= 0;
2356 struct vfs_context context
= *vfs_context_current();
2357 #define ENXIO_10146739_DBG(err_str) { \
2358 if (error == ENXIO) { \
2361 "File a radar related to rdar://10146739 \n"); \
2364 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_START
), uap
->s
,
2367 AUDIT_ARG(fd
, uap
->fd
);
2368 AUDIT_ARG(value32
, uap
->s
);
2371 * Do argument checking. Must be a regular file in, stream
2372 * type and connected socket out, positive offset.
2374 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
2375 ENXIO_10146739_DBG("%s: fp_getfvp error. %s");
2378 if ((fp
->f_flag
& FREAD
) == 0) {
2382 if (vnode_isreg(vp
) == 0) {
2386 error
= file_socket(uap
->s
, &so
);
2388 ENXIO_10146739_DBG("%s: file_socket error. %s");
2395 if (so
->so_type
!= SOCK_STREAM
) {
2399 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
2403 if (uap
->offset
< 0) {
2407 if (uap
->nbytes
== USER_ADDR_NULL
) {
2411 if (uap
->flags
!= 0) {
2416 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
2418 #if CONFIG_MACF_SOCKET_SUBSET
2419 /* JMM - fetch connected sockaddr? */
2420 error
= mac_socket_check_send(context
.vc_ucred
, so
, NULL
);
2426 * Get number of bytes to send
2427 * Should it applies to size of header and trailer?
2428 * JMM - error handling?
2430 copyin(uap
->nbytes
, &nbytes
, sizeof (off_t
));
2433 * If specified, get the pointer to the sf_hdtr struct for
2434 * any headers/trailers.
2436 if (uap
->hdtr
!= USER_ADDR_NULL
) {
2439 bzero(&user_hdtr
, sizeof (user_hdtr
));
2440 if (IS_64BIT_PROCESS(p
)) {
2441 hdtrp
= (caddr_t
)&user64_hdtr
;
2442 sizeof_hdtr
= sizeof (user64_hdtr
);
2444 hdtrp
= (caddr_t
)&user32_hdtr
;
2445 sizeof_hdtr
= sizeof (user32_hdtr
);
2447 error
= copyin(uap
->hdtr
, hdtrp
, sizeof_hdtr
);
2450 if (IS_64BIT_PROCESS(p
)) {
2451 user_hdtr
.headers
= user64_hdtr
.headers
;
2452 user_hdtr
.hdr_cnt
= user64_hdtr
.hdr_cnt
;
2453 user_hdtr
.trailers
= user64_hdtr
.trailers
;
2454 user_hdtr
.trl_cnt
= user64_hdtr
.trl_cnt
;
2456 user_hdtr
.headers
= user32_hdtr
.headers
;
2457 user_hdtr
.hdr_cnt
= user32_hdtr
.hdr_cnt
;
2458 user_hdtr
.trailers
= user32_hdtr
.trailers
;
2459 user_hdtr
.trl_cnt
= user32_hdtr
.trl_cnt
;
2463 * Send any headers. Wimp out and use writev(2).
2465 if (user_hdtr
.headers
!= USER_ADDR_NULL
) {
2466 bzero(&nuap
, sizeof (struct writev_args
));
2468 nuap
.iovp
= user_hdtr
.headers
;
2469 nuap
.iovcnt
= user_hdtr
.hdr_cnt
;
2470 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
2472 ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
2475 sbytes
+= writev_retval
;
2480 * Get the file size for 2 reasons:
2481 * 1. We don't want to allocate more mbufs than necessary
2482 * 2. We don't want to read past the end of file
2484 if ((error
= vnode_size(vp
, &file_size
, vfs_context_current())) != 0) {
2485 ENXIO_10146739_DBG("%s: vnode_size error. %s");
2490 * Simply read file data into a chain of mbufs that used with scatter
2491 * gather reads. We're not (yet?) setup to use zero copy external
2492 * mbufs that point to the file pages.
2495 error
= sblock(&so
->so_snd
, SBL_WAIT
);
2497 socket_unlock(so
, 1);
2500 for (off
= uap
->offset
; ; off
+= xfsize
, sbytes
+= xfsize
) {
2501 mbuf_t m0
= NULL
, m
;
2502 unsigned int nbufs
= SFUIOBUFS
, i
;
2504 char uio_buf
[UIO_SIZEOF(SFUIOBUFS
)]; /* 1 KB !!! */
2512 * Calculate the amount to transfer.
2513 * Align to round number of pages.
2514 * Not to exceed send socket buffer,
2515 * the EOF, or the passed in nbytes.
2517 xfsize
= sbspace(&so
->so_snd
);
2520 if (so
->so_state
& SS_CANTSENDMORE
) {
2523 } else if ((so
->so_state
& SS_NBIO
)) {
2531 if (xfsize
> SENDFILE_MAX_BYTES
)
2532 xfsize
= SENDFILE_MAX_BYTES
;
2533 else if (xfsize
> PAGE_SIZE
)
2534 xfsize
= trunc_page(xfsize
);
2535 pgoff
= off
& PAGE_MASK_64
;
2536 if (pgoff
> 0 && PAGE_SIZE
- pgoff
< xfsize
)
2537 xfsize
= PAGE_SIZE_64
- pgoff
;
2538 if (nbytes
&& xfsize
> (nbytes
- sbytes
))
2539 xfsize
= nbytes
- sbytes
;
2542 if (off
+ xfsize
> file_size
)
2543 xfsize
= file_size
- off
;
2548 * Attempt to use larger than system page-size clusters for
2549 * large writes only if there is a jumbo cluster pool and
2550 * if the socket is marked accordingly.
2552 jumbocl
= sosendjcl
&& njcl
> 0 &&
2553 ((so
->so_flags
& SOF_MULTIPAGES
) || sosendjcl_ignore_capab
);
2555 socket_unlock(so
, 0);
2556 alloc_sendpkt(M_WAIT
, xfsize
, &nbufs
, &m0
, jumbocl
);
2557 pktlen
= mbuf_pkt_maxlen(m0
);
2558 if (pktlen
< (size_t)xfsize
)
2561 auio
= uio_createwithbuffer(nbufs
, off
, UIO_SYSSPACE
,
2562 UIO_READ
, &uio_buf
[0], sizeof (uio_buf
));
2564 printf("sendfile failed. nbufs = %d. %s", nbufs
,
2565 "File a radar related to rdar://10146739.\n");
2572 for (i
= 0, m
= m0
, uiolen
= 0;
2573 i
< nbufs
&& m
!= NULL
&& uiolen
< (size_t)xfsize
;
2574 i
++, m
= mbuf_next(m
)) {
2575 size_t mlen
= mbuf_maxlen(m
);
2577 if (mlen
+ uiolen
> (size_t)xfsize
)
2578 mlen
= xfsize
- uiolen
;
2579 mbuf_setlen(m
, mlen
);
2580 uio_addiov(auio
, CAST_USER_ADDR_T(mbuf_datastart(m
)),
2585 if (xfsize
!= uio_resid(auio
))
2586 printf("sendfile: xfsize: %lld != uio_resid(auio): "
2587 "%lld\n", xfsize
, (long long)uio_resid(auio
));
2589 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_START
),
2590 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
2591 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
2592 error
= fo_read(fp
, auio
, FOF_OFFSET
, &context
);
2595 if (uio_resid(auio
) != xfsize
&& (error
== ERESTART
||
2596 error
== EINTR
|| error
== EWOULDBLOCK
)) {
2599 ENXIO_10146739_DBG("%s: fo_read error. %s");
2604 xfsize
-= uio_resid(auio
);
2605 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_END
),
2606 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
2607 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
2610 //printf("sendfile: fo_read 0 bytes, EOF\n");
2613 if (xfsize
+ off
> file_size
)
2614 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2615 "%lld\n", xfsize
, off
, file_size
);
2616 for (i
= 0, m
= m0
, rlen
= 0;
2617 i
< nbufs
&& m
!= NULL
&& rlen
< xfsize
;
2618 i
++, m
= mbuf_next(m
)) {
2619 size_t mlen
= mbuf_maxlen(m
);
2621 if (rlen
+ mlen
> (size_t)xfsize
)
2622 mlen
= xfsize
- rlen
;
2623 mbuf_setlen(m
, mlen
);
2627 mbuf_pkthdr_setlen(m0
, xfsize
);
2631 * Make sure that the socket is still able to take more data.
2632 * CANTSENDMORE being true usually means that the connection
2633 * was closed. so_error is true when an error was sensed after
2635 * The state is checked after the page mapping and buffer
2636 * allocation above since those operations may block and make
2637 * any socket checks stale. From this point forward, nothing
2638 * blocks before the pru_send (or more accurately, any blocking
2639 * results in a loop back to here to re-check).
2641 if ((so
->so_state
& SS_CANTSENDMORE
) || so
->so_error
) {
2642 if (so
->so_state
& SS_CANTSENDMORE
) {
2645 error
= so
->so_error
;
2649 ENXIO_10146739_DBG("%s: Unexpected socket error. %s");
2653 * Wait for socket space to become available. We do this just
2654 * after checking the connection state above in order to avoid
2655 * a race condition with sbwait().
2657 if (sbspace(&so
->so_snd
) < (long)so
->so_snd
.sb_lowat
) {
2658 if (so
->so_state
& SS_NBIO
) {
2663 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
2664 DBG_FUNC_START
), uap
->s
, 0, 0, 0, 0);
2665 error
= sbwait(&so
->so_snd
);
2666 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
2667 DBG_FUNC_END
), uap
->s
, 0, 0, 0, 0);
2669 * An error from sbwait usually indicates that we've
2670 * been interrupted by a signal. If we've sent anything
2671 * then return bytes sent, otherwise return the error.
2680 struct mbuf
*control
= NULL
;
2683 * Socket filter processing
2686 error
= sflt_data_out(so
, NULL
, &m0
, &control
, 0);
2688 if (error
== EJUSTRETURN
) {
2692 ENXIO_10146739_DBG("%s: sflt_data_out error. %s");
2696 * End Socket filter processing
2699 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
2700 uap
->s
, 0, 0, 0, 0);
2701 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, m0
,
2703 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
2704 uap
->s
, 0, 0, 0, 0);
2706 ENXIO_10146739_DBG("%s: pru_send error. %s");
2710 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
2712 * Send trailers. Wimp out and use writev(2).
2714 if (uap
->hdtr
!= USER_ADDR_NULL
&&
2715 user_hdtr
.trailers
!= USER_ADDR_NULL
) {
2716 bzero(&nuap
, sizeof (struct writev_args
));
2718 nuap
.iovp
= user_hdtr
.trailers
;
2719 nuap
.iovcnt
= user_hdtr
.trl_cnt
;
2720 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
2722 ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
2725 sbytes
+= writev_retval
;
2732 if (uap
->nbytes
!= USER_ADDR_NULL
) {
2733 /* XXX this appears bogus for some early failure conditions */
2734 copyout(&sbytes
, uap
->nbytes
, sizeof (off_t
));
2736 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_END
), uap
->s
,
2737 (unsigned int)((sbytes
>> 32) & 0x0ffffffff),
2738 (unsigned int)(sbytes
& 0x0ffffffff), error
, 0);
2741 sbunlock(&so
->so_snd
, FALSE
); /* will unlock socket */
2746 #endif /* SENDFILE */