2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1989, 1990, 1993
30 * The Regents of the University of California. All rights reserved.
32 * sendfile(2) and related extensions:
33 * Copyright (c) 1998, David Greenman. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
67 * support for mandatory and extensible security protections. This notice
68 * is included in support of clause 2.2 (b) of the Apple Public License,
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/proc_internal.h>
76 #include <sys/file_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <sys/malloc.h>
80 #include <kern/lock.h>
81 #include <sys/domain.h>
82 #include <sys/protosw.h>
83 #include <sys/signalvar.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/kernel.h>
87 #include <sys/uio_internal.h>
88 #include <sys/kauth.h>
90 #include <bsm/audit_kernel.h>
92 #include <sys/kdebug.h>
93 #include <sys/sysproto.h>
94 #include <netinet/in.h>
95 #include <net/route.h>
96 #include <netinet/in_pcb.h>
98 #if CONFIG_MACF_SOCKET_SUBSET
99 #include <security/mac_framework.h>
100 #endif /* MAC_SOCKET_SUBSET */
102 #define f_flag f_fglob->fg_flag
103 #define f_type f_fglob->fg_type
104 #define f_msgcount f_fglob->fg_msgcount
105 #define f_cred f_fglob->fg_cred
106 #define f_ops f_fglob->fg_ops
107 #define f_offset f_fglob->fg_offset
108 #define f_data f_fglob->fg_data
111 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
112 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
113 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
114 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
115 #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
116 #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
117 #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
118 #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
119 #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
120 #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
121 #define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8))
122 #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
123 #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
124 #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
127 #define HACK_FOR_4056224 1
129 static pid_t last_pid_4056224
= 0;
130 #endif /* HACK_FOR_4056224 */
132 /* TODO: should be in header file */
133 int falloc_locked(proc_t
, struct fileproc
**, int *, vfs_context_t
, int);
135 static int sendit(struct proc
*, int, struct user_msghdr
*, uio_t
, int,
137 static int recvit(struct proc
*, int, struct user_msghdr
*, uio_t
, user_addr_t
,
139 static int getsockaddr(struct socket
*, struct sockaddr
**, user_addr_t
,
141 static int getsockaddr_s(struct socket
*, struct sockaddr_storage
*,
142 user_addr_t
, size_t, boolean_t
);
144 static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf
**,
146 #endif /* SENDFILE */
149 * System call interface to the socket abstraction.
152 extern struct fileops socketops
;
156 * EACCES Mandatory Access Control failure
160 * socreate:EAFNOSUPPORT
161 * socreate:EPROTOTYPE
162 * socreate:EPROTONOSUPPORT
166 * socreate:??? [other protocol families, IPSEC]
169 socket(struct proc
*p
, struct socket_args
*uap
, register_t
*retval
)
175 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
176 #if CONFIG_MACF_SOCKET_SUBSET
177 if ((error
= mac_socket_check_create(kauth_cred_get(), uap
->domain
,
178 uap
->type
, uap
->protocol
)) != 0)
180 #endif /* MAC_SOCKET_SUBSET */
182 error
= falloc(p
, &fp
, &fd
, vfs_context_current());
186 fp
->f_flag
= FREAD
|FWRITE
;
187 fp
->f_type
= DTYPE_SOCKET
;
188 fp
->f_ops
= &socketops
;
190 error
= socreate(uap
->domain
, &so
, uap
->type
, uap
->protocol
);
197 thread
= current_thread();
198 ut
= get_bsdthread_info(thread
);
200 /* if this is a backgrounded thread then throttle all new sockets */
201 if ( (ut
->uu_flag
& UT_BACKGROUND
) != 0 ) {
202 so
->so_traffic_mgt_flags
|= TRAFFIC_MGT_SO_BACKGROUND
;
203 so
->so_background_thread
= thread
;
205 fp
->f_data
= (caddr_t
)so
;
208 procfdtbl_releasefd(p
, fd
, NULL
);
210 fp_drop(p
, fd
, fp
, 1);
220 * EDESTADDRREQ Destination address required
221 * EBADF Bad file descriptor
222 * EACCES Mandatory Access Control failure
223 * file_socket:ENOTSOCK
225 * getsockaddr:ENAMETOOLONG Filename too long
226 * getsockaddr:EINVAL Invalid argument
227 * getsockaddr:ENOMEM Not enough space
228 * getsockaddr:EFAULT Bad address
233 bind(__unused proc_t p
, struct bind_args
*uap
, __unused register_t
*retval
)
235 struct sockaddr_storage ss
;
236 struct sockaddr
*sa
= NULL
;
238 boolean_t want_free
= TRUE
;
241 AUDIT_ARG(fd
, uap
->s
);
242 error
= file_socket(uap
->s
, &so
);
249 if (uap
->name
== USER_ADDR_NULL
) {
250 error
= EDESTADDRREQ
;
253 if (uap
->namelen
> sizeof (ss
)) {
254 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, TRUE
);
256 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, TRUE
);
258 sa
= (struct sockaddr
*)&ss
;
264 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
265 #if CONFIG_MACF_SOCKET_SUBSET
266 if ((error
= mac_socket_check_bind(kauth_cred_get(), so
, sa
)) == 0)
267 error
= sobind(so
, sa
);
269 error
= sobind(so
, sa
);
270 #endif /* MAC_SOCKET_SUBSET */
281 * EACCES Mandatory Access Control failure
282 * file_socket:ENOTSOCK
285 * solisten:EOPNOTSUPP
289 listen(__unused
struct proc
*p
, struct listen_args
*uap
,
290 __unused register_t
*retval
)
295 AUDIT_ARG(fd
, uap
->s
);
296 error
= file_socket(uap
->s
, &so
);
300 #if CONFIG_MACF_SOCKET_SUBSET
302 error
= mac_socket_check_listen(kauth_cred_get(), so
);
304 error
= solisten(so
, uap
->backlog
);
307 error
= solisten(so
, uap
->backlog
);
308 #endif /* MAC_SOCKET_SUBSET */
317 * Returns: fp_getfsock:EBADF Bad file descriptor
318 * fp_getfsock:EOPNOTSUPP ...
319 * xlate => :ENOTSOCK Socket operation on non-socket
320 * :EFAULT Bad address on copyin/copyout
321 * :EBADF Bad file descriptor
322 * :EOPNOTSUPP Operation not supported on socket
323 * :EINVAL Invalid argument
324 * :EWOULDBLOCK Operation would block
325 * :ECONNABORTED Connection aborted
326 * :EINTR Interrupted function
327 * :EACCES Mandatory Access Control failure
328 * falloc_locked:ENFILE Too many files open in system
329 * falloc_locked::EMFILE Too many open files
330 * falloc_locked::ENOMEM Not enough space
334 accept_nocancel(struct proc
*p
, struct accept_nocancel_args
*uap
,
338 struct sockaddr
*sa
= NULL
;
341 struct socket
*head
, *so
= NULL
;
342 lck_mtx_t
*mutex_held
;
345 short fflag
; /* type must match fp->f_flag */
350 AUDIT_ARG(fd
, uap
->s
);
353 error
= copyin(uap
->anamelen
, (caddr_t
)&namelen
,
358 error
= fp_getfsock(p
, fd
, &fp
, &head
);
360 if (error
== EOPNOTSUPP
)
368 #if CONFIG_MACF_SOCKET_SUBSET
369 if ((error
= mac_socket_check_accept(kauth_cred_get(), head
)) != 0)
371 #endif /* MAC_SOCKET_SUBSET */
373 socket_lock(head
, 1);
375 if (head
->so_proto
->pr_getlock
!= NULL
) {
376 mutex_held
= (*head
->so_proto
->pr_getlock
)(head
, 0);
379 mutex_held
= head
->so_proto
->pr_domain
->dom_mtx
;
383 if ((head
->so_options
& SO_ACCEPTCONN
) == 0) {
384 if ((head
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0) {
387 /* POSIX: The socket is not accepting connections */
390 socket_unlock(head
, 1);
393 if ((head
->so_state
& SS_NBIO
) && head
->so_comp
.tqh_first
== NULL
) {
394 socket_unlock(head
, 1);
398 while (TAILQ_EMPTY(&head
->so_comp
) && head
->so_error
== 0) {
399 if (head
->so_state
& SS_CANTRCVMORE
) {
400 head
->so_error
= ECONNABORTED
;
403 if (head
->so_usecount
< 1)
404 panic("accept: head=%p refcount=%d\n", head
,
406 error
= msleep((caddr_t
)&head
->so_timeo
, mutex_held
,
407 PSOCK
| PCATCH
, "accept", 0);
408 if (head
->so_usecount
< 1)
409 panic("accept: 2 head=%p refcount=%d\n", head
,
411 if ((head
->so_state
& SS_DRAINING
)) {
412 error
= ECONNABORTED
;
415 socket_unlock(head
, 1);
419 if (head
->so_error
) {
420 error
= head
->so_error
;
422 socket_unlock(head
, 1);
428 * At this point we know that there is at least one connection
429 * ready to be accepted. Remove it from the queue prior to
430 * allocating the file descriptor for it since falloc() may
431 * block allowing another process to accept the connection
434 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
435 so
= TAILQ_FIRST(&head
->so_comp
);
436 TAILQ_REMOVE(&head
->so_comp
, so
, so_list
);
438 /* unlock head to avoid deadlock with select, keep a ref on head */
439 socket_unlock(head
, 0);
441 #if CONFIG_MACF_SOCKET_SUBSET
443 * Pass the pre-accepted socket to the MAC framework. This is
444 * cheaper than allocating a file descriptor for the socket,
445 * calling the protocol accept callback, and possibly freeing
446 * the file descriptor should the MAC check fails.
448 if ((error
= mac_socket_check_accepted(kauth_cred_get(), so
)) != 0) {
449 so
->so_state
&= ~(SS_NOFDREF
| SS_COMP
);
452 /* Drop reference on listening socket */
456 #endif /* MAC_SOCKET_SUBSET */
459 * Pass the pre-accepted socket to any interested socket filter(s).
460 * Upon failure, the socket would have been closed by the callee.
462 if (so
->so_filt
!= NULL
&& (error
= soacceptfilter(so
)) != 0) {
463 /* Drop reference on listening socket */
465 /* Propagate socket filter's error code to the caller */
470 error
= falloc(p
, &fp
, &newfd
, vfs_context_current());
473 * Probably ran out of file descriptors. Put the
474 * unaccepted connection back onto the queue and
475 * do another wakeup so some other process might
476 * have a chance at it.
478 socket_lock(head
, 0);
479 TAILQ_INSERT_HEAD(&head
->so_comp
, so
, so_list
);
481 wakeup_one((caddr_t
)&head
->so_timeo
);
482 socket_unlock(head
, 1);
486 fp
->f_type
= DTYPE_SOCKET
;
488 fp
->f_ops
= &socketops
;
489 fp
->f_data
= (caddr_t
)so
;
490 socket_lock(head
, 0);
493 so
->so_state
&= ~SS_COMP
;
495 (void) soacceptlock(so
, &sa
, 0);
496 socket_unlock(head
, 1);
502 socket_unlock(so
, 1);
506 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
511 /* save sa_len before it is destroyed */
513 namelen
= MIN(namelen
, sa_len
);
514 error
= copyout(sa
, uap
->name
, namelen
);
516 /* return the actual, untruncated address length */
519 error
= copyout((caddr_t
)&namelen
, uap
->anamelen
,
525 * If the socket has been marked as inactive by soacceptfilter(),
526 * disallow further operations on it. We explicitly call shutdown
527 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
528 * states are set for the socket. This would also flush out data
529 * hanging off the receive list of this socket.
531 if (so
->so_flags
& SOF_DEFUNCT
) {
532 (void) soshutdownlock(so
, SHUT_RD
);
533 (void) soshutdownlock(so
, SHUT_WR
);
534 (void) sodisconnectlocked(so
);
538 socket_unlock(so
, 1);
542 procfdtbl_releasefd(p
, newfd
, NULL
);
543 fp_drop(p
, newfd
, fp
, 1);
552 accept(struct proc
*p
, struct accept_args
*uap
, register_t
*retval
)
554 __pthread_testcancel(1);
555 return(accept_nocancel(p
, (struct accept_nocancel_args
*)uap
, retval
));
560 * EBADF Bad file descriptor
561 * EALREADY Connection already in progress
562 * EINPROGRESS Operation in progress
563 * ECONNABORTED Connection aborted
564 * EINTR Interrupted function
565 * EACCES Mandatory Access Control failure
566 * file_socket:ENOTSOCK
568 * getsockaddr:ENAMETOOLONG Filename too long
569 * getsockaddr:EINVAL Invalid argument
570 * getsockaddr:ENOMEM Not enough space
571 * getsockaddr:EFAULT Bad address
572 * soconnectlock:EOPNOTSUPP
573 * soconnectlock:EISCONN
574 * soconnectlock:??? [depends on protocol, filters]
577 * Imputed: so_error error may be set from so_error, which
578 * may have been set by soconnectlock.
582 connect(struct proc
*p
, struct connect_args
*uap
, register_t
*retval
)
584 __pthread_testcancel(1);
585 return(connect_nocancel(p
, (struct connect_nocancel_args
*)uap
, retval
));
589 connect_nocancel(__unused proc_t p
, struct connect_nocancel_args
*uap
, __unused register_t
*retval
)
592 struct sockaddr_storage ss
;
593 struct sockaddr
*sa
= NULL
;
594 lck_mtx_t
*mutex_held
;
595 boolean_t want_free
= TRUE
;
600 AUDIT_ARG(fd
, uap
->s
);
601 error
= file_socket(fd
, &so
);
610 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
611 * if this is a datagram socket; translate for other types.
613 dgram
= (so
->so_type
== SOCK_DGRAM
);
615 /* Get socket address now before we obtain socket lock */
616 if (uap
->namelen
> sizeof (ss
)) {
617 error
= getsockaddr(so
, &sa
, uap
->name
, uap
->namelen
, !dgram
);
619 error
= getsockaddr_s(so
, &ss
, uap
->name
, uap
->namelen
, !dgram
);
621 sa
= (struct sockaddr
*)&ss
;
628 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), sa
);
629 #if CONFIG_MACF_SOCKET_SUBSET
630 if ((error
= mac_socket_check_connect(kauth_cred_get(), so
, sa
)) != 0) {
635 #endif /* MAC_SOCKET_SUBSET */
638 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
641 socket_unlock(so
, 1);
645 error
= soconnectlock(so
, sa
, 0);
648 if ((so
->so_state
& SS_NBIO
) && (so
->so_state
& SS_ISCONNECTING
)) {
651 socket_unlock(so
, 1);
655 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
656 if (so
->so_proto
->pr_getlock
!= NULL
)
657 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
659 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
660 error
= msleep((caddr_t
)&so
->so_timeo
, mutex_held
,
661 PSOCK
| PCATCH
, "connect", 0);
662 if ((so
->so_state
& SS_DRAINING
)) {
663 error
= ECONNABORTED
;
669 error
= so
->so_error
;
673 so
->so_state
&= ~SS_ISCONNECTING
;
674 socket_unlock(so
, 1);
677 if (error
== ERESTART
)
686 * socreate:EAFNOSUPPORT
687 * socreate:EPROTOTYPE
688 * socreate:EPROTONOSUPPORT
692 * socreate:??? [other protocol families, IPSEC]
698 * soconnect2:EPROTOTYPE
699 * soconnect2:??? [other protocol families[
702 socketpair(struct proc
*p
, struct socketpair_args
*uap
,
703 __unused register_t
*retval
)
705 struct fileproc
*fp1
, *fp2
;
706 struct socket
*so1
, *so2
;
707 int fd
, error
, sv
[2];
709 AUDIT_ARG(socket
, uap
->domain
, uap
->type
, uap
->protocol
);
710 error
= socreate(uap
->domain
, &so1
, uap
->type
, uap
->protocol
);
713 error
= socreate(uap
->domain
, &so2
, uap
->type
, uap
->protocol
);
717 error
= falloc(p
, &fp1
, &fd
, vfs_context_current());
721 fp1
->f_flag
= FREAD
|FWRITE
;
722 fp1
->f_type
= DTYPE_SOCKET
;
723 fp1
->f_ops
= &socketops
;
724 fp1
->f_data
= (caddr_t
)so1
;
727 error
= falloc(p
, &fp2
, &fd
, vfs_context_current());
731 fp2
->f_flag
= FREAD
|FWRITE
;
732 fp2
->f_type
= DTYPE_SOCKET
;
733 fp2
->f_ops
= &socketops
;
734 fp2
->f_data
= (caddr_t
)so2
;
737 error
= soconnect2(so1
, so2
);
741 if (uap
->type
== SOCK_DGRAM
) {
743 * Datagram socket connection is asymmetric.
745 error
= soconnect2(so2
, so1
);
752 procfdtbl_releasefd(p
, sv
[0], NULL
);
753 procfdtbl_releasefd(p
, sv
[1], NULL
);
754 fp_drop(p
, sv
[0], fp1
, 1);
755 fp_drop(p
, sv
[1], fp2
, 1);
758 error
= copyout((caddr_t
)sv
, uap
->rsv
, 2 * sizeof (int));
761 fp_free(p
, sv
[1], fp2
);
763 fp_free(p
, sv
[0], fp1
);
777 * EACCES Mandatory Access Control failure
778 * file_socket:ENOTSOCK
780 * getsockaddr:ENAMETOOLONG Filename too long
781 * getsockaddr:EINVAL Invalid argument
782 * getsockaddr:ENOMEM Not enough space
783 * getsockaddr:EFAULT Bad address
784 * <pru_sosend>:EACCES[TCP]
785 * <pru_sosend>:EADDRINUSE[TCP]
786 * <pru_sosend>:EADDRNOTAVAIL[TCP]
787 * <pru_sosend>:EAFNOSUPPORT[TCP]
788 * <pru_sosend>:EAGAIN[TCP]
790 * <pru_sosend>:ECONNRESET[TCP]
791 * <pru_sosend>:EFAULT
792 * <pru_sosend>:EHOSTUNREACH[TCP]
794 * <pru_sosend>:EINVAL
795 * <pru_sosend>:EISCONN[AF_INET]
796 * <pru_sosend>:EMSGSIZE[TCP]
797 * <pru_sosend>:ENETDOWN[TCP]
798 * <pru_sosend>:ENETUNREACH[TCP]
799 * <pru_sosend>:ENOBUFS
800 * <pru_sosend>:ENOMEM[TCP]
801 * <pru_sosend>:ENOTCONN[AF_INET]
802 * <pru_sosend>:EOPNOTSUPP
803 * <pru_sosend>:EPERM[TCP]
805 * <pru_sosend>:EWOULDBLOCK
806 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
807 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses]
808 * <pru_sosend>:??? [value from so_error]
812 sendit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
813 int flags
, register_t
*retval
)
815 struct mbuf
*control
= NULL
;
816 struct sockaddr_storage ss
;
817 struct sockaddr
*to
= NULL
;
818 boolean_t want_free
= TRUE
;
823 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
825 error
= file_socket(s
, &so
);
827 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
834 if (mp
->msg_name
!= USER_ADDR_NULL
) {
835 if (mp
->msg_namelen
> sizeof (ss
)) {
836 error
= getsockaddr(so
, &to
, mp
->msg_name
,
837 mp
->msg_namelen
, TRUE
);
839 error
= getsockaddr_s(so
, &ss
, mp
->msg_name
,
840 mp
->msg_namelen
, TRUE
);
842 to
= (struct sockaddr
*)&ss
;
848 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), to
);
850 if (mp
->msg_control
!= USER_ADDR_NULL
) {
851 if (mp
->msg_controllen
< sizeof (struct cmsghdr
)) {
855 error
= sockargs(&control
, mp
->msg_control
,
856 mp
->msg_controllen
, MT_CONTROL
);
861 #if CONFIG_MACF_SOCKET_SUBSET
863 * We check the state without holding the socket lock;
864 * if a race condition occurs, it would simply result
865 * in an extra call to the MAC check function.
867 if (!(so
->so_state
& SS_ISCONNECTED
) &&
868 (error
= mac_socket_check_send(kauth_cred_get(), so
, to
)) != 0)
870 #endif /* MAC_SOCKET_SUBSET */
872 len
= uio_resid(uiop
);
873 error
= so
->so_proto
->pr_usrreqs
->pru_sosend(so
, to
, uiop
, 0, control
,
876 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
877 error
== EINTR
|| error
== EWOULDBLOCK
))
879 /* Generation of SIGPIPE can be controlled per socket */
880 if (error
== EPIPE
&& !(so
->so_flags
& SOF_NOSIGPIPE
))
884 *retval
= (int)(len
- uio_resid(uiop
));
886 if (to
!= NULL
&& want_free
)
889 KERNEL_DEBUG(DBG_FNC_SENDIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
897 * sendit:??? [see sendit definition in this file]
898 * write:??? [4056224: applicable for pipes]
901 sendto(struct proc
*p
, struct sendto_args
*uap
, register_t
*retval
)
903 __pthread_testcancel(1);
904 return(sendto_nocancel(p
, (struct sendto_nocancel_args
*)uap
, retval
));
908 sendto_nocancel(struct proc
*p
, struct sendto_nocancel_args
*uap
, register_t
*retval
)
910 struct user_msghdr msg
;
914 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
915 AUDIT_ARG(fd
, uap
->s
);
917 auio
= uio_create(1, 0,
918 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
923 uio_addiov(auio
, uap
->buf
, uap
->len
);
925 msg
.msg_name
= uap
->to
;
926 msg
.msg_namelen
= uap
->tolen
;
927 /* no need to set up msg_iov. sendit uses uio_t we send it */
933 error
= sendit(p
, uap
->s
, &msg
, auio
, uap
->flags
, retval
);
942 * Temporary workaround to let send() and recv() work over
943 * a pipe for binary compatibility
944 * This will be removed in the release following Tiger
946 if (error
== ENOTSOCK
) {
949 if (fp_lookup(p
, uap
->s
, &fp
, 0) == 0) {
950 (void) fp_drop(p
, uap
->s
, fp
, 0);
952 if (fp
->f_type
== DTYPE_PIPE
) {
953 struct write_args write_uap
;
954 user_ssize_t write_retval
;
956 if (p
->p_pid
> last_pid_4056224
) {
957 last_pid_4056224
= p
->p_pid
;
959 printf("%s[%d] uses send/recv "
960 "on a pipe\n", p
->p_comm
, p
->p_pid
);
963 bzero(&write_uap
, sizeof (struct write_args
));
964 write_uap
.fd
= uap
->s
;
965 write_uap
.cbuf
= uap
->buf
;
966 write_uap
.nbyte
= uap
->len
;
968 error
= write(p
, &write_uap
, &write_retval
);
969 *retval
= (int)write_retval
;
973 #endif /* HACK_FOR_4056224 */
975 KERNEL_DEBUG(DBG_FNC_SENDTO
| DBG_FUNC_END
, error
, *retval
, 0, 0, 0);
984 * sendit:??? [see sendit definition in this file]
987 sendmsg(struct proc
*p
, struct sendmsg_args
*uap
, register_t
*retval
)
989 __pthread_testcancel(1);
990 return(sendmsg_nocancel(p
, (struct sendmsg_nocancel_args
*)uap
, retval
));
994 sendmsg_nocancel(struct proc
*p
, struct sendmsg_nocancel_args
*uap
, register_t
*retval
)
997 struct user_msghdr user_msg
;
1003 struct user_iovec
*iovp
;
1005 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1006 AUDIT_ARG(fd
, uap
->s
);
1007 if (IS_64BIT_PROCESS(p
)) {
1008 msghdrp
= (caddr_t
)&user_msg
;
1009 size_of_msghdr
= sizeof (user_msg
);
1010 size_of_iovec
= sizeof (struct user_iovec
);
1012 msghdrp
= (caddr_t
)&msg
;
1013 size_of_msghdr
= sizeof (msg
);
1014 size_of_iovec
= sizeof (struct iovec
);
1016 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1018 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1022 /* only need to copy if user process is not 64-bit */
1023 if (!IS_64BIT_PROCESS(p
)) {
1024 user_msg
.msg_flags
= msg
.msg_flags
;
1025 user_msg
.msg_controllen
= msg
.msg_controllen
;
1026 user_msg
.msg_control
= CAST_USER_ADDR_T(msg
.msg_control
);
1027 user_msg
.msg_iovlen
= msg
.msg_iovlen
;
1028 user_msg
.msg_iov
= CAST_USER_ADDR_T(msg
.msg_iov
);
1029 user_msg
.msg_namelen
= msg
.msg_namelen
;
1030 user_msg
.msg_name
= CAST_USER_ADDR_T(msg
.msg_name
);
1033 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1034 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, EMSGSIZE
,
1039 /* allocate a uio large enough to hold the number of iovecs passed */
1040 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1041 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1048 if (user_msg
.msg_iovlen
) {
1050 * get location of iovecs within the uio.
1051 * then copyin the iovecs from user space.
1053 iovp
= uio_iovsaddr(auio
);
1058 error
= copyin(user_msg
.msg_iov
, (caddr_t
)iovp
,
1059 (user_msg
.msg_iovlen
* size_of_iovec
));
1062 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1064 /* finish setup of uio_t */
1065 uio_calculateresid(auio
);
1067 user_msg
.msg_iov
= 0;
1070 /* msg_flags is ignored for send */
1071 user_msg
.msg_flags
= 0;
1073 error
= sendit(p
, uap
->s
, &user_msg
, auio
, uap
->flags
, retval
);
1078 KERNEL_DEBUG(DBG_FNC_SENDMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1084 * Returns: 0 Success
1088 * EACCES Mandatory Access Control failure
1091 * <pru_soreceive>:ENOBUFS
1092 * <pru_soreceive>:ENOTCONN
1093 * <pru_soreceive>:EWOULDBLOCK
1094 * <pru_soreceive>:EFAULT
1095 * <pru_soreceive>:EINTR
1096 * <pru_soreceive>:EBADF
1097 * <pru_soreceive>:EINVAL
1098 * <pru_soreceive>:EMSGSIZE
1099 * <pru_soreceive>:???
1101 * Notes: Additional return values from calls through <pru_soreceive>
1102 * depend on protocols other than TCP or AF_UNIX, which are
1106 recvit(struct proc
*p
, int s
, struct user_msghdr
*mp
, uio_t uiop
,
1107 user_addr_t namelenp
, register_t
*retval
)
1110 struct mbuf
*m
, *control
= 0;
1113 struct sockaddr
*fromsa
= 0;
1114 struct fileproc
*fp
;
1116 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1118 if ((error
= fp_lookup(p
, s
, &fp
, 1))) {
1119 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1123 if (fp
->f_type
!= DTYPE_SOCKET
) {
1124 fp_drop(p
, s
, fp
, 1);
1129 so
= (struct socket
*)fp
->f_data
;
1131 fp_drop(p
, s
, fp
, 1);
1138 #if CONFIG_MACF_SOCKET_SUBSET
1140 * We check the state without holding the socket lock;
1141 * if a race condition occurs, it would simply result
1142 * in an extra call to the MAC check function.
1144 if (!(so
->so_state
& SS_ISCONNECTED
) &&
1145 (error
= mac_socket_check_receive(kauth_cred_get(), so
)) != 0)
1147 #endif /* MAC_SOCKET_SUBSET */
1148 if (uio_resid(uiop
) < 0) {
1149 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, EINVAL
, 0, 0, 0, 0);
1154 len
= uio_resid(uiop
);
1155 error
= so
->so_proto
->pr_usrreqs
->pru_soreceive(so
, &fromsa
, uiop
,
1156 (struct mbuf
**)0, mp
->msg_control
? &control
: (struct mbuf
**)0,
1158 AUDIT_ARG(sockaddr
, vfs_context_cwd(vfs_context_current()), fromsa
);
1160 if (uio_resid(uiop
) != len
&& (error
== ERESTART
||
1161 error
== EINTR
|| error
== EWOULDBLOCK
))
1168 *retval
= len
- uio_resid(uiop
);
1170 socklen_t sa_len
= 0;
1172 len
= mp
->msg_namelen
;
1173 if (len
<= 0 || fromsa
== 0) {
1177 #define MIN(a, b) ((a) > (b) ? (b) : (a))
1179 sa_len
= fromsa
->sa_len
;
1180 len
= MIN((unsigned int)len
, sa_len
);
1181 error
= copyout(fromsa
, mp
->msg_name
, (unsigned)len
);
1185 mp
->msg_namelen
= sa_len
;
1186 /* return the actual, untruncated address length */
1188 (error
= copyout((caddr_t
)&sa_len
, namelenp
,
1193 if (mp
->msg_control
) {
1194 len
= mp
->msg_controllen
;
1196 mp
->msg_controllen
= 0;
1197 ctlbuf
= mp
->msg_control
;
1199 while (m
&& len
> 0) {
1200 unsigned int tocopy
;
1202 if (len
>= m
->m_len
) {
1205 mp
->msg_flags
|= MSG_CTRUNC
;
1209 error
= copyout((caddr_t
)mtod(m
, caddr_t
), ctlbuf
,
1218 mp
->msg_controllen
= ctlbuf
- mp
->msg_control
;
1222 FREE(fromsa
, M_SONAME
);
1225 KERNEL_DEBUG(DBG_FNC_RECVIT
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1227 fp_drop(p
, s
, fp
, 0);
1233 * Returns: 0 Success
1237 * read:??? [4056224: applicable for pipes]
1239 * Notes: The read entry point is only called as part of support for
1240 * binary backward compatability; new code should use read
1241 * instead of recv or recvfrom when attempting to read data
1244 * For full documentation of the return codes from recvit, see
1245 * the block header for the recvit function.
1248 recvfrom(struct proc
*p
, struct recvfrom_args
*uap
, register_t
*retval
)
1250 __pthread_testcancel(1);
1251 return(recvfrom_nocancel(p
, (struct recvfrom_nocancel_args
*)uap
, retval
));
1255 recvfrom_nocancel(struct proc
*p
, struct recvfrom_nocancel_args
*uap
, register_t
*retval
)
1257 struct user_msghdr msg
;
1261 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1262 AUDIT_ARG(fd
, uap
->s
);
1264 if (uap
->fromlenaddr
) {
1265 error
= copyin(uap
->fromlenaddr
,
1266 (caddr_t
)&msg
.msg_namelen
, sizeof (msg
.msg_namelen
));
1270 msg
.msg_namelen
= 0;
1272 msg
.msg_name
= uap
->from
;
1273 auio
= uio_create(1, 0,
1274 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1280 uio_addiov(auio
, uap
->buf
, uap
->len
);
1281 /* no need to set up msg_iov. recvit uses uio_t we send it */
1284 msg
.msg_control
= 0;
1285 msg
.msg_controllen
= 0;
1286 msg
.msg_flags
= uap
->flags
;
1287 error
= recvit(p
, uap
->s
, &msg
, auio
, uap
->fromlenaddr
, retval
);
1292 #if HACK_FOR_4056224
1295 * Temporary workaround to let send() and recv() work over
1296 * a pipe for binary compatibility
1297 * This will be removed in the release following Tiger
1299 if (error
== ENOTSOCK
&& proc_is64bit(p
) == 0) {
1300 struct fileproc
*fp
;
1302 if (fp_lookup(p
, uap
->s
, &fp
, 0) == 0) {
1303 (void) fp_drop(p
, uap
->s
, fp
, 0);
1305 if (fp
->f_type
== DTYPE_PIPE
) {
1306 struct read_args read_uap
;
1307 user_ssize_t read_retval
;
1309 if (p
->p_pid
> last_pid_4056224
) {
1310 last_pid_4056224
= p
->p_pid
;
1312 printf("%s[%d] uses send/recv on "
1313 "a pipe\n", p
->p_comm
, p
->p_pid
);
1316 bzero(&read_uap
, sizeof (struct read_args
));
1317 read_uap
.fd
= uap
->s
;
1318 read_uap
.cbuf
= uap
->buf
;
1319 read_uap
.nbyte
= uap
->len
;
1321 error
= read(p
, &read_uap
, &read_retval
);
1322 *retval
= (int)read_retval
;
1326 #endif /* HACK_FOR_4056224 */
1328 KERNEL_DEBUG(DBG_FNC_RECVFROM
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1334 * Returns: 0 Success
1341 * Notes: For full documentation of the return codes from recvit, see
1342 * the block header for the recvit function.
1345 recvmsg(struct proc
*p
, struct recvmsg_args
*uap
, register_t
*retval
)
1347 __pthread_testcancel(1);
1348 return(recvmsg_nocancel(p
, (struct recvmsg_nocancel_args
*)uap
, retval
));
1352 recvmsg_nocancel(struct proc
*p
, struct recvmsg_nocancel_args
*uap
, register_t
*retval
)
1355 struct user_msghdr user_msg
;
1362 struct user_iovec
*iovp
;
1364 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1365 AUDIT_ARG(fd
, uap
->s
);
1366 if (IS_64BIT_PROCESS(p
)) {
1367 msghdrp
= (caddr_t
)&user_msg
;
1368 size_of_msghdr
= sizeof (user_msg
);
1369 size_of_iovec
= sizeof (struct user_iovec
);
1371 msghdrp
= (caddr_t
)&msg
;
1372 size_of_msghdr
= sizeof (msg
);
1373 size_of_iovec
= sizeof (struct iovec
);
1375 error
= copyin(uap
->msg
, msghdrp
, size_of_msghdr
);
1377 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1381 /* only need to copy if user process is not 64-bit */
1382 if (!IS_64BIT_PROCESS(p
)) {
1383 user_msg
.msg_flags
= msg
.msg_flags
;
1384 user_msg
.msg_controllen
= msg
.msg_controllen
;
1385 user_msg
.msg_control
= CAST_USER_ADDR_T(msg
.msg_control
);
1386 user_msg
.msg_iovlen
= msg
.msg_iovlen
;
1387 user_msg
.msg_iov
= CAST_USER_ADDR_T(msg
.msg_iov
);
1388 user_msg
.msg_namelen
= msg
.msg_namelen
;
1389 user_msg
.msg_name
= CAST_USER_ADDR_T(msg
.msg_name
);
1392 if (user_msg
.msg_iovlen
<= 0 || user_msg
.msg_iovlen
> UIO_MAXIOV
) {
1393 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, EMSGSIZE
,
1398 user_msg
.msg_flags
= uap
->flags
;
1400 /* allocate a uio large enough to hold the number of iovecs passed */
1401 auio
= uio_create(user_msg
.msg_iovlen
, 0,
1402 (IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64
: UIO_USERSPACE32
),
1410 * get location of iovecs within the uio. then copyin the iovecs from
1413 iovp
= uio_iovsaddr(auio
);
1418 uiov
= user_msg
.msg_iov
;
1419 user_msg
.msg_iov
= CAST_USER_ADDR_T(iovp
);
1420 error
= copyin(uiov
, (caddr_t
)iovp
,
1421 (user_msg
.msg_iovlen
* size_of_iovec
));
1425 /* finish setup of uio_t */
1426 uio_calculateresid(auio
);
1428 error
= recvit(p
, uap
->s
, &user_msg
, auio
, 0, retval
);
1430 user_msg
.msg_iov
= uiov
;
1431 /* only need to copy if user process is not 64-bit */
1432 if (!IS_64BIT_PROCESS(p
)) {
1433 // LP64todo - do all these change? if not, then no need to copy all of them!
1434 msg
.msg_flags
= user_msg
.msg_flags
;
1435 msg
.msg_controllen
= user_msg
.msg_controllen
;
1437 CAST_DOWN(caddr_t
, user_msg
.msg_control
);
1438 msg
.msg_iovlen
= user_msg
.msg_iovlen
;
1439 msg
.msg_iov
= (struct iovec
*)
1440 CAST_DOWN(caddr_t
, user_msg
.msg_iov
);
1441 msg
.msg_namelen
= user_msg
.msg_namelen
;
1442 msg
.msg_name
= CAST_DOWN(caddr_t
, user_msg
.msg_name
);
1444 error
= copyout(msghdrp
, uap
->msg
, size_of_msghdr
);
1450 KERNEL_DEBUG(DBG_FNC_RECVMSG
| DBG_FUNC_END
, error
, 0, 0, 0, 0);
1455 * Returns: 0 Success
1457 * file_socket:ENOTSOCK
1460 * soshutdown:ENOTCONN
1461 * soshutdown:EADDRNOTAVAIL[TCP]
1462 * soshutdown:ENOBUFS[TCP]
1463 * soshutdown:EMSGSIZE[TCP]
1464 * soshutdown:EHOSTUNREACH[TCP]
1465 * soshutdown:ENETUNREACH[TCP]
1466 * soshutdown:ENETDOWN[TCP]
1467 * soshutdown:ENOMEM[TCP]
1468 * soshutdown:EACCES[TCP]
1469 * soshutdown:EMSGSIZE[TCP]
1470 * soshutdown:ENOBUFS[TCP]
1471 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
1472 * soshutdown:??? [other protocol families]
1476 shutdown(__unused
struct proc
*p
, struct shutdown_args
*uap
,
1477 __unused register_t
*retval
)
1482 AUDIT_ARG(fd
, uap
->s
);
1483 error
= file_socket(uap
->s
, &so
);
1490 error
= soshutdown((struct socket
*)so
, uap
->how
);
1497 * Returns: 0 Success
1500 * EACCES Mandatory Access Control failure
1501 * file_socket:ENOTSOCK
1504 * sosetopt:ENOPROTOOPT
1508 * sosetopt:EOPNOTSUPP[AF_UNIX]
1513 setsockopt(struct proc
*p
, struct setsockopt_args
*uap
,
1514 __unused register_t
*retval
)
1517 struct sockopt sopt
;
1520 AUDIT_ARG(fd
, uap
->s
);
1521 if (uap
->val
== 0 && uap
->valsize
!= 0)
1523 /* No bounds checking on size (it's unsigned) */
1525 error
= file_socket(uap
->s
, &so
);
1529 sopt
.sopt_dir
= SOPT_SET
;
1530 sopt
.sopt_level
= uap
->level
;
1531 sopt
.sopt_name
= uap
->name
;
1532 sopt
.sopt_val
= uap
->val
;
1533 sopt
.sopt_valsize
= uap
->valsize
;
1540 #if CONFIG_MACF_SOCKET_SUBSET
1541 if ((error
= mac_socket_check_setsockopt(kauth_cred_get(), so
,
1544 #endif /* MAC_SOCKET_SUBSET */
1545 error
= sosetopt(so
, &sopt
);
1554 * Returns: 0 Success
1557 * EACCES Mandatory Access Control failure
1560 * file_socket:ENOTSOCK
1565 getsockopt(struct proc
*p
, struct getsockopt_args
*uap
,
1566 __unused register_t
*retval
)
1570 struct sockopt sopt
;
1573 error
= file_socket(uap
->s
, &so
);
1577 error
= copyin(uap
->avalsize
, (caddr_t
)&valsize
,
1581 /* No bounds checking on size (it's unsigned) */
1585 sopt
.sopt_dir
= SOPT_GET
;
1586 sopt
.sopt_level
= uap
->level
;
1587 sopt
.sopt_name
= uap
->name
;
1588 sopt
.sopt_val
= uap
->val
;
1589 sopt
.sopt_valsize
= (size_t)valsize
; /* checked non-negative above */
1596 #if CONFIG_MACF_SOCKET_SUBSET
1597 if ((error
= mac_socket_check_getsockopt(kauth_cred_get(), so
,
1600 #endif /* MAC_SOCKET_SUBSET */
1601 error
= sogetopt((struct socket
*)so
, &sopt
);
1603 valsize
= sopt
.sopt_valsize
;
1604 error
= copyout((caddr_t
)&valsize
, uap
->avalsize
,
1616 * Returns: 0 Success
1618 * file_socket:ENOTSOCK
1622 * <pru_sockaddr>:ENOBUFS[TCP]
1623 * <pru_sockaddr>:ECONNRESET[TCP]
1624 * <pru_sockaddr>:EINVAL[AF_UNIX]
1625 * <sf_getsockname>:???
1629 getsockname(__unused
struct proc
*p
, struct getsockname_args
*uap
,
1630 __unused register_t
*retval
)
1633 struct sockaddr
*sa
;
1638 error
= file_socket(uap
->fdes
, &so
);
1641 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof (socklen_t
));
1650 error
= (*so
->so_proto
->pr_usrreqs
->pru_sockaddr
)(so
, &sa
);
1652 struct socket_filter_entry
*filter
;
1654 for (filter
= so
->so_filt
; filter
&& error
== 0;
1655 filter
= filter
->sfe_next_onsocket
) {
1656 if (filter
->sfe_filter
->sf_filter
.sf_getsockname
) {
1660 socket_unlock(so
, 0);
1662 error
= filter
->sfe_filter
->sf_filter
.
1663 sf_getsockname(filter
->sfe_cookie
, so
, &sa
);
1667 if (error
== EJUSTRETURN
)
1675 socket_unlock(so
, 1);
1683 sa_len
= sa
->sa_len
;
1684 len
= MIN(len
, sa_len
);
1685 error
= copyout((caddr_t
)sa
, uap
->asa
, len
);
1688 /* return the actual, untruncated address length */
1691 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof (socklen_t
));
1696 file_drop(uap
->fdes
);
1701 * Get name of peer for connected socket.
1703 * Returns: 0 Success
1707 * file_socket:ENOTSOCK
1711 * <pru_peeraddr>:???
1712 * <sf_getpeername>:???
1716 getpeername(__unused
struct proc
*p
, struct getpeername_args
*uap
,
1717 __unused register_t
*retval
)
1720 struct sockaddr
*sa
;
1725 error
= file_socket(uap
->fdes
, &so
);
1735 if ((so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
1736 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
1737 /* the socket has been shutdown, no more getpeername's */
1738 socket_unlock(so
, 1);
1743 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONFIRMING
)) == 0) {
1744 socket_unlock(so
, 1);
1748 error
= copyin(uap
->alen
, (caddr_t
)&len
, sizeof (socklen_t
));
1750 socket_unlock(so
, 1);
1754 error
= (*so
->so_proto
->pr_usrreqs
->pru_peeraddr
)(so
, &sa
);
1756 struct socket_filter_entry
*filter
;
1758 for (filter
= so
->so_filt
; filter
&& error
== 0;
1759 filter
= filter
->sfe_next_onsocket
) {
1760 if (filter
->sfe_filter
->sf_filter
.sf_getpeername
) {
1764 socket_unlock(so
, 0);
1766 error
= filter
->sfe_filter
->sf_filter
.
1767 sf_getpeername(filter
->sfe_cookie
, so
, &sa
);
1771 if (error
== EJUSTRETURN
)
1779 socket_unlock(so
, 1);
1786 sa_len
= sa
->sa_len
;
1787 len
= MIN(len
, sa_len
);
1788 error
= copyout(sa
, uap
->asa
, len
);
1791 /* return the actual, untruncated address length */
1794 error
= copyout((caddr_t
)&len
, uap
->alen
, sizeof (socklen_t
));
1796 if (sa
) FREE(sa
, M_SONAME
);
1798 file_drop(uap
->fdes
);
1803 sockargs(struct mbuf
**mp
, user_addr_t data
, int buflen
, int type
)
1805 struct sockaddr
*sa
;
1809 if ((u_int
)buflen
> MLEN
) {
1810 if (type
== MT_SONAME
&& (u_int
)buflen
<= 112)
1811 buflen
= MLEN
; /* unix domain compat. hack */
1812 else if ((u_int
)buflen
> MCLBYTES
)
1815 m
= m_get(M_WAIT
, type
);
1818 if ((u_int
)buflen
> MLEN
) {
1820 if ((m
->m_flags
& M_EXT
) == 0) {
1826 error
= copyin(data
, mtod(m
, caddr_t
), (u_int
)buflen
);
1831 if (type
== MT_SONAME
) {
1832 sa
= mtod(m
, struct sockaddr
*);
1833 sa
->sa_len
= buflen
;
1840 * Given a user_addr_t of length len, allocate and fill out a *sa.
1842 * Returns: 0 Success
1843 * ENAMETOOLONG Filename too long
1844 * EINVAL Invalid argument
1845 * ENOMEM Not enough space
1846 * copyin:EFAULT Bad address
1849 getsockaddr(struct socket
*so
, struct sockaddr
**namp
, user_addr_t uaddr
,
1850 size_t len
, boolean_t translate_unspec
)
1852 struct sockaddr
*sa
;
1855 if (len
> SOCK_MAXADDRLEN
)
1856 return (ENAMETOOLONG
);
1858 if (len
< offsetof(struct sockaddr
, sa_data
[0]))
1861 MALLOC(sa
, struct sockaddr
*, len
, M_SONAME
, M_WAITOK
| M_ZERO
);
1865 error
= copyin(uaddr
, (caddr_t
)sa
, len
);
1870 * Force sa_family to AF_INET on AF_INET sockets to handle
1871 * legacy applications that use AF_UNSPEC (0). On all other
1872 * sockets we leave it unchanged and let the lower layer
1875 if (translate_unspec
&& sa
->sa_family
== AF_UNSPEC
&&
1876 INP_CHECK_SOCKAF(so
, AF_INET
) &&
1877 len
== sizeof (struct sockaddr_in
))
1878 sa
->sa_family
= AF_INET
;
1887 getsockaddr_s(struct socket
*so
, struct sockaddr_storage
*ss
,
1888 user_addr_t uaddr
, size_t len
, boolean_t translate_unspec
)
1892 if (ss
== NULL
|| uaddr
== USER_ADDR_NULL
||
1893 len
< offsetof(struct sockaddr
, sa_data
[0]))
1897 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
1898 * so the check here is inclusive.
1900 if (len
> sizeof (*ss
))
1901 return (ENAMETOOLONG
);
1903 bzero(ss
, sizeof (*ss
));
1904 error
= copyin(uaddr
, (caddr_t
)ss
, len
);
1907 * Force sa_family to AF_INET on AF_INET sockets to handle
1908 * legacy applications that use AF_UNSPEC (0). On all other
1909 * sockets we leave it unchanged and let the lower layer
1912 if (translate_unspec
&& ss
->ss_family
== AF_UNSPEC
&&
1913 INP_CHECK_SOCKAF(so
, AF_INET
) &&
1914 len
== sizeof (struct sockaddr_in
))
1915 ss
->ss_family
= AF_INET
;
1924 SYSCTL_DECL(_kern_ipc
);
1926 #define SFUIOBUFS 64
1927 static int sendfileuiobufs
= SFUIOBUFS
;
1928 SYSCTL_INT(_kern_ipc
, OID_AUTO
, sendfileuiobufs
, CTLFLAG_RW
, &sendfileuiobufs
,
1931 /* Macros to compute the number of mbufs needed depending on cluster size */
1932 #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
1933 #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
1935 /* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */
1936 #define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT)
1938 /* Upper send limit in the number of mbuf clusters */
1939 #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
1940 #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES)
1942 size_t mbuf_pkt_maxlen(mbuf_t m
);
1944 __private_extern__
size_t
1945 mbuf_pkt_maxlen(mbuf_t m
)
1950 maxlen
+= mbuf_maxlen(m
);
1957 alloc_sendpkt(int how
, size_t pktlen
, unsigned int *maxchunks
,
1958 struct mbuf
**m
, boolean_t jumbocl
)
1960 unsigned int needed
;
1963 panic("%s: pktlen (%ld) must be non-zero\n", __func__
, pktlen
);
1966 * Try to allocate for the whole thing. Since we want full control
1967 * over the buffer size and be able to accept partial result, we can't
1968 * use mbuf_allocpacket(). The logic below is similar to sosend().
1971 if (pktlen
> NBPG
&& jumbocl
) {
1972 needed
= MIN(SENDFILE_MAX_16K
, HOWMANY_16K(pktlen
));
1973 *m
= m_getpackets_internal(&needed
, 1, how
, 0, M16KCLBYTES
);
1976 needed
= MIN(SENDFILE_MAX_4K
, HOWMANY_4K(pktlen
));
1977 *m
= m_getpackets_internal(&needed
, 1, how
, 0, NBPG
);
1981 * Our previous attempt(s) at allocation had failed; the system
1982 * may be short on mbufs, and we want to block until they are
1983 * available. This time, ask just for 1 mbuf and don't return
1988 *m
= m_getpackets_internal(&needed
, 1, M_WAIT
, 1, NBPG
);
1991 panic("%s: blocking allocation returned NULL\n", __func__
);
1993 *maxchunks
= needed
;
1998 * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
1999 * struct sf_hdtr *hdtr, int flags)
2001 * Send a file specified by 'fd' and starting at 'offset' to a socket
2002 * specified by 's'. Send only '*nbytes' of the file or until EOF if
2003 * *nbytes == 0. Optionally add a header and/or trailer to the socket
2004 * output. If specified, write the total number of bytes sent into *nbytes.
2007 sendfile(struct proc
*p
, struct sendfile_args
*uap
, __unused
int *retval
)
2009 struct fileproc
*fp
;
2012 struct writev_nocancel_args nuap
;
2013 user_ssize_t writev_retval
;
2014 struct sf_hdtr hdtr
;
2015 struct user_sf_hdtr user_hdtr
;
2017 off_t nbytes
= 0, sbytes
= 0;
2020 size_t size_of_iovec
;
2022 struct vfs_context context
= *vfs_context_current();
2024 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_START
), uap
->s
,
2027 * Do argument checking. Must be a regular file in, stream
2028 * type and connected socket out, positive offset.
2030 if ((error
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) {
2033 if ((fp
->f_flag
& FREAD
) == 0) {
2037 if (vnode_isreg(vp
) == 0) {
2041 error
= file_socket(uap
->s
, &so
);
2049 if (so
->so_type
!= SOCK_STREAM
) {
2053 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
2057 if (uap
->offset
< 0) {
2061 if (uap
->nbytes
== USER_ADDR_NULL
) {
2065 if (uap
->flags
!= 0) {
2070 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
2072 #if CONFIG_MACF_SOCKET_SUBSET
2073 /* JMM - fetch connected sockaddr? */
2074 error
= mac_socket_check_send(context
.vc_ucred
, so
, NULL
);
2080 * Get number of bytes to send
2081 * Should it applies to size of header and trailer?
2082 * JMM - error handling?
2084 copyin(uap
->nbytes
, &nbytes
, sizeof (off_t
));
2087 * If specified, get the pointer to the sf_hdtr struct for
2088 * any headers/trailers.
2090 if (uap
->hdtr
!= USER_ADDR_NULL
) {
2093 bzero(&user_hdtr
, sizeof (user_hdtr
));
2094 if (IS_64BIT_PROCESS(p
)) {
2095 hdtrp
= (caddr_t
)&user_hdtr
;
2096 sizeof_hdtr
= sizeof (user_hdtr
);
2097 size_of_iovec
= sizeof (struct user_iovec
);
2099 hdtrp
= (caddr_t
)&hdtr
;
2100 sizeof_hdtr
= sizeof (hdtr
);
2101 size_of_iovec
= sizeof (struct iovec
);
2103 error
= copyin(uap
->hdtr
, hdtrp
, sizeof_hdtr
);
2106 /* need to copy if user process is not 64-bit */
2107 if (!IS_64BIT_PROCESS(p
)) {
2108 user_hdtr
.headers
= CAST_USER_ADDR_T(hdtr
.headers
);
2109 user_hdtr
.hdr_cnt
= hdtr
.hdr_cnt
;
2110 user_hdtr
.trailers
= CAST_USER_ADDR_T(hdtr
.trailers
);
2111 user_hdtr
.trl_cnt
= hdtr
.trl_cnt
;
2115 * Send any headers. Wimp out and use writev(2).
2117 if (user_hdtr
.headers
!= USER_ADDR_NULL
) {
2118 bzero(&nuap
, sizeof (struct writev_args
));
2120 nuap
.iovp
= user_hdtr
.headers
;
2121 nuap
.iovcnt
= user_hdtr
.hdr_cnt
;
2122 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
2125 sbytes
+= writev_retval
;
2130 * Get the file size for 2 reasons:
2131 * 1. We don't want to allocate more mbufs than necessary
2132 * 2. We don't want to read past the end of file
2134 if ((error
= vnode_size(vp
, &file_size
, vfs_context_current())) != 0)
2138 * Simply read file data into a chain of mbufs that used with scatter
2139 * gather reads. We're not (yet?) setup to use zero copy external
2140 * mbufs that point to the file pages.
2143 error
= sblock(&so
->so_snd
, M_WAIT
);
2145 socket_unlock(so
, 1);
2148 for (off
= uap
->offset
; ; off
+= xfsize
, sbytes
+= xfsize
) {
2149 mbuf_t m0
= NULL
, m
;
2150 unsigned int nbufs
= sendfileuiobufs
, i
;
2152 char uio_buf
[UIO_SIZEOF(sendfileuiobufs
)]; /* 1 KB !!! */
2160 * Calculate the amount to transfer.
2161 * Align to round number of pages.
2162 * Not to exceed send socket buffer,
2163 * the EOF, or the passed in nbytes.
2165 xfsize
= sbspace(&so
->so_snd
);
2168 if (so
->so_state
& SS_CANTSENDMORE
) {
2171 } else if ((so
->so_state
& SS_NBIO
)) {
2179 if (xfsize
> SENDFILE_MAX_BYTES
)
2180 xfsize
= SENDFILE_MAX_BYTES
;
2181 else if (xfsize
> PAGE_SIZE
)
2182 xfsize
= trunc_page(xfsize
);
2183 pgoff
= off
& PAGE_MASK_64
;
2184 if (pgoff
> 0 && PAGE_SIZE
- pgoff
< xfsize
)
2185 xfsize
= PAGE_SIZE_64
- pgoff
;
2186 if (nbytes
&& xfsize
> (nbytes
- sbytes
))
2187 xfsize
= nbytes
- sbytes
;
2190 if (off
+ xfsize
> file_size
)
2191 xfsize
= file_size
- off
;
2196 * Attempt to use larger than system page-size clusters for
2197 * large writes only if there is a jumbo cluster pool and
2198 * if the socket is marked accordingly.
2200 jumbocl
= sosendjcl
&& njcl
> 0 &&
2201 ((so
->so_flags
& SOF_MULTIPAGES
) || sosendjcl_ignore_capab
);
2203 socket_unlock(so
, 0);
2204 alloc_sendpkt(M_WAIT
, xfsize
, &nbufs
, &m0
, jumbocl
);
2205 pktlen
= mbuf_pkt_maxlen(m0
);
2206 if (pktlen
< xfsize
)
2209 auio
= uio_createwithbuffer(nbufs
, off
, UIO_SYSSPACE
,
2210 UIO_READ
, &uio_buf
[0], sizeof (uio_buf
));
2212 //printf("sendfile: uio_createwithbuffer failed\n");
2219 for (i
= 0, m
= m0
, uiolen
= 0;
2220 i
< nbufs
&& m
!= NULL
&& uiolen
< xfsize
;
2221 i
++, m
= mbuf_next(m
)) {
2222 size_t mlen
= mbuf_maxlen(m
);
2224 if (mlen
+ uiolen
> xfsize
)
2225 mlen
= xfsize
- uiolen
;
2226 mbuf_setlen(m
, mlen
);
2227 uio_addiov(auio
, CAST_USER_ADDR_T(mbuf_datastart(m
)),
2232 if (xfsize
!= uio_resid(auio
))
2233 printf("sendfile: xfsize: %lld != uio_resid(auio): "
2234 "%lld\n", xfsize
, uio_resid(auio
));
2236 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_START
),
2237 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
2238 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
2239 error
= fo_read(fp
, auio
, FOF_OFFSET
, &context
);
2242 if (uio_resid(auio
) != xfsize
&& (error
== ERESTART
||
2243 error
== EINTR
|| error
== EWOULDBLOCK
)) {
2250 xfsize
-= uio_resid(auio
);
2251 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ
| DBG_FUNC_END
),
2252 uap
->s
, (unsigned int)((xfsize
>> 32) & 0x0ffffffff),
2253 (unsigned int)(xfsize
& 0x0ffffffff), 0, 0);
2256 //printf("sendfile: fo_read 0 bytes, EOF\n");
2259 if (xfsize
+ off
> file_size
)
2260 printf("sendfile: xfsize: %lld + off: %lld > file_size:"
2261 "%lld\n", xfsize
, off
, file_size
);
2262 for (i
= 0, m
= m0
, rlen
= 0;
2263 i
< nbufs
&& m
!= NULL
&& rlen
< xfsize
;
2264 i
++, m
= mbuf_next(m
)) {
2265 size_t mlen
= mbuf_maxlen(m
);
2267 if (rlen
+ mlen
> xfsize
)
2268 mlen
= xfsize
- rlen
;
2269 mbuf_setlen(m
, mlen
);
2273 mbuf_pkthdr_setlen(m0
, xfsize
);
2277 * Make sure that the socket is still able to take more data.
2278 * CANTSENDMORE being true usually means that the connection
2279 * was closed. so_error is true when an error was sensed after
2281 * The state is checked after the page mapping and buffer
2282 * allocation above since those operations may block and make
2283 * any socket checks stale. From this point forward, nothing
2284 * blocks before the pru_send (or more accurately, any blocking
2285 * results in a loop back to here to re-check).
2287 if ((so
->so_state
& SS_CANTSENDMORE
) || so
->so_error
) {
2288 if (so
->so_state
& SS_CANTSENDMORE
) {
2291 error
= so
->so_error
;
2298 * Wait for socket space to become available. We do this just
2299 * after checking the connection state above in order to avoid
2300 * a race condition with sbwait().
2302 if (sbspace(&so
->so_snd
) < (long)so
->so_snd
.sb_lowat
) {
2303 if (so
->so_state
& SS_NBIO
) {
2308 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
2309 DBG_FUNC_START
), uap
->s
, 0, 0, 0, 0);
2310 error
= sbwait(&so
->so_snd
);
2311 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT
|
2312 DBG_FUNC_END
), uap
->s
, 0, 0, 0, 0);
2314 * An error from sbwait usually indicates that we've
2315 * been interrupted by a signal. If we've sent anything
2316 * then return bytes sent, otherwise return the error.
2326 * Socket filter processing
2328 struct socket_filter_entry
*filter
;
2330 struct mbuf
*control
= NULL
;
2331 boolean_t recursive
= (so
->so_send_filt_thread
!= NULL
);
2334 for (filter
= so
->so_filt
; filter
&& (error
== 0);
2335 filter
= filter
->sfe_next_onsocket
) {
2336 if (filter
->sfe_filter
->sf_filter
.sf_data_out
) {
2337 if (filtered
== 0) {
2339 so
->so_send_filt_thread
=
2342 socket_unlock(so
, 0);
2344 error
= filter
->sfe_filter
->sf_filter
.
2345 sf_data_out(filter
->sfe_cookie
, so
,
2346 NULL
, &m0
, &control
, 0);
2352 * At this point, we've run at least one filter.
2353 * The socket is unlocked as is the socket
2354 * buffer. Clear the recorded filter thread
2355 * only when we are outside of a filter's
2356 * context. This allows for a filter to issue
2357 * multiple inject calls from its sf_data_out
2363 so
->so_send_filt_thread
= 0;
2365 if (error
== EJUSTRETURN
) {
2373 * End Socket filter processing
2376 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
2377 uap
->s
, 0, 0, 0, 0);
2378 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, m0
,
2380 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND
| DBG_FUNC_START
),
2381 uap
->s
, 0, 0, 0, 0);
2386 sbunlock(&so
->so_snd
, 0); /* will unlock socket */
2388 * Send trailers. Wimp out and use writev(2).
2390 if (uap
->hdtr
!= USER_ADDR_NULL
&&
2391 user_hdtr
.trailers
!= USER_ADDR_NULL
) {
2392 bzero(&nuap
, sizeof (struct writev_args
));
2394 nuap
.iovp
= user_hdtr
.trailers
;
2395 nuap
.iovcnt
= user_hdtr
.trl_cnt
;
2396 error
= writev_nocancel(p
, &nuap
, &writev_retval
);
2399 sbytes
+= writev_retval
;
2406 if (uap
->nbytes
!= USER_ADDR_NULL
) {
2407 /* XXX this appears bogus for some early failure conditions */
2408 copyout(&sbytes
, uap
->nbytes
, sizeof (off_t
));
2410 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE
| DBG_FUNC_END
), uap
->s
,
2411 (unsigned int)((sbytes
>> 32) & 0x0ffffffff),
2412 (unsigned int)(sbytes
& 0x0ffffffff), error
, 0);
2415 sbunlock(&so
->so_snd
, 0); /* will unlock socket */
2420 #endif /* SENDFILE */