X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/21362eb3e66fd2c787aee132bce100a44d71a99c..d190cdc3f5544636abb56dc1874be391d3e1b148:/bsd/kern/uipc_syscalls.c?ds=sidebyside

diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c
index cff67dd97..603f8b34d 100644
--- a/bsd/kern/uipc_syscalls.c
+++ b/bsd/kern/uipc_syscalls.c
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
@@ -30,7 +30,7 @@
  *	The Regents of the University of California.  All rights reserved.
  *
  * sendfile(2) and related extensions:
- * Copyright (c) 1998, David Greenman. All rights reserved. 
+ * Copyright (c) 1998, David Greenman. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -62,235 +62,365 @@
  *
  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
  */
-
-
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/filedesc.h>
 #include <sys/proc_internal.h>
 #include <sys/file_internal.h>
+#include <sys/vnode_internal.h>
 #include <sys/malloc.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
-#include <kern/lock.h>
+#include <kern/locks.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-#if KTRACE
-#include <sys/ktrace.h>
-#endif
 #include <sys/kernel.h>
 #include <sys/uio_internal.h>
+#include <sys/kauth.h>
+#include <kern/task.h>
+#include <sys/priv.h>
+#include <sys/sysctl.h>
 
-#include <bsm/audit_kernel.h>
+#include <security/audit/audit.h>
 
 #include <sys/kdebug.h>
 #include <sys/sysproto.h>
-
-#define f_flag f_fglob->fg_flag
-#define f_type f_fglob->fg_type
-#define f_msgcount f_fglob->fg_msgcount
-#define f_cred f_fglob->fg_cred
-#define f_ops f_fglob->fg_ops
-#define f_offset f_fglob->fg_offset
-#define f_data f_fglob->fg_data
-#if KDEBUG
-
-#define DBG_LAYER_IN_BEG	NETDBG_CODE(DBG_NETSOCK, 0)
-#define DBG_LAYER_IN_END	NETDBG_CODE(DBG_NETSOCK, 2)
-#define DBG_LAYER_OUT_BEG	NETDBG_CODE(DBG_NETSOCK, 1)
-#define DBG_LAYER_OUT_END	NETDBG_CODE(DBG_NETSOCK, 3)
-#define DBG_FNC_SENDMSG		NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
-#define DBG_FNC_SENDTO		NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
-#define DBG_FNC_SENDIT		NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
-#define DBG_FNC_RECVFROM	NETDBG_CODE(DBG_NETSOCK, (5 << 8))
-#define DBG_FNC_RECVMSG		NETDBG_CODE(DBG_NETSOCK, (6 << 8))
-#define DBG_FNC_RECVIT		NETDBG_CODE(DBG_NETSOCK, (7 << 8))
-
+#include <netinet/in.h>
+#include <net/route.h>
+#include <netinet/in_pcb.h>
+
+#if CONFIG_MACF_SOCKET_SUBSET
+#include <security/mac_framework.h>
+#endif /* MAC_SOCKET_SUBSET */
+
+#define	f_flag f_fglob->fg_flag
+#define	f_type f_fglob->fg_ops->fo_type
+#define	f_msgcount f_fglob->fg_msgcount
+#define	f_cred f_fglob->fg_cred
+#define	f_ops f_fglob->fg_ops
+#define	f_offset f_fglob->fg_offset
+#define	f_data f_fglob->fg_data
+
+#define	DBG_LAYER_IN_BEG	NETDBG_CODE(DBG_NETSOCK, 0)
+#define	DBG_LAYER_IN_END	NETDBG_CODE(DBG_NETSOCK, 2)
+#define	DBG_LAYER_OUT_BEG	NETDBG_CODE(DBG_NETSOCK, 1)
+#define	DBG_LAYER_OUT_END	NETDBG_CODE(DBG_NETSOCK, 3)
+#define	DBG_FNC_SENDMSG		NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
+#define	DBG_FNC_SENDTO		NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
+#define	DBG_FNC_SENDIT		NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
+#define	DBG_FNC_RECVFROM	NETDBG_CODE(DBG_NETSOCK, (5 << 8))
+#define	DBG_FNC_RECVMSG		NETDBG_CODE(DBG_NETSOCK, (6 << 8))
+#define	DBG_FNC_RECVIT		NETDBG_CODE(DBG_NETSOCK, (7 << 8))
+#define	DBG_FNC_SENDFILE	NETDBG_CODE(DBG_NETSOCK, (10 << 8))
+#define	DBG_FNC_SENDFILE_WAIT	NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1))
+#define	DBG_FNC_SENDFILE_READ	NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2))
+#define	DBG_FNC_SENDFILE_SEND	NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
+#define	DBG_FNC_SENDMSG_X	NETDBG_CODE(DBG_NETSOCK, (11 << 8))
+#define	DBG_FNC_RECVMSG_X	NETDBG_CODE(DBG_NETSOCK, (12 << 8))
+
+#if DEBUG || DEVELOPMENT
+#define	DEBUG_KERNEL_ADDRPERM(_v) (_v)
+#define	DBG_PRINTF(...) printf(__VA_ARGS__)
+#else
+#define	DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
+#define	DBG_PRINTF(...) do { } while (0)
 #endif
 
-
-#define HACK_FOR_4056224 1
-#if HACK_FOR_4056224
-static pid_t last_pid_4056224 = 0;
-#endif /* HACK_FOR_4056224 */
-
-
+/* TODO: should be in header file */
+int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
+
+static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t,
+    int, int32_t *);
+static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
+    int32_t *);
+static int connectit(struct socket *, struct sockaddr *);
+static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
+    size_t, boolean_t);
+static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
+    user_addr_t, size_t, boolean_t);
+static int getsockaddrlist(struct socket *, struct sockaddr_list **,
+    user_addr_t, socklen_t, boolean_t);
 #if SENDFILE
-static void sf_buf_init(void *arg);
-SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
-static struct sf_buf *sf_buf_alloc(void);
-static void sf_buf_ref(caddr_t addr, u_int size);
-static void sf_buf_free(caddr_t addr, u_int size);
-
-static SLIST_HEAD(, sf_buf) sf_freelist;
-static vm_offset_t sf_base;
-static struct sf_buf *sf_bufs;
-static int sf_buf_alloc_want;
-#endif
-
-static int sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, 
-					int flags, register_t *retval);
-static int recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
-					user_addr_t namelenp, register_t *retval);
-  
-static int accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat);
-static int getsockname1(struct proc *p, struct getsockname_args *uap,
-			     register_t *retval, int compat);
-static int getpeername1(struct proc *p, struct getpeername_args *uap,
-			     register_t *retval, int compat);
-
-
-#if COMPAT_43_SOCKET
-struct orecvmsg_args  {
-	int	s;
-	struct	omsghdr *msg;
-	int	flags;
-};
-struct osendmsg_args {
-	int s;
-	caddr_t msg;
-	int flags;
-};
-struct osend_args {
-	int s;
-	caddr_t buf;
-	int len;
-	int flags;
-};
-struct	orecv_args {
-	int	s;
-	caddr_t	buf;
-	int	len;
-	int	flags;
-};
-
-int oaccept(struct proc *p, struct accept_args *uap, register_t *retval);
-int ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval);
-int ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval);
-int orecv(struct proc *p, struct orecv_args	*uap, register_t *retval);
-int orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval);
-int orecvmsg(struct proc *p, struct orecvmsg_args *uap, register_t *retval);
-int	osend(struct proc *p, struct osend_args *uap, register_t *retval);
-int osendmsg(struct proc *p, struct osendmsg_args *uap, register_t *retval);
-#endif // COMPAT_43_SOCKET
+static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
+    boolean_t);
+#endif /* SENDFILE */
+static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
+static int connectitx(struct socket *, struct sockaddr_list **,
+    struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
+    sae_connid_t *, uio_t, unsigned int, user_ssize_t *);
+static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
+static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
+    int *);
+static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
+
+static int internalize_user_msghdr_array(const void *, int, int, u_int,
+    struct user_msghdr_x *, struct uio **);
+static u_int externalize_user_msghdr_array(void *, int, int, u_int,
+    const struct user_msghdr_x *, struct uio **);
+
+static void free_uio_array(struct uio **, u_int);
+static int uio_array_is_valid(struct uio **, u_int);
+static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
+static int internalize_recv_msghdr_array(const void *, int, int,
+    u_int, struct user_msghdr_x *, struct recv_msg_elem *);
+static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
+    const struct user_msghdr_x *, struct recv_msg_elem *);
+static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
+static void free_recv_msg_array(struct recv_msg_elem *, u_int);
+
+SYSCTL_DECL(_kern_ipc);
+
+static u_int somaxsendmsgx = 100;
+SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx,
+	CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, "");
+static u_int somaxrecvmsgx = 100;
+SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx,
+	CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, "");
 
 /*
  * System call interface to the socket abstraction.
  */
 
-extern	struct fileops socketops;
+extern const struct fileops socketops;
+
+/*
+ * Returns:	0			Success
+ *		EACCES			Mandatory Access Control failure
+ *	falloc:ENFILE
+ *	falloc:EMFILE
+ *	falloc:ENOMEM
+ *	socreate:EAFNOSUPPORT
+ *	socreate:EPROTOTYPE
+ *	socreate:EPROTONOSUPPORT
+ *	socreate:ENOBUFS
+ *	socreate:ENOMEM
+ *	socreate:???			[other protocol families, IPSEC]
+ */
+int
+socket(struct proc *p,
+	struct socket_args *uap,
+	int32_t *retval)
+{
+	return (socket_common(p, uap->domain, uap->type, uap->protocol,
+	    proc_selfpid(), retval, 0));
+}
 
 int
-socket(p, uap, retval)
-	struct proc *p;
-	register struct socket_args *uap;
-	register_t *retval;
+socket_delegate(struct proc *p,
+		struct socket_delegate_args *uap,
+		int32_t *retval)
+{
+	return socket_common(p, uap->domain, uap->type, uap->protocol,
+	    uap->epid, retval, 1);
+}
+
+static int
+socket_common(struct proc *p,
+		int domain,
+		int type,
+		int protocol,
+		pid_t epid,
+		int32_t *retval,
+		int delegate)
 {
 	struct socket *so;
 	struct fileproc *fp;
 	int fd, error;
 
-	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
+	AUDIT_ARG(socket, domain, type, protocol);
+#if CONFIG_MACF_SOCKET_SUBSET
+	if ((error = mac_socket_check_create(kauth_cred_get(), domain,
+	    type, protocol)) != 0)
+		return (error);
+#endif /* MAC_SOCKET_SUBSET */
+
+	if (delegate) {
+		error = priv_check_cred(kauth_cred_get(),
+		    PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
+		if (error)
+			return (EACCES);
+	}
 
-	error = falloc(p, &fp, &fd);
+	error = falloc(p, &fp, &fd, vfs_context_current());
 	if (error) {
 		return (error);
 	}
 	fp->f_flag = FREAD|FWRITE;
-	fp->f_type = DTYPE_SOCKET;
 	fp->f_ops = &socketops;
 
-	error = socreate(uap->domain, &so, uap->type, uap->protocol);
+	if (delegate)
+		error = socreate_delegate(domain, &so, type, protocol, epid);
+	else
+		error = socreate(domain, &so, type, protocol);
+
 	if (error) {
 		fp_free(p, fd, fp);
 	} else {
 		fp->f_data = (caddr_t)so;
 
 		proc_fdlock(p);
-		*fdflags(p, fd) &= ~UF_RESERVED;
-		
+		procfdtbl_releasefd(p, fd, NULL);
+
 		fp_drop(p, fd, fp, 1);
 		proc_fdunlock(p);
 
 		*retval = fd;
+		if (ENTR_SHOULDTRACE) {
+			KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
+			    fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
+		}
 	}
 	return (error);
 }
 
+/*
+ * Returns:	0			Success
+ *		EDESTADDRREQ		Destination address required
+ *		EBADF			Bad file descriptor
+ *		EACCES			Mandatory Access Control failure
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	getsockaddr:ENAMETOOLONG	Filename too long
+ *	getsockaddr:EINVAL		Invalid argument
+ *	getsockaddr:ENOMEM		Not enough space
+ *	getsockaddr:EFAULT		Bad address
+ *	sobindlock:???
+ */
 /* ARGSUSED */
 int
-bind(struct proc *p, struct bind_args *uap, __unused register_t *retval)
+bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval)
 {
-	struct sockaddr *sa;
+	struct sockaddr_storage ss;
+	struct sockaddr *sa = NULL;
 	struct socket *so;
+	boolean_t want_free = TRUE;
 	int error;
 
 	AUDIT_ARG(fd, uap->s);
 	error = file_socket(uap->s, &so);
-	if (error)
+	if (error != 0)
 		return (error);
-	error = getsockaddr(&sa, uap->name, uap->namelen);
-	if (error) 
-		goto out;
-	AUDIT_ARG(sockaddr, p, sa);
-	if (so != NULL)	
-		error = sobind(so, sa);
-	else
+	if (so == NULL) {
 		error = EBADF;
-	FREE(sa, M_SONAME);
+		goto out;
+	}
+	if (uap->name == USER_ADDR_NULL) {
+		error = EDESTADDRREQ;
+		goto out;
+	}
+	if (uap->namelen > sizeof (ss)) {
+		error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE);
+	} else {
+		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE);
+		if (error == 0) {
+			sa = (struct sockaddr *)&ss;
+			want_free = FALSE;
+		}
+	}
+	if (error != 0)
+		goto out;
+	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
+#if CONFIG_MACF_SOCKET_SUBSET
+	if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
+		error = sobindlock(so, sa, 1);	/* will lock socket */
+#else
+		error = sobindlock(so, sa, 1);	/* will lock socket */
+#endif /* MAC_SOCKET_SUBSET */
+	if (want_free)
+		FREE(sa, M_SONAME);
 out:
 	file_drop(uap->s);
 	return (error);
 }
 
-
+/*
+ * Returns:	0			Success
+ *		EBADF
+ *		EACCES			Mandatory Access Control failure
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	solisten:EINVAL
+ *	solisten:EOPNOTSUPP
+ *	solisten:???
+ */
 int
-listen(__unused struct proc *p, register struct listen_args *uap, 
-		__unused register_t *retval)
+listen(__unused struct proc *p, struct listen_args *uap,
+    __unused int32_t *retval)
 {
 	int error;
-	struct socket * so;
+	struct socket *so;
 
 	AUDIT_ARG(fd, uap->s);
 	error = file_socket(uap->s, &so);
 	if (error)
 		return (error);
 	if (so != NULL)
+#if CONFIG_MACF_SOCKET_SUBSET
+	{
+		error = mac_socket_check_listen(kauth_cred_get(), so);
+		if (error == 0)
+			error = solisten(so, uap->backlog);
+	}
+#else
 		error =  solisten(so, uap->backlog);
+#endif /* MAC_SOCKET_SUBSET */
 	else
 		error = EBADF;
+
 	file_drop(uap->s);
 	return (error);
 }
 
-#if !COMPAT_43_SOCKET
-#define	accept1	accept
-#endif
-
-
-
+/*
+ * Returns:	fp_getfsock:EBADF	Bad file descriptor
+ *		fp_getfsock:EOPNOTSUPP	...
+ *		xlate => :ENOTSOCK	Socket operation on non-socket
+ *		:EFAULT			Bad address on copyin/copyout
+ *		:EBADF			Bad file descriptor
+ *		:EOPNOTSUPP		Operation not supported on socket
+ *		:EINVAL			Invalid argument
+ *		:EWOULDBLOCK		Operation would block
+ *		:ECONNABORTED		Connection aborted
+ *		:EINTR			Interrupted function
+ *		:EACCES			Mandatory Access Control failure
+ *		falloc_locked:ENFILE	Too many files open in system
+ *		falloc_locked::EMFILE	Too many open files
+ *		falloc_locked::ENOMEM	Not enough space
+ *		0			Success
+ */
 int
-accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat)
+accept_nocancel(struct proc *p, struct accept_nocancel_args *uap,
+    int32_t *retval)
 {
 	struct fileproc *fp;
-	struct sockaddr *sa;
+	struct sockaddr *sa = NULL;
 	socklen_t namelen;
 	int error;
 	struct socket *head, *so = NULL;
 	lck_mtx_t *mutex_held;
 	int fd = uap->s;
-	int newfd;;
+	int newfd;
 	short fflag;		/* type must match fp->f_flag */
 	int dosocklock = 0;
 
+	*retval = -1;
+
 	AUDIT_ARG(fd, uap->s);
+
 	if (uap->name) {
 		error = copyin(uap->anamelen, (caddr_t)&namelen,
-			sizeof(socklen_t));
-		if(error)
+		    sizeof (socklen_t));
+		if (error)
 			return (error);
 	}
 	error = fp_getfsock(p, fd, &fp, &head);
@@ -303,22 +433,29 @@ accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat)
 		error = EBADF;
 		goto out;
 	}
+#if CONFIG_MACF_SOCKET_SUBSET
+	if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0)
+		goto out;
+#endif /* MAC_SOCKET_SUBSET */
 
 	socket_lock(head, 1);
 
 	if (head->so_proto->pr_getlock != NULL)  {
 		mutex_held = (*head->so_proto->pr_getlock)(head, 0);
 		dosocklock = 1;
-	}
-	else {
+	} else {
 		mutex_held = head->so_proto->pr_domain->dom_mtx;
 		dosocklock = 0;
 	}
 
-
 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
+		if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
+			error = EOPNOTSUPP;
+		} else {
+			/* POSIX: The socket is not accepting connections */
+			error = EINVAL;
+		}
 		socket_unlock(head, 1);
-		error = EINVAL;
 		goto out;
 	}
 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
@@ -326,17 +463,19 @@ accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat)
 		error = EWOULDBLOCK;
 		goto out;
 	}
-        while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
+	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
 		if (head->so_state & SS_CANTRCVMORE) {
 			head->so_error = ECONNABORTED;
 			break;
 		}
 		if (head->so_usecount < 1)
-			panic("accept1: head=%x refcount=%d\n", head, head->so_usecount);
-		error = msleep((caddr_t)&head->so_timeo, mutex_held, PSOCK | PCATCH,
-		    "accept", 0);
+			panic("accept: head=%p refcount=%d\n", head,
+			    head->so_usecount);
+		error = msleep((caddr_t)&head->so_timeo, mutex_held,
+		    PSOCK | PCATCH, "accept", 0);
 		if (head->so_usecount < 1)
-			panic("accept1: 2 head=%x refcount=%d\n", head, head->so_usecount);
+			panic("accept: 2 head=%p refcount=%d\n", head,
+			    head->so_usecount);
 		if ((head->so_state & SS_DRAINING)) {
 			error = ECONNABORTED;
 		}
@@ -352,7 +491,6 @@ accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat)
 		goto out;
 	}
 
-
 	/*
 	 * At this point we know that there is at least one connection
 	 * ready to be accepted. Remove it from the queue prior to
@@ -363,153 +501,219 @@ accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat)
 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 	so = TAILQ_FIRST(&head->so_comp);
 	TAILQ_REMOVE(&head->so_comp, so, so_list);
+	so->so_head = NULL;
+	so->so_state &= ~SS_COMP;
 	head->so_qlen--;
-	socket_unlock(head, 0); /* unlock head to avoid deadlock with select, keep a ref on head */
+	/* unlock head to avoid deadlock with select, keep a ref on head */
+	socket_unlock(head, 0);
+
+#if CONFIG_MACF_SOCKET_SUBSET
+	/*
+	 * Pass the pre-accepted socket to the MAC framework. This is
+	 * cheaper than allocating a file descriptor for the socket,
+	 * calling the protocol accept callback, and possibly freeing
+	 * the file descriptor should the MAC check fails.
+	 */
+	if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
+		socket_lock(so, 1);
+		so->so_state &= ~SS_NOFDREF;
+		socket_unlock(so, 1);
+		soclose(so);
+		/* Drop reference on listening socket */
+		sodereference(head);
+		goto out;
+	}
+#endif /* MAC_SOCKET_SUBSET */
+
+	/*
+	 * Pass the pre-accepted socket to any interested socket filter(s).
+	 * Upon failure, the socket would have been closed by the callee.
+	 */
+	if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) {
+		/* Drop reference on listening socket */
+		sodereference(head);
+		/* Propagate socket filter's error code to the caller */
+		goto out;
+	}
+
 	fflag = fp->f_flag;
-	proc_fdlock(p);
-	error = falloc_locked(p, &fp, &newfd, 1);
+	error = falloc(p, &fp, &newfd, vfs_context_current());
 	if (error) {
 		/*
-		 * Probably ran out of file descriptors. Put the
-		 * unaccepted connection back onto the queue and
-		 * do another wakeup so some other process might
-		 * have a chance at it.
+		 * Probably ran out of file descriptors.
+		 *
+		 * <rdar://problem/8554930>
+		 * Don't put this back on the socket like we used to, that
+		 * just causes the client to spin. Drop the socket.
 		 */
-		proc_fdunlock(p);
-		socket_lock(head, 0);
-		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
-		head->so_qlen++;
-		wakeup_one((caddr_t)&head->so_timeo);
-		socket_unlock(head, 1);
+		socket_lock(so, 1);
+		so->so_state &= ~SS_NOFDREF;
+		socket_unlock(so, 1);
+		soclose(so);
+		sodereference(head);
 		goto out;
-	} 
-	*fdflags(p, newfd) &= ~UF_RESERVED;
+	}
 	*retval = newfd;
-	fp->f_type = DTYPE_SOCKET;
 	fp->f_flag = fflag;
 	fp->f_ops = &socketops;
 	fp->f_data = (caddr_t)so;
-	fp_drop(p, newfd, fp, 1);
-	proc_fdunlock(p);
+
 	socket_lock(head, 0);
 	if (dosocklock)
 		socket_lock(so, 1);
-	so->so_state &= ~SS_COMP;
-	so->so_head = NULL;
-	sa = 0;
+
+	/* Sync socket non-blocking/async state with file flags */
+	if (fp->f_flag & FNONBLOCK) {
+		so->so_state |= SS_NBIO;
+	} else {
+		so->so_state &= ~SS_NBIO;
+	}
+
+	if (fp->f_flag & FASYNC) {
+		so->so_state |= SS_ASYNC;
+		so->so_rcv.sb_flags |= SB_ASYNC;
+		so->so_snd.sb_flags |= SB_ASYNC;
+	} else {
+		so->so_state &= ~SS_ASYNC;
+		so->so_rcv.sb_flags &= ~SB_ASYNC;
+		so->so_snd.sb_flags &= ~SB_ASYNC;
+	}
+
 	(void) soacceptlock(so, &sa, 0);
 	socket_unlock(head, 1);
-	if (sa == 0) {
+	if (sa == NULL) {
 		namelen = 0;
 		if (uap->name)
 			goto gotnoname;
-		if (dosocklock)
-			socket_unlock(so, 1);
 		error = 0;
-		goto out;
+		goto releasefd;
 	}
-	AUDIT_ARG(sockaddr, p, sa);
+	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
+
 	if (uap->name) {
-		/* check sa_len before it is destroyed */
-		if (namelen > sa->sa_len)
-			namelen = sa->sa_len;
-#if COMPAT_43_SOCKET
-		if (compat)
-			((struct osockaddr *)sa)->sa_family =
-			    sa->sa_family;
-#endif
+		socklen_t	sa_len;
+
+		/* save sa_len before it is destroyed */
+		sa_len = sa->sa_len;
+		namelen = MIN(namelen, sa_len);
 		error = copyout(sa, uap->name, namelen);
 		if (!error)
+			/* return the actual, untruncated address length */
+			namelen = sa_len;
 gotnoname:
-			error = copyout((caddr_t)&namelen, uap->anamelen, 
-			    			sizeof(socklen_t));
+		error = copyout((caddr_t)&namelen, uap->anamelen,
+		    sizeof (socklen_t));
 	}
 	FREE(sa, M_SONAME);
+
+releasefd:
+	/*
+	 * If the socket has been marked as inactive by sosetdefunct(),
+	 * disallow further operations on it.
+	 */
+	if (so->so_flags & SOF_DEFUNCT) {
+		sodefunct(current_proc(), so,
+		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
+	}
+
 	if (dosocklock)
 		socket_unlock(so, 1);
+
+	proc_fdlock(p);
+	procfdtbl_releasefd(p, newfd, NULL);
+	fp_drop(p, newfd, fp, 1);
+	proc_fdunlock(p);
+
 out:
 	file_drop(fd);
+
+	if (error == 0 && ENTR_SHOULDTRACE) {
+		KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
+		    newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so));
+	}
 	return (error);
 }
 
 int
-accept(struct proc *p, struct accept_args *uap, register_t *retval)
+accept(struct proc *p, struct accept_args *uap, int32_t *retval)
 {
-
-	return (accept1(p, uap, retval, 0));
+	__pthread_testcancel(1);
+	return (accept_nocancel(p, (struct accept_nocancel_args *)uap,
+	    retval));
 }
 
-#if COMPAT_43_SOCKET
+/*
+ * Returns:	0			Success
+ *		EBADF			Bad file descriptor
+ *		EALREADY		Connection already in progress
+ *		EINPROGRESS		Operation in progress
+ *		ECONNABORTED		Connection aborted
+ *		EINTR			Interrupted function
+ *		EACCES			Mandatory Access Control failure
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	getsockaddr:ENAMETOOLONG	Filename too long
+ *	getsockaddr:EINVAL		Invalid argument
+ *	getsockaddr:ENOMEM		Not enough space
+ *	getsockaddr:EFAULT		Bad address
+ *	soconnectlock:EOPNOTSUPP
+ *	soconnectlock:EISCONN
+ *	soconnectlock:???		[depends on protocol, filters]
+ *	msleep:EINTR
+ *
+ * Imputed:	so_error		error may be set from so_error, which
+ *					may have been set by soconnectlock.
+ */
+/* ARGSUSED */
 int
-oaccept(struct proc *p, struct accept_args *uap, register_t *retval)
+connect(struct proc *p, struct connect_args *uap, int32_t *retval)
 {
-
-	return (accept1(p, uap, retval, 1));
+	__pthread_testcancel(1);
+	return (connect_nocancel(p, (struct connect_nocancel_args *)uap,
+	    retval));
 }
-#endif /* COMPAT_43_SOCKET */
 
-/* ARGSUSED */
 int
-connect(struct proc *p, struct connect_args *uap, __unused register_t *retval)
+connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
 {
+#pragma unused(p, retval)
 	struct socket *so;
-	struct sockaddr *sa;
-	lck_mtx_t *mutex_held;
+	struct sockaddr_storage ss;
+	struct sockaddr *sa = NULL;
 	int error;
 	int fd = uap->s;
+	boolean_t dgram;
 
 	AUDIT_ARG(fd, uap->s);
-	error = file_socket( fd, &so);
-	if (error)
+	error = file_socket(fd, &so);
+	if (error != 0)
 		return (error);
 	if (so == NULL) {
 		error = EBADF;
 		goto out;
 	}
 
-	socket_lock(so, 1);
+	/*
+	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
+	 * if this is a datagram socket; translate for other types.
+	 */
+	dgram = (so->so_type == SOCK_DGRAM);
 
-	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
-		socket_unlock(so, 1);
-		error = EALREADY;
-		goto out;
+	/* Get socket address now before we obtain socket lock */
+	if (uap->namelen > sizeof (ss)) {
+		error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
+	} else {
+		error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
+		if (error == 0)
+			sa = (struct sockaddr *)&ss;
 	}
-	error = getsockaddr(&sa, uap->name, uap->namelen);
-	if (error)  {
-		socket_unlock(so, 1);
+	if (error != 0)
 		goto out;
-	}
-	AUDIT_ARG(sockaddr, p, sa);
-	error = soconnectlock(so, sa, 0);
-	if (error)
-		goto bad;
-	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+
+	error = connectit(so, sa);
+
+	if (sa != NULL && sa != SA(&ss))
 		FREE(sa, M_SONAME);
-		socket_unlock(so, 1);
-		error = EINPROGRESS;
-		goto out;
-	}
-	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-		if (so->so_proto->pr_getlock != NULL) 
-			mutex_held = (*so->so_proto->pr_getlock)(so, 0);
-		else 
-			mutex_held = so->so_proto->pr_domain->dom_mtx;
-		error = msleep((caddr_t)&so->so_timeo, mutex_held, PSOCK | PCATCH,
-		    "connec", 0);
-		if ((so->so_state & SS_DRAINING)) {
-			error = ECONNABORTED;
-		}
-		if (error)
-			break;
-	}
-	if (error == 0) {
-		error = so->so_error;
-		so->so_error = 0;
-	}
-bad:
-	so->so_state &= ~SS_ISCONNECTING;
-	socket_unlock(so, 1);
-	FREE(sa, M_SONAME);
 	if (error == ERESTART)
 		error = EINTR;
 out:
@@ -517,200 +721,662 @@ out:
 	return (error);
 }
 
-int
-socketpair(struct proc *p, struct socketpair_args *uap, __unused register_t *retval)
+static int
+connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
 {
-	struct fileproc *fp1, *fp2;
-	struct socket *so1, *so2;
-	int fd, error, sv[2];
+#pragma unused(p, retval)
+	struct sockaddr_list *src_sl = NULL, *dst_sl = NULL;
+	struct socket *so;
+	int error, error1, fd = uap->socket;
+	boolean_t dgram;
+	sae_connid_t cid = SAE_CONNID_ANY;
+	struct user32_sa_endpoints ep32;
+	struct user64_sa_endpoints ep64;
+	struct user_sa_endpoints ep;
+	user_ssize_t bytes_written = 0;
+	struct user_iovec *iovp;
+	uio_t auio = NULL;
 
-	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
-	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
-	if (error)
+	AUDIT_ARG(fd, uap->socket);
+	error = file_socket(fd, &so);
+	if (error != 0)
 		return (error);
-	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
-	if (error)
-		goto free1;
-
-	error = falloc(p, &fp1, &fd);
-	if (error) {
-		goto free2;
+	if (so == NULL) {
+		error = EBADF;
+		goto out;
 	}
-	fp1->f_flag = FREAD|FWRITE;
-	fp1->f_type = DTYPE_SOCKET;
-	fp1->f_ops = &socketops;
-	fp1->f_data = (caddr_t)so1;
-	sv[0] = fd;
 
-	error = falloc(p, &fp2, &fd);
-	if (error) {
-		goto free3;
+	if (uap->endpoints == USER_ADDR_NULL) {
+		error = EINVAL;
+		goto out;
 	}
-	fp2->f_flag = FREAD|FWRITE;
-	fp2->f_type = DTYPE_SOCKET;
-	fp2->f_ops = &socketops;
-	fp2->f_data = (caddr_t)so2;
-	sv[1] = fd;
 
-	error = soconnect2(so1, so2);
-	if (error) {
-		goto free4;
+	if (IS_64BIT_PROCESS(p)) {
+		error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64));
+		if (error != 0)
+			goto out;
+
+		ep.sae_srcif = ep64.sae_srcif;
+		ep.sae_srcaddr = ep64.sae_srcaddr;
+		ep.sae_srcaddrlen = ep64.sae_srcaddrlen;
+		ep.sae_dstaddr = ep64.sae_dstaddr;
+		ep.sae_dstaddrlen = ep64.sae_dstaddrlen;
+	} else {
+		error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32));
+		if (error != 0)
+			goto out;
+
+		ep.sae_srcif = ep32.sae_srcif;
+		ep.sae_srcaddr = ep32.sae_srcaddr;
+		ep.sae_srcaddrlen = ep32.sae_srcaddrlen;
+		ep.sae_dstaddr = ep32.sae_dstaddr;
+		ep.sae_dstaddrlen = ep32.sae_dstaddrlen;
 	}
-	if (uap->type == SOCK_DGRAM) {
-		/*
-		 * Datagram socket connection is asymmetric.
-		 */
-		 error = soconnect2(so2, so1);
-		 if (error) {
-			 goto free4;
-		 }
+
+	/*
+	 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
+	 * if this is a datagram socket; translate for other types.
+	 */
+	dgram = (so->so_type == SOCK_DGRAM);
+
+	/*
+	 * Get socket address(es) now before we obtain socket lock; use
+	 * sockaddr_list for src address for convenience, if present,
+	 * even though it won't hold more than one.
+	 */
+	if (ep.sae_srcaddr != USER_ADDR_NULL && (error = getsockaddrlist(so,
+	    &src_sl, (user_addr_t)(caddr_t)ep.sae_srcaddr, ep.sae_srcaddrlen,
+	    dgram)) != 0)
+		goto out;
+
+	if (ep.sae_dstaddr == USER_ADDR_NULL) {
+		error = EINVAL;
+		goto out;
 	}
 
-	proc_fdlock(p);
-	*fdflags(p, sv[0]) &= ~UF_RESERVED;
-	*fdflags(p, sv[1]) &= ~UF_RESERVED;
-	fp_drop(p, sv[0], fp1, 1);
-	fp_drop(p, sv[1], fp2, 1);
-	proc_fdunlock(p);
+	error = getsockaddrlist(so, &dst_sl, (user_addr_t)(caddr_t)ep.sae_dstaddr,
+	    ep.sae_dstaddrlen, dgram);
+	if (error != 0)
+		goto out;
 
-	error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof(int));
-#if 0   /* old pipe(2) syscall compatability, unused these days */
-	retval[0] = sv[0];		/* XXX ??? */
-	retval[1] = sv[1];		/* XXX ??? */
-#endif /* 0 */
-	return (error);
-free4:
-	fp_free(p, sv[1], fp2);
-free3:
-	fp_free(p, sv[0], fp1);
-free2:
-	(void)soclose(so2);
-free1:
-	(void)soclose(so1);
-	return (error);
-}
+	VERIFY(dst_sl != NULL &&
+	    !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0);
 
-static int
-sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, 
-		int flags, register_t *retval)
-{
-	struct mbuf *control;
-	struct sockaddr *to;
-	int error;
-	struct socket *so;
-	user_ssize_t len;
-#if KTRACE
-	uio_t ktruio = NULL;
-#endif
-	
-	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0);
+	if (uap->iov != USER_ADDR_NULL) {
+		/* Verify range before calling uio_create() */
+		if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
+			return (EINVAL);
 
-	error = file_socket(s, &so);
-	if (error )
-	{
-	    KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
-	    return (error);
+		if (uap->len == USER_ADDR_NULL)
+			return (EINVAL);
+
+		/* allocate a uio to hold the number of iovecs passed */
+		auio = uio_create(uap->iovcnt, 0,
+		    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
+		    UIO_WRITE);
+
+		if (auio == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+
+		/*
+		 * get location of iovecs within the uio.
+		 * then copyin the iovecs from user space.
+		 */
+		iovp = uio_iovsaddr(auio);
+		if (iovp == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		error = copyin_user_iovec_array(uap->iov,
+			IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+			uap->iovcnt, iovp);
+		if (error != 0)
+			goto out;
+
+		/* finish setup of uio_t */
+		error = uio_calculateresid(auio);
+		if (error != 0) {
+			goto out;
+		}
 	}
-	
-	if (mp->msg_name) {
-		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
+
+	error = connectitx(so, &src_sl, &dst_sl, p, ep.sae_srcif, uap->associd,
+	    &cid, auio, uap->flags, &bytes_written);
+	if (error == ERESTART)
+		error = EINTR;
+
+	if (uap->len != USER_ADDR_NULL) {
+		error1 = copyout(&bytes_written, uap->len, sizeof (uap->len));
+		/* give precedence to connectitx errors */
+		if ((error1 != 0) && (error == 0))
+			error = error1;
+	}
+
+	if (uap->connid != USER_ADDR_NULL) {
+		error1 = copyout(&cid, uap->connid, sizeof (cid));
+		/* give precedence to connectitx errors */
+		if ((error1 != 0) && (error == 0))
+			error = error1;
+	}
+out:
+	file_drop(fd);
+	if (auio != NULL) {
+		uio_free(auio);
+	}
+	if (src_sl != NULL)
+		sockaddrlist_free(src_sl);
+	if (dst_sl != NULL)
+		sockaddrlist_free(dst_sl);
+	return (error);
+}
+
+int
+connectx(struct proc *p, struct connectx_args *uap, int *retval)
+{
+	/*
+	 * Due to similiarity with a POSIX interface, define as
+	 * an unofficial cancellation point.
+	 */
+	__pthread_testcancel(1);
+	return (connectx_nocancel(p, uap, retval));
+}
+
+static int
+connectit(struct socket *so, struct sockaddr *sa)
+{
+	int error;
+
+	AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
+#if CONFIG_MACF_SOCKET_SUBSET
+	if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
+		return (error);
+#endif /* MAC_SOCKET_SUBSET */
+
+	socket_lock(so, 1);
+	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+		error = EALREADY;
+		goto out;
+	}
+	error = soconnectlock(so, sa, 0);
+	if (error != 0) {
+		so->so_state &= ~SS_ISCONNECTING;
+		goto out;
+	}
+	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+		error = EINPROGRESS;
+		goto out;
+	}
+	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+		lck_mtx_t *mutex_held;
+
+		if (so->so_proto->pr_getlock != NULL)
+			mutex_held = (*so->so_proto->pr_getlock)(so, 0);
+		else
+			mutex_held = so->so_proto->pr_domain->dom_mtx;
+		error = msleep((caddr_t)&so->so_timeo, mutex_held,
+		    PSOCK | PCATCH, __func__, 0);
+		if (so->so_state & SS_DRAINING) {
+			error = ECONNABORTED;
+		}
+		if (error != 0)
+			break;
+	}
+	if (error == 0) {
+		error = so->so_error;
+		so->so_error = 0;
+	}
+out:
+	socket_unlock(so, 1);
+	return (error);
+}
+
+static int
+connectitx(struct socket *so, struct sockaddr_list **src_sl,
+    struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
+    sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags,
+    user_ssize_t *bytes_written)
+{
+	struct sockaddr_entry *se;
+	int error;
+#pragma unused (flags)
+
+	VERIFY(dst_sl != NULL && *dst_sl != NULL);
+
+	TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) {
+		VERIFY(se->se_addr != NULL);
+		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
+		    se->se_addr);
+#if CONFIG_MACF_SOCKET_SUBSET
+		if ((error = mac_socket_check_connect(kauth_cred_get(),
+		    so, se->se_addr)) != 0)
+			return (error);
+#endif /* MAC_SOCKET_SUBSET */
+	}
+
+	socket_lock(so, 1);
+	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+		error = EALREADY;
+		goto out;
+	}
+
+	if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
+	    (flags & CONNECT_DATA_IDEMPOTENT))
+		so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
+
+	/*
+	 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
+	 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
+	 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
+	 * Case 3 allows user to combine write with connect even if they have
+	 * no use for TFO (such as regular TCP, and UDP).
+	 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
+	 */
+	if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
+	    ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio))
+		so->so_flags1 |= SOF1_PRECONNECT_DATA;
+
+	/*
+	 * If a user sets data idempotent and does not pass an uio, or
+	 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
+	 * SOF1_DATA_IDEMPOTENT.
+	 */
+	if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
+	    (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
+		/* We should return EINVAL instead perhaps. */
+		so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
+	}
+
+	error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope,
+	    aid, pcid, 0, NULL, 0, auio, bytes_written);
+	if (error != 0) {
+		so->so_state &= ~SS_ISCONNECTING;
+		goto out;
+	}
+	/*
+	 * If, after the call to soconnectxlocked the flag is still set (in case
+	 * data has been queued and the connect() has actually been triggered,
+	 * it will have been unset by the transport), we exit immediately. There
+	 * is no reason to wait on any event.
+	 */
+	if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
+		error = 0;
+		goto out;
+	}
+	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+		error = EINPROGRESS;
+		goto out;
+	}
+	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+		lck_mtx_t *mutex_held;
+
+		if (so->so_proto->pr_getlock != NULL)
+			mutex_held = (*so->so_proto->pr_getlock)(so, 0);
+		else
+			mutex_held = so->so_proto->pr_domain->dom_mtx;
+		error = msleep((caddr_t)&so->so_timeo, mutex_held,
+		    PSOCK | PCATCH, __func__, 0);
+		if (so->so_state & SS_DRAINING) {
+			error = ECONNABORTED;
+		}
+		if (error != 0)
+			break;
+	}
+	if (error == 0) {
+		error = so->so_error;
+		so->so_error = 0;
+	}
+out:
+	socket_unlock(so, 1);
+	return (error);
+}
+
+int
+peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
+{
+	/*
+	 * Due to similiarity with a POSIX interface, define as
+	 * an unofficial cancellation point.
+	 */
+	__pthread_testcancel(1);
+	return (peeloff_nocancel(p, uap, retval));
+}
+
+static int
+peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval)
+{
+	struct fileproc *fp;
+	struct socket *mp_so, *so = NULL;
+	int newfd, fd = uap->s;
+	short fflag;		/* type must match fp->f_flag */
+	int error;
+
+	*retval = -1;
+
+	error = fp_getfsock(p, fd, &fp, &mp_so);
+	if (error != 0) {
+		if (error == EOPNOTSUPP)
+			error = ENOTSOCK;
+		goto out_nofile;
+	}
+	if (mp_so == NULL) {
+		error = EBADF;
+		goto out;
+	}
+
+	socket_lock(mp_so, 1);
+	error = sopeelofflocked(mp_so, uap->aid, &so);
+	if (error != 0) {
+		socket_unlock(mp_so, 1);
+		goto out;
+	}
+	VERIFY(so != NULL);
+	socket_unlock(mp_so, 0);		/* keep ref on mp_so for us */
+
+	fflag = fp->f_flag;
+	error = falloc(p, &fp, &newfd, vfs_context_current());
+	if (error != 0) {
+		/* drop this socket (probably ran out of file descriptors) */
+		soclose(so);
+		sodereference(mp_so);		/* our mp_so ref */
+		goto out;
+	}
+
+	fp->f_flag = fflag;
+	fp->f_ops = &socketops;
+	fp->f_data = (caddr_t)so;
+
+	/*
+	 * If the socket has been marked as inactive by sosetdefunct(),
+	 * disallow further operations on it.
+	 */
+	if (so->so_flags & SOF_DEFUNCT) {
+		sodefunct(current_proc(), so,
+		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
+	}
+
+	proc_fdlock(p);
+	procfdtbl_releasefd(p, newfd, NULL);
+	fp_drop(p, newfd, fp, 1);
+	proc_fdunlock(p);
+
+	sodereference(mp_so);			/* our mp_so ref */
+	*retval = newfd;
+
+out:
+	file_drop(fd);
+
+out_nofile:
+	return (error);
+}
+
+int
+disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
+{
+	/*
+	 * Due to similiarity with a POSIX interface, define as
+	 * an unofficial cancellation point.
+	 */
+	__pthread_testcancel(1);
+	return (disconnectx_nocancel(p, uap, retval));
+}
+
+static int
+disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
+{
+#pragma unused(p, retval)
+	struct socket *so;
+	int fd = uap->s;
+	int error;
+
+	error = file_socket(fd, &so);
+	if (error != 0)
+		return (error);
+	if (so == NULL) {
+		error = EBADF;
+		goto out;
+	}
+
+	error = sodisconnectx(so, uap->aid, uap->cid);
+out:
+	file_drop(fd);
+	return (error);
+}
+
+/*
+ * Returns:	0			Success
+ *	socreate:EAFNOSUPPORT
+ *	socreate:EPROTOTYPE
+ *	socreate:EPROTONOSUPPORT
+ *	socreate:ENOBUFS
+ *	socreate:ENOMEM
+ *	socreate:EISCONN
+ *	socreate:???			[other protocol families, IPSEC]
+ *	falloc:ENFILE
+ *	falloc:EMFILE
+ *	falloc:ENOMEM
+ *	copyout:EFAULT
+ *	soconnect2:EINVAL
+ *	soconnect2:EPROTOTYPE
+ *	soconnect2:???			[other protocol families[
+ */
+int
+socketpair(struct proc *p, struct socketpair_args *uap,
+    __unused int32_t *retval)
+{
+	struct fileproc *fp1, *fp2;
+	struct socket *so1, *so2;
+	int fd, error, sv[2];
+
+	AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
+	error = socreate(uap->domain, &so1, uap->type, uap->protocol);
+	if (error)
+		return (error);
+	error = socreate(uap->domain, &so2, uap->type, uap->protocol);
+	if (error)
+		goto free1;
+
+	error = falloc(p, &fp1, &fd, vfs_context_current());
+	if (error) {
+		goto free2;
+	}
+	fp1->f_flag = FREAD|FWRITE;
+	fp1->f_ops = &socketops;
+	fp1->f_data = (caddr_t)so1;
+	sv[0] = fd;
+
+	error = falloc(p, &fp2, &fd, vfs_context_current());
+	if (error) {
+		goto free3;
+	}
+	fp2->f_flag = FREAD|FWRITE;
+	fp2->f_ops = &socketops;
+	fp2->f_data = (caddr_t)so2;
+	sv[1] = fd;
+
+	error = soconnect2(so1, so2);
+	if (error) {
+		goto free4;
+	}
+	if (uap->type == SOCK_DGRAM) {
+		/*
+		 * Datagram socket connection is asymmetric.
+		 */
+		error = soconnect2(so2, so1);
 		if (error) {
-		    KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
-			goto out;
+			goto free4;
 		}
-		AUDIT_ARG(sockaddr, p, to);
-	} else {
-		to = 0;
 	}
-	if (mp->msg_control) {
-		if (mp->msg_controllen < ((socklen_t)sizeof(struct cmsghdr))
-#if COMPAT_43_SOCKET
-		    && !(mp->msg_flags & MSG_COMPAT)
-#endif
-		) {
+
+	if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
+		goto free4;
+
+	proc_fdlock(p);
+	procfdtbl_releasefd(p, sv[0], NULL);
+	procfdtbl_releasefd(p, sv[1], NULL);
+	fp_drop(p, sv[0], fp1, 1);
+	fp_drop(p, sv[1], fp2, 1);
+	proc_fdunlock(p);
+
+	return (0);
+free4:
+	fp_free(p, sv[1], fp2);
+free3:
+	fp_free(p, sv[0], fp1);
+free2:
+	(void) soclose(so2);
+free1:
+	(void) soclose(so1);
+	return (error);
+}
+
+/*
+ * Returns:	0			Success
+ *		EINVAL
+ *		ENOBUFS
+ *		EBADF
+ *		EPIPE
+ *		EACCES			Mandatory Access Control failure
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	getsockaddr:ENAMETOOLONG	Filename too long
+ *	getsockaddr:EINVAL		Invalid argument
+ *	getsockaddr:ENOMEM		Not enough space
+ *	getsockaddr:EFAULT		Bad address
+ *	<pru_sosend>:EACCES[TCP]
+ *	<pru_sosend>:EADDRINUSE[TCP]
+ *	<pru_sosend>:EADDRNOTAVAIL[TCP]
+ *	<pru_sosend>:EAFNOSUPPORT[TCP]
+ *	<pru_sosend>:EAGAIN[TCP]
+ *	<pru_sosend>:EBADF
+ *	<pru_sosend>:ECONNRESET[TCP]
+ *	<pru_sosend>:EFAULT
+ *	<pru_sosend>:EHOSTUNREACH[TCP]
+ *	<pru_sosend>:EINTR
+ *	<pru_sosend>:EINVAL
+ *	<pru_sosend>:EISCONN[AF_INET]
+ *	<pru_sosend>:EMSGSIZE[TCP]
+ *	<pru_sosend>:ENETDOWN[TCP]
+ *	<pru_sosend>:ENETUNREACH[TCP]
+ *	<pru_sosend>:ENOBUFS
+ *	<pru_sosend>:ENOMEM[TCP]
+ *	<pru_sosend>:ENOTCONN[AF_INET]
+ *	<pru_sosend>:EOPNOTSUPP
+ *	<pru_sosend>:EPERM[TCP]
+ *	<pru_sosend>:EPIPE
+ *	<pru_sosend>:EWOULDBLOCK
+ *	<pru_sosend>:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
+ *	<pru_sosend>:???[AF_INET]	[whatever a filter author chooses]
+ *	<pru_sosend>:???		[value from so_error]
+ *	sockargs:???
+ */
+static int
+sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop,
+    int flags, int32_t *retval)
+{
+	struct mbuf *control = NULL;
+	struct sockaddr_storage ss;
+	struct sockaddr *to = NULL;
+	boolean_t want_free = TRUE;
+	int error;
+	user_ssize_t len;
+
+	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+	if (mp->msg_name != USER_ADDR_NULL) {
+		if (mp->msg_namelen > sizeof (ss)) {
+			error = getsockaddr(so, &to, mp->msg_name,
+			    mp->msg_namelen, TRUE);
+		} else {
+			error = getsockaddr_s(so, &ss, mp->msg_name,
+			    mp->msg_namelen, TRUE);
+			if (error == 0) {
+				to = (struct sockaddr *)&ss;
+				want_free = FALSE;
+			}
+		}
+		if (error != 0)
+			goto out;
+		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to);
+	}
+	if (mp->msg_control != USER_ADDR_NULL) {
+		if (mp->msg_controllen < sizeof (struct cmsghdr)) {
 			error = EINVAL;
 			goto bad;
 		}
 		error = sockargs(&control, mp->msg_control,
 		    mp->msg_controllen, MT_CONTROL);
-		if (error)
+		if (error != 0)
 			goto bad;
-#if COMPAT_43_SOCKET
-		if (mp->msg_flags & MSG_COMPAT) {
-			register struct cmsghdr *cm;
-
-			M_PREPEND(control, sizeof(*cm), M_WAIT);
-			if (control == 0) {
-				error = ENOBUFS;
-				goto bad;
-			} else {
-				cm = mtod(control, struct cmsghdr *);
-				cm->cmsg_len = control->m_len;
-				cm->cmsg_level = SOL_SOCKET;
-				cm->cmsg_type = SCM_RIGHTS;
-			}
-		}
-#endif
-	} else {
-		control = 0;
 	}
 
-#if KTRACE    
-	if (KTRPOINT(p, KTR_GENIO)) {
-		ktruio = uio_duplicate(uiop);
-	}
-#endif
+#if CONFIG_MACF_SOCKET_SUBSET
+	/*
+	 * We check the state without holding the socket lock;
+	 * if a race condition occurs, it would simply result
+	 * in an extra call to the MAC check function.
+	 */
+	if (to != NULL &&
+	    !(so->so_state & SS_DEFUNCT) &&
+	    (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
+		goto bad;
+#endif /* MAC_SOCKET_SUBSET */
 
 	len = uio_resid(uiop);
-	if (so == NULL)
-		error = EBADF;
-	else
-		error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
-							     flags);
-	if (error) {
+	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
+		control, flags);
+	if (error != 0) {
 		if (uio_resid(uiop) != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
-                /* Generation of SIGPIPE can be controlled per socket */
+		/* Generation of SIGPIPE can be controlled per socket */
 		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
 			psignal(p, SIGPIPE);
 	}
 	if (error == 0)
 		*retval = (int)(len - uio_resid(uiop));
 bad:
-#if KTRACE
-	if (ktruio != NULL) {
-		if (error == 0) {
-			uio_setresid(ktruio, retval[0]);
-			ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error);
-		}
-		uio_free(ktruio);
-	}
-#endif
-	if (to)
+	if (to != NULL && want_free)
 		FREE(to, M_SONAME);
-	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
 out:
-	file_drop(s);
+	KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0);
+
 	return (error);
 }
 
+/*
+ * Returns:	0			Success
+ *		ENOMEM
+ *	sendit:???			[see sendit definition in this file]
+ *	write:???			[4056224: applicable for pipes]
+ */
+int
+sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
+{
+	__pthread_testcancel(1);
+	return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
+}
 
 int
-sendto(struct proc *p, struct sendto_args *uap, register_t *retval)
+sendto_nocancel(struct proc *p,
+		struct sendto_nocancel_args *uap,
+		int32_t *retval)
 {
 	struct user_msghdr msg;
 	int error;
 	uio_t auio = NULL;
+	struct socket *so;
 
-	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0,0,0,0,0);
+	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
 	AUDIT_ARG(fd, uap->s);
 
 	auio = uio_create(1, 0,
-				  (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
-				  UIO_WRITE);
+	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
+	    UIO_WRITE);
 	if (auio == NULL) {
-		return (ENOMEM);
+		error = ENOMEM;
+		goto done;
 	}
 	uio_addiov(auio, uap->buf, uap->len);
 
@@ -722,347 +1388,613 @@ sendto(struct proc *p, struct sendto_args *uap, register_t *retval)
 	msg.msg_control = 0;
 	msg.msg_flags = 0;
 
-	error = sendit(p, uap->s, &msg, auio, uap->flags, retval);
-	
-	if (auio != NULL) {
+	error = file_socket(uap->s, &so);
+	if (error)
+		goto done;
+
+	if (so == NULL) {
+		error = EBADF;
+	} else {
+		error = sendit(p, so, &msg, auio, uap->flags, retval);
+	}
+
+	file_drop(uap->s);
+done:
+	if (auio != NULL)
 		uio_free(auio);
+
+	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
+
+	return (error);
+}
+
+/*
+ * Returns:	0			Success
+ *		ENOBUFS
+ *	copyin:EFAULT
+ *	sendit:???			[see sendit definition in this file]
+ */
+int
+sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
+{
+	__pthread_testcancel(1);
+	return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap,
+	    retval));
+}
+
+int
+sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap,
+    int32_t *retval)
+{
+	struct user32_msghdr msg32;
+	struct user64_msghdr msg64;
+	struct user_msghdr user_msg;
+	caddr_t msghdrp;
+	int	size_of_msghdr;
+	int error;
+	uio_t auio = NULL;
+	struct user_iovec *iovp;
+	struct socket *so;
+
+	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
+	AUDIT_ARG(fd, uap->s);
+	if (IS_64BIT_PROCESS(p)) {
+		msghdrp = (caddr_t)&msg64;
+		size_of_msghdr = sizeof (msg64);
+	} else {
+		msghdrp = (caddr_t)&msg32;
+		size_of_msghdr = sizeof (msg32);
 	}
-	
-#if HACK_FOR_4056224
-	/* 
-	 * Radar 4056224 
-	 * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
-	 * This will be removed in the release following Tiger
-	 */
-	if (error == ENOTSOCK) {
-		struct fileproc *fp;
-		
-        if (fp_lookup(p, uap->s, &fp, 0) == 0) {
-			(void) fp_drop(p, uap->s, fp,0);
-			
-			if (fp->f_type == DTYPE_PIPE) {
-				struct write_args write_uap;
-				user_ssize_t write_retval;
-				
-				if (p->p_pid > last_pid_4056224) {
-					last_pid_4056224 = p->p_pid;
-
-					printf("%s[%d] uses send/recv on a pipe\n", 
-						p->p_comm, p->p_pid);
-				}
-				
-				bzero(&write_uap, sizeof(struct write_args));
-				write_uap.fd = uap->s;
-				write_uap.cbuf = uap->buf;
-				write_uap.nbyte = uap->len;
-	
-				error = write(p, &write_uap, &write_retval);
-				*retval = (int)write_retval;
-			}
+	error = copyin(uap->msg, msghdrp, size_of_msghdr);
+	if (error) {
+		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
+		return (error);
+	}
+
+	if (IS_64BIT_PROCESS(p)) {
+		user_msg.msg_flags = msg64.msg_flags;
+		user_msg.msg_controllen = msg64.msg_controllen;
+		user_msg.msg_control = msg64.msg_control;
+		user_msg.msg_iovlen = msg64.msg_iovlen;
+		user_msg.msg_iov = msg64.msg_iov;
+		user_msg.msg_namelen = msg64.msg_namelen;
+		user_msg.msg_name = msg64.msg_name;
+	} else {
+		user_msg.msg_flags = msg32.msg_flags;
+		user_msg.msg_controllen = msg32.msg_controllen;
+		user_msg.msg_control = msg32.msg_control;
+		user_msg.msg_iovlen = msg32.msg_iovlen;
+		user_msg.msg_iov = msg32.msg_iov;
+		user_msg.msg_namelen = msg32.msg_namelen;
+		user_msg.msg_name = msg32.msg_name;
+	}
+
+	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
+		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,
+		    0, 0, 0, 0);
+		return (EMSGSIZE);
+	}
+
+	/* allocate a uio large enough to hold the number of iovecs passed */
+	auio = uio_create(user_msg.msg_iovlen, 0,
+	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
+	    UIO_WRITE);
+	if (auio == NULL) {
+		error = ENOBUFS;
+		goto done;
+	}
+
+	if (user_msg.msg_iovlen) {
+		/*
+		 * get location of iovecs within the uio.
+		 * then copyin the iovecs from user space.
+		 */
+		iovp = uio_iovsaddr(auio);
+		if (iovp == NULL) {
+			error = ENOBUFS;
+			goto done;
+		}
+		error = copyin_user_iovec_array(user_msg.msg_iov,
+			IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+			user_msg.msg_iovlen, iovp);
+		if (error)
+			goto done;
+		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
+
+		/* finish setup of uio_t */
+		error = uio_calculateresid(auio);
+		if (error) {
+			goto done;
 		}
+	} else {
+		user_msg.msg_iov = 0;
 	}
-#endif /* HACK_FOR_4056224 */
 
-	KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval,0,0,0);
-	
-	return(error);
+	/* msg_flags is ignored for send */
+	user_msg.msg_flags = 0;
+
+	error = file_socket(uap->s, &so);
+	if (error) {
+		goto done;
+	}
+	if (so == NULL) {
+		error = EBADF;
+	} else {
+		error = sendit(p, so, &user_msg, auio, uap->flags, retval);
+	}
+	file_drop(uap->s);
+done:
+	if (auio != NULL) {
+		uio_free(auio);
+	}
+	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
+
+	return (error);
 }
 
-#if COMPAT_43_SOCKET
 int
-osend(__unused struct proc *p, 
-	  __unused struct osend_args *uap, 
-	  __unused register_t *retval)
+sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval)
 {
-	/* these are no longer supported and in fact 
-	 * there is no way to call it directly.
-	 * LP64todo - remove this once we're sure there are no clients 
+	int error = 0;
+	struct user_msghdr_x *user_msg_x = NULL;
+	struct uio **uiop = NULL;
+	struct socket *so;
+	u_int i;
+	struct sockaddr *to = NULL;
+	user_ssize_t len_before = 0, len_after;
+	int need_drop = 0;
+	size_t size_of_msghdr;
+	void *umsgp = NULL;
+	u_int uiocnt;
+	int has_addr_or_ctl = 0;
+
+	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+	error = file_socket(uap->s, &so);
+	if (error) {
+		goto out;
+	}
+	need_drop = 1;
+	if (so == NULL) {
+		error = EBADF;
+		goto out;
+	}
+
+	/*
+	 * Input parameter range check
+	 */
+	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * Clip to max currently allowed
+	 */
+	if (uap->cnt > somaxsendmsgx)
+		uap->cnt = somaxsendmsgx;
+
+	user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
+			M_TEMP, M_WAITOK | M_ZERO);
+	if (user_msg_x == NULL) {
+		DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
+		error = ENOMEM;
+		goto out;
+	}
+	uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
+		M_TEMP, M_WAITOK | M_ZERO);
+	if (uiop == NULL) {
+		DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
+		error = ENOMEM;
+		goto out;
+	}
+
+	size_of_msghdr = IS_64BIT_PROCESS(p) ?
+		sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
+
+	umsgp = _MALLOC(uap->cnt * size_of_msghdr,
+			M_TEMP, M_WAITOK | M_ZERO);
+	if (umsgp == NULL) {
+		printf("%s _MALLOC() user_msg_x failed\n", __func__);
+		error = ENOMEM;
+		goto out;
+	}
+	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
+	if (error) {
+		DBG_PRINTF("%s copyin() failed\n", __func__);
+		goto out;
+	}
+	error = internalize_user_msghdr_array(umsgp,
+		IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+		UIO_WRITE, uap->cnt, user_msg_x, uiop);
+	if (error) {
+		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
+		goto out;
+	}
+	/*
+	 * Make sure the size of each message iovec and
+	 * the aggregate size of all the iovec is valid
+	 */
+	if (uio_array_is_valid(uiop, uap->cnt) == 0) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Sanity check on passed arguments
 	 */
-	return (ENOTSUP);
+	for (i = 0; i < uap->cnt; i++) {
+		struct user_msghdr_x *mp = user_msg_x + i;
+
+		/*
+		 * No flags on send message
+		 */
+		if (mp->msg_flags != 0) {
+			error = EINVAL;
+			goto out;
+		}
+		/*
+		 * No support for address or ancillary data (yet)
+		 */
+		if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0)
+			has_addr_or_ctl = 1;
+
+		if (mp->msg_control != USER_ADDR_NULL ||
+		    mp->msg_controllen != 0)
+			has_addr_or_ctl = 1;
+
+#if CONFIG_MACF_SOCKET_SUBSET
+		/*
+		 * We check the state without holding the socket lock;
+		 * if a race condition occurs, it would simply result
+		 * in an extra call to the MAC check function.
+		 *
+		 * Note: The following check is never true taken with the
+		 * current limitation that we do not accept to pass an address,
+		 * this is effectively placeholder code. If we add support for
+		 * addresses, we will have to check every address.
+		 */
+		if (to != NULL &&
+		    !(so->so_state & SS_DEFUNCT) &&
+		    (error = mac_socket_check_send(kauth_cred_get(), so, to))
+			!= 0)
+			goto out;
+#endif /* MAC_SOCKET_SUBSET */
+	}
+
+	len_before = uio_array_resid(uiop, uap->cnt);
+
+	/*
+	 * Feed list of packets at once only for connected socket without
+	 * control message
+	 */
+	if (so->so_proto->pr_usrreqs->pru_sosend_list !=
+	    pru_sosend_list_notsupp &&
+	    has_addr_or_ctl == 0 && somaxsendmsgx == 0) {
+		error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop,
+		    uap->cnt, uap->flags);
+	} else {
+		for (i = 0; i < uap->cnt; i++) {
+			struct user_msghdr_x *mp = user_msg_x + i;
+			struct user_msghdr user_msg;
+			uio_t auio = uiop[i];
+			int32_t tmpval;
+
+			user_msg.msg_flags = mp->msg_flags;
+			user_msg.msg_controllen = mp->msg_controllen;
+			user_msg.msg_control = mp->msg_control;
+			user_msg.msg_iovlen = mp->msg_iovlen;
+			user_msg.msg_iov = mp->msg_iov;
+			user_msg.msg_namelen = mp->msg_namelen;
+			user_msg.msg_name = mp->msg_name;
+
+			error = sendit(p, so, &user_msg, auio, uap->flags,
+			    &tmpval);
+			if (error != 0)
+				break;
+		}
+	}
+	len_after = uio_array_resid(uiop, uap->cnt);
+
+	VERIFY(len_after <= len_before);
+
+	if (error != 0) {
+		if (len_after != len_before && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK ||
+		    error == ENOBUFS))
+			error = 0;
+		/* Generation of SIGPIPE can be controlled per socket */
+		if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE))
+			psignal(p, SIGPIPE);
+	}
+	if (error == 0) {
+		uiocnt = externalize_user_msghdr_array(umsgp,
+		    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+		    UIO_WRITE, uap->cnt, user_msg_x, uiop);
+
+		*retval = (int)(uiocnt);
+	}
+out:
+	if (need_drop)
+		file_drop(uap->s);
+	if (umsgp != NULL)
+		_FREE(umsgp, M_TEMP);
+	if (uiop != NULL) {
+		free_uio_array(uiop, uap->cnt);
+		_FREE(uiop, M_TEMP);
+	}
+	if (user_msg_x != NULL)
+		_FREE(user_msg_x, M_TEMP);
+
+	KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
+
+	return (error);
+}
+
+
+static int
+copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen)
+{
+	int error = 0;
+	socklen_t sa_len = 0;
+	ssize_t len;
+
+	len = *namelen;
+	if (len <= 0 || fromsa == 0) {
+		len = 0;
+	} else {
+#ifndef MIN
+#define	MIN(a, b) ((a) > (b) ? (b) : (a))
+#endif
+		sa_len = fromsa->sa_len;
+		len = MIN((unsigned int)len, sa_len);
+		error = copyout(fromsa, name, (unsigned)len);
+		if (error)
+			goto out;
+	}
+	*namelen = sa_len;
+out:
+	return (0);
 }
 
-int
-osendmsg(__unused struct proc *p, 
-	  	 __unused struct osendmsg_args *uap, 
-		 __unused register_t *retval)
-{
-	/* these are no longer supported and in fact 
-	 * there is no way to call it directly.
-	 * LP64todo - remove this once we're sure there are no clients 
-	 */
-	return (ENOTSUP);
-}
-#endif
+static int
+copyout_control(struct proc *p, struct mbuf *m, user_addr_t control,
+    socklen_t *controllen, int *flags)
+{
+	int error = 0;
+	ssize_t len;
+	user_addr_t ctlbuf;
+
+	len = *controllen;
+	*controllen = 0;
+	ctlbuf = control;
+
+	while (m && len > 0) {
+		unsigned int tocopy;
+		struct cmsghdr *cp = mtod(m, struct cmsghdr *);
+		int cp_size = CMSG_ALIGN(cp->cmsg_len);
+		int buflen = m->m_len;
+
+		while (buflen > 0 && len > 0) {
+			/*
+			 * SCM_TIMESTAMP hack because  struct timeval has a
+			 * different size for 32 bits and 64 bits processes
+			 */
+			if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
+				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
+				struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
+				int tmp_space;
+				struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
+
+				tmp_cp->cmsg_level = SOL_SOCKET;
+				tmp_cp->cmsg_type = SCM_TIMESTAMP;
 
+				if (proc_is64bit(p)) {
+					struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
 
-int
-sendmsg(struct proc *p, register struct sendmsg_args *uap, register_t *retval)
-{
-	struct msghdr msg;
-	struct user_msghdr user_msg;
-	caddr_t msghdrp;
-	int	size_of_msghdr;
-	int error;
-	int size_of_iovec;
-	uio_t auio = NULL;
-	struct user_iovec *iovp;
+					tv64->tv_sec = tv->tv_sec;
+					tv64->tv_usec = tv->tv_usec;
 
-	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0,0,0,0,0);
-	AUDIT_ARG(fd, uap->s);
-	if (IS_64BIT_PROCESS(p)) {
-		msghdrp = (caddr_t) &user_msg;
-		size_of_msghdr = sizeof(user_msg);
-		size_of_iovec = sizeof(struct user_iovec);
-	}
-	else {
-		msghdrp = (caddr_t) &msg;
-		size_of_msghdr = sizeof(msg);
-		size_of_iovec = sizeof(struct iovec);
-	}
-	error = copyin(uap->msg, msghdrp, size_of_msghdr);
-	if (error)
-	{
-	    KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
-	    return (error);
-	}
+					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
+					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
+				} else {
+					struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
 
-	/* only need to copy if user process is not 64-bit */
-	if (!IS_64BIT_PROCESS(p)) {
-		user_msg.msg_flags = msg.msg_flags;
-		user_msg.msg_controllen = msg.msg_controllen;
-		user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
-		user_msg.msg_iovlen = msg.msg_iovlen;
-		user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
-		user_msg.msg_namelen = msg.msg_namelen;
-		user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
-	}
+					tv32->tv_sec = tv->tv_sec;
+					tv32->tv_usec = tv->tv_usec;
 
-	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
-		KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
-		return (EMSGSIZE);
-	}
+					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
+					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
+				}
+				if (len >= tmp_space) {
+					tocopy = tmp_space;
+				} else {
+					*flags |= MSG_CTRUNC;
+					tocopy = len;
+				}
+				error = copyout(tmp_buffer, ctlbuf, tocopy);
+				if (error)
+					goto out;
+			} else {
+				if (cp_size > buflen) {
+					panic("cp_size > buflen, something"
+					    "wrong with alignment!");
+				}
+				if (len >= cp_size) {
+					tocopy = cp_size;
+				} else {
+					*flags |= MSG_CTRUNC;
+					tocopy = len;
+				}
+				error = copyout((caddr_t) cp, ctlbuf, tocopy);
+				if (error)
+					goto out;
+			}
 
-	/* allocate a uio large enough to hold the number of iovecs passed */
-	auio = uio_create(user_msg.msg_iovlen, 0,
-				  (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
-				  UIO_WRITE);
-	if (auio == NULL) {
-		error = ENOBUFS;
-		goto done;
-	}
-		
-	if (user_msg.msg_iovlen) {
-		/* get location of iovecs within the uio.  then copyin the iovecs from
-		 * user space.
-		 */
-		iovp = uio_iovsaddr(auio);
-		if (iovp == NULL) {
-			error = ENOBUFS;
-			goto done;
+			ctlbuf += tocopy;
+			len -= tocopy;
+
+			buflen -= cp_size;
+			cp = (struct cmsghdr *)(void *)
+			    ((unsigned char *) cp + cp_size);
+			cp_size = CMSG_ALIGN(cp->cmsg_len);
 		}
-		error = copyin(user_msg.msg_iov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec));
-		if (error)
-			goto done;
-		user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
-	
-		/* finish setup of uio_t */ 
-		uio_calculateresid(auio);
-	}
-	else {
-		user_msg.msg_iov = 0;
-	}
-	
-#if COMPAT_43_SOCKET
-	user_msg.msg_flags = 0;
-#endif
-	error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval);
-done:
-	if (auio != NULL) {
-		uio_free(auio);
-	}
-	KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0);
 
+		m = m->m_next;
+	}
+	*controllen = ctlbuf - control;
+out:
 	return (error);
 }
 
+/*
+ * Returns:	0			Success
+ *		ENOTSOCK
+ *		EINVAL
+ *		EBADF
+ *		EACCES			Mandatory Access Control failure
+ *	copyout:EFAULT
+ *	fp_lookup:EBADF
+ *	<pru_soreceive>:ENOBUFS
+ *	<pru_soreceive>:ENOTCONN
+ *	<pru_soreceive>:EWOULDBLOCK
+ *	<pru_soreceive>:EFAULT
+ *	<pru_soreceive>:EINTR
+ *	<pru_soreceive>:EBADF
+ *	<pru_soreceive>:EINVAL
+ *	<pru_soreceive>:EMSGSIZE
+ *	<pru_soreceive>:???
+ *
+ * Notes:	Additional return values from calls through <pru_soreceive>
+ *		depend on protocols other than TCP or AF_UNIX, which are
+ *		documented above.
+ */
 static int
-recvit(p, s, mp, uiop, namelenp, retval)
-	register struct proc *p;
-	int s;
-	register struct user_msghdr *mp;
-	uio_t uiop;
-	user_addr_t namelenp;
-	register_t *retval;
+recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
+    user_addr_t namelenp, int32_t *retval)
 {
-	int len, error;
-	struct mbuf *m, *control = 0;
-	user_addr_t ctlbuf;
+	ssize_t len;
+	int error;
+	struct mbuf *control = 0;
 	struct socket *so;
 	struct sockaddr *fromsa = 0;
 	struct fileproc *fp;
-#if KTRACE
-	uio_t ktruio = NULL;
-#endif
 
-	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0,0,0,0,0);
+	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0);
 	proc_fdlock(p);
-	if ( (error = fp_lookup(p, s, &fp, 1)) ) {
-	    KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
+	if ((error = fp_lookup(p, s, &fp, 1))) {
+		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
 		proc_fdunlock(p);
-	    return (error);
+		return (error);
 	}
 	if (fp->f_type != DTYPE_SOCKET) {
-		fp_drop(p, s, fp,1);
+		fp_drop(p, s, fp, 1);
 		proc_fdunlock(p);
-		return(ENOTSOCK);
+		return (ENOTSOCK);
 	}
 
-	so = (struct socket *)fp->f_data;	
+	so = (struct socket *)fp->f_data;
+	if (so == NULL) {
+		fp_drop(p, s, fp, 1);
+		proc_fdunlock(p);
+		return (EBADF);
+	}
 
 	proc_fdunlock(p);
+
+#if CONFIG_MACF_SOCKET_SUBSET
+	/*
+	 * We check the state without holding the socket lock;
+	 * if a race condition occurs, it would simply result
+	 * in an extra call to the MAC check function.
+	 */
+	if (!(so->so_state & SS_DEFUNCT) &&
+	    !(so->so_state & SS_ISCONNECTED) &&
+	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
+	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
+		goto out1;
+#endif /* MAC_SOCKET_SUBSET */
 	if (uio_resid(uiop) < 0) {
-		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0);
+		KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0);
 		error = EINVAL;
 		goto out1;
 	}
-#if KTRACE
-	if (KTRPOINT(p, KTR_GENIO)) {
-		ktruio = uio_duplicate(uiop);
-	}
-#endif
 
 	len = uio_resid(uiop);
-	if (so == NULL)
-		error = EBADF;
-	else {
-		error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
-			(struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
-			&mp->msg_flags);
-	}
-	AUDIT_ARG(sockaddr, p, fromsa);
+	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop,
+	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
+	    &mp->msg_flags);
+	if (fromsa)
+		AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
+		    fromsa);
 	if (error) {
 		if (uio_resid(uiop) != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	}
-#if KTRACE
-	if (ktruio != NULL) {
-		if (error == 0) {
-			uio_setresid(ktruio, len - uio_resid(uiop));
-			ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error);
-		}
-		uio_free(ktruio);
-	}
-#endif
 	if (error)
 		goto out;
+
 	*retval = len - uio_resid(uiop);
+
 	if (mp->msg_name) {
-		len = mp->msg_namelen;
-		if (len <= 0 || fromsa == 0)
-			len = 0;
-		else {
-#ifndef MIN
-#define MIN(a,b) ((a)>(b)?(b):(a))
-#endif
-			/* save sa_len before it is destroyed by MSG_COMPAT */
-			len = MIN(len, fromsa->sa_len);
-#if COMPAT_43_SOCKET
-			if (mp->msg_flags & MSG_COMPAT)
-				((struct osockaddr *)fromsa)->sa_family =
-				    fromsa->sa_family;
-#endif
-			error = copyout(fromsa, mp->msg_name, (unsigned)len);
-			if (error)
-				goto out;
-		}
-		mp->msg_namelen = len;
+		error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen);
+		if (error)
+			goto out;
+		/* return the actual, untruncated address length */
 		if (namelenp &&
-		    (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
-#if COMPAT_43_SOCKET
-			if (mp->msg_flags & MSG_COMPAT)
-				error = 0;	/* old recvfrom didn't check */
-			else
-#endif
+		    (error = copyout((caddr_t)&mp->msg_namelen, namelenp,
+		    sizeof (int)))) {
 			goto out;
 		}
 	}
-	if (mp->msg_control) {
-#if COMPAT_43_SOCKET
-		/*
-		 * We assume that old recvmsg calls won't receive access
-		 * rights and other control info, esp. as control info
-		 * is always optional and those options didn't exist in 4.3.
-		 * If we receive rights, trim the cmsghdr; anything else
-		 * is tossed.
-		 */
-		if (control && mp->msg_flags & MSG_COMPAT) {
-			if (mtod(control, struct cmsghdr *)->cmsg_level !=
-			    SOL_SOCKET ||
-			    mtod(control, struct cmsghdr *)->cmsg_type !=
-			    SCM_RIGHTS) {
-				mp->msg_controllen = 0;
-				goto out;
-			}
-			control->m_len -= sizeof (struct cmsghdr);
-			control->m_data += sizeof (struct cmsghdr);
-		}
-#endif
-		len = mp->msg_controllen;
-		m = control;
-		mp->msg_controllen = 0;
-		ctlbuf = mp->msg_control;
-
-		while (m && len > 0) {
-			unsigned int tocopy;
-
-			if (len >= m->m_len) 
-				tocopy = m->m_len;
-			else {
-				mp->msg_flags |= MSG_CTRUNC;
-				tocopy = len;
-			}
-		
-			error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf, tocopy);
-			if (error)
-				goto out;
 
-			ctlbuf += tocopy;
-			len -= tocopy;
-			m = m->m_next;
-		}
-		mp->msg_controllen = ctlbuf - mp->msg_control;
+	if (mp->msg_control) {
+		error = copyout_control(p, control, mp->msg_control,
+		    &mp->msg_controllen, &mp->msg_flags);
 	}
 out:
 	if (fromsa)
 		FREE(fromsa, M_SONAME);
 	if (control)
 		m_freem(control);
-	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0);
+	KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0);
 out1:
 	fp_drop(p, s, fp, 0);
 	return (error);
 }
 
+/*
+ * Returns:	0			Success
+ *		ENOMEM
+ *	copyin:EFAULT
+ *	recvit:???
+ *	read:???			[4056224: applicable for pipes]
+ *
+ * Notes:	The read entry point is only called as part of support for
+ *		binary backward compatability; new code should use read
+ *		instead of recv or recvfrom when attempting to read data
+ *		from pipes.
+ *
+ *		For full documentation of the return codes from recvit, see
+ *		the block header for the recvit function.
+ */
+int
+recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval)
+{
+	__pthread_testcancel(1);
+	return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap,
+	    retval));
+}
 
 int
-recvfrom(p, uap, retval)
-	struct proc *p;
-	register struct recvfrom_args /* {
-		int	s;
-		caddr_t	buf;
-		size_t	len;
-		int	flags;
-		caddr_t	from;
-		int	*fromlenaddr;
-	} */ *uap;
-	register_t *retval;
+recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap,
+    int32_t *retval)
 {
 	struct user_msghdr msg;
 	int error;
 	uio_t auio = NULL;
 
-	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0,0,0,0,0);
+	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0);
 	AUDIT_ARG(fd, uap->s);
 
 	if (uap->fromlenaddr) {
@@ -1070,16 +2002,17 @@ recvfrom(p, uap, retval)
 		    (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
 		if (error)
 			return (error);
-	} else
+	} else {
 		msg.msg_namelen = 0;
+	}
 	msg.msg_name = uap->from;
 	auio = uio_create(1, 0,
-				  (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
-				  UIO_READ);
+	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
+	    UIO_READ);
 	if (auio == NULL) {
 		return (ENOMEM);
 	}
-	
+
 	uio_addiov(auio, uap->buf, uap->len);
 	/* no need to set up msg_iov.  recvit uses uio_t we send it */
 	msg.msg_iov = 0;
@@ -1091,157 +2024,98 @@ recvfrom(p, uap, retval)
 	if (auio != NULL) {
 		uio_free(auio);
 	}
-	
-#if HACK_FOR_4056224
-	/* 
-	 * Radar 4056224 
-	 * Temporary workaround to let send() and recv() work over a pipe for binary compatibility
-	 * This will be removed in the release following Tiger
-	 */
-	if (error == ENOTSOCK && proc_is64bit(p) == 0) {
-		struct fileproc *fp;
-		
-        if (fp_lookup(p, uap->s, &fp, 0) == 0) {
-			(void) fp_drop(p, uap->s, fp,0);
-			
-			if (fp->f_type == DTYPE_PIPE) {
-				struct read_args read_uap;
-				user_ssize_t read_retval;
-				
-				if (p->p_pid > last_pid_4056224) {
-					last_pid_4056224 = p->p_pid;
-
-					printf("%s[%d] uses send/recv on a pipe\n", 
-						p->p_comm, p->p_pid);
-				}
-				
-				bzero(&read_uap, sizeof(struct read_args));
-				read_uap.fd = uap->s;
-				read_uap.cbuf = uap->buf;
-				read_uap.nbyte = uap->len;
-	
-				error = read(p, &read_uap, &read_retval);
-				*retval = (int)read_retval;
-			}
-		}
-	}
-#endif /* HACK_FOR_4056224 */
-
-	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error,0,0,0,0);
-	
-	return (error);
-}
-
-#if COMPAT_43_SOCKET
-int
-orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval)
-{
-
-	uap->flags |= MSG_COMPAT;
-	return (recvfrom(p, uap, retval));
-}
-#endif
-
 
-#if COMPAT_43_SOCKET
-int
-orecv(__unused struct proc *p, __unused struct orecv_args	*uap, 
-		__unused register_t *retval)
-{
-	/* these are no longer supported and in fact 
-	 * there is no way to call it directly.
-	 * LP64todo - remove this once we're sure there are no clients 
-	 */
+	KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
 
-	return (ENOTSUP);
+	return (error);
 }
 
 /*
- * Old recvmsg.  This code takes advantage of the fact that the old msghdr
- * overlays the new one, missing only the flags, and with the (old) access
- * rights where the control fields are now.
+ * Returns:	0			Success
+ *		EMSGSIZE
+ *		ENOMEM
+ *	copyin:EFAULT
+ *	copyout:EFAULT
+ *	recvit:???
+ *
+ * Notes:	For full documentation of the return codes from recvit, see
+ *		the block header for the recvit function.
  */
 int
-orecvmsg(__unused struct proc *p, __unused struct orecvmsg_args *uap, 
-		__unused register_t *retval)
+recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval)
 {
-	/* these are no longer supported and in fact 
-	 * there is no way to call it directly.
-	 * LP64todo - remove this once we're sure there are no clients 
-	 */
-
-	return (ENOTSUP);
-
+	__pthread_testcancel(1);
+	return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap,
+	    retval));
 }
-#endif
 
 int
-recvmsg(p, uap, retval)
-	struct proc *p;
-	struct recvmsg_args *uap;
-	register_t *retval;
+recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap,
+    int32_t *retval)
 {
-	struct msghdr msg;
+	struct user32_msghdr msg32;
+	struct user64_msghdr msg64;
 	struct user_msghdr user_msg;
 	caddr_t msghdrp;
 	int	size_of_msghdr;
 	user_addr_t uiov;
-	register int error;
-	int size_of_iovec;
+	int error;
 	uio_t auio = NULL;
 	struct user_iovec *iovp;
 
-	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0,0,0,0,0);
+	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
 	AUDIT_ARG(fd, uap->s);
 	if (IS_64BIT_PROCESS(p)) {
-		msghdrp = (caddr_t) &user_msg;
-		size_of_msghdr = sizeof(user_msg);
-		size_of_iovec = sizeof(struct user_iovec);
-	}
-	else {
-		msghdrp = (caddr_t) &msg;
-		size_of_msghdr = sizeof(msg);
-		size_of_iovec = sizeof(struct iovec);
+		msghdrp = (caddr_t)&msg64;
+		size_of_msghdr = sizeof (msg64);
+	} else {
+		msghdrp = (caddr_t)&msg32;
+		size_of_msghdr = sizeof (msg32);
 	}
 	error = copyin(uap->msg, msghdrp, size_of_msghdr);
-	if (error)
-	{
-	    	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
+	if (error) {
+		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
 		return (error);
 	}
 
 	/* only need to copy if user process is not 64-bit */
-	if (!IS_64BIT_PROCESS(p)) {
-		user_msg.msg_flags = msg.msg_flags;
-		user_msg.msg_controllen = msg.msg_controllen;
-		user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control);
-		user_msg.msg_iovlen = msg.msg_iovlen;
-		user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov);
-		user_msg.msg_namelen = msg.msg_namelen;
-		user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name);
+	if (IS_64BIT_PROCESS(p)) {
+		user_msg.msg_flags = msg64.msg_flags;
+		user_msg.msg_controllen = msg64.msg_controllen;
+		user_msg.msg_control = msg64.msg_control;
+		user_msg.msg_iovlen = msg64.msg_iovlen;
+		user_msg.msg_iov = msg64.msg_iov;
+		user_msg.msg_namelen = msg64.msg_namelen;
+		user_msg.msg_name = msg64.msg_name;
+	} else {
+		user_msg.msg_flags = msg32.msg_flags;
+		user_msg.msg_controllen = msg32.msg_controllen;
+		user_msg.msg_control = msg32.msg_control;
+		user_msg.msg_iovlen = msg32.msg_iovlen;
+		user_msg.msg_iov = msg32.msg_iov;
+		user_msg.msg_namelen = msg32.msg_namelen;
+		user_msg.msg_name = msg32.msg_name;
 	}
 
 	if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) {
-		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0);
+		KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,
+		    0, 0, 0, 0);
 		return (EMSGSIZE);
 	}
 
-#if COMPAT_43_SOCKET
-	user_msg.msg_flags = uap->flags &~ MSG_COMPAT;
-#else
 	user_msg.msg_flags = uap->flags;
-#endif
 
 	/* allocate a uio large enough to hold the number of iovecs passed */
 	auio = uio_create(user_msg.msg_iovlen, 0,
-				  (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
-				  UIO_READ);
+	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
+	    UIO_READ);
 	if (auio == NULL) {
 		error = ENOMEM;
 		goto done;
 	}
 
-	/* get location of iovecs within the uio.  then copyin the iovecs from
+	/*
+	 * get location of iovecs within the uio.  then copyin the iovecs from
 	 * user space.
 	 */
 	iovp = uio_iovsaddr(auio);
@@ -1251,42 +2125,279 @@ recvmsg(p, uap, retval)
 	}
 	uiov = user_msg.msg_iov;
 	user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
-	error = copyin(uiov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec));
+	error = copyin_user_iovec_array(uiov,
+		IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+		user_msg.msg_iovlen, iovp);
 	if (error)
 		goto done;
 
-	/* finish setup of uio_t */ 
-	uio_calculateresid(auio);
-		
-	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
-	if (!error) {
-		user_msg.msg_iov = uiov;
-		/* only need to copy if user process is not 64-bit */
-		if (!IS_64BIT_PROCESS(p)) {
-			// LP64todo - do all these change?  if not, then no need to copy all of them!
-			msg.msg_flags = user_msg.msg_flags;
-			msg.msg_controllen = user_msg.msg_controllen;
-			msg.msg_control = CAST_DOWN(caddr_t, user_msg.msg_control);
-			msg.msg_iovlen = user_msg.msg_iovlen;
-			msg.msg_iov = (struct iovec *) CAST_DOWN(caddr_t, user_msg.msg_iov);
-			msg.msg_namelen = user_msg.msg_namelen;
-			msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name);
+	/* finish setup of uio_t */
+	error = uio_calculateresid(auio);
+	if (error) {
+		goto done;
+	}
+
+	error = recvit(p, uap->s, &user_msg, auio, 0, retval);
+	if (!error) {
+		user_msg.msg_iov = uiov;
+		if (IS_64BIT_PROCESS(p)) {
+			msg64.msg_flags = user_msg.msg_flags;
+			msg64.msg_controllen = user_msg.msg_controllen;
+			msg64.msg_control = user_msg.msg_control;
+			msg64.msg_iovlen = user_msg.msg_iovlen;
+			msg64.msg_iov = user_msg.msg_iov;
+			msg64.msg_namelen = user_msg.msg_namelen;
+			msg64.msg_name = user_msg.msg_name;
+		} else {
+			msg32.msg_flags = user_msg.msg_flags;
+			msg32.msg_controllen = user_msg.msg_controllen;
+			msg32.msg_control = user_msg.msg_control;
+			msg32.msg_iovlen = user_msg.msg_iovlen;
+			msg32.msg_iov = user_msg.msg_iov;
+			msg32.msg_namelen = user_msg.msg_namelen;
+			msg32.msg_name = user_msg.msg_name;
+		}
+		error = copyout(msghdrp, uap->msg, size_of_msghdr);
+	}
+done:
+	if (auio != NULL) {
+		uio_free(auio);
+	}
+	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0);
+	return (error);
+}
+
+int
+recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval)
+{
+	int error = EOPNOTSUPP;
+	struct user_msghdr_x *user_msg_x = NULL;
+	struct recv_msg_elem *recv_msg_array = NULL;
+	struct socket *so;
+	user_ssize_t len_before = 0, len_after;
+	int need_drop = 0;
+	size_t size_of_msghdr;
+	void *umsgp = NULL;
+	u_int i;
+	u_int uiocnt;
+
+	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+	error = file_socket(uap->s, &so);
+	if (error) {
+		goto out;
+	}
+	need_drop = 1;
+	if (so == NULL) {
+		error = EBADF;
+		goto out;
+	}
+	/*
+	 * Input parameter range check
+	 */
+	if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) {
+		error = EINVAL;
+		goto out;
+	}
+	if (uap->cnt > somaxrecvmsgx)
+		uap->cnt = somaxrecvmsgx;
+
+	user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
+	    M_TEMP, M_WAITOK | M_ZERO);
+	if (user_msg_x == NULL) {
+		DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
+		error = ENOMEM;
+		goto out;
+	}
+	recv_msg_array = alloc_recv_msg_array(uap->cnt);
+	if (recv_msg_array == NULL) {
+		DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__);
+		error = ENOMEM;
+		goto out;
+	}
+	size_of_msghdr = IS_64BIT_PROCESS(p) ?
+	    sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
+
+	umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
+	if (umsgp == NULL) {
+		DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
+		error = ENOMEM;
+		goto out;
+	}
+	error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr);
+	if (error) {
+		DBG_PRINTF("%s copyin() failed\n", __func__);
+		goto out;
+	}
+	error = internalize_recv_msghdr_array(umsgp,
+	    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
+	if (error) {
+		DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__);
+		goto out;
+	}
+	/*
+	 * Make sure the size of each message iovec and
+	 * the aggregate size of all the iovec is valid
+	 */
+	if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) {
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * Sanity check on passed arguments
+	 */
+	for (i = 0; i < uap->cnt; i++) {
+		struct user_msghdr_x *mp = user_msg_x + i;
+
+		if (mp->msg_flags != 0) {
+			error = EINVAL;
+			goto out;
+		}
+	}
+#if CONFIG_MACF_SOCKET_SUBSET
+	/*
+	 * We check the state without holding the socket lock;
+	 * if a race condition occurs, it would simply result
+	 * in an extra call to the MAC check function.
+	 */
+	if (!(so->so_state & SS_DEFUNCT) &&
+	    !(so->so_state & SS_ISCONNECTED) &&
+	    !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
+	    (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
+		goto out;
+#endif /* MAC_SOCKET_SUBSET */
+
+	len_before = recv_msg_array_resid(recv_msg_array, uap->cnt);
+
+	if (so->so_proto->pr_usrreqs->pru_soreceive_list !=
+	    pru_soreceive_list_notsupp &&
+	    somaxrecvmsgx == 0) {
+		error = so->so_proto->pr_usrreqs->pru_soreceive_list(so,
+		    recv_msg_array, uap->cnt, &uap->flags);
+	} else {
+		int flags = uap->flags;
+
+		for (i = 0; i < uap->cnt; i++) {
+			struct recv_msg_elem *recv_msg_elem;
+			uio_t auio;
+			struct sockaddr **psa;
+			struct mbuf **controlp;
+
+			recv_msg_elem = recv_msg_array + i;
+			auio = recv_msg_elem->uio;
+
+			/*
+			 * Do not block if we got at least one packet
+			 */
+			if (i > 0)
+				flags |= MSG_DONTWAIT;
+
+			psa = (recv_msg_elem->which & SOCK_MSG_SA) ?
+			    &recv_msg_elem->psa : NULL;
+			controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ?
+			    &recv_msg_elem->controlp : NULL;
+
+			error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
+			    auio, (struct mbuf **)0, controlp, &flags);
+			if (error)
+				break;
+			/*
+			 * We have some data
+			 */
+			recv_msg_elem->which |= SOCK_MSG_DATA;
+			/*
+			 * Stop on partial copy
+			 */
+			if (flags & (MSG_RCVMORE | MSG_TRUNC))
+				break;
+		}
+		if ((uap->flags & MSG_DONTWAIT) == 0)
+			flags &= ~MSG_DONTWAIT;
+		uap->flags = flags;
+	}
+
+	len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
+
+	if (error) {
+		if (len_after != len_before && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		else
+			goto out;
+	}
+
+	uiocnt = externalize_recv_msghdr_array(umsgp,
+	    IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
+	    UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
+
+	error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
+	if (error) {
+		DBG_PRINTF("%s copyout() failed\n", __func__);
+		goto out;
+	}
+	*retval = (int)(uiocnt);
+
+	for (i = 0; i < uap->cnt; i++) {
+		struct user_msghdr_x *mp = user_msg_x + i;
+		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+		struct sockaddr *fromsa = recv_msg_elem->psa;
+
+		if (mp->msg_name) {
+			error = copyout_sa(fromsa, mp->msg_name,
+			    &mp->msg_namelen);
+			if (error)
+				goto out;
+		}
+		if (mp->msg_control) {
+			error = copyout_control(p, recv_msg_elem->controlp,
+			    mp->msg_control, &mp->msg_controllen,
+			    &mp->msg_flags);
+			if (error)
+				goto out;
 		}
-		error = copyout(msghdrp, uap->msg, size_of_msghdr);
-	}
-done:
-	if (auio != NULL) {
-		uio_free(auio);
 	}
-	KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0);
+out:
+	if (need_drop)
+		file_drop(uap->s);
+	if (umsgp != NULL)
+		_FREE(umsgp, M_TEMP);
+	if (recv_msg_array != NULL)
+		free_recv_msg_array(recv_msg_array, uap->cnt);
+	if (user_msg_x != NULL)
+		_FREE(user_msg_x, M_TEMP);
+
+	KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
+
 	return (error);
 }
 
+/*
+ * Returns:	0			Success
+ *		EBADF
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	soshutdown:EINVAL
+ *	soshutdown:ENOTCONN
+ *	soshutdown:EADDRNOTAVAIL[TCP]
+ *	soshutdown:ENOBUFS[TCP]
+ *	soshutdown:EMSGSIZE[TCP]
+ *	soshutdown:EHOSTUNREACH[TCP]
+ *	soshutdown:ENETUNREACH[TCP]
+ *	soshutdown:ENETDOWN[TCP]
+ *	soshutdown:ENOMEM[TCP]
+ *	soshutdown:EACCES[TCP]
+ *	soshutdown:EMSGSIZE[TCP]
+ *	soshutdown:ENOBUFS[TCP]
+ *	soshutdown:???[TCP]		[ignorable: mostly IPSEC/firewall/DLIL]
+ *	soshutdown:???			[other protocol families]
+ */
 /* ARGSUSED */
 int
-shutdown(__unused struct proc *p, struct shutdown_args *uap, __unused register_t *retval)
+shutdown(__unused struct proc *p, struct shutdown_args *uap,
+    __unused int32_t *retval)
 {
-	struct socket * so;
+	struct socket *so;
 	int error;
 
 	AUDIT_ARG(fd, uap->s);
@@ -1300,26 +2411,37 @@ shutdown(__unused struct proc *p, struct shutdown_args *uap, __unused register_t
 	error =  soshutdown((struct socket *)so, uap->how);
 out:
 	file_drop(uap->s);
-	return(error);
+	return (error);
 }
 
-
-
-
-
+/*
+ * Returns:	0			Success
+ *		EFAULT
+ *		EINVAL
+ *		EACCES			Mandatory Access Control failure
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	sosetopt:EINVAL
+ *	sosetopt:ENOPROTOOPT
+ *	sosetopt:ENOBUFS
+ *	sosetopt:EDOM
+ *	sosetopt:EFAULT
+ *	sosetopt:EOPNOTSUPP[AF_UNIX]
+ *	sosetopt:???
+ */
 /* ARGSUSED */
 int
-setsockopt(struct proc *p, struct setsockopt_args *uap, __unused register_t *retval)
+setsockopt(struct proc *p, struct setsockopt_args *uap,
+    __unused int32_t *retval)
 {
-	struct socket * so;
+	struct socket *so;
 	struct sockopt sopt;
 	int error;
 
 	AUDIT_ARG(fd, uap->s);
 	if (uap->val == 0 && uap->valsize != 0)
 		return (EFAULT);
-	if (uap->valsize < 0)
-		return (EINVAL);
+	/* No bounds checking on size (it's unsigned) */
 
 	error = file_socket(uap->s, &so);
 	if (error)
@@ -1336,36 +2458,51 @@ setsockopt(struct proc *p, struct setsockopt_args *uap, __unused register_t *ret
 		error = EINVAL;
 		goto out;
 	}
-	error = sosetopt(so, &sopt);
+#if CONFIG_MACF_SOCKET_SUBSET
+	if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so,
+	    &sopt)) != 0)
+		goto out;
+#endif /* MAC_SOCKET_SUBSET */
+	error = sosetoptlock(so, &sopt, 1);	/* will lock socket */
 out:
 	file_drop(uap->s);
-	return(error);
+	return (error);
 }
 
 
 
+/*
+ * Returns:	0			Success
+ *		EINVAL
+ *		EBADF
+ *		EACCES			Mandatory Access Control failure
+ *	copyin:EFAULT
+ *	copyout:EFAULT
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	sogetopt:???
+ */
 int
-getsockopt(struct proc *p, struct getsockopt_args  *uap, __unused register_t *retval)
+getsockopt(struct proc *p, struct getsockopt_args  *uap,
+    __unused int32_t *retval)
 {
 	int		error;
 	socklen_t	valsize;
 	struct sockopt	sopt;
-	struct socket *	so;
+	struct socket *so;
 
 	error = file_socket(uap->s, &so);
 	if (error)
 		return (error);
 	if (uap->val) {
-		error = copyin(uap->avalsize, (caddr_t)&valsize, sizeof (valsize));
+		error = copyin(uap->avalsize, (caddr_t)&valsize,
+		    sizeof (valsize));
 		if (error)
 			goto out;
-		if (valsize < 0) {
-			error = EINVAL;
-			goto out;
-		}
-	} else
+		/* No bounds checking on size (it's unsigned) */
+	} else {
 		valsize = 0;
-
+	}
 	sopt.sopt_dir = SOPT_GET;
 	sopt.sopt_level = uap->level;
 	sopt.sopt_name = uap->name;
@@ -1377,10 +2514,16 @@ getsockopt(struct proc *p, struct getsockopt_args  *uap, __unused register_t *re
 		error = EBADF;
 		goto out;
 	}
-	error = sogetopt((struct socket *)so, &sopt);
+#if CONFIG_MACF_SOCKET_SUBSET
+	if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so,
+	    &sopt)) != 0)
+		goto out;
+#endif /* MAC_SOCKET_SUBSET */
+	error = sogetoptlock((struct socket *)so, &sopt, 1);	/* will lock */
 	if (error == 0) {
 		valsize = sopt.sopt_valsize;
-		error = copyout((caddr_t)&valsize, uap->avalsize, sizeof (valsize));
+		error = copyout((caddr_t)&valsize, uap->avalsize,
+		    sizeof (valsize));
 	}
 out:
 	file_drop(uap->s);
@@ -1390,21 +2533,33 @@ out:
 
 /*
  * Get socket name.
+ *
+ * Returns:	0			Success
+ *		EBADF
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	copyin:EFAULT
+ *	copyout:EFAULT
+ *	<pru_sockaddr>:ENOBUFS[TCP]
+ *	<pru_sockaddr>:ECONNRESET[TCP]
+ *	<pru_sockaddr>:EINVAL[AF_UNIX]
+ *	<sf_getsockname>:???
  */
 /* ARGSUSED */
-static int
-getsockname1(__unused struct proc *p, struct getsockname_args *uap, __unused register_t *retval,
-	int compat)
+int
+getsockname(__unused struct proc *p, struct getsockname_args *uap,
+    __unused int32_t *retval)
 {
 	struct socket *so;
 	struct sockaddr *sa;
 	socklen_t len;
+	socklen_t sa_len;
 	int error;
 
 	error = file_socket(uap->fdes, &so);
 	if (error)
 		return (error);
-	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
+	error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
 	if (error)
 		goto out;
 	if (so == NULL) {
@@ -1414,30 +2569,10 @@ getsockname1(__unused struct proc *p, struct getsockname_args *uap, __unused reg
 	sa = 0;
 	socket_lock(so, 1);
 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
-	if (error == 0)
-	{
-		struct socket_filter_entry *filter;
-		int	filtered = 0;
-		for (filter = so->so_filt; filter && error == 0;
-			 filter = filter->sfe_next_onsocket) {
-			if (filter->sfe_filter->sf_filter.sf_getsockname) {
-				if (!filtered) {
-					filtered = 1;
-					sflt_use(so);
-					socket_unlock(so, 0);
-				}
-				error = filter->sfe_filter->sf_filter.sf_getsockname(filter->sfe_cookie,
-							so, &sa);
-			}
-		}
-		
+	if (error == 0) {
+		error = sflt_getsockname(so, &sa);
 		if (error == EJUSTRETURN)
 			error = 0;
-		
-		if (filtered) {
-			socket_lock(so, 0);
-			sflt_unuse(so);
-		}
 	}
 	socket_unlock(so, 1);
 	if (error)
@@ -1447,15 +2582,15 @@ getsockname1(__unused struct proc *p, struct getsockname_args *uap, __unused reg
 		goto gotnothing;
 	}
 
-	len = MIN(len, sa->sa_len);
-#if COMPAT_43_SOCKET
-	if (compat)
-		((struct osockaddr *)sa)->sa_family = sa->sa_family;
-#endif
+	sa_len = sa->sa_len;
+	len = MIN(len, sa_len);
 	error = copyout((caddr_t)sa, uap->asa, len);
-	if (error == 0)
+	if (error)
+		goto bad;
+	/* return the actual, untruncated address length */
+	len = sa_len;
 gotnothing:
-		error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
+		error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
 bad:
 	if (sa)
 		FREE(sa, M_SONAME);
@@ -1464,31 +2599,29 @@ out:
 	return (error);
 }
 
-int
-getsockname(struct proc *p, struct getsockname_args *uap, register_t *retval)
-{
-	return (getsockname1(p, uap, retval, 0));
-}
-
-#if COMPAT_43_SOCKET
-int
-ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval)
-{
-	return (getsockname1(p, uap, retval, 1));
-}
-#endif /* COMPAT_43_SOCKET */
-
 /*
  * Get name of peer for connected socket.
+ *
+ * Returns:	0			Success
+ *		EBADF
+ *		EINVAL
+ *		ENOTCONN
+ *	file_socket:ENOTSOCK
+ *	file_socket:EBADF
+ *	copyin:EFAULT
+ *	copyout:EFAULT
+ *	<pru_peeraddr>:???
+ *	<sf_getpeername>:???
  */
 /* ARGSUSED */
 int
-getpeername1(__unused struct proc *p, struct getpeername_args *uap, __unused register_t *retval,
-	int compat)
+getpeername(__unused struct proc *p, struct getpeername_args *uap,
+    __unused int32_t *retval)
 {
 	struct socket *so;
 	struct sockaddr *sa;
 	socklen_t len;
+	socklen_t sa_len;
 	int error;
 
 	error = file_socket(uap->fdes, &so);
@@ -1501,42 +2634,30 @@ getpeername1(__unused struct proc *p, struct getpeername_args *uap, __unused reg
 
 	socket_lock(so, 1);
 
+	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
+	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
+		/* the socket has been shutdown, no more getpeername's */
+		socket_unlock(so, 1);
+		error = EINVAL;
+		goto out;
+	}
+
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 		socket_unlock(so, 1);
 		error = ENOTCONN;
 		goto out;
 	}
-	error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t));
+	error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t));
 	if (error) {
 		socket_unlock(so, 1);
 		goto out;
 	}
 	sa = 0;
 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
-	if (error == 0)
-	{
-		struct socket_filter_entry *filter;
-		int	filtered = 0;
-		for (filter = so->so_filt; filter && error == 0;
-			 filter = filter->sfe_next_onsocket) {
-			if (filter->sfe_filter->sf_filter.sf_getpeername) {
-				if (!filtered) {
-					filtered = 1;
-					sflt_use(so);
-					socket_unlock(so, 0);
-				}
-				error = filter->sfe_filter->sf_filter.sf_getpeername(filter->sfe_cookie,
-							so, &sa);
-			}
-		}
-		
+	if (error == 0) {
+		error = sflt_getpeername(so, &sa);
 		if (error == EJUSTRETURN)
 			error = 0;
-		
-		if (filtered) {
-			socket_lock(so, 0);
-			sflt_unuse(so);
-		}
 	}
 	socket_unlock(so, 1);
 	if (error)
@@ -1545,453 +2666,958 @@ getpeername1(__unused struct proc *p, struct getpeername_args *uap, __unused reg
 		len = 0;
 		goto gotnothing;
 	}
-	len = MIN(len, sa->sa_len);
-#if COMPAT_43_SOCKET
-	if (compat)
-		((struct osockaddr *)sa)->sa_family =
-		    sa->sa_family;
-#endif
-	error = copyout(sa, uap->asa, len);
-	if (error)
-		goto bad;
-gotnothing:
-	error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
-bad:
-	if (sa) FREE(sa, M_SONAME);
-out:
-	file_drop(uap->fdes);
+	sa_len = sa->sa_len;
+	len = MIN(len, sa_len);
+	error = copyout(sa, uap->asa, len);
+	if (error)
+		goto bad;
+	/* return the actual, untruncated address length */
+	len = sa_len;
+gotnothing:
+	error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t));
+bad:
+	if (sa) FREE(sa, M_SONAME);
+out:
+	file_drop(uap->fdes);
+	return (error);
+}
+
+int
+sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type)
+{
+	struct sockaddr *sa;
+	struct mbuf *m;
+	int error;
+
+	size_t alloc_buflen = (size_t)buflen;
+
+	if (alloc_buflen > INT_MAX/2)
+		return (EINVAL);
+#ifdef __LP64__
+	/*
+	 * The fd's in the buffer must expand to be pointers, thus we need twice
+	 * as much space
+	 */
+	if (type == MT_CONTROL)
+		alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) +
+		    sizeof(struct cmsghdr);
+#endif
+	if (alloc_buflen > MLEN) {
+		if (type == MT_SONAME && alloc_buflen <= 112)
+			alloc_buflen = MLEN;	/* unix domain compat. hack */
+		else if (alloc_buflen > MCLBYTES)
+			return (EINVAL);
+	}
+	m = m_get(M_WAIT, type);
+	if (m == NULL)
+		return (ENOBUFS);
+	if (alloc_buflen > MLEN) {
+		MCLGET(m, M_WAIT);
+		if ((m->m_flags & M_EXT) == 0) {
+			m_free(m);
+			return (ENOBUFS);
+		}
+	}
+	/*
+	 * K64: We still copyin the original buflen because it gets expanded
+	 * later and we lie about the size of the mbuf because it only affects
+	 * unp_* functions
+	 */
+	m->m_len = buflen;
+	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
+	if (error) {
+		(void) m_free(m);
+	} else {
+		*mp = m;
+		if (type == MT_SONAME) {
+			sa = mtod(m, struct sockaddr *);
+			sa->sa_len = buflen;
+		}
+	}
+	return (error);
+}
+
+/*
+ * Given a user_addr_t of length len, allocate and fill out a *sa.
+ *
+ * Returns:	0			Success
+ *		ENAMETOOLONG		Filename too long
+ *		EINVAL			Invalid argument
+ *		ENOMEM			Not enough space
+ *		copyin:EFAULT		Bad address
+ */
+static int
+getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr,
+    size_t len, boolean_t translate_unspec)
+{
+	struct sockaddr *sa;
+	int error;
+
+	if (len > SOCK_MAXADDRLEN)
+		return (ENAMETOOLONG);
+
+	if (len < offsetof(struct sockaddr, sa_data[0]))
+		return (EINVAL);
+
+	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO);
+	if (sa == NULL) {
+		return (ENOMEM);
+	}
+	error = copyin(uaddr, (caddr_t)sa, len);
+	if (error) {
+		FREE(sa, M_SONAME);
+	} else {
+		/*
+		 * Force sa_family to AF_INET on AF_INET sockets to handle
+		 * legacy applications that use AF_UNSPEC (0).  On all other
+		 * sockets we leave it unchanged and let the lower layer
+		 * handle it.
+		 */
+		if (translate_unspec && sa->sa_family == AF_UNSPEC &&
+		    SOCK_CHECK_DOM(so, PF_INET) &&
+		    len == sizeof (struct sockaddr_in))
+			sa->sa_family = AF_INET;
+
+		sa->sa_len = len;
+		*namp = sa;
+	}
+	return (error);
+}
+
+static int
+getsockaddr_s(struct socket *so, struct sockaddr_storage *ss,
+    user_addr_t uaddr, size_t len, boolean_t translate_unspec)
+{
+	int error;
+
+	if (ss == NULL || uaddr == USER_ADDR_NULL ||
+	    len < offsetof(struct sockaddr, sa_data[0]))
+		return (EINVAL);
+
+	/*
+	 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
+	 * so the check here is inclusive.
+	 */
+	if (len > sizeof (*ss))
+		return (ENAMETOOLONG);
+
+	bzero(ss, sizeof (*ss));
+	error = copyin(uaddr, (caddr_t)ss, len);
+	if (error == 0) {
+		/*
+		 * Force sa_family to AF_INET on AF_INET sockets to handle
+		 * legacy applications that use AF_UNSPEC (0).  On all other
+		 * sockets we leave it unchanged and let the lower layer
+		 * handle it.
+		 */
+		if (translate_unspec && ss->ss_family == AF_UNSPEC &&
+		    SOCK_CHECK_DOM(so, PF_INET) &&
+		    len == sizeof (struct sockaddr_in))
+			ss->ss_family = AF_INET;
+
+		ss->ss_len = len;
+	}
+	return (error);
+}
+
+/*
+ * Hard limit on the number of source and/or destination addresses
+ * that can be specified by an application.
+ */
+#define	SOCKADDRLIST_MAX_ENTRIES	64
+
+static int
+getsockaddrlist(struct socket *so, struct sockaddr_list **slp,
+    user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec)
+{
+	struct sockaddr_list *sl;
+	int error = 0;
+
+	*slp = NULL;
+
+	if (uaddr == USER_ADDR_NULL || uaddrlen == 0 ||
+	    uaddrlen > (sizeof(struct sockaddr_in6) * SOCKADDRLIST_MAX_ENTRIES))
+		return (EINVAL);
+
+	sl = sockaddrlist_alloc(M_WAITOK);
+	if (sl == NULL)
+		return (ENOMEM);
+
+	VERIFY(sl->sl_cnt == 0);
+	while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) {
+		struct sockaddr_storage ss;
+		struct sockaddr_entry *se;
+		struct sockaddr *sa;
+
+		if (uaddrlen < sizeof (struct sockaddr)) {
+			error = EINVAL;
+			break;
+		}
+
+		bzero(&ss, sizeof (ss));
+		error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr));
+		if (error != 0)
+			break;
+
+		/* getsockaddr does the same but we need them now */
+		if (uaddrlen < ss.ss_len ||
+		    ss.ss_len < offsetof(struct sockaddr, sa_data[0])) {
+			error = EINVAL;
+			break;
+		} else if (ss.ss_len > sizeof (ss)) {
+			/*
+			 * sockaddr_storage size is less than SOCK_MAXADDRLEN,
+			 * so the check here is inclusive.  We could use the
+			 * latter instead, but seems like an overkill for now.
+			 */
+			error = ENAMETOOLONG;
+			break;
+		}
+
+		se = sockaddrentry_alloc(M_WAITOK);
+		if (se == NULL) {
+			error = ENOBUFS;
+			break;
+		}
+
+		sockaddrlist_insert(sl, se);
+
+		error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec);
+		if (error != 0)
+			break;
+
+		VERIFY(sa != NULL && sa->sa_len == ss.ss_len);
+		se->se_addr = sa;
+
+		uaddr += ss.ss_len;
+		VERIFY(((signed)uaddrlen - ss.ss_len) >= 0);
+		uaddrlen -= ss.ss_len;
+	}
+
+	if (error != 0)
+		sockaddrlist_free(sl);
+	else
+		*slp = sl;
+
+	return (error);
+}
+
+int
+internalize_user_msghdr_array(const void *src, int spacetype, int direction,
+    u_int count, struct user_msghdr_x *dst, struct uio **uiop)
+{
+	int error = 0;
+	u_int i;
+	u_int namecnt = 0;
+	u_int ctlcnt = 0;
+
+	for (i = 0; i < count; i++) {
+		uio_t auio;
+		struct user_iovec *iovp;
+		struct user_msghdr_x *user_msg = dst + i;
+
+		if (spacetype == UIO_USERSPACE64) {
+			const struct user64_msghdr_x *msghdr64;
+
+			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
+
+			user_msg->msg_name = msghdr64->msg_name;
+			user_msg->msg_namelen = msghdr64->msg_namelen;
+			user_msg->msg_iov = msghdr64->msg_iov;
+			user_msg->msg_iovlen = msghdr64->msg_iovlen;
+			user_msg->msg_control = msghdr64->msg_control;
+			user_msg->msg_controllen = msghdr64->msg_controllen;
+			user_msg->msg_flags = msghdr64->msg_flags;
+			user_msg->msg_datalen = msghdr64->msg_datalen;
+		} else {
+			const struct user32_msghdr_x *msghdr32;
+
+			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
+
+			user_msg->msg_name = msghdr32->msg_name;
+			user_msg->msg_namelen = msghdr32->msg_namelen;
+			user_msg->msg_iov = msghdr32->msg_iov;
+			user_msg->msg_iovlen = msghdr32->msg_iovlen;
+			user_msg->msg_control = msghdr32->msg_control;
+			user_msg->msg_controllen = msghdr32->msg_controllen;
+			user_msg->msg_flags = msghdr32->msg_flags;
+			user_msg->msg_datalen = msghdr32->msg_datalen;
+		}
+
+		if (user_msg->msg_iovlen <= 0 ||
+		    user_msg->msg_iovlen > UIO_MAXIOV) {
+			error = EMSGSIZE;
+			goto done;
+		}
+		auio = uio_create(user_msg->msg_iovlen, 0, spacetype,
+		    direction);
+		if (auio == NULL) {
+			error = ENOMEM;
+			goto done;
+		}
+		uiop[i] = auio;
+
+		iovp = uio_iovsaddr(auio);
+		if (iovp == NULL) {
+			error = ENOMEM;
+			goto done;
+		}
+		error = copyin_user_iovec_array(user_msg->msg_iov,
+			spacetype, user_msg->msg_iovlen, iovp);
+		if (error)
+			goto done;
+		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
+
+		error = uio_calculateresid(auio);
+		if (error)
+			goto done;
+		user_msg->msg_datalen = uio_resid(auio);
+
+		if (user_msg->msg_name && user_msg->msg_namelen)
+			namecnt++;
+		if (user_msg->msg_control && user_msg->msg_controllen)
+			ctlcnt++;
+	}
+done:
+
+	return (error);
+}
+
+int
+internalize_recv_msghdr_array(const void *src, int spacetype, int direction,
+    u_int count, struct user_msghdr_x *dst,
+    struct recv_msg_elem *recv_msg_array)
+{
+	int error = 0;
+	u_int i;
+
+	for (i = 0; i < count; i++) {
+		struct user_iovec *iovp;
+		struct user_msghdr_x *user_msg = dst + i;
+		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+
+		if (spacetype == UIO_USERSPACE64) {
+			const struct user64_msghdr_x *msghdr64;
+
+			msghdr64 = ((const struct user64_msghdr_x *)src) + i;
+
+			user_msg->msg_name = msghdr64->msg_name;
+			user_msg->msg_namelen = msghdr64->msg_namelen;
+			user_msg->msg_iov = msghdr64->msg_iov;
+			user_msg->msg_iovlen = msghdr64->msg_iovlen;
+			user_msg->msg_control = msghdr64->msg_control;
+			user_msg->msg_controllen = msghdr64->msg_controllen;
+			user_msg->msg_flags = msghdr64->msg_flags;
+			user_msg->msg_datalen = msghdr64->msg_datalen;
+		} else {
+			const struct user32_msghdr_x *msghdr32;
+
+			msghdr32 = ((const struct user32_msghdr_x *)src) + i;
+
+			user_msg->msg_name = msghdr32->msg_name;
+			user_msg->msg_namelen = msghdr32->msg_namelen;
+			user_msg->msg_iov = msghdr32->msg_iov;
+			user_msg->msg_iovlen = msghdr32->msg_iovlen;
+			user_msg->msg_control = msghdr32->msg_control;
+			user_msg->msg_controllen = msghdr32->msg_controllen;
+			user_msg->msg_flags = msghdr32->msg_flags;
+			user_msg->msg_datalen = msghdr32->msg_datalen;
+		}
+
+		if (user_msg->msg_iovlen <= 0 ||
+		    user_msg->msg_iovlen > UIO_MAXIOV) {
+			error = EMSGSIZE;
+			goto done;
+		}
+		recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0,
+		    spacetype, direction);
+		if (recv_msg_elem->uio == NULL) {
+			error = ENOMEM;
+			goto done;
+		}
+
+		iovp = uio_iovsaddr(recv_msg_elem->uio);
+		if (iovp == NULL) {
+			error = ENOMEM;
+			goto done;
+		}
+		error = copyin_user_iovec_array(user_msg->msg_iov,
+			spacetype, user_msg->msg_iovlen, iovp);
+		if (error)
+			goto done;
+		user_msg->msg_iov = CAST_USER_ADDR_T(iovp);
+
+		error = uio_calculateresid(recv_msg_elem->uio);
+		if (error)
+			goto done;
+		user_msg->msg_datalen = uio_resid(recv_msg_elem->uio);
+
+		if (user_msg->msg_name && user_msg->msg_namelen)
+			recv_msg_elem->which |= SOCK_MSG_SA;
+		if (user_msg->msg_control && user_msg->msg_controllen)
+			recv_msg_elem->which |= SOCK_MSG_CONTROL;
+	}
+done:
+
 	return (error);
 }
 
-int
-getpeername(struct proc *p, struct getpeername_args *uap, register_t *retval)
+u_int
+externalize_user_msghdr_array(void *dst, int spacetype, int direction,
+    u_int count, const struct user_msghdr_x *src, struct uio **uiop)
 {
+#pragma unused(direction)
+	u_int i;
+	int seenlast = 0;
+	u_int retcnt = 0;
+
+	for (i = 0; i < count; i++) {
+		const struct user_msghdr_x *user_msg = src + i;
+		uio_t auio = uiop[i];
+		user_ssize_t len = user_msg->msg_datalen - uio_resid(auio);
+
+		if (user_msg->msg_datalen != 0 && len == 0)
+			seenlast = 1;
+
+		if (seenlast == 0)
+			retcnt ++;
 
-	return (getpeername1(p, uap, retval, 0));
+		if (spacetype == UIO_USERSPACE64) {
+			struct user64_msghdr_x *msghdr64;
+
+			msghdr64 = ((struct user64_msghdr_x *)dst) + i;
+
+			msghdr64->msg_flags = user_msg->msg_flags;
+			msghdr64->msg_datalen = len;
+
+		} else {
+			struct user32_msghdr_x *msghdr32;
+
+			msghdr32 = ((struct user32_msghdr_x *)dst) + i;
+
+			msghdr32->msg_flags = user_msg->msg_flags;
+			msghdr32->msg_datalen = len;
+		}
+	}
+	return (retcnt);
 }
 
-#if COMPAT_43_SOCKET
-int
-ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval)
+u_int
+externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
+    u_int count, const struct user_msghdr_x *src,
+    struct recv_msg_elem *recv_msg_array)
 {
+	u_int i;
+	int seenlast = 0;
+	u_int retcnt = 0;
+
+	for (i = 0; i < count; i++) {
+		const struct user_msghdr_x *user_msg = src + i;
+		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+		user_ssize_t len;
+
+		len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
+
+		if (direction == UIO_READ) {
+			if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0)
+				seenlast = 1;
+		} else {
+			if (user_msg->msg_datalen != 0 && len == 0)
+				seenlast = 1;
+		}
+
+		if (seenlast == 0)
+			retcnt ++;
+
+		if (spacetype == UIO_USERSPACE64) {
+			struct user64_msghdr_x *msghdr64;
 
-	return (getpeername1(p, uap, retval, 1));
+			msghdr64 = ((struct user64_msghdr_x *)dst) + i;
+
+			msghdr64->msg_flags = user_msg->msg_flags;
+			msghdr64->msg_datalen = len;
+
+		} else {
+			struct user32_msghdr_x *msghdr32;
+
+			msghdr32 = ((struct user32_msghdr_x *)dst) + i;
+
+			msghdr32->msg_flags = user_msg->msg_flags;
+			msghdr32->msg_datalen = len;
+		}
+	}
+	return (retcnt);
 }
-#endif /* COMPAT_43_SOCKET */
 
-int
-sockargs(mp, data, buflen, type)
-	struct mbuf **mp;
-	user_addr_t data;
-	int buflen, type;
+void
+free_uio_array(struct uio **uiop, u_int count)
 {
-	register struct sockaddr *sa;
-	register struct mbuf *m;
-	int error;
+	u_int i;
 
-	if ((u_int)buflen > MLEN) {
-#if COMPAT_43_SOCKET
-		if (type == MT_SONAME && (u_int)buflen <= 112)
-			buflen = MLEN;		/* unix domain compat. hack */
-		else
-#endif
-		if ((u_int)buflen > MCLBYTES)
-			return (EINVAL);
-	}
-	m = m_get(M_WAIT, type);
-	if (m == NULL)
-		return (ENOBUFS);
-	if ((u_int)buflen > MLEN) {
-		MCLGET(m, M_WAIT);
-		if ((m->m_flags & M_EXT) == 0) {
-			m_free(m);
-			return ENOBUFS;
-		}
+	for (i = 0; i < count; i++) {
+		if (uiop[i] != NULL)
+			uio_free(uiop[i]);
 	}
-	m->m_len = buflen;
-	error = copyin(data, mtod(m, caddr_t), (u_int)buflen);
-	if (error)
-		(void) m_free(m);
-	else {
-		*mp = m;
-		if (type == MT_SONAME) {
-			sa = mtod(m, struct sockaddr *);
+}
 
-#if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
-			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
-				sa->sa_family = sa->sa_len;
-#endif
-			sa->sa_len = buflen;
-		}
+__private_extern__ user_ssize_t
+uio_array_resid(struct uio **uiop, u_int count)
+{
+	user_ssize_t len = 0;
+	u_int i;
+
+	for (i = 0; i < count; i++) {
+		struct uio *auio = uiop[i];
+
+		if (auio != NULL)
+			len += uio_resid(auio);
 	}
-	return (error);
+	return (len);
 }
 
-/*
- * Given a user_addr_t of length len, allocate and fill out a *sa.
- */
 int
-getsockaddr(struct sockaddr **namp, user_addr_t uaddr, size_t len)
+uio_array_is_valid(struct uio **uiop, u_int count)
 {
-	struct sockaddr *sa;
-	int error;
+	user_ssize_t len = 0;
+	u_int i;
 
-	if (len > SOCK_MAXADDRLEN)
-		return ENAMETOOLONG;
+	for (i = 0; i < count; i++) {
+		struct uio *auio = uiop[i];
 
-	if (len == 0)
-	     return EINVAL;
+		if (auio != NULL) {
+			user_ssize_t resid = uio_resid(auio);
 
-	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
-	if (sa == NULL) {
-		return ENOMEM;
-	}
-	error = copyin(uaddr, (caddr_t)sa, len);
-	if (error) {
-		FREE(sa, M_SONAME);
-	} else {
-#if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN
-		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
-			sa->sa_family = sa->sa_len;
-#endif
-		sa->sa_len = len;
-		*namp = sa;
+			/*
+			 * Sanity check on the validity of the iovec:
+			 * no point of going over sb_max
+			 */
+			if (resid < 0 || (u_int32_t)resid > sb_max)
+				return (0);
+
+			len += resid;
+			if (len < 0 || (u_int32_t)len > sb_max)
+				return (0);
+		}
 	}
-	return error;
+	return (1);
 }
 
 
-#if SENDFILE
-/*
- * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
- * XXX - The sf_buf functions are currently private to sendfile(2), so have
- * been made static, but may be useful in the future for doing zero-copy in
- * other parts of the networking code. 
- */
-static void
-sf_buf_init(void *arg)
+struct recv_msg_elem *
+alloc_recv_msg_array(u_int count)
+{
+	struct recv_msg_elem *recv_msg_array;
+
+	recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
+	    M_TEMP, M_WAITOK | M_ZERO);
+
+	return (recv_msg_array);
+}
+
+void
+free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
 {
-	int i;
+	u_int i;
 
-	SLIST_INIT(&sf_freelist);
-	kmem_alloc_pageable(kernel_map, &sf_base, nsfbufs * PAGE_SIZE);
-	MALLOC(sf_bufs, struct sf_buf *, nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT|M_ZERO);
-	if (sf_bufs == NULL)
-		return;		/* XXX silently fail leaving sf_bufs NULL */
+	for (i = 0; i < count; i++) {
+		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
 
-	for (i = 0; i < nsfbufs; i++) {
-		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
-		SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
+		if (recv_msg_elem->uio != NULL)
+			uio_free(recv_msg_elem->uio);
+		if (recv_msg_elem->psa != NULL)
+			_FREE(recv_msg_elem->psa, M_TEMP);
+		if (recv_msg_elem->controlp != NULL)
+			m_freem(recv_msg_elem->controlp);
 	}
+	_FREE(recv_msg_array, M_TEMP);
 }
 
-/*
- * Get an sf_buf from the freelist. Will block if none are available.
- */
-static struct sf_buf *
-sf_buf_alloc()
+
+__private_extern__ user_ssize_t
+recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count)
 {
-	struct sf_buf *sf;
+	user_ssize_t len = 0;
+	u_int i;
 
-	while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
-		sf_buf_alloc_want = 1;
-		tsleep(&sf_freelist, PVM, "sfbufa", 0);
+	for (i = 0; i < count; i++) {
+		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+
+		if (recv_msg_elem->uio != NULL)
+			len += uio_resid(recv_msg_elem->uio);
 	}
-	SLIST_REMOVE_HEAD(&sf_freelist, free_list);
-	sf->refcnt = 1;
-	return (sf);
+	return (len);
 }
 
-#define dtosf(x)	(&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
-static void
-sf_buf_ref(caddr_t addr, u_int size)
+int
+recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count)
 {
-	struct sf_buf *sf;
+	user_ssize_t len = 0;
+	u_int i;
+
+	for (i = 0; i < count; i++) {
+		struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
+
+		if (recv_msg_elem->uio != NULL) {
+			user_ssize_t resid = uio_resid(recv_msg_elem->uio);
+
+			/*
+			 * Sanity check on the validity of the iovec:
+			 * no point of going over sb_max
+			 */
+			if (resid < 0 || (u_int32_t)resid > sb_max)
+				return (0);
 
-	sf = dtosf(addr);
-	if (sf->refcnt == 0)
-		panic("sf_buf_ref: referencing a free sf_buf");
-	sf->refcnt++;
+			len += resid;
+			if (len < 0 || (u_int32_t)len > sb_max)
+				return (0);
+		}
+	}
+	return (1);
 }
 
-/*
- * Lose a reference to an sf_buf. When none left, detach mapped page
- * and release resources back to the system.
- *
- * Must be called at splimp.
- */
+#if SENDFILE
+
+#define	SFUIOBUFS 64
+
+/* Macros to compute the number of mbufs needed depending on cluster size */
+#define	HOWMANY_16K(n)	((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1)
+#define	HOWMANY_4K(n)	((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1)
+
+/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
+#define	SENDFILE_MAX_BYTES	(SFUIOBUFS << PGSHIFT)
+
+/* Upper send limit in the number of mbuf clusters */
+#define	SENDFILE_MAX_16K	HOWMANY_16K(SENDFILE_MAX_BYTES)
+#define	SENDFILE_MAX_4K		HOWMANY_4K(SENDFILE_MAX_BYTES)
+
 static void
-sf_buf_free(caddr_t addr, u_int size)
+alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
+    struct mbuf **m, boolean_t jumbocl)
 {
-	struct sf_buf *sf;
-	struct vm_page *m;
-
-	sf = dtosf(addr);
-	if (sf->refcnt == 0)
-		panic("sf_buf_free: freeing free sf_buf");
-	sf->refcnt--;
-	if (sf->refcnt == 0) {
-		pmap_qremove((vm_offset_t)addr, 1);
-		m = sf->m;
-		vm_page_unwire(m, 0);
-		/*
-		 * Check for the object going away on us. This can
-		 * happen since we don't hold a reference to it.
-		 * If so, we're responsible for freeing the page.
-		 */
-		if (m->wire_count == 0 && m->object == NULL)
-			vm_page_lock_queues();
-			vm_page_free(m);
-			vm_page_unlock_queues();
-		sf->m = NULL;
-		SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
-		if (sf_buf_alloc_want) {
-			sf_buf_alloc_want = 0;
-			wakeup(&sf_freelist);
-		}
+	unsigned int needed;
+
+	if (pktlen == 0)
+		panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen);
+
+	/*
+	 * Try to allocate for the whole thing.  Since we want full control
+	 * over the buffer size and be able to accept partial result, we can't
+	 * use mbuf_allocpacket().  The logic below is similar to sosend().
+	 */
+	*m = NULL;
+	if (pktlen > MBIGCLBYTES && jumbocl) {
+		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
+		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
+	}
+	if (*m == NULL) {
+		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
+		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
+	}
+
+	/*
+	 * Our previous attempt(s) at allocation had failed; the system
+	 * may be short on mbufs, and we want to block until they are
+	 * available.  This time, ask just for 1 mbuf and don't return
+	 * until we get it.
+	 */
+	if (*m == NULL) {
+		needed = 1;
+		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
 	}
+	if (*m == NULL)
+		panic("%s: blocking allocation returned NULL\n", __func__);
+
+	*maxchunks = needed;
 }
 
 /*
  * sendfile(2).
- * int sendfile(int fd, int s, off_t offset, size_t nbytes,
- *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
+ * int sendfile(int fd, int s, off_t offset, off_t *nbytes,
+ *	 struct sf_hdtr *hdtr, int flags)
  *
  * Send a file specified by 'fd' and starting at 'offset' to a socket
- * specified by 's'. Send only 'nbytes' of the file or until EOF if
- * nbytes == 0. Optionally add a header and/or trailer to the socket
- * output. If specified, write the total number of bytes sent into *sbytes.
+ * specified by 's'. Send only '*nbytes' of the file or until EOF if
+ * *nbytes == 0. Optionally add a header and/or trailer to the socket
+ * output. If specified, write the total number of bytes sent into *nbytes.
  */
 int
-sendfile(struct proc *p, struct sendfile_args *uap)
+sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
 {
 	struct fileproc *fp;
 	struct vnode *vp;
-	struct vm_object *obj;
 	struct socket *so;
-	struct mbuf *m;
-	struct sf_buf *sf;
-	struct vm_page *pg;
-	struct writev_args nuap;
-	struct sf_hdtr hdtr;
-	off_t off, xfsize, sbytes = 0;
-	int error = 0, s;
-
-	if (sf_bufs == NULL) {
-		/* Fail if initialization failed */
-		return ENOSYS;
-	}
+	struct writev_nocancel_args nuap;
+	user_ssize_t writev_retval;
+	struct user_sf_hdtr user_hdtr;
+	struct user32_sf_hdtr user32_hdtr;
+	struct user64_sf_hdtr user64_hdtr;
+	off_t off, xfsize;
+	off_t nbytes = 0, sbytes = 0;
+	int error = 0;
+	size_t sizeof_hdtr;
+	off_t file_size;
+	struct vfs_context context = *vfs_context_current();
+
+	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
+	    0, 0, 0, 0);
+
+	AUDIT_ARG(fd, uap->fd);
+	AUDIT_ARG(value32, uap->s);
 
 	/*
 	 * Do argument checking. Must be a regular file in, stream
 	 * type and connected socket out, positive offset.
 	 */
-	if (error = fp_getfvp(p, uap->fd, &fp, &vp))
+	if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
 		goto done;
-	if (fp->f_flag & FREAD) == 0) {
+	}
+	if ((fp->f_flag & FREAD) == 0) {
 		error = EBADF;
 		goto done1;
 	}
-	obj = vp->v_object;
-	if (vp->v_type != VREG || obj == NULL) {
-		error = EINVAL;
+	if (vnode_isreg(vp) == 0) {
+		error = ENOTSUP;
 		goto done1;
 	}
 	error = file_socket(uap->s, &so);
-	if (error)
+	if (error) {
 		goto done1;
+	}
 	if (so == NULL) {
 		error = EBADF;
 		goto done2;
 	}
-
-	socket_lock(so, 1);
-
 	if (so->so_type != SOCK_STREAM) {
 		error = EINVAL;
-		goto done3;
+		goto done2;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		error = ENOTCONN;
-		goto done3;
+		goto done2;
 	}
 	if (uap->offset < 0) {
 		error = EINVAL;
-		goto done3;
+		goto done2;
+	}
+	if (uap->nbytes == USER_ADDR_NULL) {
+		error = EINVAL;
+		goto done2;
+	}
+	if (uap->flags != 0) {
+		error = EINVAL;
+		goto done2;
 	}
 
+	context.vc_ucred = fp->f_fglob->fg_cred;
+
+#if CONFIG_MACF_SOCKET_SUBSET
+	/* JMM - fetch connected sockaddr? */
+	error = mac_socket_check_send(context.vc_ucred, so, NULL);
+	if (error)
+		goto done2;
+#endif
+
+	/*
+	 * Get number of bytes to send
+	 * Should it applies to size of header and trailer?
+	 * JMM - error handling?
+	 */
+	copyin(uap->nbytes, &nbytes, sizeof (off_t));
+
 	/*
 	 * If specified, get the pointer to the sf_hdtr struct for
 	 * any headers/trailers.
 	 */
-	if (uap->hdtr != NULL) {
-		error = copyin(CAST_USER_ADDR_T(uap->hdtr), &hdtr, sizeof(hdtr));
+	if (uap->hdtr != USER_ADDR_NULL) {
+		caddr_t hdtrp;
+
+		bzero(&user_hdtr, sizeof (user_hdtr));
+		if (IS_64BIT_PROCESS(p)) {
+			hdtrp = (caddr_t)&user64_hdtr;
+			sizeof_hdtr = sizeof (user64_hdtr);
+		} else {
+			hdtrp = (caddr_t)&user32_hdtr;
+			sizeof_hdtr = sizeof (user32_hdtr);
+		}
+		error = copyin(uap->hdtr, hdtrp, sizeof_hdtr);
 		if (error)
-			goto done3;
+			goto done2;
+		if (IS_64BIT_PROCESS(p)) {
+			user_hdtr.headers = user64_hdtr.headers;
+			user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt;
+			user_hdtr.trailers = user64_hdtr.trailers;
+			user_hdtr.trl_cnt = user64_hdtr.trl_cnt;
+		} else {
+			user_hdtr.headers = user32_hdtr.headers;
+			user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt;
+			user_hdtr.trailers = user32_hdtr.trailers;
+			user_hdtr.trl_cnt = user32_hdtr.trl_cnt;
+		}
+
 		/*
 		 * Send any headers. Wimp out and use writev(2).
 		 */
-		if (hdtr.headers != NULL) {
+		if (user_hdtr.headers != USER_ADDR_NULL) {
+			bzero(&nuap, sizeof (struct writev_args));
 			nuap.fd = uap->s;
-			nuap.iovp = hdtr.headers;
-			nuap.iovcnt = hdtr.hdr_cnt;
-			error = writev(p, &nuap);
-			if (error)
-				goto done3;
-			sbytes += p->p_retval[0];
+			nuap.iovp = user_hdtr.headers;
+			nuap.iovcnt = user_hdtr.hdr_cnt;
+			error = writev_nocancel(p, &nuap, &writev_retval);
+			if (error) {
+				goto done2;
+			}
+			sbytes += writev_retval;
 		}
 	}
 
 	/*
-	 * Protect against multiple writers to the socket.
+	 * Get the file size for 2 reasons:
+	 *  1. We don't want to allocate more mbufs than necessary
+	 *  2. We don't want to read past the end of file
 	 */
-	(void) sblock(&so->so_snd, M_WAIT);
+	if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
+		goto done2;
+	}
 
 	/*
-	 * Loop through the pages in the file, starting with the requested
-	 * offset. Get a file page (do I/O if necessary), map the file page
-	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
-	 * it on the socket.
+	 * Simply read file data into a chain of mbufs that used with scatter
+	 * gather reads. We're not (yet?) setup to use zero copy external
+	 * mbufs that point to the file pages.
 	 */
+	socket_lock(so, 1);
+	error = sblock(&so->so_snd, SBL_WAIT);
+	if (error) {
+		socket_unlock(so, 1);
+		goto done2;
+	}
 	for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
-		vm_object_offset_t pindex;
-		vm_object_offset_t pgoff;
+		mbuf_t	m0 = NULL, m;
+		unsigned int	nbufs = SFUIOBUFS, i;
+		uio_t	auio;
+		char	uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
+		size_t	uiolen;
+		user_ssize_t	rlen;
+		off_t	pgoff;
+		size_t	pktlen;
+		boolean_t jumbocl;
 
-		pindex = OFF_TO_IDX(off);
-retry_lookup:
 		/*
-		 * Calculate the amount to transfer. Not to exceed a page,
+		 * Calculate the amount to transfer.
+		 * Align to round number of pages.
+		 * Not to exceed send socket buffer,
 		 * the EOF, or the passed in nbytes.
 		 */
-		xfsize = obj->un_pager.vnp.vnp_size - off;
-		if (xfsize > PAGE_SIZE_64)
-			xfsize = PAGE_SIZE;
-		pgoff = (vm_object_offset_t)(off & PAGE_MASK_64);
-		if (PAGE_SIZE - pgoff < xfsize)
+		xfsize = sbspace(&so->so_snd);
+
+		if (xfsize <= 0) {
+			if (so->so_state & SS_CANTSENDMORE) {
+				error = EPIPE;
+				goto done3;
+			} else if ((so->so_state & SS_NBIO)) {
+				error = EAGAIN;
+				goto done3;
+			} else {
+				xfsize = PAGE_SIZE;
+			}
+		}
+
+		if (xfsize > SENDFILE_MAX_BYTES)
+			xfsize = SENDFILE_MAX_BYTES;
+		else if (xfsize > PAGE_SIZE)
+			xfsize = trunc_page(xfsize);
+		pgoff = off & PAGE_MASK_64;
+		if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize)
 			xfsize = PAGE_SIZE_64 - pgoff;
-		if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
-			xfsize = uap->nbytes - sbytes;
+		if (nbytes && xfsize > (nbytes - sbytes))
+			xfsize = nbytes - sbytes;
 		if (xfsize <= 0)
 			break;
+		if (off + xfsize > file_size)
+			xfsize = file_size - off;
+		if (xfsize <= 0)
+			break;
+
 		/*
-		 * Optimize the non-blocking case by looking at the socket space
-		 * before going to the extra work of constituting the sf_buf.
+		 * Attempt to use larger than system page-size clusters for
+		 * large writes only if there is a jumbo cluster pool and
+		 * if the socket is marked accordingly.
 		 */
-		if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
-			if (so->so_state & SS_CANTSENDMORE)
-				error = EPIPE;
-			else
-				error = EAGAIN;
-			sbunlock(&so->so_snd, 0); /* will release lock */
-			goto done2;
+		jumbocl = sosendjcl && njcl > 0 &&
+		    ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab);
+
+		socket_unlock(so, 0);
+		alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl);
+		pktlen = mbuf_pkthdr_maxlen(m0);
+		if (pktlen < (size_t)xfsize)
+			xfsize = pktlen;
+
+		auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
+		    UIO_READ, &uio_buf[0], sizeof (uio_buf));
+		if (auio == NULL) {
+			printf("sendfile failed. nbufs = %d. %s", nbufs,
+				"File a radar related to rdar://10146739.\n");
+			mbuf_freem(m0);
+			error = ENXIO;
+			socket_lock(so, 0);
+			goto done3;
 		}
-		/*
-		 * Attempt to look up the page. If the page doesn't exist or the
-		 * part we're interested in isn't valid, then read it from disk.
-		 * If some other part of the kernel has this page (i.e. it's busy),
-		 * then disk I/O may be occuring on it, so wait and retry.
-		 */
-		pg = vm_page_lookup(obj, pindex);
-		if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
-		    !vm_page_is_valid(pg, pgoff, xfsize))) {
-			struct uio auio;
-			struct iovec aiov;
-			int bsize;
-
-			if (pg == NULL) {
-				pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
-				if (pg == NULL) {
-					VM_WAIT;
-					goto retry_lookup;
-				}
-				/*
-				 * don't just clear PG_BUSY manually -
-				 * vm_page_alloc() should be considered opaque,
-				 * use the VM routine provided to clear
-				 * PG_BUSY.
-				 */
-				vm_page_wakeup(pg);
 
+		for (i = 0, m = m0, uiolen = 0;
+		    i < nbufs && m != NULL && uiolen < (size_t)xfsize;
+		    i++, m = mbuf_next(m)) {
+			size_t mlen = mbuf_maxlen(m);
+
+			if (mlen + uiolen > (size_t)xfsize)
+				mlen = xfsize - uiolen;
+			mbuf_setlen(m, mlen);
+			uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)),
+			    mlen);
+			uiolen += mlen;
+		}
+
+		if (xfsize != uio_resid(auio))
+			printf("sendfile: xfsize: %lld != uio_resid(auio): "
+				"%lld\n", xfsize, (long long)uio_resid(auio));
+
+		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
+		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
+		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
+		error = fo_read(fp, auio, FOF_OFFSET, &context);
+		socket_lock(so, 0);
+		if (error != 0) {
+			if (uio_resid(auio) != xfsize && (error == ERESTART ||
+			    error == EINTR || error == EWOULDBLOCK)) {
+				error = 0;
+			} else {
+				mbuf_freem(m0);
+				goto done3;
 			}
-			/*
-			 * Ensure that our page is still around when the I/O completes.
-			 */
-			vm_page_io_start(pg);
-			vm_page_wire(pg);
-			/*
-			 * Get the page from backing store.
-			 */
-			bsize = vp->v_mount->mnt_vfsstat.f_iosize;
-			auio.uio_iov = &aiov;
-			auio.uio_iovcnt = 1;
-			aiov.iov_base = 0;
-			aiov.iov_len = MAXBSIZE;
-			auio.uio_offset = trunc_page(off);
-			auio.uio_segflg = UIO_NOCOPY;
-			auio.uio_rw = UIO_READ;
-			uio_setresid(&auio, MAXBSIZE);
-			error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
-			        p->p_ucred);
-			vm_page_flag_clear(pg, PG_ZERO);
-			vm_page_io_finish(pg);
-			if (error) {
-				vm_page_unwire(pg, 0);
-				/*
-				 * See if anyone else might know about this page.
-				 * If not and it is not valid, then free it.
-				 */
-				if (pg->wire_count == 0 && pg->valid == 0 &&
-				    pg->busy == 0 && !(pg->flags & PG_BUSY) &&
-				    pg->hold_count == 0)
-					vm_page_lock_queues();
-					vm_page_free(pg);
-					vm_page_unlock_queues();
-				sbunlock(&so->so_snd, 0); /* will release socket lock */
-				goto done2;
-			}
-		} else {
-			if ((pg->flags & PG_BUSY) || pg->busy)  {
-				s = splvm();
-				if ((pg->flags & PG_BUSY) || pg->busy) {
-					/*
-					 * Page is busy. Wait and retry.
-					 */
-					vm_page_flag_set(pg, PG_WANTED);
-					tsleep(pg, PVM, "sfpbsy", 0);
-					goto retry_lookup;
-				}
-			}
-			/*
-			 * Protect from having the page ripped out from beneath us.
-			 */
-			vm_page_wire(pg);
 		}
-		/*
-		 * Allocate a kernel virtual page and insert the physical page
-		 * into it.
-		 */
-		sf = sf_buf_alloc();
-		sf->m = pg;
-		pmap_qenter(sf->kva, &pg, 1);
-		/*
-		 * Get an mbuf header and set it up as having external storage.
-		 */
-		MGETHDR(m, M_WAIT, MT_DATA);
-		if (m == NULL) {
-			error = ENOBUFS;
-			sbunlock(&so->so_snd, 0); /* will release socket lock */
-			goto done2;
+		xfsize -= uio_resid(auio);
+		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END),
+		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
+		    (unsigned int)(xfsize & 0x0ffffffff), 0, 0);
+
+		if (xfsize == 0) {
+			// printf("sendfile: fo_read 0 bytes, EOF\n");
+			break;
 		}
-		m->m_ext.ext_free = sf_buf_free;
-		m->m_ext.ext_ref = sf_buf_ref;
-		m->m_ext.ext_buf = (void *)sf->kva;
-		m->m_ext.ext_size = PAGE_SIZE;
-		m->m_data = (char *) sf->kva + pgoff;
-		m->m_flags |= M_EXT;
-		m->m_pkthdr.len = m->m_len = xfsize;
-		/*
-		 * Add the buffer to the socket buffer chain.
-		 */
+		if (xfsize + off > file_size)
+			printf("sendfile: xfsize: %lld + off: %lld > file_size:"
+			    "%lld\n", xfsize, off, file_size);
+		for (i = 0, m = m0, rlen = 0;
+		    i < nbufs && m != NULL && rlen < xfsize;
+		    i++, m = mbuf_next(m)) {
+			size_t mlen = mbuf_maxlen(m);
+
+			if (rlen + mlen > (size_t)xfsize)
+				mlen = xfsize - rlen;
+			mbuf_setlen(m, mlen);
+
+			rlen += mlen;
+		}
+		mbuf_pkthdr_setlen(m0, xfsize);
+
 retry_space:
 		/*
 		 * Make sure that the socket is still able to take more data.
@@ -2011,69 +3637,98 @@ retry_space:
 				error = so->so_error;
 				so->so_error = 0;
 			}
-			m_freem(m);
-			sbunlock(&so->so_snd, 0); /* will release socket lock */
-			goto done2;
+			m_freem(m0);
+			goto done3;
 		}
 		/*
 		 * Wait for socket space to become available. We do this just
 		 * after checking the connection state above in order to avoid
 		 * a race condition with sbwait().
 		 */
-		if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
+		if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) {
 			if (so->so_state & SS_NBIO) {
-				m_freem(m);
-				sbunlock(&so->so_snd, 0); /* will release socket lock */
+				m_freem(m0);
 				error = EAGAIN;
-				goto done2;
+				goto done3;
 			}
+			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT |
+			    DBG_FUNC_START), uap->s, 0, 0, 0, 0);
 			error = sbwait(&so->so_snd);
+			KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT|
+			    DBG_FUNC_END), uap->s, 0, 0, 0, 0);
 			/*
 			 * An error from sbwait usually indicates that we've
 			 * been interrupted by a signal. If we've sent anything
 			 * then return bytes sent, otherwise return the error.
 			 */
 			if (error) {
-				m_freem(m);
-				sbunlock(&so->so_snd, 0);
-				goto done2;
+				m_freem(m0);
+				goto done3;
 			}
 			goto retry_space;
 		}
-		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
-		splx(s);
+
+		struct mbuf *control = NULL;
+		{
+			/*
+			 * Socket filter processing
+			 */
+
+			error = sflt_data_out(so, NULL, &m0, &control, 0);
+			if (error) {
+				if (error == EJUSTRETURN) {
+					error = 0;
+					continue;
+				}
+				goto done3;
+			}
+			/*
+			 * End Socket filter processing
+			 */
+		}
+		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
+		    uap->s, 0, 0, 0, 0);
+		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
+		    0, control, p);
+		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
+		    uap->s, 0, 0, 0, 0);
 		if (error) {
-			sbunlock(&so->so_snd, 0); /* will release socket lock */
-			goto done2;
+			goto done3;
 		}
 	}
-	sbunlock(&so->so_snd, 0); /* will release socket lock */
-
+	sbunlock(&so->so_snd, FALSE);	/* will unlock socket */
 	/*
 	 * Send trailers. Wimp out and use writev(2).
 	 */
-	if (uap->hdtr != NULL && hdtr.trailers != NULL) {
-			nuap.fd = uap->s;
-			nuap.iovp = hdtr.trailers;
-			nuap.iovcnt = hdtr.trl_cnt;
-			error = writev(p, &nuap);
-			if (error)
-				goto done2;
-			sbytes += p->p_retval[0];
+	if (uap->hdtr != USER_ADDR_NULL &&
+	    user_hdtr.trailers != USER_ADDR_NULL) {
+		bzero(&nuap, sizeof (struct writev_args));
+		nuap.fd = uap->s;
+		nuap.iovp = user_hdtr.trailers;
+		nuap.iovcnt = user_hdtr.trl_cnt;
+		error = writev_nocancel(p, &nuap, &writev_retval);
+		if (error) {
+			goto done2;
+		}
+		sbytes += writev_retval;
 	}
 done2:
 	file_drop(uap->s);
 done1:
 	file_drop(uap->fd);
 done:
-	if (uap->sbytes != NULL) {
+	if (uap->nbytes != USER_ADDR_NULL) {
 		/* XXX this appears bogus for some early failure conditions */
-		copyout(&sbytes, CAST_USER_ADDR_T(uap->sbytes), sizeof(off_t));
+		copyout(&sbytes, uap->nbytes, sizeof (off_t));
 	}
+	KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s,
+	    (unsigned int)((sbytes >> 32) & 0x0ffffffff),
+	    (unsigned int)(sbytes & 0x0ffffffff), error, 0);
 	return (error);
 done3:
-	socket_unlock(so, 1);
+	sbunlock(&so->so_snd, FALSE);	/* will unlock socket */
 	goto done2;
 }
 
-#endif
+
+#endif /* SENDFILE */