/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
+ *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
- *
+ *
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
+ *
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
- *
+ *
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
#include <sys/file_internal.h>
#include <sys/vnode_internal.h>
#include <sys/malloc.h>
+#include <sys/mcache.h>
#include <sys/mbuf.h>
#include <kern/lock.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/uio_internal.h>
#include <sys/kauth.h>
+#include <kern/task.h>
+#include <sys/priv.h>
#include <security/audit/audit.h>
#endif /* MAC_SOCKET_SUBSET */
#define f_flag f_fglob->fg_flag
-#define f_type f_fglob->fg_type
+#define f_type f_fglob->fg_ops->fo_type
#define f_msgcount f_fglob->fg_msgcount
#define f_cred f_fglob->fg_cred
#define f_ops f_fglob->fg_ops
#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3))
-#define HACK_FOR_4056224 1
-#if HACK_FOR_4056224
-static pid_t last_pid_4056224 = 0;
-#endif /* HACK_FOR_4056224 */
-
/* TODO: should be in header file */
int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int);
int32_t *);
static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t,
int32_t *);
+static int connectit(struct socket *, struct sockaddr *);
static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t,
size_t, boolean_t);
static int getsockaddr_s(struct socket *, struct sockaddr_storage *,
user_addr_t, size_t, boolean_t);
+static int getsockaddrlist(struct socket *, struct sockaddr_list **,
+ user_addr_t, socklen_t, boolean_t);
#if SENDFILE
static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **,
boolean_t);
#endif /* SENDFILE */
+static int connectx_nocancel(struct proc *, struct connectx_args *, int *);
+static int connectitx(struct socket *, struct sockaddr_list **,
+ struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *);
+static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *);
+static int disconnectx_nocancel(struct proc *, struct disconnectx_args *,
+ int *);
+static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int);
/*
* System call interface to the socket abstraction.
*/
-extern struct fileops socketops;
+extern const struct fileops socketops;
/*
* Returns: 0 Success
* socreate:EPROTONOSUPPORT
* socreate:ENOBUFS
* socreate:ENOMEM
- * socreate:EISCONN
* socreate:??? [other protocol families, IPSEC]
*/
int
-socket(struct proc *p, struct socket_args *uap, int32_t *retval)
+socket(struct proc *p,
+ struct socket_args *uap,
+ int32_t *retval)
+{
+ return (socket_common(p, uap->domain, uap->type, uap->protocol,
+ proc_selfpid(), retval, 0));
+}
+
+int
+socket_delegate(struct proc *p,
+ struct socket_delegate_args *uap,
+ int32_t *retval)
+{
+ return socket_common(p, uap->domain, uap->type, uap->protocol,
+ uap->epid, retval, 1);
+}
+
+static int
+socket_common(struct proc *p,
+ int domain,
+ int type,
+ int protocol,
+ pid_t epid,
+ int32_t *retval,
+ int delegate)
{
struct socket *so;
struct fileproc *fp;
int fd, error;
- AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol);
+ AUDIT_ARG(socket, domain, type, protocol);
#if CONFIG_MACF_SOCKET_SUBSET
- if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain,
- uap->type, uap->protocol)) != 0)
+ if ((error = mac_socket_check_create(kauth_cred_get(), domain,
+ type, protocol)) != 0)
return (error);
#endif /* MAC_SOCKET_SUBSET */
+ if (delegate) {
+ error = priv_check_cred(kauth_cred_get(),
+ PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0);
+ if (error)
+ return (EACCES);
+ }
+
error = falloc(p, &fp, &fd, vfs_context_current());
if (error) {
return (error);
}
fp->f_flag = FREAD|FWRITE;
- fp->f_type = DTYPE_SOCKET;
fp->f_ops = &socketops;
- error = socreate(uap->domain, &so, uap->type, uap->protocol);
+ if (delegate)
+ error = socreate_delegate(domain, &so, type, protocol, epid);
+ else
+ error = socreate(domain, &so, type, protocol);
+
if (error) {
fp_free(p, fd, fp);
} else {
- thread_t thread;
- struct uthread *ut;
-
- thread = current_thread();
- ut = get_bsdthread_info(thread);
-
- /* if this is a backgrounded thread then throttle all new sockets */
- if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
- so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
- so->so_background_thread = thread;
- }
fp->f_data = (caddr_t)so;
proc_fdlock(p);
* getsockaddr:EINVAL Invalid argument
* getsockaddr:ENOMEM Not enough space
* getsockaddr:EFAULT Bad address
- * sobind:???
+ * sobindlock:???
*/
/* ARGSUSED */
int
AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
#if CONFIG_MACF_SOCKET_SUBSET
if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0)
- error = sobind(so, sa);
+ error = sobindlock(so, sa, 1); /* will lock socket */
#else
- error = sobind(so, sa);
+ error = sobindlock(so, sa, 1); /* will lock socket */
#endif /* MAC_SOCKET_SUBSET */
if (want_free)
FREE(sa, M_SONAME);
* the file descriptor should the MAC check fails.
*/
if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) {
+ socket_lock(so, 1);
so->so_state &= ~(SS_NOFDREF | SS_COMP);
so->so_head = NULL;
+ socket_unlock(so, 1);
soclose(so);
/* Drop reference on listening socket */
sodereference(head);
error = falloc(p, &fp, &newfd, vfs_context_current());
if (error) {
/*
- * Probably ran out of file descriptors. Put the
- * unaccepted connection back onto the queue and
- * do another wakeup so some other process might
- * have a chance at it.
+ * Probably ran out of file descriptors.
+ *
+ * <rdar://problem/8554930>
+ * Don't put this back on the socket like we used to, that
+ * just causes the client to spin. Drop the socket.
*/
- socket_lock(head, 0);
- TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
- head->so_qlen++;
- wakeup_one((caddr_t)&head->so_timeo);
- socket_unlock(head, 1);
+ socket_lock(so, 1);
+ so->so_state &= ~(SS_NOFDREF | SS_COMP);
+ so->so_head = NULL;
+ socket_unlock(so, 1);
+ soclose(so);
+ sodereference(head);
goto out;
}
*retval = newfd;
- fp->f_type = DTYPE_SOCKET;
fp->f_flag = fflag;
fp->f_ops = &socketops;
fp->f_data = (caddr_t)so;
releasefd:
/*
- * If the socket has been marked as inactive by soacceptfilter(),
- * disallow further operations on it. We explicitly call shutdown
- * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
- * states are set for the socket. This would also flush out data
- * hanging off the receive list of this socket.
+ * If the socket has been marked as inactive by sosetdefunct(),
+ * disallow further operations on it.
*/
if (so->so_flags & SOF_DEFUNCT) {
- (void) soshutdownlock(so, SHUT_RD);
- (void) soshutdownlock(so, SHUT_WR);
- (void) sodisconnectlocked(so);
+ sodefunct(current_proc(), so,
+ SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
}
if (dosocklock)
}
int
-connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused int32_t *retval)
+connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval)
{
+#pragma unused(p, retval)
struct socket *so;
struct sockaddr_storage ss;
struct sockaddr *sa = NULL;
- lck_mtx_t *mutex_held;
- boolean_t want_free = TRUE;
int error;
int fd = uap->s;
boolean_t dgram;
error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram);
} else {
error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram);
- if (error == 0) {
+ if (error == 0)
sa = (struct sockaddr *)&ss;
- want_free = FALSE;
- }
}
if (error != 0)
goto out;
+ error = connectit(so, sa);
+
+ if (sa != NULL && sa != SA(&ss))
+ FREE(sa, M_SONAME);
+ if (error == ERESTART)
+ error = EINTR;
+out:
+ file_drop(fd);
+ return (error);
+}
+
+static int
+connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval)
+{
+#pragma unused(p, retval)
+ struct sockaddr_list *src_sl = NULL, *dst_sl = NULL;
+ struct socket *so;
+ int error, fd = uap->s;
+ boolean_t dgram;
+ connid_t cid = CONNID_ANY;
+
+ AUDIT_ARG(fd, uap->s);
+ error = file_socket(fd, &so);
+ if (error != 0)
+ return (error);
+ if (so == NULL) {
+ error = EBADF;
+ goto out;
+ }
+
+ /*
+ * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET
+ * if this is a datagram socket; translate for other types.
+ */
+ dgram = (so->so_type == SOCK_DGRAM);
+
+ /*
+ * Get socket address(es) now before we obtain socket lock; use
+ * sockaddr_list for src address for convenience, if present,
+ * even though it won't hold more than one.
+ */
+ if (uap->src != USER_ADDR_NULL && (error = getsockaddrlist(so,
+ &src_sl, uap->src, uap->srclen, dgram)) != 0)
+ goto out;
+
+ error = getsockaddrlist(so, &dst_sl, uap->dsts, uap->dstlen, dgram);
+ if (error != 0)
+ goto out;
+
+ VERIFY(dst_sl != NULL &&
+ !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0);
+
+ error = connectitx(so, &src_sl, &dst_sl, p, uap->ifscope,
+ uap->aid, &cid);
+ if (error == ERESTART)
+ error = EINTR;
+
+ if (uap->cid != USER_ADDR_NULL)
+ (void) copyout(&cid, uap->cid, sizeof (cid));
+
+out:
+ file_drop(fd);
+ if (src_sl != NULL)
+ sockaddrlist_free(src_sl);
+ if (dst_sl != NULL)
+ sockaddrlist_free(dst_sl);
+ return (error);
+}
+
+int
+connectx(struct proc *p, struct connectx_args *uap, int *retval)
+{
+ /*
+ * Due to similiarity with a POSIX interface, define as
+ * an unofficial cancellation point.
+ */
+ __pthread_testcancel(1);
+ return (connectx_nocancel(p, uap, retval));
+}
+
+static int
+connectit(struct socket *so, struct sockaddr *sa)
+{
+ int error;
+
AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa);
#if CONFIG_MACF_SOCKET_SUBSET
- if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) {
- if (want_free)
- FREE(sa, M_SONAME);
+ if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0)
+ return (error);
+#endif /* MAC_SOCKET_SUBSET */
+
+ socket_lock(so, 1);
+ if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+ error = EALREADY;
+ goto out;
+ }
+ error = soconnectlock(so, sa, 0);
+ if (error != 0) {
+ so->so_state &= ~SS_ISCONNECTING;
+ goto out;
+ }
+ if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+ error = EINPROGRESS;
goto out;
}
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ lck_mtx_t *mutex_held;
+
+ if (so->so_proto->pr_getlock != NULL)
+ mutex_held = (*so->so_proto->pr_getlock)(so, 0);
+ else
+ mutex_held = so->so_proto->pr_domain->dom_mtx;
+ error = msleep((caddr_t)&so->so_timeo, mutex_held,
+ PSOCK | PCATCH, __func__, 0);
+ if (so->so_state & SS_DRAINING) {
+ error = ECONNABORTED;
+ }
+ if (error != 0)
+ break;
+ }
+ if (error == 0) {
+ error = so->so_error;
+ so->so_error = 0;
+ }
+out:
+ socket_unlock(so, 1);
+ return (error);
+}
+
+static int
+connectitx(struct socket *so, struct sockaddr_list **src_sl,
+ struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
+ associd_t aid, connid_t *pcid)
+{
+ struct sockaddr_entry *se;
+ int error;
+
+ VERIFY(dst_sl != NULL && *dst_sl != NULL);
+
+ TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) {
+ VERIFY(se->se_addr != NULL);
+ AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()),
+ se->se_addr);
+#if CONFIG_MACF_SOCKET_SUBSET
+ if ((error = mac_socket_check_connect(kauth_cred_get(),
+ so, se->se_addr)) != 0)
+ return (error);
#endif /* MAC_SOCKET_SUBSET */
- socket_lock(so, 1);
+ }
+ socket_lock(so, 1);
if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
- if (want_free)
- FREE(sa, M_SONAME);
- socket_unlock(so, 1);
error = EALREADY;
goto out;
}
- error = soconnectlock(so, sa, 0);
- if (error)
- goto bad;
+ error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope,
+ aid, pcid, 0, NULL, 0);
+ if (error != 0) {
+ so->so_state &= ~SS_ISCONNECTING;
+ goto out;
+ }
if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
- if (want_free)
- FREE(sa, M_SONAME);
- socket_unlock(so, 1);
error = EINPROGRESS;
goto out;
}
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ lck_mtx_t *mutex_held;
+
if (so->so_proto->pr_getlock != NULL)
mutex_held = (*so->so_proto->pr_getlock)(so, 0);
else
mutex_held = so->so_proto->pr_domain->dom_mtx;
error = msleep((caddr_t)&so->so_timeo, mutex_held,
- PSOCK | PCATCH, "connect", 0);
- if ((so->so_state & SS_DRAINING)) {
+ PSOCK | PCATCH, __func__, 0);
+ if (so->so_state & SS_DRAINING) {
error = ECONNABORTED;
}
- if (error)
+ if (error != 0)
break;
}
if (error == 0) {
error = so->so_error;
so->so_error = 0;
}
-bad:
- so->so_state &= ~SS_ISCONNECTING;
+out:
socket_unlock(so, 1);
- if (want_free)
- FREE(sa, M_SONAME);
- if (error == ERESTART)
- error = EINTR;
+ return (error);
+}
+
+int
+peeloff(struct proc *p, struct peeloff_args *uap, int *retval)
+{
+ /*
+ * Due to similiarity with a POSIX interface, define as
+ * an unofficial cancellation point.
+ */
+ __pthread_testcancel(1);
+ return (peeloff_nocancel(p, uap, retval));
+}
+
+static int
+peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval)
+{
+ struct fileproc *fp;
+ struct socket *mp_so, *so = NULL;
+ int newfd, fd = uap->s;
+ short fflag; /* type must match fp->f_flag */
+ int error;
+
+ *retval = -1;
+
+ error = fp_getfsock(p, fd, &fp, &mp_so);
+ if (error != 0) {
+ if (error == EOPNOTSUPP)
+ error = ENOTSOCK;
+ goto out_nofile;
+ }
+ if (mp_so == NULL) {
+ error = EBADF;
+ goto out;
+ }
+
+ socket_lock(mp_so, 1);
+ error = sopeelofflocked(mp_so, uap->aid, &so);
+ if (error != 0) {
+ socket_unlock(mp_so, 1);
+ goto out;
+ }
+ VERIFY(so != NULL);
+ socket_unlock(mp_so, 0); /* keep ref on mp_so for us */
+
+ fflag = fp->f_flag;
+ error = falloc(p, &fp, &newfd, vfs_context_current());
+ if (error != 0) {
+ /* drop this socket (probably ran out of file descriptors) */
+ soclose(so);
+ sodereference(mp_so); /* our mp_so ref */
+ goto out;
+ }
+
+ fp->f_flag = fflag;
+ fp->f_ops = &socketops;
+ fp->f_data = (caddr_t)so;
+
+ /*
+ * If the socket has been marked as inactive by sosetdefunct(),
+ * disallow further operations on it.
+ */
+ if (so->so_flags & SOF_DEFUNCT) {
+ sodefunct(current_proc(), so,
+ SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
+ }
+
+ proc_fdlock(p);
+ procfdtbl_releasefd(p, newfd, NULL);
+ fp_drop(p, newfd, fp, 1);
+ proc_fdunlock(p);
+
+ sodereference(mp_so); /* our mp_so ref */
+ *retval = newfd;
+
+out:
+ file_drop(fd);
+
+out_nofile:
+ return (error);
+}
+
+int
+disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval)
+{
+ /*
+ * Due to similiarity with a POSIX interface, define as
+ * an unofficial cancellation point.
+ */
+ __pthread_testcancel(1);
+ return (disconnectx_nocancel(p, uap, retval));
+}
+
+static int
+disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval)
+{
+#pragma unused(p, retval)
+ struct socket *so;
+ int fd = uap->s;
+ int error;
+
+ error = file_socket(fd, &so);
+ if (error != 0)
+ return (error);
+ if (so == NULL) {
+ error = EBADF;
+ goto out;
+ }
+
+ error = sodisconnectx(so, uap->aid, uap->cid);
out:
file_drop(fd);
return (error);
goto free2;
}
fp1->f_flag = FREAD|FWRITE;
- fp1->f_type = DTYPE_SOCKET;
fp1->f_ops = &socketops;
fp1->f_data = (caddr_t)so1;
sv[0] = fd;
goto free3;
}
fp2->f_flag = FREAD|FWRITE;
- fp2->f_type = DTYPE_SOCKET;
fp2->f_ops = &socketops;
fp2->f_data = (caddr_t)so2;
sv[1] = fd;
}
}
+ if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
+ goto free4;
+
proc_fdlock(p);
procfdtbl_releasefd(p, sv[0], NULL);
procfdtbl_releasefd(p, sv[1], NULL);
fp_drop(p, sv[1], fp2, 1);
proc_fdunlock(p);
- error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof (int));
- return (error);
+ return (0);
free4:
fp_free(p, sv[1], fp2);
free3:
/*
* We check the state without holding the socket lock;
* if a race condition occurs, it would simply result
- * in an extra call to the MAC check function.
+ * in an extra call to the MAC check function.
*/
- if (!(so->so_state & SS_ISCONNECTED) &&
+ if ( to != NULL &&
+ !(so->so_state & SS_DEFUNCT) &&
(error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0)
goto bad;
#endif /* MAC_SOCKET_SUBSET */
len = uio_resid(uiop);
- error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control,
- flags);
+ error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0,
+ control, flags);
if (error != 0) {
if (uio_resid(uiop) != len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
sendto(struct proc *p, struct sendto_args *uap, int32_t *retval)
{
__pthread_testcancel(1);
- return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
+ return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval));
}
int
-sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retval)
+sendto_nocancel(struct proc *p,
+ struct sendto_nocancel_args *uap,
+ int32_t *retval)
{
struct user_msghdr msg;
int error;
uio_free(auio);
}
-#if HACK_FOR_4056224
- /*
- * Radar 4056224
- * Temporary workaround to let send() and recv() work over
- * a pipe for binary compatibility
- * This will be removed in the release following Tiger
- */
- if (error == ENOTSOCK) {
- struct fileproc *fp;
-
- if (fp_lookup(p, uap->s, &fp, 0) == 0) {
- (void) fp_drop(p, uap->s, fp, 0);
-
- if (fp->f_type == DTYPE_PIPE) {
- struct write_args write_uap;
- user_ssize_t write_retval;
-
- if (p->p_pid > last_pid_4056224) {
- last_pid_4056224 = p->p_pid;
-
- printf("%s[%d] uses send/recv "
- "on a pipe\n", p->p_comm, p->p_pid);
- }
-
- bzero(&write_uap, sizeof (struct write_args));
- write_uap.fd = uap->s;
- write_uap.cbuf = uap->buf;
- write_uap.nbyte = uap->len;
-
- error = write(p, &write_uap, &write_retval);
- *retval = (int)write_retval;
- }
- }
- }
-#endif /* HACK_FOR_4056224 */
-
KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0);
return (error);
sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval)
{
__pthread_testcancel(1);
- return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
+ return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval));
}
int
user_msg.msg_iov = CAST_USER_ADDR_T(iovp);
/* finish setup of uio_t */
- uio_calculateresid(auio);
+ error = uio_calculateresid(auio);
+ if (error) {
+ goto done;
+ }
} else {
user_msg.msg_iov = 0;
}
recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
user_addr_t namelenp, int32_t *retval)
{
- int len, error;
+ ssize_t len;
+ int error;
struct mbuf *m, *control = 0;
user_addr_t ctlbuf;
struct socket *so;
* if a race condition occurs, it would simply result
* in an extra call to the MAC check function.
*/
- if (!(so->so_state & SS_ISCONNECTED) &&
+ if (!(so->so_state & SS_DEFUNCT) &&
+ !(so->so_state & SS_ISCONNECTED) &&
+ !(so->so_proto->pr_flags & PR_CONNREQUIRED) &&
(error = mac_socket_check_receive(kauth_cred_get(), so)) != 0)
goto out1;
#endif /* MAC_SOCKET_SUBSET */
while (m && len > 0) {
unsigned int tocopy;
struct cmsghdr *cp = mtod(m, struct cmsghdr *);
-
- /*
- * SCM_TIMESTAMP hack because struct timeval has a
- * different size for 32 bits and 64 bits processes
- */
- if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
- unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
- struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer;
- int tmp_space;
- struct timeval *tv = (struct timeval *)CMSG_DATA(cp);
-
- tmp_cp->cmsg_level = SOL_SOCKET;
- tmp_cp->cmsg_type = SCM_TIMESTAMP;
-
- if (proc_is64bit(p)) {
- struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp);
-
- tv64->tv_sec = tv->tv_sec;
- tv64->tv_usec = tv->tv_usec;
-
- tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
- tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
- } else {
- struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp);
-
- tv32->tv_sec = tv->tv_sec;
- tv32->tv_usec = tv->tv_usec;
+ int cp_size = CMSG_ALIGN(cp->cmsg_len);
+ int buflen = m->m_len;
- tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
- tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
- }
- if (len >= tmp_space) {
- tocopy = tmp_space;
- } else {
- mp->msg_flags |= MSG_CTRUNC;
- tocopy = len;
- }
- error = copyout(tmp_buffer, ctlbuf, tocopy);
- if (error)
- goto out;
+ while (buflen > 0 && len > 0) {
+
+ /*
+ SCM_TIMESTAMP hack because struct timeval has a
+ * different size for 32 bits and 64 bits processes
+ */
+ if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) {
+ unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
+ struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer;
+ int tmp_space;
+ struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp);
+
+ tmp_cp->cmsg_level = SOL_SOCKET;
+ tmp_cp->cmsg_type = SCM_TIMESTAMP;
+
+ if (proc_is64bit(p)) {
+ struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp);
+
+ tv64->tv_sec = tv->tv_sec;
+ tv64->tv_usec = tv->tv_usec;
+
+ tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
+ tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
+ } else {
+ struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp);
+
+ tv32->tv_sec = tv->tv_sec;
+ tv32->tv_usec = tv->tv_usec;
+
+ tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
+ tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
+ }
+ if (len >= tmp_space) {
+ tocopy = tmp_space;
+ } else {
+ mp->msg_flags |= MSG_CTRUNC;
+ tocopy = len;
+ }
+ error = copyout(tmp_buffer, ctlbuf, tocopy);
+ if (error)
+ goto out;
- } else {
- if (len >= m->m_len) {
- tocopy = m->m_len;
} else {
- mp->msg_flags |= MSG_CTRUNC;
- tocopy = len;
+
+ if (cp_size > buflen) {
+ panic("cp_size > buflen, something wrong with alignment!");
+ }
+
+ if (len >= cp_size) {
+ tocopy = cp_size;
+ } else {
+ mp->msg_flags |= MSG_CTRUNC;
+ tocopy = len;
+ }
+
+ error = copyout((caddr_t) cp, ctlbuf,
+ tocopy);
+ if (error)
+ goto out;
}
-
- error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf,
- tocopy);
- if (error)
- goto out;
+
+
+ ctlbuf += tocopy;
+ len -= tocopy;
+
+ buflen -= cp_size;
+ cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size);
+ cp_size = CMSG_ALIGN(cp->cmsg_len);
}
- ctlbuf += tocopy;
- len -= tocopy;
m = m->m_next;
}
mp->msg_controllen = ctlbuf - mp->msg_control;
return (error);
}
-
/*
* Returns: 0 Success
* ENOMEM
uio_free(auio);
}
-#if HACK_FOR_4056224
- /*
- * Radar 4056224
- * Temporary workaround to let send() and recv() work over
- * a pipe for binary compatibility
- * This will be removed in the release following Tiger
- */
- if (error == ENOTSOCK && proc_is64bit(p) == 0) {
- struct fileproc *fp;
-
- if (fp_lookup(p, uap->s, &fp, 0) == 0) {
- (void) fp_drop(p, uap->s, fp, 0);
-
- if (fp->f_type == DTYPE_PIPE) {
- struct read_args read_uap;
- user_ssize_t read_retval;
-
- if (p->p_pid > last_pid_4056224) {
- last_pid_4056224 = p->p_pid;
-
- printf("%s[%d] uses send/recv on "
- "a pipe\n", p->p_comm, p->p_pid);
- }
-
- bzero(&read_uap, sizeof (struct read_args));
- read_uap.fd = uap->s;
- read_uap.cbuf = uap->buf;
- read_uap.nbyte = uap->len;
-
- error = read(p, &read_uap, &read_retval);
- *retval = (int)read_retval;
- }
- }
- }
-#endif /* HACK_FOR_4056224 */
-
KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0);
return (error);
goto done;
/* finish setup of uio_t */
- uio_calculateresid(auio);
+ error = uio_calculateresid(auio);
+ if (error) {
+ goto done;
+ }
error = recvit(p, uap->s, &user_msg, auio, 0, retval);
if (!error) {
&sopt)) != 0)
goto out;
#endif /* MAC_SOCKET_SUBSET */
- error = sosetopt(so, &sopt);
+ error = sosetoptlock(so, &sopt, 1); /* will lock socket */
out:
file_drop(uap->s);
return (error);
&sopt)) != 0)
goto out;
#endif /* MAC_SOCKET_SUBSET */
- error = sogetopt((struct socket *)so, &sopt);
+ error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */
if (error == 0) {
valsize = sopt.sopt_valsize;
error = copyout((caddr_t)&valsize, uap->avalsize,
socket_lock(so, 1);
error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
if (error == 0) {
- struct socket_filter_entry *filter;
- int filtered = 0;
- for (filter = so->so_filt; filter && error == 0;
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_getsockname) {
- if (!filtered) {
- filtered = 1;
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_getsockname(filter->sfe_cookie, so, &sa);
- }
- }
-
+ error = sflt_getsockname(so, &sa);
if (error == EJUSTRETURN)
error = 0;
-
- if (filtered) {
- socket_lock(so, 0);
- sflt_unuse(so);
- }
}
socket_unlock(so, 1);
if (error)
sa = 0;
error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
if (error == 0) {
- struct socket_filter_entry *filter;
- int filtered = 0;
- for (filter = so->so_filt; filter && error == 0;
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_getpeername) {
- if (!filtered) {
- filtered = 1;
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_getpeername(filter->sfe_cookie, so, &sa);
- }
- }
-
+ error = sflt_getpeername(so, &sa);
if (error == EJUSTRETURN)
error = 0;
-
- if (filtered) {
- socket_lock(so, 0);
- sflt_unuse(so);
- }
}
socket_unlock(so, 1);
if (error)
struct mbuf *m;
int error;
- int alloc_buflen = buflen;
+ size_t alloc_buflen = (size_t)buflen;
+
+ if(alloc_buflen > INT_MAX/2)
+ return (EINVAL);
#ifdef __LP64__
/* The fd's in the buffer must expand to be pointers, thus we need twice as much space */
if(type == MT_CONTROL)
alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr);
#endif
- if ((u_int)alloc_buflen > MLEN) {
- if (type == MT_SONAME && (u_int)alloc_buflen <= 112)
+ if (alloc_buflen > MLEN) {
+ if (type == MT_SONAME && alloc_buflen <= 112)
alloc_buflen = MLEN; /* unix domain compat. hack */
- else if ((u_int)alloc_buflen > MCLBYTES)
+ else if (alloc_buflen > MCLBYTES)
return (EINVAL);
}
m = m_get(M_WAIT, type);
if (m == NULL)
return (ENOBUFS);
- if ((u_int)alloc_buflen > MLEN) {
+ if (alloc_buflen > MLEN) {
MCLGET(m, M_WAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
* handle it.
*/
if (translate_unspec && sa->sa_family == AF_UNSPEC &&
- INP_CHECK_SOCKAF(so, AF_INET) &&
+ SOCK_CHECK_DOM(so, PF_INET) &&
len == sizeof (struct sockaddr_in))
sa->sa_family = AF_INET;
* handle it.
*/
if (translate_unspec && ss->ss_family == AF_UNSPEC &&
- INP_CHECK_SOCKAF(so, AF_INET) &&
+ SOCK_CHECK_DOM(so, PF_INET) &&
len == sizeof (struct sockaddr_in))
ss->ss_family = AF_INET;
return (error);
}
-#if SENDFILE
+/*
+ * Hard limit on the number of source and/or destination addresses
+ * that can be specified by an application.
+ */
+#define SOCKADDRLIST_MAX_ENTRIES 64
+
+static int
+getsockaddrlist(struct socket *so, struct sockaddr_list **slp,
+ user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec)
+{
+ struct sockaddr_list *sl;
+ int error = 0;
+
+ *slp = NULL;
+
+ if (uaddr == USER_ADDR_NULL || uaddrlen == 0)
+ return (EINVAL);
+
+ sl = sockaddrlist_alloc(M_WAITOK);
+ if (sl == NULL)
+ return (ENOMEM);
+
+ VERIFY(sl->sl_cnt == 0);
+ while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) {
+ struct sockaddr_storage ss;
+ struct sockaddr_entry *se;
+ struct sockaddr *sa;
+
+ if (uaddrlen < sizeof (struct sockaddr)) {
+ error = EINVAL;
+ break;
+ }
+
+ bzero(&ss, sizeof (ss));
+ error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr));
+ if (error != 0)
+ break;
+
+ /* getsockaddr does the same but we need them now */
+ if (uaddrlen < ss.ss_len ||
+ ss.ss_len < offsetof(struct sockaddr, sa_data[0])) {
+ error = EINVAL;
+ break;
+ } else if (ss.ss_len > sizeof (ss)) {
+ /*
+ * sockaddr_storage size is less than SOCK_MAXADDRLEN,
+ * so the check here is inclusive. We could user the
+ * latter instead, but seems like an overkill for now.
+ */
+ error = ENAMETOOLONG;
+ break;
+ }
-SYSCTL_DECL(_kern_ipc);
+ se = sockaddrentry_alloc(M_WAITOK);
+ if (se == NULL)
+ break;
+
+ sockaddrlist_insert(sl, se);
+
+ error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec);
+ if (error != 0)
+ break;
+
+ VERIFY(sa != NULL && sa->sa_len == ss.ss_len);
+ se->se_addr = sa;
+
+ uaddr += ss.ss_len;
+ VERIFY(((signed)uaddrlen - ss.ss_len) >= 0);
+ uaddrlen -= ss.ss_len;
+ }
+
+ if (error != 0)
+ sockaddrlist_free(sl);
+ else
+ *slp = sl;
+
+ return (error);
+}
+
+#if SENDFILE
#define SFUIOBUFS 64
-static int sendfileuiobufs = SFUIOBUFS;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW, &sendfileuiobufs,
- 0, "");
/* Macros to compute the number of mbufs needed depending on cluster size */
#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1)
#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1)
-/* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */
-#define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT)
+/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */
+#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT)
/* Upper send limit in the number of mbuf clusters */
#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES)
* use mbuf_allocpacket(). The logic below is similar to sosend().
*/
*m = NULL;
- if (pktlen > NBPG && jumbocl) {
+ if (pktlen > MBIGCLBYTES && jumbocl) {
needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
}
if (*m == NULL) {
needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
- *m = m_getpackets_internal(&needed, 1, how, 0, NBPG);
+ *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
}
/*
*/
if (*m == NULL) {
needed = 1;
- *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, NBPG);
+ *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
}
if (*m == NULL)
panic("%s: blocking allocation returned NULL\n", __func__);
size_t sizeof_hdtr;
off_t file_size;
struct vfs_context context = *vfs_context_current();
-
+#define ENXIO_10146739_DBG(err_str) { \
+ if (error == ENXIO) { \
+ printf(err_str, \
+ __func__, \
+ "File a radar related to rdar://10146739 \n"); \
+ } \
+}
KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s,
0, 0, 0, 0);
* type and connected socket out, positive offset.
*/
if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
+ ENXIO_10146739_DBG("%s: fp_getfvp error. %s");
goto done;
}
if ((fp->f_flag & FREAD) == 0) {
}
error = file_socket(uap->s, &so);
if (error) {
+ ENXIO_10146739_DBG("%s: file_socket error. %s");
goto done1;
}
if (so == NULL) {
nuap.iovp = user_hdtr.headers;
nuap.iovcnt = user_hdtr.hdr_cnt;
error = writev_nocancel(p, &nuap, &writev_retval);
- if (error)
+ if (error) {
+ ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
goto done2;
+ }
sbytes += writev_retval;
}
}
* 1. We don't want to allocate more mbufs than necessary
* 2. We don't want to read past the end of file
*/
- if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0)
+ if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) {
+ ENXIO_10146739_DBG("%s: vnode_size error. %s");
goto done2;
+ }
/*
* Simply read file data into a chain of mbufs that used with scatter
* mbufs that point to the file pages.
*/
socket_lock(so, 1);
- error = sblock(&so->so_snd, M_WAIT);
+ error = sblock(&so->so_snd, SBL_WAIT);
if (error) {
socket_unlock(so, 1);
goto done2;
}
for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
mbuf_t m0 = NULL, m;
- unsigned int nbufs = sendfileuiobufs, i;
+ unsigned int nbufs = SFUIOBUFS, i;
uio_t auio;
- char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */
+ char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */
size_t uiolen;
user_ssize_t rlen;
off_t pgoff;
auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE,
UIO_READ, &uio_buf[0], sizeof (uio_buf));
if (auio == NULL) {
- //printf("sendfile: uio_createwithbuffer failed\n");
+ printf("sendfile failed. nbufs = %d. %s", nbufs,
+ "File a radar related to rdar://10146739.\n");
mbuf_freem(m0);
error = ENXIO;
socket_lock(so, 0);
if (xfsize != uio_resid(auio))
printf("sendfile: xfsize: %lld != uio_resid(auio): "
- "%lld\n", xfsize, uio_resid(auio));
+ "%lld\n", xfsize, (long long)uio_resid(auio));
KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
error == EINTR || error == EWOULDBLOCK)) {
error = 0;
} else {
+ ENXIO_10146739_DBG("%s: fo_read error. %s");
mbuf_freem(m0);
goto done3;
}
so->so_error = 0;
}
m_freem(m0);
+ ENXIO_10146739_DBG("%s: Unexpected socket error. %s");
goto done3;
}
/*
}
goto retry_space;
}
+
+ struct mbuf *control = NULL;
{
/*
* Socket filter processing
*/
- struct socket_filter_entry *filter;
- int filtered = 0;
- struct mbuf *control = NULL;
- boolean_t recursive = (so->so_send_filt_thread != NULL);
- error = 0;
- for (filter = so->so_filt; filter && (error == 0);
- filter = filter->sfe_next_onsocket) {
- if (filter->sfe_filter->sf_filter.sf_data_out) {
- if (filtered == 0) {
- filtered = 1;
- so->so_send_filt_thread =
- current_thread();
- sflt_use(so);
- socket_unlock(so, 0);
- }
- error = filter->sfe_filter->sf_filter.
- sf_data_out(filter->sfe_cookie, so,
- NULL, &m0, &control, 0);
- }
- }
-
- if (filtered) {
- /*
- * At this point, we've run at least one filter.
- * The socket is unlocked as is the socket
- * buffer. Clear the recorded filter thread
- * only when we are outside of a filter's
- * context. This allows for a filter to issue
- * multiple inject calls from its sf_data_out
- * callback routine.
- */
- socket_lock(so, 0);
- sflt_unuse(so);
- if (!recursive)
- so->so_send_filt_thread = 0;
- if (error) {
- if (error == EJUSTRETURN) {
- error = 0;
- continue;
- }
- goto done3;
+ error = sflt_data_out(so, NULL, &m0, &control, 0);
+ if (error) {
+ if (error == EJUSTRETURN) {
+ error = 0;
+ continue;
}
+ ENXIO_10146739_DBG("%s: sflt_data_out error. %s");
+ goto done3;
}
/*
* End Socket filter processing
KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
uap->s, 0, 0, 0, 0);
error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
- 0, 0, p);
+ 0, control, p);
KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
uap->s, 0, 0, 0, 0);
if (error) {
+ ENXIO_10146739_DBG("%s: pru_send error. %s");
goto done3;
}
}
- sbunlock(&so->so_snd, 0); /* will unlock socket */
+ sbunlock(&so->so_snd, FALSE); /* will unlock socket */
/*
* Send trailers. Wimp out and use writev(2).
*/
nuap.iovp = user_hdtr.trailers;
nuap.iovcnt = user_hdtr.trl_cnt;
error = writev_nocancel(p, &nuap, &writev_retval);
- if (error)
+ if (error) {
+ ENXIO_10146739_DBG("%s: writev_nocancel error. %s");
goto done2;
+ }
sbytes += writev_retval;
}
done2:
(unsigned int)(sbytes & 0x0ffffffff), error, 0);
return (error);
done3:
- sbunlock(&so->so_snd, 0); /* will unlock socket */
+ sbunlock(&so->so_snd, FALSE); /* will unlock socket */
goto done2;
}