X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..c18c124eaa464aaaa5549e99e5a70fc9cbb50944:/bsd/kern/uipc_syscalls.c diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index 1126e7955..106e11dc2 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -76,8 +76,9 @@ #include #include #include +#include #include -#include +#include #include #include #include @@ -86,8 +87,10 @@ #include #include #include +#include +#include -#include +#include #include #include @@ -100,14 +103,13 @@ #endif /* MAC_SOCKET_SUBSET */ #define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type +#define f_type f_fglob->fg_ops->fo_type #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred #define f_ops f_fglob->fg_ops #define f_offset f_fglob->fg_offset #define f_data f_fglob->fg_data - #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0) #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2) #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1) @@ -122,34 +124,49 @@ #define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1)) #define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2)) #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3)) +#define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8)) +#define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8)) -#define HACK_FOR_4056224 1 -#if HACK_FOR_4056224 -static pid_t last_pid_4056224 = 0; -#endif /* HACK_FOR_4056224 */ - /* TODO: should be in header file */ int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int); static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int, - register_t *); + int32_t *); static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t, - register_t *); + int32_t *); +static int connectit(struct socket *, struct sockaddr *); static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t, - size_t); + size_t, boolean_t); static int getsockaddr_s(struct socket *, struct sockaddr_storage *, - user_addr_t, size_t); + user_addr_t, size_t, boolean_t); +static int getsockaddrlist(struct socket *, struct sockaddr_list **, + user_addr_t, socklen_t, boolean_t); #if SENDFILE static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **, boolean_t); #endif /* SENDFILE */ +static int connectx_nocancel(struct proc *, struct connectx_args *, int *); +static int connectitx(struct socket *, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *); +static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *); +static int disconnectx_nocancel(struct proc *, struct disconnectx_args *, + int *); +static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int); + +static int internalize_user_msghdr_array(const void *, int, int, u_int, + struct user_msghdr_x *, struct uio **); +static u_int externalize_user_msghdr_array(void *, int, int, u_int, + const struct user_msghdr_x *, struct uio **); + +static void free_uio_array(struct uio **, u_int); +static int uio_array_is_valid(struct uio **, u_int); /* * System call interface to the socket abstraction. */ -extern struct fileops socketops; +extern const struct fileops socketops; /* * Returns: 0 Success @@ -162,46 +179,68 @@ extern struct fileops socketops; * socreate:EPROTONOSUPPORT * socreate:ENOBUFS * socreate:ENOMEM - * socreate:EISCONN * socreate:??? [other protocol families, IPSEC] */ int -socket(struct proc *p, struct socket_args *uap, register_t *retval) +socket(struct proc *p, + struct socket_args *uap, + int32_t *retval) +{ + return (socket_common(p, uap->domain, uap->type, uap->protocol, + proc_selfpid(), retval, 0)); +} + +int +socket_delegate(struct proc *p, + struct socket_delegate_args *uap, + int32_t *retval) +{ + return socket_common(p, uap->domain, uap->type, uap->protocol, + uap->epid, retval, 1); +} + +static int +socket_common(struct proc *p, + int domain, + int type, + int protocol, + pid_t epid, + int32_t *retval, + int delegate) { struct socket *so; struct fileproc *fp; int fd, error; - AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol); + AUDIT_ARG(socket, domain, type, protocol); #if CONFIG_MACF_SOCKET_SUBSET - if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain, - uap->type, uap->protocol)) != 0) + if ((error = mac_socket_check_create(kauth_cred_get(), domain, + type, protocol)) != 0) return (error); #endif /* MAC_SOCKET_SUBSET */ + if (delegate) { + error = priv_check_cred(kauth_cred_get(), + PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0); + if (error) + return (EACCES); + } + error = falloc(p, &fp, &fd, vfs_context_current()); if (error) { return (error); } fp->f_flag = FREAD|FWRITE; - fp->f_type = DTYPE_SOCKET; fp->f_ops = &socketops; - error = socreate(uap->domain, &so, uap->type, uap->protocol); + if (delegate) + error = socreate_delegate(domain, &so, type, protocol, epid); + else + error = socreate(domain, &so, type, protocol); + if (error) { fp_free(p, fd, fp); } else { - thread_t thread; - struct uthread *ut; - - thread = current_thread(); - ut = get_bsdthread_info(thread); - - /* if this is a backgrounded thread then throttle all new sockets */ - if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) { - so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND; - so->so_background_thread = thread; - } fp->f_data = (caddr_t)so; proc_fdlock(p); @@ -226,11 +265,11 @@ socket(struct proc *p, struct socket_args *uap, register_t *retval) * getsockaddr:EINVAL Invalid argument * getsockaddr:ENOMEM Not enough space * getsockaddr:EFAULT Bad address - * sobind:??? + * sobindlock:??? */ /* ARGSUSED */ int -bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval) +bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval) { struct sockaddr_storage ss; struct sockaddr *sa = NULL; @@ -251,9 +290,9 @@ bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval) goto out; } if (uap->namelen > sizeof (ss)) { - error = getsockaddr(so, &sa, uap->name, uap->namelen); + error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE); } else { - error = getsockaddr_s(so, &ss, uap->name, uap->namelen); + error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE); if (error == 0) { sa = (struct sockaddr *)&ss; want_free = FALSE; @@ -264,9 +303,9 @@ bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval) AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa); #if CONFIG_MACF_SOCKET_SUBSET if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) - error = sobind(so, sa); + error = sobindlock(so, sa, 1); /* will lock socket */ #else - error = sobind(so, sa); + error = sobindlock(so, sa, 1); /* will lock socket */ #endif /* MAC_SOCKET_SUBSET */ if (want_free) FREE(sa, M_SONAME); @@ -287,7 +326,7 @@ out: */ int listen(__unused struct proc *p, struct listen_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { int error; struct socket *so; @@ -332,7 +371,7 @@ listen(__unused struct proc *p, struct listen_args *uap, */ int accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, - register_t *retval) + int32_t *retval) { struct fileproc *fp; struct sockaddr *sa = NULL; @@ -446,8 +485,10 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, * the file descriptor should the MAC check fails. */ if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) { + socket_lock(so, 1); so->so_state &= ~(SS_NOFDREF | SS_COMP); so->so_head = NULL; + socket_unlock(so, 1); soclose(so); /* Drop reference on listening socket */ sodereference(head); @@ -470,36 +511,55 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, error = falloc(p, &fp, &newfd, vfs_context_current()); if (error) { /* - * Probably ran out of file descriptors. Put the - * unaccepted connection back onto the queue and - * do another wakeup so some other process might - * have a chance at it. + * Probably ran out of file descriptors. + * + * + * Don't put this back on the socket like we used to, that + * just causes the client to spin. Drop the socket. */ - socket_lock(head, 0); - TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); - head->so_qlen++; - wakeup_one((caddr_t)&head->so_timeo); - socket_unlock(head, 1); + socket_lock(so, 1); + so->so_state &= ~(SS_NOFDREF | SS_COMP); + so->so_head = NULL; + socket_unlock(so, 1); + soclose(so); + sodereference(head); goto out; } *retval = newfd; - fp->f_type = DTYPE_SOCKET; fp->f_flag = fflag; fp->f_ops = &socketops; fp->f_data = (caddr_t)so; + socket_lock(head, 0); if (dosocklock) socket_lock(so, 1); + so->so_state &= ~SS_COMP; so->so_head = NULL; + + /* Sync socket non-blocking/async state with file flags */ + if (fp->f_flag & FNONBLOCK) { + so->so_state |= SS_NBIO; + } else { + so->so_state &= ~SS_NBIO; + } + + if (fp->f_flag & FASYNC) { + so->so_state |= SS_ASYNC; + so->so_rcv.sb_flags |= SB_ASYNC; + so->so_snd.sb_flags |= SB_ASYNC; + } else { + so->so_state &= ~SS_ASYNC; + so->so_rcv.sb_flags &= ~SB_ASYNC; + so->so_snd.sb_flags &= ~SB_ASYNC; + } + (void) soacceptlock(so, &sa, 0); socket_unlock(head, 1); if (sa == NULL) { namelen = 0; if (uap->name) goto gotnoname; - if (dosocklock) - socket_unlock(so, 1); error = 0; goto releasefd; } @@ -521,23 +581,19 @@ gotnoname: } FREE(sa, M_SONAME); +releasefd: /* - * If the socket has been marked as inactive by soacceptfilter(), - * disallow further operations on it. We explicitly call shutdown - * on both data directions to ensure that SS_CANT{RCV,SEND}MORE - * states are set for the socket. This would also flush out data - * hanging off the receive list of this socket. + * If the socket has been marked as inactive by sosetdefunct(), + * disallow further operations on it. */ if (so->so_flags & SOF_DEFUNCT) { - (void) soshutdownlock(so, SHUT_RD); - (void) soshutdownlock(so, SHUT_WR); - (void) sodisconnectlocked(so); + sodefunct(current_proc(), so, + SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL); } if (dosocklock) socket_unlock(so, 1); -releasefd: proc_fdlock(p); procfdtbl_releasefd(p, newfd, NULL); fp_drop(p, newfd, fp, 1); @@ -549,7 +605,7 @@ out: } int -accept(struct proc *p, struct accept_args *uap, register_t *retval) +accept(struct proc *p, struct accept_args *uap, int32_t *retval) { __pthread_testcancel(1); return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval)); @@ -579,22 +635,22 @@ accept(struct proc *p, struct accept_args *uap, register_t *retval) */ /* ARGSUSED */ int -connect(struct proc *p, struct connect_args *uap, register_t *retval) +connect(struct proc *p, struct connect_args *uap, int32_t *retval) { __pthread_testcancel(1); return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval)); } int -connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused register_t *retval) +connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval) { +#pragma unused(p, retval) struct socket *so; struct sockaddr_storage ss; struct sockaddr *sa = NULL; - lck_mtx_t *mutex_held; - boolean_t want_free = TRUE; int error; int fd = uap->s; + boolean_t dgram; AUDIT_ARG(fd, uap->s); error = file_socket(fd, &so); @@ -605,70 +661,325 @@ connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused goto out; } + /* + * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET + * if this is a datagram socket; translate for other types. + */ + dgram = (so->so_type == SOCK_DGRAM); + /* Get socket address now before we obtain socket lock */ if (uap->namelen > sizeof (ss)) { - error = getsockaddr(so, &sa, uap->name, uap->namelen); + error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram); } else { - error = getsockaddr_s(so, &ss, uap->name, uap->namelen); - if (error == 0) { + error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram); + if (error == 0) sa = (struct sockaddr *)&ss; - want_free = FALSE; - } } if (error != 0) goto out; + error = connectit(so, sa); + + if (sa != NULL && sa != SA(&ss)) + FREE(sa, M_SONAME); + if (error == ERESTART) + error = EINTR; +out: + file_drop(fd); + return (error); +} + +static int +connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) +{ +#pragma unused(p, retval) + struct sockaddr_list *src_sl = NULL, *dst_sl = NULL; + struct socket *so; + int error, fd = uap->s; + boolean_t dgram; + connid_t cid = CONNID_ANY; + + AUDIT_ARG(fd, uap->s); + error = file_socket(fd, &so); + if (error != 0) + return (error); + if (so == NULL) { + error = EBADF; + goto out; + } + + /* + * XXX Workaround to ensure connectx does not fail because + * of unreaped so_error. + */ + so->so_error = 0; + + /* + * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET + * if this is a datagram socket; translate for other types. + */ + dgram = (so->so_type == SOCK_DGRAM); + + /* + * Get socket address(es) now before we obtain socket lock; use + * sockaddr_list for src address for convenience, if present, + * even though it won't hold more than one. + */ + if (uap->src != USER_ADDR_NULL && (error = getsockaddrlist(so, + &src_sl, uap->src, uap->srclen, dgram)) != 0) + goto out; + + error = getsockaddrlist(so, &dst_sl, uap->dsts, uap->dstlen, dgram); + if (error != 0) + goto out; + + VERIFY(dst_sl != NULL && + !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0); + + error = connectitx(so, &src_sl, &dst_sl, p, uap->ifscope, + uap->aid, &cid); + if (error == ERESTART) + error = EINTR; + + if (uap->cid != USER_ADDR_NULL) + (void) copyout(&cid, uap->cid, sizeof (cid)); + +out: + file_drop(fd); + if (src_sl != NULL) + sockaddrlist_free(src_sl); + if (dst_sl != NULL) + sockaddrlist_free(dst_sl); + return (error); +} + +int +connectx(struct proc *p, struct connectx_args *uap, int *retval) +{ + /* + * Due to similiarity with a POSIX interface, define as + * an unofficial cancellation point. + */ + __pthread_testcancel(1); + return (connectx_nocancel(p, uap, retval)); +} + +static int +connectit(struct socket *so, struct sockaddr *sa) +{ + int error; + AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa); #if CONFIG_MACF_SOCKET_SUBSET - if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) { - if (want_free) - FREE(sa, M_SONAME); + if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) + return (error); +#endif /* MAC_SOCKET_SUBSET */ + + socket_lock(so, 1); + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + error = EALREADY; + goto out; + } + error = soconnectlock(so, sa, 0); + if (error != 0) { + so->so_state &= ~SS_ISCONNECTING; goto out; } + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + error = EINPROGRESS; + goto out; + } + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + lck_mtx_t *mutex_held; + + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + error = msleep((caddr_t)&so->so_timeo, mutex_held, + PSOCK | PCATCH, __func__, 0); + if (so->so_state & SS_DRAINING) { + error = ECONNABORTED; + } + if (error != 0) + break; + } + if (error == 0) { + error = so->so_error; + so->so_error = 0; + } +out: + socket_unlock(so, 1); + return (error); +} + +static int +connectitx(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid) +{ + struct sockaddr_entry *se; + int error; + + VERIFY(dst_sl != NULL && *dst_sl != NULL); + + TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) { + VERIFY(se->se_addr != NULL); + AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), + se->se_addr); +#if CONFIG_MACF_SOCKET_SUBSET + if ((error = mac_socket_check_connect(kauth_cred_get(), + so, se->se_addr)) != 0) + return (error); #endif /* MAC_SOCKET_SUBSET */ - socket_lock(so, 1); + } + socket_lock(so, 1); if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { - if (want_free) - FREE(sa, M_SONAME); - socket_unlock(so, 1); error = EALREADY; goto out; } - error = soconnectlock(so, sa, 0); - if (error) - goto bad; + error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope, + aid, pcid, 0, NULL, 0); + if (error != 0) { + so->so_state &= ~SS_ISCONNECTING; + goto out; + } if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { - if (want_free) - FREE(sa, M_SONAME); - socket_unlock(so, 1); error = EINPROGRESS; goto out; } while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + lck_mtx_t *mutex_held; + if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else mutex_held = so->so_proto->pr_domain->dom_mtx; error = msleep((caddr_t)&so->so_timeo, mutex_held, - PSOCK | PCATCH, "connect", 0); - if ((so->so_state & SS_DRAINING)) { + PSOCK | PCATCH, __func__, 0); + if (so->so_state & SS_DRAINING) { error = ECONNABORTED; } - if (error) + if (error != 0) break; } if (error == 0) { error = so->so_error; so->so_error = 0; } -bad: - so->so_state &= ~SS_ISCONNECTING; +out: socket_unlock(so, 1); - if (want_free) - FREE(sa, M_SONAME); - if (error == ERESTART) - error = EINTR; + return (error); +} + +int +peeloff(struct proc *p, struct peeloff_args *uap, int *retval) +{ + /* + * Due to similiarity with a POSIX interface, define as + * an unofficial cancellation point. + */ + __pthread_testcancel(1); + return (peeloff_nocancel(p, uap, retval)); +} + +static int +peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval) +{ + struct fileproc *fp; + struct socket *mp_so, *so = NULL; + int newfd, fd = uap->s; + short fflag; /* type must match fp->f_flag */ + int error; + + *retval = -1; + + error = fp_getfsock(p, fd, &fp, &mp_so); + if (error != 0) { + if (error == EOPNOTSUPP) + error = ENOTSOCK; + goto out_nofile; + } + if (mp_so == NULL) { + error = EBADF; + goto out; + } + + socket_lock(mp_so, 1); + error = sopeelofflocked(mp_so, uap->aid, &so); + if (error != 0) { + socket_unlock(mp_so, 1); + goto out; + } + VERIFY(so != NULL); + socket_unlock(mp_so, 0); /* keep ref on mp_so for us */ + + fflag = fp->f_flag; + error = falloc(p, &fp, &newfd, vfs_context_current()); + if (error != 0) { + /* drop this socket (probably ran out of file descriptors) */ + soclose(so); + sodereference(mp_so); /* our mp_so ref */ + goto out; + } + + fp->f_flag = fflag; + fp->f_ops = &socketops; + fp->f_data = (caddr_t)so; + + /* + * If the socket has been marked as inactive by sosetdefunct(), + * disallow further operations on it. + */ + if (so->so_flags & SOF_DEFUNCT) { + sodefunct(current_proc(), so, + SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL); + } + + proc_fdlock(p); + procfdtbl_releasefd(p, newfd, NULL); + fp_drop(p, newfd, fp, 1); + proc_fdunlock(p); + + sodereference(mp_so); /* our mp_so ref */ + *retval = newfd; + +out: + file_drop(fd); + +out_nofile: + return (error); +} + +int +disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval) +{ + /* + * Due to similiarity with a POSIX interface, define as + * an unofficial cancellation point. + */ + __pthread_testcancel(1); + return (disconnectx_nocancel(p, uap, retval)); +} + +static int +disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval) +{ +#pragma unused(p, retval) + struct socket *so; + int fd = uap->s; + int error; + + error = file_socket(fd, &so); + if (error != 0) + return (error); + if (so == NULL) { + error = EBADF; + goto out; + } + + error = sodisconnectx(so, uap->aid, uap->cid); out: file_drop(fd); return (error); @@ -693,7 +1004,7 @@ out: */ int socketpair(struct proc *p, struct socketpair_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct fileproc *fp1, *fp2; struct socket *so1, *so2; @@ -712,7 +1023,6 @@ socketpair(struct proc *p, struct socketpair_args *uap, goto free2; } fp1->f_flag = FREAD|FWRITE; - fp1->f_type = DTYPE_SOCKET; fp1->f_ops = &socketops; fp1->f_data = (caddr_t)so1; sv[0] = fd; @@ -722,7 +1032,6 @@ socketpair(struct proc *p, struct socketpair_args *uap, goto free3; } fp2->f_flag = FREAD|FWRITE; - fp2->f_type = DTYPE_SOCKET; fp2->f_ops = &socketops; fp2->f_data = (caddr_t)so2; sv[1] = fd; @@ -741,6 +1050,9 @@ socketpair(struct proc *p, struct socketpair_args *uap, } } + if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0) + goto free4; + proc_fdlock(p); procfdtbl_releasefd(p, sv[0], NULL); procfdtbl_releasefd(p, sv[1], NULL); @@ -748,8 +1060,7 @@ socketpair(struct proc *p, struct socketpair_args *uap, fp_drop(p, sv[1], fp2, 1); proc_fdunlock(p); - error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof (int)); - return (error); + return (0); free4: fp_free(p, sv[1], fp2); free3: @@ -803,7 +1114,7 @@ free1: */ static int sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, - int flags, register_t *retval) + int flags, int32_t *retval) { struct mbuf *control = NULL; struct sockaddr_storage ss; @@ -827,10 +1138,10 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, if (mp->msg_name != USER_ADDR_NULL) { if (mp->msg_namelen > sizeof (ss)) { error = getsockaddr(so, &to, mp->msg_name, - mp->msg_namelen); + mp->msg_namelen, TRUE); } else { error = getsockaddr_s(so, &ss, mp->msg_name, - mp->msg_namelen); + mp->msg_namelen, TRUE); if (error == 0) { to = (struct sockaddr *)&ss; want_free = FALSE; @@ -855,16 +1166,17 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, /* * We check the state without holding the socket lock; * if a race condition occurs, it would simply result - * in an extra call to the MAC check function. + * in an extra call to the MAC check function. */ - if (!(so->so_state & SS_ISCONNECTED) && + if ( to != NULL && + !(so->so_state & SS_DEFUNCT) && (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) goto bad; #endif /* MAC_SOCKET_SUBSET */ len = uio_resid(uiop); - error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control, - flags); + error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, + control, flags); if (error != 0) { if (uio_resid(uiop) != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) @@ -891,14 +1203,16 @@ out: * write:??? [4056224: applicable for pipes] */ int -sendto(struct proc *p, struct sendto_args *uap, register_t *retval) +sendto(struct proc *p, struct sendto_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval)); + return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval)); } int -sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, register_t *retval) +sendto_nocancel(struct proc *p, + struct sendto_nocancel_args *uap, + int32_t *retval) { struct user_msghdr msg; int error; @@ -929,42 +1243,6 @@ sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, register_t *re uio_free(auio); } -#if HACK_FOR_4056224 - /* - * Radar 4056224 - * Temporary workaround to let send() and recv() work over - * a pipe for binary compatibility - * This will be removed in the release following Tiger - */ - if (error == ENOTSOCK) { - struct fileproc *fp; - - if (fp_lookup(p, uap->s, &fp, 0) == 0) { - (void) fp_drop(p, uap->s, fp, 0); - - if (fp->f_type == DTYPE_PIPE) { - struct write_args write_uap; - user_ssize_t write_retval; - - if (p->p_pid > last_pid_4056224) { - last_pid_4056224 = p->p_pid; - - printf("%s[%d] uses send/recv " - "on a pipe\n", p->p_comm, p->p_pid); - } - - bzero(&write_uap, sizeof (struct write_args)); - write_uap.fd = uap->s; - write_uap.cbuf = uap->buf; - write_uap.nbyte = uap->len; - - error = write(p, &write_uap, &write_retval); - *retval = (int)write_retval; - } - } - } -#endif /* HACK_FOR_4056224 */ - KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0); return (error); @@ -977,34 +1255,32 @@ sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, register_t *re * sendit:??? [see sendit definition in this file] */ int -sendmsg(struct proc *p, struct sendmsg_args *uap, register_t *retval) +sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval)); + return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval)); } int -sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t *retval) +sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval) { - struct msghdr msg; + struct user32_msghdr msg32; + struct user64_msghdr msg64; struct user_msghdr user_msg; caddr_t msghdrp; int size_of_msghdr; int error; - int size_of_iovec; uio_t auio = NULL; struct user_iovec *iovp; KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); if (IS_64BIT_PROCESS(p)) { - msghdrp = (caddr_t)&user_msg; - size_of_msghdr = sizeof (user_msg); - size_of_iovec = sizeof (struct user_iovec); + msghdrp = (caddr_t)&msg64; + size_of_msghdr = sizeof (msg64); } else { - msghdrp = (caddr_t)&msg; - size_of_msghdr = sizeof (msg); - size_of_iovec = sizeof (struct iovec); + msghdrp = (caddr_t)&msg32; + size_of_msghdr = sizeof (msg32); } error = copyin(uap->msg, msghdrp, size_of_msghdr); if (error) { @@ -1012,15 +1288,22 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t * return (error); } - /* only need to copy if user process is not 64-bit */ - if (!IS_64BIT_PROCESS(p)) { - user_msg.msg_flags = msg.msg_flags; - user_msg.msg_controllen = msg.msg_controllen; - user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control); - user_msg.msg_iovlen = msg.msg_iovlen; - user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov); - user_msg.msg_namelen = msg.msg_namelen; - user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name); + if (IS_64BIT_PROCESS(p)) { + user_msg.msg_flags = msg64.msg_flags; + user_msg.msg_controllen = msg64.msg_controllen; + user_msg.msg_control = msg64.msg_control; + user_msg.msg_iovlen = msg64.msg_iovlen; + user_msg.msg_iov = msg64.msg_iov; + user_msg.msg_namelen = msg64.msg_namelen; + user_msg.msg_name = msg64.msg_name; + } else { + user_msg.msg_flags = msg32.msg_flags; + user_msg.msg_controllen = msg32.msg_controllen; + user_msg.msg_control = msg32.msg_control; + user_msg.msg_iovlen = msg32.msg_iovlen; + user_msg.msg_iov = msg32.msg_iov; + user_msg.msg_namelen = msg32.msg_namelen; + user_msg.msg_name = msg32.msg_name; } if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) { @@ -1048,14 +1331,18 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t * error = ENOBUFS; goto done; } - error = copyin(user_msg.msg_iov, (caddr_t)iovp, - (user_msg.msg_iovlen * size_of_iovec)); + error = copyin_user_iovec_array(user_msg.msg_iov, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + user_msg.msg_iovlen, iovp); if (error) goto done; user_msg.msg_iov = CAST_USER_ADDR_T(iovp); /* finish setup of uio_t */ - uio_calculateresid(auio); + error = uio_calculateresid(auio); + if (error) { + goto done; + } } else { user_msg.msg_iov = 0; } @@ -1073,6 +1360,174 @@ done: return (error); } +int +sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) +{ + int error = 0; + struct user_msghdr_x *user_msg = NULL; + struct uio **uiop = NULL; + struct socket *so; + u_int i; + struct sockaddr *to = NULL; + struct mbuf *control = NULL; + user_ssize_t len_before = 0, len_after; + int need_drop = 0; + size_t size_of_msghdr; + void *umsgp = NULL; + u_int uiocnt; + + KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0); + + error = file_socket(uap->s, &so); + if (error) { + goto out; + } + need_drop = 1; + if (so == NULL) { + error = EBADF; + goto out; + } + if (so->so_proto->pr_usrreqs->pru_sosend_list == NULL) { + printf("%s no pru_sosend_list\n", __func__); + error = EOPNOTSUPP; + goto out; + } + + /* + * Input parameter range check + */ + if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) { + error = EINVAL; + goto out; + } + user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), + M_TEMP, M_WAITOK | M_ZERO); + if (user_msg == NULL) { + printf("%s _MALLOC() user_msg failed\n", __func__); + error = ENOMEM; + goto out; + } + uiop = _MALLOC(uap->cnt * sizeof(struct uio *), + M_TEMP, M_WAITOK | M_ZERO); + if (uiop == NULL) { + printf("%s _MALLOC() uiop failed\n", __func__); + error = ENOMEM; + goto out; + } + + size_of_msghdr = IS_64BIT_PROCESS(p) ? + sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x); + + umsgp = _MALLOC(uap->cnt * size_of_msghdr, + M_TEMP, M_WAITOK | M_ZERO); + if (umsgp == NULL) { + printf("%s _MALLOC() user_msg failed\n", __func__); + error = ENOMEM; + goto out; + } + error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr); + if (error) { + printf("%s copyin() failed\n", __func__); + goto out; + } + error = internalize_user_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_WRITE, uap->cnt, user_msg, uiop); + if (error) { + printf("%s copyin_user_msghdr_array() failed\n", __func__); + goto out; + } + /* + * Make sure the size of each message iovec and + * the aggregate size of all the iovec is valid + */ + if (uio_array_is_valid(uiop, uap->cnt) == 0) { + error = EINVAL; + goto out; + } + + /* + * Sanity check on passed arguments + */ + for (i = 0; i < uap->cnt; i++) { + struct user_msghdr_x *mp = &user_msg[i]; + + /* + * No flags on send message + */ + if (mp->msg_flags != 0) { + error = EINVAL; + goto out; + } + /* + * No support for address or ancillary data (yet) + */ + if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) { + error = EINVAL; + goto out; + } + if (mp->msg_control != USER_ADDR_NULL || + mp->msg_controllen != 0) { + error = EINVAL; + goto out; + } +#if CONFIG_MACF_SOCKET_SUBSET + /* + * We check the state without holding the socket lock; + * if a race condition occurs, it would simply result + * in an extra call to the MAC check function. + * + * Note: The following check is never true taken with the + * current limitation that we do not accept to pass an address, + * this is effectively placeholder code. If we add support for addresses, + * we will have to check every address. + */ + if ( to != NULL && + !(so->so_state & SS_DEFUNCT) && + (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) + goto out; +#endif /* MAC_SOCKET_SUBSET */ + } + + len_before = uio_array_resid(uiop, uap->cnt); + + error = so->so_proto->pr_usrreqs->pru_sosend_list(so, to, uiop, + uap->cnt, 0, control, uap->flags); + + len_after = uio_array_resid(uiop, uap->cnt); + + if (error != 0) { + if (len_after != len_before && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + /* Generation of SIGPIPE can be controlled per socket */ + if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) + psignal(p, SIGPIPE); + } + if (error == 0) { + uiocnt = externalize_user_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_WRITE, uap->cnt, user_msg, uiop); + + *retval = (int)(uiocnt); + } +out: + if (need_drop) + file_drop(uap->s); + if (umsgp != NULL) + _FREE(umsgp, M_TEMP); + if (uiop != NULL) { + free_uio_array(uiop, uap->cnt); + _FREE(uiop, M_TEMP); + } + if (user_msg != NULL) + _FREE(user_msg, M_TEMP); + + KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0); + + return (error); +} + /* * Returns: 0 Success * ENOTSOCK @@ -1097,9 +1552,10 @@ done: */ static int recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, - user_addr_t namelenp, register_t *retval) + user_addr_t namelenp, int32_t *retval) { - int len, error; + ssize_t len; + int error; struct mbuf *m, *control = 0; user_addr_t ctlbuf; struct socket *so; @@ -1134,7 +1590,9 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, * if a race condition occurs, it would simply result * in an extra call to the MAC check function. */ - if (!(so->so_state & SS_ISCONNECTED) && + if (!(so->so_state & SS_DEFUNCT) && + !(so->so_state & SS_ISCONNECTED) && + !(so->so_proto->pr_flags & PR_CONNREQUIRED) && (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) goto out1; #endif /* MAC_SOCKET_SUBSET */ @@ -1148,7 +1606,9 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop, (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, &mp->msg_flags); - AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), fromsa); + if (fromsa) + AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), + fromsa); if (error) { if (uio_resid(uiop) != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) @@ -1191,21 +1651,80 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, while (m && len > 0) { unsigned int tocopy; + struct cmsghdr *cp = mtod(m, struct cmsghdr *); + int cp_size = CMSG_ALIGN(cp->cmsg_len); + int buflen = m->m_len; - if (len >= m->m_len) { - tocopy = m->m_len; - } else { - mp->msg_flags |= MSG_CTRUNC; - tocopy = len; - } + while (buflen > 0 && len > 0) { - error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf, - tocopy); - if (error) - goto out; + /* + SCM_TIMESTAMP hack because struct timeval has a + * different size for 32 bits and 64 bits processes + */ + if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) { + unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))]; + struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer; + int tmp_space; + struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp); + + tmp_cp->cmsg_level = SOL_SOCKET; + tmp_cp->cmsg_type = SCM_TIMESTAMP; + + if (proc_is64bit(p)) { + struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp); + + tv64->tv_sec = tv->tv_sec; + tv64->tv_usec = tv->tv_usec; + + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval)); + tmp_space = CMSG_SPACE(sizeof(struct user64_timeval)); + } else { + struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp); + + tv32->tv_sec = tv->tv_sec; + tv32->tv_usec = tv->tv_usec; + + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval)); + tmp_space = CMSG_SPACE(sizeof(struct user32_timeval)); + } + if (len >= tmp_space) { + tocopy = tmp_space; + } else { + mp->msg_flags |= MSG_CTRUNC; + tocopy = len; + } + error = copyout(tmp_buffer, ctlbuf, tocopy); + if (error) + goto out; + + } else { + + if (cp_size > buflen) { + panic("cp_size > buflen, something wrong with alignment!"); + } + + if (len >= cp_size) { + tocopy = cp_size; + } else { + mp->msg_flags |= MSG_CTRUNC; + tocopy = len; + } + + error = copyout((caddr_t) cp, ctlbuf, + tocopy); + if (error) + goto out; + } + + + ctlbuf += tocopy; + len -= tocopy; + + buflen -= cp_size; + cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size); + cp_size = CMSG_ALIGN(cp->cmsg_len); + } - ctlbuf += tocopy; - len -= tocopy; m = m->m_next; } mp->msg_controllen = ctlbuf - mp->msg_control; @@ -1221,7 +1740,6 @@ out1: return (error); } - /* * Returns: 0 Success * ENOMEM @@ -1238,14 +1756,14 @@ out1: * the block header for the recvit function. */ int -recvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval) +recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval) { __pthread_testcancel(1); return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval)); } int -recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, register_t *retval) +recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval) { struct user_msghdr msg; int error; @@ -1282,42 +1800,6 @@ recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, register_t uio_free(auio); } -#if HACK_FOR_4056224 - /* - * Radar 4056224 - * Temporary workaround to let send() and recv() work over - * a pipe for binary compatibility - * This will be removed in the release following Tiger - */ - if (error == ENOTSOCK && proc_is64bit(p) == 0) { - struct fileproc *fp; - - if (fp_lookup(p, uap->s, &fp, 0) == 0) { - (void) fp_drop(p, uap->s, fp, 0); - - if (fp->f_type == DTYPE_PIPE) { - struct read_args read_uap; - user_ssize_t read_retval; - - if (p->p_pid > last_pid_4056224) { - last_pid_4056224 = p->p_pid; - - printf("%s[%d] uses send/recv on " - "a pipe\n", p->p_comm, p->p_pid); - } - - bzero(&read_uap, sizeof (struct read_args)); - read_uap.fd = uap->s; - read_uap.cbuf = uap->buf; - read_uap.nbyte = uap->len; - - error = read(p, &read_uap, &read_retval); - *retval = (int)read_retval; - } - } - } -#endif /* HACK_FOR_4056224 */ - KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0); return (error); @@ -1335,35 +1817,33 @@ recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, register_t * the block header for the recvit function. */ int -recvmsg(struct proc *p, struct recvmsg_args *uap, register_t *retval) +recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval)); } int -recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t *retval) +recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval) { - struct msghdr msg; + struct user32_msghdr msg32; + struct user64_msghdr msg64; struct user_msghdr user_msg; caddr_t msghdrp; int size_of_msghdr; user_addr_t uiov; int error; - int size_of_iovec; uio_t auio = NULL; struct user_iovec *iovp; KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); if (IS_64BIT_PROCESS(p)) { - msghdrp = (caddr_t)&user_msg; - size_of_msghdr = sizeof (user_msg); - size_of_iovec = sizeof (struct user_iovec); + msghdrp = (caddr_t)&msg64; + size_of_msghdr = sizeof (msg64); } else { - msghdrp = (caddr_t)&msg; - size_of_msghdr = sizeof (msg); - size_of_iovec = sizeof (struct iovec); + msghdrp = (caddr_t)&msg32; + size_of_msghdr = sizeof (msg32); } error = copyin(uap->msg, msghdrp, size_of_msghdr); if (error) { @@ -1372,14 +1852,22 @@ recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t * } /* only need to copy if user process is not 64-bit */ - if (!IS_64BIT_PROCESS(p)) { - user_msg.msg_flags = msg.msg_flags; - user_msg.msg_controllen = msg.msg_controllen; - user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control); - user_msg.msg_iovlen = msg.msg_iovlen; - user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov); - user_msg.msg_namelen = msg.msg_namelen; - user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name); + if (IS_64BIT_PROCESS(p)) { + user_msg.msg_flags = msg64.msg_flags; + user_msg.msg_controllen = msg64.msg_controllen; + user_msg.msg_control = msg64.msg_control; + user_msg.msg_iovlen = msg64.msg_iovlen; + user_msg.msg_iov = msg64.msg_iov; + user_msg.msg_namelen = msg64.msg_namelen; + user_msg.msg_name = msg64.msg_name; + } else { + user_msg.msg_flags = msg32.msg_flags; + user_msg.msg_controllen = msg32.msg_controllen; + user_msg.msg_control = msg32.msg_control; + user_msg.msg_iovlen = msg32.msg_iovlen; + user_msg.msg_iov = msg32.msg_iov; + user_msg.msg_namelen = msg32.msg_namelen; + user_msg.msg_name = msg32.msg_name; } if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) { @@ -1410,29 +1898,37 @@ recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t * } uiov = user_msg.msg_iov; user_msg.msg_iov = CAST_USER_ADDR_T(iovp); - error = copyin(uiov, (caddr_t)iovp, - (user_msg.msg_iovlen * size_of_iovec)); + error = copyin_user_iovec_array(uiov, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + user_msg.msg_iovlen, iovp); if (error) goto done; /* finish setup of uio_t */ - uio_calculateresid(auio); + error = uio_calculateresid(auio); + if (error) { + goto done; + } error = recvit(p, uap->s, &user_msg, auio, 0, retval); if (!error) { user_msg.msg_iov = uiov; - /* only need to copy if user process is not 64-bit */ - if (!IS_64BIT_PROCESS(p)) { - // LP64todo - do all these change? if not, then no need to copy all of them! - msg.msg_flags = user_msg.msg_flags; - msg.msg_controllen = user_msg.msg_controllen; - msg.msg_control = - CAST_DOWN(caddr_t, user_msg.msg_control); - msg.msg_iovlen = user_msg.msg_iovlen; - msg.msg_iov = (struct iovec *) - CAST_DOWN(caddr_t, user_msg.msg_iov); - msg.msg_namelen = user_msg.msg_namelen; - msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name); + if (IS_64BIT_PROCESS(p)) { + msg64.msg_flags = user_msg.msg_flags; + msg64.msg_controllen = user_msg.msg_controllen; + msg64.msg_control = user_msg.msg_control; + msg64.msg_iovlen = user_msg.msg_iovlen; + msg64.msg_iov = user_msg.msg_iov; + msg64.msg_namelen = user_msg.msg_namelen; + msg64.msg_name = user_msg.msg_name; + } else { + msg32.msg_flags = user_msg.msg_flags; + msg32.msg_controllen = user_msg.msg_controllen; + msg32.msg_control = user_msg.msg_control; + msg32.msg_iovlen = user_msg.msg_iovlen; + msg32.msg_iov = user_msg.msg_iov; + msg32.msg_namelen = user_msg.msg_namelen; + msg32.msg_name = user_msg.msg_name; } error = copyout(msghdrp, uap->msg, size_of_msghdr); } @@ -1444,6 +1940,166 @@ done: return (error); } +int +recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) +{ + int error = EOPNOTSUPP; + struct user_msghdr_x *user_msg = NULL; + struct uio **uiop = NULL; + struct socket *so; + user_ssize_t len_before = 0, len_after; + int need_drop = 0; + size_t size_of_msghdr; + void *umsgp = NULL; + u_int i; + u_int uiocnt; + + KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0); + + error = file_socket(uap->s, &so); + if (error) { + goto out; + } + need_drop = 1; + if (so == NULL) { + error = EBADF; + goto out; + } + if (so->so_proto->pr_usrreqs->pru_soreceive_list == NULL) { + printf("%s no pru_soreceive_list\n", __func__); + error = EOPNOTSUPP; + goto out; + } + + /* + * Input parameter range check + */ + if (uap->cnt == 0 || uap->cnt > UIO_MAXIOV) { + error = EINVAL; + goto out; + } + user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), + M_TEMP, M_WAITOK | M_ZERO); + if (user_msg == NULL) { + printf("%s _MALLOC() user_msg failed\n", __func__); + error = ENOMEM; + goto out; + } + uiop = _MALLOC(uap->cnt * sizeof(struct uio *), + M_TEMP, M_WAITOK | M_ZERO); + if (uiop == NULL) { + printf("%s _MALLOC() uiop failed\n", __func__); + error = ENOMEM; + goto out; + } + + size_of_msghdr = IS_64BIT_PROCESS(p) ? + sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x); + + umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO); + if (umsgp == NULL) { + printf("%s _MALLOC() user_msg failed\n", __func__); + error = ENOMEM; + goto out; + } + error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr); + if (error) { + printf("%s copyin() failed\n", __func__); + goto out; + } + error = internalize_user_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_READ, uap->cnt, user_msg, uiop); + if (error) { + printf("%s copyin_user_msghdr_array() failed\n", __func__); + goto out; + } + /* + * Make sure the size of each message iovec and + * the aggregate size of all the iovec is valid + */ + if (uio_array_is_valid(uiop, uap->cnt) == 0) { + error = EINVAL; + goto out; + } + + /* + * Sanity check on passed arguments + */ + for (i = 0; i < uap->cnt; i++) { + struct user_msghdr_x *mp = &user_msg[i]; + + if (mp->msg_flags != 0) { + error = EINVAL; + goto out; + } + /* + * No support for address or ancillary data (yet) + */ + if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) { + error = EINVAL; + goto out; + } + if (mp->msg_control != USER_ADDR_NULL || + mp->msg_controllen != 0) { + error = EINVAL; + goto out; + } + } +#if CONFIG_MACF_SOCKET_SUBSET + /* + * We check the state without holding the socket lock; + * if a race condition occurs, it would simply result + * in an extra call to the MAC check function. + */ + if (!(so->so_state & SS_DEFUNCT) && + !(so->so_state & SS_ISCONNECTED) && + !(so->so_proto->pr_flags & PR_CONNREQUIRED) && + (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) + goto out; +#endif /* MAC_SOCKET_SUBSET */ + + len_before = uio_array_resid(uiop, uap->cnt); + + error = so->so_proto->pr_usrreqs->pru_soreceive_list(so, NULL, uiop, + uap->cnt, (struct mbuf **)0, NULL, NULL); + + len_after = uio_array_resid(uiop, uap->cnt); + + if (error) { + if (len_after != len_before && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + } + if (error == 0) { + uiocnt = externalize_user_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_READ, uap->cnt, user_msg, uiop); + + error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr); + if (error) { + printf("%s copyout() failed\n", __func__); + goto out; + } + *retval = (int)(uiocnt); + } +out: + if (need_drop) + file_drop(uap->s); + if (umsgp != NULL) + _FREE(umsgp, M_TEMP); + if (uiop != NULL) { + free_uio_array(uiop, uap->cnt); + _FREE(uiop, M_TEMP); + } + if (user_msg != NULL) + _FREE(user_msg, M_TEMP); + + KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0); + + return (error); +} + /* * Returns: 0 Success * EBADF @@ -1467,7 +2123,7 @@ done: /* ARGSUSED */ int shutdown(__unused struct proc *p, struct shutdown_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct socket *so; int error; @@ -1504,7 +2160,7 @@ out: /* ARGSUSED */ int setsockopt(struct proc *p, struct setsockopt_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct socket *so; struct sockopt sopt; @@ -1535,7 +2191,7 @@ setsockopt(struct proc *p, struct setsockopt_args *uap, &sopt)) != 0) goto out; #endif /* MAC_SOCKET_SUBSET */ - error = sosetopt(so, &sopt); + error = sosetoptlock(so, &sopt, 1); /* will lock socket */ out: file_drop(uap->s); return (error); @@ -1556,7 +2212,7 @@ out: */ int getsockopt(struct proc *p, struct getsockopt_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { int error; socklen_t valsize; @@ -1591,7 +2247,7 @@ getsockopt(struct proc *p, struct getsockopt_args *uap, &sopt)) != 0) goto out; #endif /* MAC_SOCKET_SUBSET */ - error = sogetopt((struct socket *)so, &sopt); + error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */ if (error == 0) { valsize = sopt.sopt_valsize; error = copyout((caddr_t)&valsize, uap->avalsize, @@ -1620,7 +2276,7 @@ out: /* ARGSUSED */ int getsockname(__unused struct proc *p, struct getsockname_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct socket *so; struct sockaddr *sa; @@ -1642,28 +2298,9 @@ getsockname(__unused struct proc *p, struct getsockname_args *uap, socket_lock(so, 1); error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); if (error == 0) { - struct socket_filter_entry *filter; - int filtered = 0; - for (filter = so->so_filt; filter && error == 0; - filter = filter->sfe_next_onsocket) { - if (filter->sfe_filter->sf_filter.sf_getsockname) { - if (!filtered) { - filtered = 1; - sflt_use(so); - socket_unlock(so, 0); - } - error = filter->sfe_filter->sf_filter. - sf_getsockname(filter->sfe_cookie, so, &sa); - } - } - + error = sflt_getsockname(so, &sa); if (error == EJUSTRETURN) error = 0; - - if (filtered) { - socket_lock(so, 0); - sflt_unuse(so); - } } socket_unlock(so, 1); if (error) @@ -1707,7 +2344,7 @@ out: /* ARGSUSED */ int getpeername(__unused struct proc *p, struct getpeername_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct socket *so; struct sockaddr *sa; @@ -1746,28 +2383,9 @@ getpeername(__unused struct proc *p, struct getpeername_args *uap, sa = 0; error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); if (error == 0) { - struct socket_filter_entry *filter; - int filtered = 0; - for (filter = so->so_filt; filter && error == 0; - filter = filter->sfe_next_onsocket) { - if (filter->sfe_filter->sf_filter.sf_getpeername) { - if (!filtered) { - filtered = 1; - sflt_use(so); - socket_unlock(so, 0); - } - error = filter->sfe_filter->sf_filter. - sf_getpeername(filter->sfe_cookie, so, &sa); - } - } - + error = sflt_getpeername(so, &sa); if (error == EJUSTRETURN) error = 0; - - if (filtered) { - socket_lock(so, 0); - sflt_unuse(so); - } } socket_unlock(so, 1); if (error) @@ -1799,22 +2417,34 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) struct mbuf *m; int error; - if ((u_int)buflen > MLEN) { - if (type == MT_SONAME && (u_int)buflen <= 112) - buflen = MLEN; /* unix domain compat. hack */ - else if ((u_int)buflen > MCLBYTES) + size_t alloc_buflen = (size_t)buflen; + + if(alloc_buflen > INT_MAX/2) + return (EINVAL); +#ifdef __LP64__ + /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */ + if(type == MT_CONTROL) + alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr); +#endif + if (alloc_buflen > MLEN) { + if (type == MT_SONAME && alloc_buflen <= 112) + alloc_buflen = MLEN; /* unix domain compat. hack */ + else if (alloc_buflen > MCLBYTES) return (EINVAL); } m = m_get(M_WAIT, type); if (m == NULL) return (ENOBUFS); - if ((u_int)buflen > MLEN) { + if (alloc_buflen > MLEN) { MCLGET(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); return (ENOBUFS); } } + /* K64: We still copyin the original buflen because it gets expanded later + * and we lie about the size of the mbuf because it only affects unp_* functions + */ m->m_len = buflen; error = copyin(data, mtod(m, caddr_t), (u_int)buflen); if (error) { @@ -1840,7 +2470,7 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) */ static int getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr, - size_t len) + size_t len, boolean_t translate_unspec) { struct sockaddr *sa; int error; @@ -1865,8 +2495,8 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr, * sockets we leave it unchanged and let the lower layer * handle it. */ - if (sa->sa_family == AF_UNSPEC && - INP_CHECK_SOCKAF(so, AF_INET) && + if (translate_unspec && sa->sa_family == AF_UNSPEC && + SOCK_CHECK_DOM(so, PF_INET) && len == sizeof (struct sockaddr_in)) sa->sa_family = AF_INET; @@ -1878,7 +2508,7 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr, static int getsockaddr_s(struct socket *so, struct sockaddr_storage *ss, - user_addr_t uaddr, size_t len) + user_addr_t uaddr, size_t len, boolean_t translate_unspec) { int error; @@ -1902,8 +2532,8 @@ getsockaddr_s(struct socket *so, struct sockaddr_storage *ss, * sockets we leave it unchanged and let the lower layer * handle it. */ - if (ss->ss_family == AF_UNSPEC && - INP_CHECK_SOCKAF(so, AF_INET) && + if (translate_unspec && ss->ss_family == AF_UNSPEC && + SOCK_CHECK_DOM(so, PF_INET) && len == sizeof (struct sockaddr_in)) ss->ss_family = AF_INET; @@ -1912,40 +2542,268 @@ getsockaddr_s(struct socket *so, struct sockaddr_storage *ss, return (error); } -#if SENDFILE +/* + * Hard limit on the number of source and/or destination addresses + * that can be specified by an application. + */ +#define SOCKADDRLIST_MAX_ENTRIES 64 + +static int +getsockaddrlist(struct socket *so, struct sockaddr_list **slp, + user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec) +{ + struct sockaddr_list *sl; + int error = 0; + + *slp = NULL; + + if (uaddr == USER_ADDR_NULL || uaddrlen == 0) + return (EINVAL); + + sl = sockaddrlist_alloc(M_WAITOK); + if (sl == NULL) + return (ENOMEM); + + VERIFY(sl->sl_cnt == 0); + while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) { + struct sockaddr_storage ss; + struct sockaddr_entry *se; + struct sockaddr *sa; + + if (uaddrlen < sizeof (struct sockaddr)) { + error = EINVAL; + break; + } + + bzero(&ss, sizeof (ss)); + error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr)); + if (error != 0) + break; + + /* getsockaddr does the same but we need them now */ + if (uaddrlen < ss.ss_len || + ss.ss_len < offsetof(struct sockaddr, sa_data[0])) { + error = EINVAL; + break; + } else if (ss.ss_len > sizeof (ss)) { + /* + * sockaddr_storage size is less than SOCK_MAXADDRLEN, + * so the check here is inclusive. We could user the + * latter instead, but seems like an overkill for now. + */ + error = ENAMETOOLONG; + break; + } + + se = sockaddrentry_alloc(M_WAITOK); + if (se == NULL) + break; + + sockaddrlist_insert(sl, se); + + error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec); + if (error != 0) + break; + + VERIFY(sa != NULL && sa->sa_len == ss.ss_len); + se->se_addr = sa; + + uaddr += ss.ss_len; + VERIFY(((signed)uaddrlen - ss.ss_len) >= 0); + uaddrlen -= ss.ss_len; + } + + if (error != 0) + sockaddrlist_free(sl); + else + *slp = sl; + + return (error); +} + +int +internalize_user_msghdr_array(const void *src, int spacetype, int direction, + u_int count, struct user_msghdr_x *dst, struct uio **uiop) +{ + int error = 0; + u_int i; + + for (i = 0; i < count; i++) { + uio_t auio; + struct user_iovec *iovp; + struct user_msghdr_x *user_msg = &dst[i]; + + if (spacetype == UIO_USERSPACE64) { + struct user64_msghdr_x *msghdr64; + + msghdr64 = ((struct user64_msghdr_x *)src) + i; + + user_msg->msg_name = msghdr64->msg_name; + user_msg->msg_namelen = msghdr64->msg_namelen; + user_msg->msg_iov = msghdr64->msg_iov; + user_msg->msg_iovlen = msghdr64->msg_iovlen; + user_msg->msg_control = msghdr64->msg_control; + user_msg->msg_controllen = msghdr64->msg_controllen; + user_msg->msg_flags = msghdr64->msg_flags; + user_msg->msg_datalen = msghdr64->msg_datalen; + } else { + struct user32_msghdr_x *msghdr32; + + msghdr32 = ((struct user32_msghdr_x *)src) + i; + + user_msg->msg_name = msghdr32->msg_name; + user_msg->msg_namelen = msghdr32->msg_namelen; + user_msg->msg_iov = msghdr32->msg_iov; + user_msg->msg_iovlen = msghdr32->msg_iovlen; + user_msg->msg_control = msghdr32->msg_control; + user_msg->msg_controllen = msghdr32->msg_controllen; + user_msg->msg_flags = msghdr32->msg_flags; + user_msg->msg_datalen = msghdr32->msg_datalen; + } + + if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) { + error = EMSGSIZE; + goto done; + } + auio = uio_create(user_msg->msg_iovlen, 0, spacetype, direction); + if (auio == NULL) { + error = ENOMEM; + goto done; + } + uiop[i] = auio; + + if (user_msg->msg_iovlen) { + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto done; + } + error = copyin_user_iovec_array(user_msg->msg_iov, + spacetype, user_msg->msg_iovlen, iovp); + if (error) + goto done; + user_msg->msg_iov = CAST_USER_ADDR_T(iovp); + + error = uio_calculateresid(auio); + if (error) + goto done; + user_msg->msg_datalen = uio_resid(auio); + } else { + user_msg->msg_datalen = 0; + } + } +done: + return (error); +} -SYSCTL_DECL(_kern_ipc); +u_int +externalize_user_msghdr_array(void *dst, int spacetype, int direction, + u_int count, const struct user_msghdr_x *src, struct uio **uiop) +{ +#pragma unused(direction) + u_int i; + int seenlast = 0; + u_int retcnt = 0; + + for (i = 0; i < count; i++) { + const struct user_msghdr_x *user_msg = &src[i]; + uio_t auio = uiop[i]; + user_ssize_t len = user_msg->msg_datalen - uio_resid(auio); + + if (user_msg->msg_datalen != 0 && len == 0) + seenlast = 1; + + if (seenlast == 0) + retcnt ++; + + if (spacetype == UIO_USERSPACE64) { + struct user64_msghdr_x *msghdr64; + + msghdr64 = ((struct user64_msghdr_x *)dst) + i; + + msghdr64->msg_flags = user_msg->msg_flags; + msghdr64->msg_datalen = len; + + } else { + struct user32_msghdr_x *msghdr32; + + msghdr32 = ((struct user32_msghdr_x *)dst) + i; + + msghdr32->msg_flags = user_msg->msg_flags; + msghdr32->msg_datalen = len; + } + } + return (retcnt); +} + +void +free_uio_array(struct uio **uiop, u_int count) +{ + u_int i; + + for (i = 0; i < count; i++) { + if (uiop[i] != NULL) + uio_free(uiop[i]); + } +} + +__private_extern__ user_ssize_t +uio_array_resid(struct uio **uiop, u_int count) +{ + user_ssize_t len = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct uio *auio = uiop[i]; + + if (auio!= NULL) + len += uio_resid(auio); + } + return (len); +} + +int +uio_array_is_valid(struct uio **uiop, u_int count) +{ + user_ssize_t len = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct uio *auio = uiop[i]; + + if (auio != NULL) { + user_ssize_t resid = uio_resid(auio); + + /* + * Sanity check on the validity of the iovec: + * no point of going over sb_max + */ + if (resid < 0 || (u_int32_t)resid > sb_max) + return (0); + + len += resid; + if (len < 0 || (u_int32_t)len > sb_max) + return (0); + } + } + return (1); +} + +#if SENDFILE #define SFUIOBUFS 64 -static int sendfileuiobufs = SFUIOBUFS; -SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW, &sendfileuiobufs, - 0, ""); /* Macros to compute the number of mbufs needed depending on cluster size */ #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1) #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1) -/* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */ -#define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT) +/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */ +#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT) /* Upper send limit in the number of mbuf clusters */ #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES) #define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES) -size_t mbuf_pkt_maxlen(mbuf_t m); - -__private_extern__ size_t -mbuf_pkt_maxlen(mbuf_t m) -{ - size_t maxlen = 0; - - while (m) { - maxlen += mbuf_maxlen(m); - m = mbuf_next(m); - } - return (maxlen); -} - static void alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks, struct mbuf **m, boolean_t jumbocl) @@ -1961,13 +2819,13 @@ alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks, * use mbuf_allocpacket(). The logic below is similar to sosend(). */ *m = NULL; - if (pktlen > NBPG && jumbocl) { + if (pktlen > MBIGCLBYTES && jumbocl) { needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen)); *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES); } if (*m == NULL) { needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen)); - *m = m_getpackets_internal(&needed, 1, how, 0, NBPG); + *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES); } /* @@ -1978,7 +2836,7 @@ alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks, */ if (*m == NULL) { needed = 1; - *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, NBPG); + *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES); } if (*m == NULL) panic("%s: blocking allocation returned NULL\n", __func__); @@ -2004,23 +2862,34 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) struct socket *so; struct writev_nocancel_args nuap; user_ssize_t writev_retval; - struct sf_hdtr hdtr; struct user_sf_hdtr user_hdtr; + struct user32_sf_hdtr user32_hdtr; + struct user64_sf_hdtr user64_hdtr; off_t off, xfsize; off_t nbytes = 0, sbytes = 0; int error = 0; size_t sizeof_hdtr; - size_t size_of_iovec; off_t file_size; struct vfs_context context = *vfs_context_current(); - +#define ENXIO_10146739_DBG(err_str) { \ + if (error == ENXIO) { \ + printf(err_str, \ + __func__, \ + "File a radar related to rdar://10146739 \n"); \ + } \ +} KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s, 0, 0, 0, 0); + + AUDIT_ARG(fd, uap->fd); + AUDIT_ARG(value32, uap->s); + /* * Do argument checking. Must be a regular file in, stream * type and connected socket out, positive offset. */ if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) { + ENXIO_10146739_DBG("%s: fp_getfvp error. %s"); goto done; } if ((fp->f_flag & FREAD) == 0) { @@ -2033,6 +2902,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) } error = file_socket(uap->s, &so); if (error) { + ENXIO_10146739_DBG("%s: file_socket error. %s"); goto done1; } if (so == NULL) { @@ -2085,23 +2955,25 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) bzero(&user_hdtr, sizeof (user_hdtr)); if (IS_64BIT_PROCESS(p)) { - hdtrp = (caddr_t)&user_hdtr; - sizeof_hdtr = sizeof (user_hdtr); - size_of_iovec = sizeof (struct user_iovec); + hdtrp = (caddr_t)&user64_hdtr; + sizeof_hdtr = sizeof (user64_hdtr); } else { - hdtrp = (caddr_t)&hdtr; - sizeof_hdtr = sizeof (hdtr); - size_of_iovec = sizeof (struct iovec); + hdtrp = (caddr_t)&user32_hdtr; + sizeof_hdtr = sizeof (user32_hdtr); } error = copyin(uap->hdtr, hdtrp, sizeof_hdtr); if (error) goto done2; - /* need to copy if user process is not 64-bit */ - if (!IS_64BIT_PROCESS(p)) { - user_hdtr.headers = CAST_USER_ADDR_T(hdtr.headers); - user_hdtr.hdr_cnt = hdtr.hdr_cnt; - user_hdtr.trailers = CAST_USER_ADDR_T(hdtr.trailers); - user_hdtr.trl_cnt = hdtr.trl_cnt; + if (IS_64BIT_PROCESS(p)) { + user_hdtr.headers = user64_hdtr.headers; + user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt; + user_hdtr.trailers = user64_hdtr.trailers; + user_hdtr.trl_cnt = user64_hdtr.trl_cnt; + } else { + user_hdtr.headers = user32_hdtr.headers; + user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt; + user_hdtr.trailers = user32_hdtr.trailers; + user_hdtr.trl_cnt = user32_hdtr.trl_cnt; } /* @@ -2113,8 +2985,10 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) nuap.iovp = user_hdtr.headers; nuap.iovcnt = user_hdtr.hdr_cnt; error = writev_nocancel(p, &nuap, &writev_retval); - if (error) + if (error) { + ENXIO_10146739_DBG("%s: writev_nocancel error. %s"); goto done2; + } sbytes += writev_retval; } } @@ -2124,8 +2998,10 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) * 1. We don't want to allocate more mbufs than necessary * 2. We don't want to read past the end of file */ - if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) + if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) { + ENXIO_10146739_DBG("%s: vnode_size error. %s"); goto done2; + } /* * Simply read file data into a chain of mbufs that used with scatter @@ -2133,16 +3009,16 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) * mbufs that point to the file pages. */ socket_lock(so, 1); - error = sblock(&so->so_snd, M_WAIT); + error = sblock(&so->so_snd, SBL_WAIT); if (error) { socket_unlock(so, 1); goto done2; } for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { mbuf_t m0 = NULL, m; - unsigned int nbufs = sendfileuiobufs, i; + unsigned int nbufs = SFUIOBUFS, i; uio_t auio; - char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */ + char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */ size_t uiolen; user_ssize_t rlen; off_t pgoff; @@ -2195,14 +3071,15 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) socket_unlock(so, 0); alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl); - pktlen = mbuf_pkt_maxlen(m0); - if (pktlen < xfsize) + pktlen = mbuf_pkthdr_maxlen(m0); + if (pktlen < (size_t)xfsize) xfsize = pktlen; auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof (uio_buf)); if (auio == NULL) { - //printf("sendfile: uio_createwithbuffer failed\n"); + printf("sendfile failed. nbufs = %d. %s", nbufs, + "File a radar related to rdar://10146739.\n"); mbuf_freem(m0); error = ENXIO; socket_lock(so, 0); @@ -2210,11 +3087,11 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) } for (i = 0, m = m0, uiolen = 0; - i < nbufs && m != NULL && uiolen < xfsize; + i < nbufs && m != NULL && uiolen < (size_t)xfsize; i++, m = mbuf_next(m)) { size_t mlen = mbuf_maxlen(m); - if (mlen + uiolen > xfsize) + if (mlen + uiolen > (size_t)xfsize) mlen = xfsize - uiolen; mbuf_setlen(m, mlen); uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)), @@ -2224,7 +3101,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) if (xfsize != uio_resid(auio)) printf("sendfile: xfsize: %lld != uio_resid(auio): " - "%lld\n", xfsize, uio_resid(auio)); + "%lld\n", xfsize, (long long)uio_resid(auio)); KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START), uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff), @@ -2236,6 +3113,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) error == EINTR || error == EWOULDBLOCK)) { error = 0; } else { + ENXIO_10146739_DBG("%s: fo_read error. %s"); mbuf_freem(m0); goto done3; } @@ -2257,7 +3135,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) i++, m = mbuf_next(m)) { size_t mlen = mbuf_maxlen(m); - if (rlen + mlen > xfsize) + if (rlen + mlen > (size_t)xfsize) mlen = xfsize - rlen; mbuf_setlen(m, mlen); @@ -2285,6 +3163,7 @@ retry_space: so->so_error = 0; } m_freem(m0); + ENXIO_10146739_DBG("%s: Unexpected socket error. %s"); goto done3; } /* @@ -2314,53 +3193,21 @@ retry_space: } goto retry_space; } + + struct mbuf *control = NULL; { /* * Socket filter processing */ - struct socket_filter_entry *filter; - int filtered = 0; - struct mbuf *control = NULL; - boolean_t recursive = (so->so_send_filt_thread != NULL); - - error = 0; - for (filter = so->so_filt; filter && (error == 0); - filter = filter->sfe_next_onsocket) { - if (filter->sfe_filter->sf_filter.sf_data_out) { - if (filtered == 0) { - filtered = 1; - so->so_send_filt_thread = - current_thread(); - sflt_use(so); - socket_unlock(so, 0); - } - error = filter->sfe_filter->sf_filter. - sf_data_out(filter->sfe_cookie, so, - NULL, &m0, &control, 0); - } - } - if (filtered) { - /* - * At this point, we've run at least one filter. - * The socket is unlocked as is the socket - * buffer. Clear the recorded filter thread - * only when we are outside of a filter's - * context. This allows for a filter to issue - * multiple inject calls from its sf_data_out - * callback routine. - */ - socket_lock(so, 0); - sflt_unuse(so); - if (!recursive) - so->so_send_filt_thread = 0; - if (error) { - if (error == EJUSTRETURN) { - error = 0; - continue; - } - goto done3; + error = sflt_data_out(so, NULL, &m0, &control, 0); + if (error) { + if (error == EJUSTRETURN) { + error = 0; + continue; } + ENXIO_10146739_DBG("%s: sflt_data_out error. %s"); + goto done3; } /* * End Socket filter processing @@ -2369,14 +3216,15 @@ retry_space: KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START), uap->s, 0, 0, 0, 0); error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0, - 0, 0, p); + 0, control, p); KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START), uap->s, 0, 0, 0, 0); if (error) { + ENXIO_10146739_DBG("%s: pru_send error. %s"); goto done3; } } - sbunlock(&so->so_snd, 0); /* will unlock socket */ + sbunlock(&so->so_snd, FALSE); /* will unlock socket */ /* * Send trailers. Wimp out and use writev(2). */ @@ -2387,8 +3235,10 @@ retry_space: nuap.iovp = user_hdtr.trailers; nuap.iovcnt = user_hdtr.trl_cnt; error = writev_nocancel(p, &nuap, &writev_retval); - if (error) + if (error) { + ENXIO_10146739_DBG("%s: writev_nocancel error. %s"); goto done2; + } sbytes += writev_retval; } done2: @@ -2405,7 +3255,7 @@ done: (unsigned int)(sbytes & 0x0ffffffff), error, 0); return (error); done3: - sbunlock(&so->so_snd, 0); /* will unlock socket */ + sbunlock(&so->so_snd, FALSE); /* will unlock socket */ goto done2; }