X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/fe8ab488e9161c46dd9885d58fc52996dc0249ff..4d15aeb193b2c68f1d38666c317f8d3734f5f083:/bsd/kern/uipc_syscalls.c diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index 106e11dc2..0aaba750a 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2014 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,6 +89,7 @@ #include #include #include +#include #include @@ -127,12 +128,19 @@ #define DBG_FNC_SENDMSG_X NETDBG_CODE(DBG_NETSOCK, (11 << 8)) #define DBG_FNC_RECVMSG_X NETDBG_CODE(DBG_NETSOCK, (12 << 8)) +#if DEBUG || DEVELOPMENT +#define DEBUG_KERNEL_ADDRPERM(_v) (_v) +#define DBG_PRINTF(...) printf(__VA_ARGS__) +#else +#define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v) +#define DBG_PRINTF(...) do { } while (0) +#endif /* TODO: should be in header file */ int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int); -static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int, - int32_t *); +static int sendit(struct proc *, struct socket *, struct user_msghdr *, uio_t, + int, int32_t *); static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t, int32_t *); static int connectit(struct socket *, struct sockaddr *); @@ -140,27 +148,42 @@ static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t, size_t, boolean_t); static int getsockaddr_s(struct socket *, struct sockaddr_storage *, user_addr_t, size_t, boolean_t); -static int getsockaddrlist(struct socket *, struct sockaddr_list **, - user_addr_t, socklen_t, boolean_t); #if SENDFILE static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **, boolean_t); #endif /* SENDFILE */ static int connectx_nocancel(struct proc *, struct connectx_args *, int *); -static int connectitx(struct socket *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *); +static int connectitx(struct socket *, struct sockaddr *, + struct sockaddr *, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uio_t, unsigned int, user_ssize_t *); static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *); static int disconnectx_nocancel(struct proc *, struct disconnectx_args *, int *); static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int); static int internalize_user_msghdr_array(const void *, int, int, u_int, - struct user_msghdr_x *, struct uio **); + struct user_msghdr_x *, struct uio **); static u_int externalize_user_msghdr_array(void *, int, int, u_int, - const struct user_msghdr_x *, struct uio **); + const struct user_msghdr_x *, struct uio **); static void free_uio_array(struct uio **, u_int); static int uio_array_is_valid(struct uio **, u_int); +static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int); +static int internalize_recv_msghdr_array(const void *, int, int, + u_int, struct user_msghdr_x *, struct recv_msg_elem *); +static u_int externalize_recv_msghdr_array(void *, int, int, u_int, + const struct user_msghdr_x *, struct recv_msg_elem *); +static struct recv_msg_elem *alloc_recv_msg_array(u_int count); +static void free_recv_msg_array(struct recv_msg_elem *, u_int); + +SYSCTL_DECL(_kern_ipc); + +static u_int somaxsendmsgx = 100; +SYSCTL_UINT(_kern_ipc, OID_AUTO, maxsendmsgx, + CTLFLAG_RW | CTLFLAG_LOCKED, &somaxsendmsgx, 0, ""); +static u_int somaxrecvmsgx = 100; +SYSCTL_UINT(_kern_ipc, OID_AUTO, maxrecvmsgx, + CTLFLAG_RW | CTLFLAG_LOCKED, &somaxrecvmsgx, 0, ""); /* * System call interface to the socket abstraction. @@ -250,6 +273,10 @@ socket_common(struct proc *p, proc_fdunlock(p); *retval = fd; + if (ENTR_SHOULDTRACE) { + KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START, + fd, 0, (int64_t)VM_KERNEL_ADDRPERM(so)); + } } return (error); } @@ -429,6 +456,7 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, socket_unlock(head, 1); goto out; } +check_again: if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) { socket_unlock(head, 1); error = EWOULDBLOCK; @@ -462,7 +490,6 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, goto out; } - /* * At this point we know that there is at least one connection * ready to be accepted. Remove it from the queue prior to @@ -471,9 +498,20 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, * instead. */ lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + + so_acquire_accept_list(head, NULL); + if (TAILQ_EMPTY(&head->so_comp)) { + so_release_accept_list(head); + goto check_again; + } + so = TAILQ_FIRST(&head->so_comp); TAILQ_REMOVE(&head->so_comp, so, so_list); + so->so_head = NULL; + so->so_state &= ~SS_COMP; head->so_qlen--; + so_release_accept_list(head); + /* unlock head to avoid deadlock with select, keep a ref on head */ socket_unlock(head, 0); @@ -486,8 +524,7 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, */ if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) { socket_lock(so, 1); - so->so_state &= ~(SS_NOFDREF | SS_COMP); - so->so_head = NULL; + so->so_state &= ~SS_NOFDREF; socket_unlock(so, 1); soclose(so); /* Drop reference on listening socket */ @@ -500,7 +537,7 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, * Pass the pre-accepted socket to any interested socket filter(s). * Upon failure, the socket would have been closed by the callee. */ - if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) { + if (so->so_filt != NULL && (error = soacceptfilter(so, head)) != 0) { /* Drop reference on listening socket */ sodereference(head); /* Propagate socket filter's error code to the caller */ @@ -518,8 +555,7 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, * just causes the client to spin. Drop the socket. */ socket_lock(so, 1); - so->so_state &= ~(SS_NOFDREF | SS_COMP); - so->so_head = NULL; + so->so_state &= ~SS_NOFDREF; socket_unlock(so, 1); soclose(so); sodereference(head); @@ -534,9 +570,6 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, if (dosocklock) socket_lock(so, 1); - so->so_state &= ~SS_COMP; - so->so_head = NULL; - /* Sync socket non-blocking/async state with file flags */ if (fp->f_flag & FNONBLOCK) { so->so_state |= SS_NBIO; @@ -601,6 +634,11 @@ releasefd: out: file_drop(fd); + + if (error == 0 && ENTR_SHOULDTRACE) { + KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START, + newfd, 0, (int64_t)VM_KERNEL_ADDRPERM(so)); + } return (error); } @@ -608,7 +646,8 @@ int accept(struct proc *p, struct accept_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval)); + return (accept_nocancel(p, (struct accept_nocancel_args *)uap, + retval)); } /* @@ -638,7 +677,8 @@ int connect(struct proc *p, struct connect_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval)); + return (connect_nocancel(p, (struct connect_nocancel_args *)uap, + retval)); } int @@ -693,13 +733,20 @@ static int connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) { #pragma unused(p, retval) - struct sockaddr_list *src_sl = NULL, *dst_sl = NULL; + struct sockaddr_storage ss, sd; + struct sockaddr *src = NULL, *dst = NULL; struct socket *so; - int error, fd = uap->s; + int error, error1, fd = uap->socket; boolean_t dgram; - connid_t cid = CONNID_ANY; + sae_connid_t cid = SAE_CONNID_ANY; + struct user32_sa_endpoints ep32; + struct user64_sa_endpoints ep64; + struct user_sa_endpoints ep; + user_ssize_t bytes_written = 0; + struct user_iovec *iovp; + uio_t auio = NULL; - AUDIT_ARG(fd, uap->s); + AUDIT_ARG(fd, uap->socket); error = file_socket(fd, &so); if (error != 0) return (error); @@ -708,11 +755,32 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) goto out; } - /* - * XXX Workaround to ensure connectx does not fail because - * of unreaped so_error. - */ - so->so_error = 0; + if (uap->endpoints == USER_ADDR_NULL) { + error = EINVAL; + goto out; + } + + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->endpoints, (caddr_t)&ep64, sizeof(ep64)); + if (error != 0) + goto out; + + ep.sae_srcif = ep64.sae_srcif; + ep.sae_srcaddr = ep64.sae_srcaddr; + ep.sae_srcaddrlen = ep64.sae_srcaddrlen; + ep.sae_dstaddr = ep64.sae_dstaddr; + ep.sae_dstaddrlen = ep64.sae_dstaddrlen; + } else { + error = copyin(uap->endpoints, (caddr_t)&ep32, sizeof(ep32)); + if (error != 0) + goto out; + + ep.sae_srcif = ep32.sae_srcif; + ep.sae_srcaddr = ep32.sae_srcaddr; + ep.sae_srcaddrlen = ep32.sae_srcaddrlen; + ep.sae_dstaddr = ep32.sae_dstaddr; + ep.sae_dstaddrlen = ep32.sae_dstaddrlen; + } /* * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET @@ -720,36 +788,106 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) */ dgram = (so->so_type == SOCK_DGRAM); - /* - * Get socket address(es) now before we obtain socket lock; use - * sockaddr_list for src address for convenience, if present, - * even though it won't hold more than one. - */ - if (uap->src != USER_ADDR_NULL && (error = getsockaddrlist(so, - &src_sl, uap->src, uap->srclen, dgram)) != 0) + /* Get socket address now before we obtain socket lock */ + if (ep.sae_srcaddr != USER_ADDR_NULL) { + if (ep.sae_srcaddrlen > sizeof (ss)) { + error = getsockaddr(so, &src, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram); + } else { + error = getsockaddr_s(so, &ss, ep.sae_srcaddr, ep.sae_srcaddrlen, dgram); + if (error == 0) + src = (struct sockaddr *)&ss; + } + + if (error) + goto out; + } + + if (ep.sae_dstaddr == USER_ADDR_NULL) { + error = EINVAL; goto out; + } - error = getsockaddrlist(so, &dst_sl, uap->dsts, uap->dstlen, dgram); - if (error != 0) + /* Get socket address now before we obtain socket lock */ + if (ep.sae_dstaddrlen > sizeof (sd)) { + error = getsockaddr(so, &dst, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram); + } else { + error = getsockaddr_s(so, &sd, ep.sae_dstaddr, ep.sae_dstaddrlen, dgram); + if (error == 0) + dst = (struct sockaddr *)&sd; + } + + if (error) goto out; - VERIFY(dst_sl != NULL && - !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0); + VERIFY(dst != NULL); + + if (uap->iov != USER_ADDR_NULL) { + /* Verify range before calling uio_create() */ + if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) + return (EINVAL); + + if (uap->len == USER_ADDR_NULL) + return (EINVAL); + + /* allocate a uio to hold the number of iovecs passed */ + auio = uio_create(uap->iovcnt, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_WRITE); + + if (auio == NULL) { + error = ENOMEM; + goto out; + } + + /* + * get location of iovecs within the uio. + * then copyin the iovecs from user space. + */ + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto out; + } + error = copyin_user_iovec_array(uap->iov, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + uap->iovcnt, iovp); + if (error != 0) + goto out; - error = connectitx(so, &src_sl, &dst_sl, p, uap->ifscope, - uap->aid, &cid); + /* finish setup of uio_t */ + error = uio_calculateresid(auio); + if (error != 0) { + goto out; + } + } + + error = connectitx(so, src, dst, p, ep.sae_srcif, uap->associd, + &cid, auio, uap->flags, &bytes_written); if (error == ERESTART) error = EINTR; - if (uap->cid != USER_ADDR_NULL) - (void) copyout(&cid, uap->cid, sizeof (cid)); + if (uap->len != USER_ADDR_NULL) { + error1 = copyout(&bytes_written, uap->len, sizeof (uap->len)); + /* give precedence to connectitx errors */ + if ((error1 != 0) && (error == 0)) + error = error1; + } + if (uap->connid != USER_ADDR_NULL) { + error1 = copyout(&cid, uap->connid, sizeof (cid)); + /* give precedence to connectitx errors */ + if ((error1 != 0) && (error == 0)) + error = error1; + } out: file_drop(fd); - if (src_sl != NULL) - sockaddrlist_free(src_sl); - if (dst_sl != NULL) - sockaddrlist_free(dst_sl); + if (auio != NULL) { + uio_free(auio); + } + if (src != NULL && src != SA(&ss)) + FREE(src, M_SONAME); + if (dst != NULL && dst != SA(&sd)) + FREE(dst, M_SONAME); return (error); } @@ -814,37 +952,75 @@ out: } static int -connectitx(struct socket *so, struct sockaddr_list **src_sl, - struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid) +connectitx(struct socket *so, struct sockaddr *src, + struct sockaddr *dst, struct proc *p, uint32_t ifscope, + sae_associd_t aid, sae_connid_t *pcid, uio_t auio, unsigned int flags, + user_ssize_t *bytes_written) { - struct sockaddr_entry *se; int error; +#pragma unused (flags) - VERIFY(dst_sl != NULL && *dst_sl != NULL); + VERIFY(dst != NULL); - TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) { - VERIFY(se->se_addr != NULL); - AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), - se->se_addr); + AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), dst); #if CONFIG_MACF_SOCKET_SUBSET - if ((error = mac_socket_check_connect(kauth_cred_get(), - so, se->se_addr)) != 0) - return (error); + if ((error = mac_socket_check_connect(kauth_cred_get(), so, dst)) != 0) + return (error); #endif /* MAC_SOCKET_SUBSET */ - } socket_lock(so, 1); if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { error = EALREADY; goto out; } - error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope, - aid, pcid, 0, NULL, 0); + + if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) && + (flags & CONNECT_DATA_IDEMPOTENT)) { + so->so_flags1 |= SOF1_DATA_IDEMPOTENT; + + if (flags & CONNECT_DATA_AUTHENTICATED) + so->so_flags |= SOF1_DATA_AUTHENTICATED; + } + + /* + * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data. + * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error) + * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data + * Case 3 allows user to combine write with connect even if they have + * no use for TFO (such as regular TCP, and UDP). + * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case) + */ + if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) && + ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) + so->so_flags1 |= SOF1_PRECONNECT_DATA; + + /* + * If a user sets data idempotent and does not pass an uio, or + * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset + * SOF1_DATA_IDEMPOTENT. + */ + if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) && + (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) { + /* We should return EINVAL instead perhaps. */ + so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT; + } + + error = soconnectxlocked(so, src, dst, p, ifscope, + aid, pcid, 0, NULL, 0, auio, bytes_written); if (error != 0) { so->so_state &= ~SS_ISCONNECTING; goto out; } + /* + * If, after the call to soconnectxlocked the flag is still set (in case + * data has been queued and the connect() has actually been triggered, + * it will have been unset by the transport), we exit immediately. There + * is no reason to wait on any event. + */ + if (so->so_flags1 & SOF1_PRECONNECT_DATA) { + error = 0; + goto out; + } if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { error = EINPROGRESS; goto out; @@ -1113,7 +1289,7 @@ free1: * sockargs:??? */ static int -sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, +sendit(struct proc *p, struct socket *so, struct user_msghdr *mp, uio_t uiop, int flags, int32_t *retval) { struct mbuf *control = NULL; @@ -1121,20 +1297,10 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, struct sockaddr *to = NULL; boolean_t want_free = TRUE; int error; - struct socket *so; user_ssize_t len; KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0); - error = file_socket(s, &so); - if (error) { - KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0); - return (error); - } - if (so == NULL) { - error = EBADF; - goto out; - } if (mp->msg_name != USER_ADDR_NULL) { if (mp->msg_namelen > sizeof (ss)) { error = getsockaddr(so, &to, mp->msg_name, @@ -1166,9 +1332,9 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, /* * We check the state without holding the socket lock; * if a race condition occurs, it would simply result - * in an extra call to the MAC check function. + * in an extra call to the MAC check function. */ - if ( to != NULL && + if (to != NULL && !(so->so_state & SS_DEFUNCT) && (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) goto bad; @@ -1192,7 +1358,7 @@ bad: FREE(to, M_SONAME); out: KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0); - file_drop(s); + return (error); } @@ -1217,6 +1383,7 @@ sendto_nocancel(struct proc *p, struct user_msghdr msg; int error; uio_t auio = NULL; + struct socket *so; KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); @@ -1225,7 +1392,8 @@ sendto_nocancel(struct proc *p, (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), UIO_WRITE); if (auio == NULL) { - return (ENOMEM); + error = ENOMEM; + goto done; } uio_addiov(auio, uap->buf, uap->len); @@ -1237,11 +1405,20 @@ sendto_nocancel(struct proc *p, msg.msg_control = 0; msg.msg_flags = 0; - error = sendit(p, uap->s, &msg, auio, uap->flags, retval); + error = file_socket(uap->s, &so); + if (error) + goto done; + + if (so == NULL) { + error = EBADF; + } else { + error = sendit(p, so, &msg, auio, uap->flags, retval); + } - if (auio != NULL) { + file_drop(uap->s); +done: + if (auio != NULL) uio_free(auio); - } KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0); @@ -1258,11 +1435,13 @@ int sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); - return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval)); + return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, + retval)); } int -sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval) +sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, + int32_t *retval) { struct user32_msghdr msg32; struct user64_msghdr msg64; @@ -1272,6 +1451,7 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *ret int error; uio_t auio = NULL; struct user_iovec *iovp; + struct socket *so; KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); @@ -1350,7 +1530,16 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *ret /* msg_flags is ignored for send */ user_msg.msg_flags = 0; - error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval); + error = file_socket(uap->s, &so); + if (error) { + goto done; + } + if (so == NULL) { + error = EBADF; + } else { + error = sendit(p, so, &user_msg, auio, uap->flags, retval); + } + file_drop(uap->s); done: if (auio != NULL) { uio_free(auio); @@ -1364,17 +1553,17 @@ int sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) { int error = 0; - struct user_msghdr_x *user_msg = NULL; + struct user_msghdr_x *user_msg_x = NULL; struct uio **uiop = NULL; struct socket *so; u_int i; struct sockaddr *to = NULL; - struct mbuf *control = NULL; user_ssize_t len_before = 0, len_after; int need_drop = 0; size_t size_of_msghdr; void *umsgp = NULL; u_int uiocnt; + int has_addr_or_ctl = 0; KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0); @@ -1387,11 +1576,6 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) error = EBADF; goto out; } - if (so->so_proto->pr_usrreqs->pru_sosend_list == NULL) { - printf("%s no pru_sosend_list\n", __func__); - error = EOPNOTSUPP; - goto out; - } /* * Input parameter range check @@ -1400,17 +1584,23 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) error = EINVAL; goto out; } - user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), + /* + * Clip to max currently allowed + */ + if (uap->cnt > somaxsendmsgx) + uap->cnt = somaxsendmsgx; + + user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), M_TEMP, M_WAITOK | M_ZERO); - if (user_msg == NULL) { - printf("%s _MALLOC() user_msg failed\n", __func__); + if (user_msg_x == NULL) { + DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__); error = ENOMEM; goto out; } uiop = _MALLOC(uap->cnt * sizeof(struct uio *), M_TEMP, M_WAITOK | M_ZERO); if (uiop == NULL) { - printf("%s _MALLOC() uiop failed\n", __func__); + DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__); error = ENOMEM; goto out; } @@ -1418,23 +1608,23 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) size_of_msghdr = IS_64BIT_PROCESS(p) ? sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x); - umsgp = _MALLOC(uap->cnt * size_of_msghdr, + umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO); if (umsgp == NULL) { - printf("%s _MALLOC() user_msg failed\n", __func__); + printf("%s _MALLOC() user_msg_x failed\n", __func__); error = ENOMEM; goto out; } error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr); if (error) { - printf("%s copyin() failed\n", __func__); + DBG_PRINTF("%s copyin() failed\n", __func__); goto out; } error = internalize_user_msghdr_array(umsgp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, - UIO_WRITE, uap->cnt, user_msg, uiop); + UIO_WRITE, uap->cnt, user_msg_x, uiop); if (error) { - printf("%s copyin_user_msghdr_array() failed\n", __func__); + DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__); goto out; } /* @@ -1450,7 +1640,7 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) * Sanity check on passed arguments */ for (i = 0; i < uap->cnt; i++) { - struct user_msghdr_x *mp = &user_msg[i]; + struct user_msghdr_x *mp = user_msg_x + i; /* * No flags on send message @@ -1462,43 +1652,72 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) /* * No support for address or ancillary data (yet) */ - if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) { - error = EINVAL; - goto out; - } + if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) + has_addr_or_ctl = 1; + if (mp->msg_control != USER_ADDR_NULL || - mp->msg_controllen != 0) { - error = EINVAL; - goto out; - } + mp->msg_controllen != 0) + has_addr_or_ctl = 1; + #if CONFIG_MACF_SOCKET_SUBSET /* * We check the state without holding the socket lock; * if a race condition occurs, it would simply result - * in an extra call to the MAC check function. + * in an extra call to the MAC check function. * * Note: The following check is never true taken with the * current limitation that we do not accept to pass an address, - * this is effectively placeholder code. If we add support for addresses, - * we will have to check every address. + * this is effectively placeholder code. If we add support for + * addresses, we will have to check every address. */ - if ( to != NULL && + if (to != NULL && !(so->so_state & SS_DEFUNCT) && - (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) + (error = mac_socket_check_send(kauth_cred_get(), so, to)) + != 0) goto out; #endif /* MAC_SOCKET_SUBSET */ } len_before = uio_array_resid(uiop, uap->cnt); - error = so->so_proto->pr_usrreqs->pru_sosend_list(so, to, uiop, - uap->cnt, 0, control, uap->flags); - + /* + * Feed list of packets at once only for connected socket without + * control message + */ + if (so->so_proto->pr_usrreqs->pru_sosend_list != + pru_sosend_list_notsupp && + has_addr_or_ctl == 0 && somaxsendmsgx == 0) { + error = so->so_proto->pr_usrreqs->pru_sosend_list(so, uiop, + uap->cnt, uap->flags); + } else { + for (i = 0; i < uap->cnt; i++) { + struct user_msghdr_x *mp = user_msg_x + i; + struct user_msghdr user_msg; + uio_t auio = uiop[i]; + int32_t tmpval; + + user_msg.msg_flags = mp->msg_flags; + user_msg.msg_controllen = mp->msg_controllen; + user_msg.msg_control = mp->msg_control; + user_msg.msg_iovlen = mp->msg_iovlen; + user_msg.msg_iov = mp->msg_iov; + user_msg.msg_namelen = mp->msg_namelen; + user_msg.msg_name = mp->msg_name; + + error = sendit(p, so, &user_msg, auio, uap->flags, + &tmpval); + if (error != 0) + break; + } + } len_after = uio_array_resid(uiop, uap->cnt); + VERIFY(len_after <= len_before); + if (error != 0) { if (len_after != len_before && (error == ERESTART || - error == EINTR || error == EWOULDBLOCK)) + error == EINTR || error == EWOULDBLOCK || + error == ENOBUFS)) error = 0; /* Generation of SIGPIPE can be controlled per socket */ if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) @@ -1507,7 +1726,7 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) if (error == 0) { uiocnt = externalize_user_msghdr_array(umsgp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, - UIO_WRITE, uap->cnt, user_msg, uiop); + UIO_WRITE, uap->cnt, user_msg_x, uiop); *retval = (int)(uiocnt); } @@ -1520,14 +1739,130 @@ out: free_uio_array(uiop, uap->cnt); _FREE(uiop, M_TEMP); } - if (user_msg != NULL) - _FREE(user_msg, M_TEMP); + if (user_msg_x != NULL) + _FREE(user_msg_x, M_TEMP); KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0); return (error); } + +static int +copyout_sa(struct sockaddr *fromsa, user_addr_t name, socklen_t *namelen) +{ + int error = 0; + socklen_t sa_len = 0; + ssize_t len; + + len = *namelen; + if (len <= 0 || fromsa == 0) { + len = 0; + } else { +#ifndef MIN +#define MIN(a, b) ((a) > (b) ? (b) : (a)) +#endif + sa_len = fromsa->sa_len; + len = MIN((unsigned int)len, sa_len); + error = copyout(fromsa, name, (unsigned)len); + if (error) + goto out; + } + *namelen = sa_len; +out: + return (0); +} + +static int +copyout_control(struct proc *p, struct mbuf *m, user_addr_t control, + socklen_t *controllen, int *flags) +{ + int error = 0; + ssize_t len; + user_addr_t ctlbuf; + + len = *controllen; + *controllen = 0; + ctlbuf = control; + + while (m && len > 0) { + unsigned int tocopy; + struct cmsghdr *cp = mtod(m, struct cmsghdr *); + int cp_size = CMSG_ALIGN(cp->cmsg_len); + int buflen = m->m_len; + + while (buflen > 0 && len > 0) { + /* + * SCM_TIMESTAMP hack because struct timeval has a + * different size for 32 bits and 64 bits processes + */ + if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) { + unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))]; + struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer; + int tmp_space; + struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp); + + tmp_cp->cmsg_level = SOL_SOCKET; + tmp_cp->cmsg_type = SCM_TIMESTAMP; + + if (proc_is64bit(p)) { + struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp); + + tv64->tv_sec = tv->tv_sec; + tv64->tv_usec = tv->tv_usec; + + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval)); + tmp_space = CMSG_SPACE(sizeof(struct user64_timeval)); + } else { + struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp); + + tv32->tv_sec = tv->tv_sec; + tv32->tv_usec = tv->tv_usec; + + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval)); + tmp_space = CMSG_SPACE(sizeof(struct user32_timeval)); + } + if (len >= tmp_space) { + tocopy = tmp_space; + } else { + *flags |= MSG_CTRUNC; + tocopy = len; + } + error = copyout(tmp_buffer, ctlbuf, tocopy); + if (error) + goto out; + } else { + if (cp_size > buflen) { + panic("cp_size > buflen, something" + "wrong with alignment!"); + } + if (len >= cp_size) { + tocopy = cp_size; + } else { + *flags |= MSG_CTRUNC; + tocopy = len; + } + error = copyout((caddr_t) cp, ctlbuf, tocopy); + if (error) + goto out; + } + + ctlbuf += tocopy; + len -= tocopy; + + buflen -= cp_size; + cp = (struct cmsghdr *)(void *) + ((unsigned char *) cp + cp_size); + cp_size = CMSG_ALIGN(cp->cmsg_len); + } + + m = m->m_next; + } + *controllen = ctlbuf - control; +out: + return (error); +} + /* * Returns: 0 Success * ENOTSOCK @@ -1556,8 +1891,7 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, { ssize_t len; int error; - struct mbuf *m, *control = 0; - user_addr_t ctlbuf; + struct mbuf *control = 0; struct socket *so; struct sockaddr *fromsa = 0; struct fileproc *fp; @@ -1614,120 +1948,26 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, error == EINTR || error == EWOULDBLOCK)) error = 0; } - if (error) goto out; *retval = len - uio_resid(uiop); - if (mp->msg_name) { - socklen_t sa_len = 0; - len = mp->msg_namelen; - if (len <= 0 || fromsa == 0) { - len = 0; - } else { -#ifndef MIN -#define MIN(a, b) ((a) > (b) ? (b) : (a)) -#endif - sa_len = fromsa->sa_len; - len = MIN((unsigned int)len, sa_len); - error = copyout(fromsa, mp->msg_name, (unsigned)len); - if (error) - goto out; - } - mp->msg_namelen = sa_len; + if (mp->msg_name) { + error = copyout_sa(fromsa, mp->msg_name, &mp->msg_namelen); + if (error) + goto out; /* return the actual, untruncated address length */ if (namelenp && - (error = copyout((caddr_t)&sa_len, namelenp, + (error = copyout((caddr_t)&mp->msg_namelen, namelenp, sizeof (int)))) { goto out; } } - if (mp->msg_control) { - len = mp->msg_controllen; - m = control; - mp->msg_controllen = 0; - ctlbuf = mp->msg_control; - - while (m && len > 0) { - unsigned int tocopy; - struct cmsghdr *cp = mtod(m, struct cmsghdr *); - int cp_size = CMSG_ALIGN(cp->cmsg_len); - int buflen = m->m_len; - - while (buflen > 0 && len > 0) { - - /* - SCM_TIMESTAMP hack because struct timeval has a - * different size for 32 bits and 64 bits processes - */ - if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) { - unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))]; - struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer; - int tmp_space; - struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp); - - tmp_cp->cmsg_level = SOL_SOCKET; - tmp_cp->cmsg_type = SCM_TIMESTAMP; - - if (proc_is64bit(p)) { - struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp); - - tv64->tv_sec = tv->tv_sec; - tv64->tv_usec = tv->tv_usec; - - tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval)); - tmp_space = CMSG_SPACE(sizeof(struct user64_timeval)); - } else { - struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp); - - tv32->tv_sec = tv->tv_sec; - tv32->tv_usec = tv->tv_usec; - - tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval)); - tmp_space = CMSG_SPACE(sizeof(struct user32_timeval)); - } - if (len >= tmp_space) { - tocopy = tmp_space; - } else { - mp->msg_flags |= MSG_CTRUNC; - tocopy = len; - } - error = copyout(tmp_buffer, ctlbuf, tocopy); - if (error) - goto out; - } else { - - if (cp_size > buflen) { - panic("cp_size > buflen, something wrong with alignment!"); - } - - if (len >= cp_size) { - tocopy = cp_size; - } else { - mp->msg_flags |= MSG_CTRUNC; - tocopy = len; - } - - error = copyout((caddr_t) cp, ctlbuf, - tocopy); - if (error) - goto out; - } - - - ctlbuf += tocopy; - len -= tocopy; - - buflen -= cp_size; - cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size); - cp_size = CMSG_ALIGN(cp->cmsg_len); - } - - m = m->m_next; - } - mp->msg_controllen = ctlbuf - mp->msg_control; + if (mp->msg_control) { + error = copyout_control(p, control, mp->msg_control, + &mp->msg_controllen, &mp->msg_flags); } out: if (fromsa) @@ -1759,11 +1999,13 @@ int recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval)); + return (recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, + retval)); } int -recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval) +recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, + int32_t *retval) { struct user_msghdr msg; int error; @@ -1820,11 +2062,13 @@ int recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval)); + return (recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, + retval)); } int -recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval) +recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, + int32_t *retval) { struct user32_msghdr msg32; struct user64_msghdr msg64; @@ -1944,8 +2188,8 @@ int recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) { int error = EOPNOTSUPP; - struct user_msghdr_x *user_msg = NULL; - struct uio **uiop = NULL; + struct user_msghdr_x *user_msg_x = NULL; + struct recv_msg_elem *recv_msg_array = NULL; struct socket *so; user_ssize_t len_before = 0, len_after; int need_drop = 0; @@ -1965,12 +2209,6 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) error = EBADF; goto out; } - if (so->so_proto->pr_usrreqs->pru_soreceive_list == NULL) { - printf("%s no pru_soreceive_list\n", __func__); - error = EOPNOTSUPP; - goto out; - } - /* * Input parameter range check */ @@ -1978,73 +2216,61 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) error = EINVAL; goto out; } - user_msg = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), + if (uap->cnt > somaxrecvmsgx) + uap->cnt = somaxrecvmsgx; + + user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x), M_TEMP, M_WAITOK | M_ZERO); - if (user_msg == NULL) { - printf("%s _MALLOC() user_msg failed\n", __func__); + if (user_msg_x == NULL) { + DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__); error = ENOMEM; goto out; } - uiop = _MALLOC(uap->cnt * sizeof(struct uio *), - M_TEMP, M_WAITOK | M_ZERO); - if (uiop == NULL) { - printf("%s _MALLOC() uiop failed\n", __func__); + recv_msg_array = alloc_recv_msg_array(uap->cnt); + if (recv_msg_array == NULL) { + DBG_PRINTF("%s alloc_recv_msg_array() failed\n", __func__); error = ENOMEM; goto out; } - size_of_msghdr = IS_64BIT_PROCESS(p) ? sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x); umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO); if (umsgp == NULL) { - printf("%s _MALLOC() user_msg failed\n", __func__); + DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__); error = ENOMEM; goto out; } error = copyin(uap->msgp, umsgp, uap->cnt * size_of_msghdr); if (error) { - printf("%s copyin() failed\n", __func__); + DBG_PRINTF("%s copyin() failed\n", __func__); goto out; } - error = internalize_user_msghdr_array(umsgp, + error = internalize_recv_msghdr_array(umsgp, IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, - UIO_READ, uap->cnt, user_msg, uiop); + UIO_READ, uap->cnt, user_msg_x, recv_msg_array); if (error) { - printf("%s copyin_user_msghdr_array() failed\n", __func__); + DBG_PRINTF("%s copyin_user_msghdr_array() failed\n", __func__); goto out; } /* * Make sure the size of each message iovec and * the aggregate size of all the iovec is valid */ - if (uio_array_is_valid(uiop, uap->cnt) == 0) { + if (recv_msg_array_is_valid(recv_msg_array, uap->cnt) == 0) { error = EINVAL; goto out; } - /* * Sanity check on passed arguments */ for (i = 0; i < uap->cnt; i++) { - struct user_msghdr_x *mp = &user_msg[i]; + struct user_msghdr_x *mp = user_msg_x + i; if (mp->msg_flags != 0) { error = EINVAL; goto out; } - /* - * No support for address or ancillary data (yet) - */ - if (mp->msg_name != USER_ADDR_NULL || mp->msg_namelen != 0) { - error = EINVAL; - goto out; - } - if (mp->msg_control != USER_ADDR_NULL || - mp->msg_controllen != 0) { - error = EINVAL; - goto out; - } } #if CONFIG_MACF_SOCKET_SUBSET /* @@ -2059,44 +2285,107 @@ recvmsg_x(struct proc *p, struct recvmsg_x_args *uap, user_ssize_t *retval) goto out; #endif /* MAC_SOCKET_SUBSET */ - len_before = uio_array_resid(uiop, uap->cnt); + len_before = recv_msg_array_resid(recv_msg_array, uap->cnt); - error = so->so_proto->pr_usrreqs->pru_soreceive_list(so, NULL, uiop, - uap->cnt, (struct mbuf **)0, NULL, NULL); + if (so->so_proto->pr_usrreqs->pru_soreceive_list != + pru_soreceive_list_notsupp && + somaxrecvmsgx == 0) { + error = so->so_proto->pr_usrreqs->pru_soreceive_list(so, + recv_msg_array, uap->cnt, &uap->flags); + } else { + int flags = uap->flags; - len_after = uio_array_resid(uiop, uap->cnt); + for (i = 0; i < uap->cnt; i++) { + struct recv_msg_elem *recv_msg_elem; + uio_t auio; + struct sockaddr **psa; + struct mbuf **controlp; + + recv_msg_elem = recv_msg_array + i; + auio = recv_msg_elem->uio; + + /* + * Do not block if we got at least one packet + */ + if (i > 0) + flags |= MSG_DONTWAIT; + + psa = (recv_msg_elem->which & SOCK_MSG_SA) ? + &recv_msg_elem->psa : NULL; + controlp = (recv_msg_elem->which & SOCK_MSG_CONTROL) ? + &recv_msg_elem->controlp : NULL; + + error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa, + auio, (struct mbuf **)0, controlp, &flags); + if (error) + break; + /* + * We have some data + */ + recv_msg_elem->which |= SOCK_MSG_DATA; + /* + * Stop on partial copy + */ + if (flags & (MSG_RCVMORE | MSG_TRUNC)) + break; + } + if ((uap->flags & MSG_DONTWAIT) == 0) + flags &= ~MSG_DONTWAIT; + uap->flags = flags; + } + + len_after = recv_msg_array_resid(recv_msg_array, uap->cnt); if (error) { if (len_after != len_before && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; + else + goto out; } - if (error == 0) { - uiocnt = externalize_user_msghdr_array(umsgp, - IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, - UIO_READ, uap->cnt, user_msg, uiop); - error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr); - if (error) { - printf("%s copyout() failed\n", __func__); - goto out; + uiocnt = externalize_recv_msghdr_array(umsgp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + UIO_READ, uap->cnt, user_msg_x, recv_msg_array); + + error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr); + if (error) { + DBG_PRINTF("%s copyout() failed\n", __func__); + goto out; + } + *retval = (int)(uiocnt); + + for (i = 0; i < uap->cnt; i++) { + struct user_msghdr_x *mp = user_msg_x + i; + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + struct sockaddr *fromsa = recv_msg_elem->psa; + + if (mp->msg_name) { + error = copyout_sa(fromsa, mp->msg_name, + &mp->msg_namelen); + if (error) + goto out; + } + if (mp->msg_control) { + error = copyout_control(p, recv_msg_elem->controlp, + mp->msg_control, &mp->msg_controllen, + &mp->msg_flags); + if (error) + goto out; } - *retval = (int)(uiocnt); } out: if (need_drop) file_drop(uap->s); if (umsgp != NULL) _FREE(umsgp, M_TEMP); - if (uiop != NULL) { - free_uio_array(uiop, uap->cnt); - _FREE(uiop, M_TEMP); - } - if (user_msg != NULL) - _FREE(user_msg, M_TEMP); - + if (recv_msg_array != NULL) + free_recv_msg_array(recv_msg_array, uap->cnt); + if (user_msg_x != NULL) + _FREE(user_msg_x, M_TEMP); + KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0); - + return (error); } @@ -2419,16 +2708,20 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) size_t alloc_buflen = (size_t)buflen; - if(alloc_buflen > INT_MAX/2) + if (alloc_buflen > INT_MAX/2) return (EINVAL); #ifdef __LP64__ - /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */ - if(type == MT_CONTROL) - alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr); + /* + * The fd's in the buffer must expand to be pointers, thus we need twice + * as much space + */ + if (type == MT_CONTROL) + alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + + sizeof(struct cmsghdr); #endif if (alloc_buflen > MLEN) { if (type == MT_SONAME && alloc_buflen <= 112) - alloc_buflen = MLEN; /* unix domain compat. hack */ + alloc_buflen = MLEN; /* unix domain compat. hack */ else if (alloc_buflen > MCLBYTES) return (EINVAL); } @@ -2442,8 +2735,10 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) return (ENOBUFS); } } - /* K64: We still copyin the original buflen because it gets expanded later - * and we lie about the size of the mbuf because it only affects unp_* functions + /* + * K64: We still copyin the original buflen because it gets expanded + * later and we lie about the size of the mbuf because it only affects + * unp_* functions */ m->m_len = buflen; error = copyin(data, mtod(m, caddr_t), (u_int)buflen); @@ -2542,101 +2837,104 @@ getsockaddr_s(struct socket *so, struct sockaddr_storage *ss, return (error); } -/* - * Hard limit on the number of source and/or destination addresses - * that can be specified by an application. - */ -#define SOCKADDRLIST_MAX_ENTRIES 64 - -static int -getsockaddrlist(struct socket *so, struct sockaddr_list **slp, - user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec) +int +internalize_user_msghdr_array(const void *src, int spacetype, int direction, + u_int count, struct user_msghdr_x *dst, struct uio **uiop) { - struct sockaddr_list *sl; int error = 0; + u_int i; + u_int namecnt = 0; + u_int ctlcnt = 0; - *slp = NULL; - - if (uaddr == USER_ADDR_NULL || uaddrlen == 0) - return (EINVAL); + for (i = 0; i < count; i++) { + uio_t auio; + struct user_iovec *iovp; + struct user_msghdr_x *user_msg = dst + i; - sl = sockaddrlist_alloc(M_WAITOK); - if (sl == NULL) - return (ENOMEM); + if (spacetype == UIO_USERSPACE64) { + const struct user64_msghdr_x *msghdr64; - VERIFY(sl->sl_cnt == 0); - while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) { - struct sockaddr_storage ss; - struct sockaddr_entry *se; - struct sockaddr *sa; + msghdr64 = ((const struct user64_msghdr_x *)src) + i; - if (uaddrlen < sizeof (struct sockaddr)) { - error = EINVAL; - break; - } + user_msg->msg_name = msghdr64->msg_name; + user_msg->msg_namelen = msghdr64->msg_namelen; + user_msg->msg_iov = msghdr64->msg_iov; + user_msg->msg_iovlen = msghdr64->msg_iovlen; + user_msg->msg_control = msghdr64->msg_control; + user_msg->msg_controllen = msghdr64->msg_controllen; + user_msg->msg_flags = msghdr64->msg_flags; + user_msg->msg_datalen = msghdr64->msg_datalen; + } else { + const struct user32_msghdr_x *msghdr32; - bzero(&ss, sizeof (ss)); - error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr)); - if (error != 0) - break; + msghdr32 = ((const struct user32_msghdr_x *)src) + i; - /* getsockaddr does the same but we need them now */ - if (uaddrlen < ss.ss_len || - ss.ss_len < offsetof(struct sockaddr, sa_data[0])) { - error = EINVAL; - break; - } else if (ss.ss_len > sizeof (ss)) { - /* - * sockaddr_storage size is less than SOCK_MAXADDRLEN, - * so the check here is inclusive. We could user the - * latter instead, but seems like an overkill for now. - */ - error = ENAMETOOLONG; - break; + user_msg->msg_name = msghdr32->msg_name; + user_msg->msg_namelen = msghdr32->msg_namelen; + user_msg->msg_iov = msghdr32->msg_iov; + user_msg->msg_iovlen = msghdr32->msg_iovlen; + user_msg->msg_control = msghdr32->msg_control; + user_msg->msg_controllen = msghdr32->msg_controllen; + user_msg->msg_flags = msghdr32->msg_flags; + user_msg->msg_datalen = msghdr32->msg_datalen; } - se = sockaddrentry_alloc(M_WAITOK); - if (se == NULL) - break; - - sockaddrlist_insert(sl, se); + if (user_msg->msg_iovlen <= 0 || + user_msg->msg_iovlen > UIO_MAXIOV) { + error = EMSGSIZE; + goto done; + } + auio = uio_create(user_msg->msg_iovlen, 0, spacetype, + direction); + if (auio == NULL) { + error = ENOMEM; + goto done; + } + uiop[i] = auio; - error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec); - if (error != 0) - break; + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto done; + } + error = copyin_user_iovec_array(user_msg->msg_iov, + spacetype, user_msg->msg_iovlen, iovp); + if (error) + goto done; + user_msg->msg_iov = CAST_USER_ADDR_T(iovp); - VERIFY(sa != NULL && sa->sa_len == ss.ss_len); - se->se_addr = sa; + error = uio_calculateresid(auio); + if (error) + goto done; + user_msg->msg_datalen = uio_resid(auio); - uaddr += ss.ss_len; - VERIFY(((signed)uaddrlen - ss.ss_len) >= 0); - uaddrlen -= ss.ss_len; + if (user_msg->msg_name && user_msg->msg_namelen) + namecnt++; + if (user_msg->msg_control && user_msg->msg_controllen) + ctlcnt++; } - - if (error != 0) - sockaddrlist_free(sl); - else - *slp = sl; +done: return (error); } int -internalize_user_msghdr_array(const void *src, int spacetype, int direction, - u_int count, struct user_msghdr_x *dst, struct uio **uiop) +internalize_recv_msghdr_array(const void *src, int spacetype, int direction, + u_int count, struct user_msghdr_x *dst, + struct recv_msg_elem *recv_msg_array) { int error = 0; u_int i; for (i = 0; i < count; i++) { - uio_t auio; struct user_iovec *iovp; - struct user_msghdr_x *user_msg = &dst[i]; + struct user_msghdr_x *user_msg = dst + i; + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; if (spacetype == UIO_USERSPACE64) { - struct user64_msghdr_x *msghdr64; + const struct user64_msghdr_x *msghdr64; - msghdr64 = ((struct user64_msghdr_x *)src) + i; + msghdr64 = ((const struct user64_msghdr_x *)src) + i; user_msg->msg_name = msghdr64->msg_name; user_msg->msg_namelen = msghdr64->msg_namelen; @@ -2647,9 +2945,9 @@ internalize_user_msghdr_array(const void *src, int spacetype, int direction, user_msg->msg_flags = msghdr64->msg_flags; user_msg->msg_datalen = msghdr64->msg_datalen; } else { - struct user32_msghdr_x *msghdr32; + const struct user32_msghdr_x *msghdr32; - msghdr32 = ((struct user32_msghdr_x *)src) + i; + msghdr32 = ((const struct user32_msghdr_x *)src) + i; user_msg->msg_name = msghdr32->msg_name; user_msg->msg_namelen = msghdr32->msg_namelen; @@ -2660,45 +2958,48 @@ internalize_user_msghdr_array(const void *src, int spacetype, int direction, user_msg->msg_flags = msghdr32->msg_flags; user_msg->msg_datalen = msghdr32->msg_datalen; } - - if (user_msg->msg_iovlen <= 0 || user_msg->msg_iovlen > UIO_MAXIOV) { + + if (user_msg->msg_iovlen <= 0 || + user_msg->msg_iovlen > UIO_MAXIOV) { error = EMSGSIZE; goto done; } - auio = uio_create(user_msg->msg_iovlen, 0, spacetype, direction); - if (auio == NULL) { + recv_msg_elem->uio = uio_create(user_msg->msg_iovlen, 0, + spacetype, direction); + if (recv_msg_elem->uio == NULL) { error = ENOMEM; goto done; } - uiop[i] = auio; - if (user_msg->msg_iovlen) { - iovp = uio_iovsaddr(auio); - if (iovp == NULL) { - error = ENOMEM; - goto done; - } - error = copyin_user_iovec_array(user_msg->msg_iov, - spacetype, user_msg->msg_iovlen, iovp); - if (error) - goto done; - user_msg->msg_iov = CAST_USER_ADDR_T(iovp); - - error = uio_calculateresid(auio); - if (error) - goto done; - user_msg->msg_datalen = uio_resid(auio); - } else { - user_msg->msg_datalen = 0; + iovp = uio_iovsaddr(recv_msg_elem->uio); + if (iovp == NULL) { + error = ENOMEM; + goto done; } + error = copyin_user_iovec_array(user_msg->msg_iov, + spacetype, user_msg->msg_iovlen, iovp); + if (error) + goto done; + user_msg->msg_iov = CAST_USER_ADDR_T(iovp); + + error = uio_calculateresid(recv_msg_elem->uio); + if (error) + goto done; + user_msg->msg_datalen = uio_resid(recv_msg_elem->uio); + + if (user_msg->msg_name && user_msg->msg_namelen) + recv_msg_elem->which |= SOCK_MSG_SA; + if (user_msg->msg_control && user_msg->msg_controllen) + recv_msg_elem->which |= SOCK_MSG_CONTROL; } done: + return (error); } u_int externalize_user_msghdr_array(void *dst, int spacetype, int direction, - u_int count, const struct user_msghdr_x *src, struct uio **uiop) + u_int count, const struct user_msghdr_x *src, struct uio **uiop) { #pragma unused(direction) u_int i; @@ -2706,13 +3007,13 @@ externalize_user_msghdr_array(void *dst, int spacetype, int direction, u_int retcnt = 0; for (i = 0; i < count; i++) { - const struct user_msghdr_x *user_msg = &src[i]; + const struct user_msghdr_x *user_msg = src + i; uio_t auio = uiop[i]; user_ssize_t len = user_msg->msg_datalen - uio_resid(auio); if (user_msg->msg_datalen != 0 && len == 0) seenlast = 1; - + if (seenlast == 0) retcnt ++; @@ -2723,7 +3024,54 @@ externalize_user_msghdr_array(void *dst, int spacetype, int direction, msghdr64->msg_flags = user_msg->msg_flags; msghdr64->msg_datalen = len; - + + } else { + struct user32_msghdr_x *msghdr32; + + msghdr32 = ((struct user32_msghdr_x *)dst) + i; + + msghdr32->msg_flags = user_msg->msg_flags; + msghdr32->msg_datalen = len; + } + } + return (retcnt); +} + +u_int +externalize_recv_msghdr_array(void *dst, int spacetype, int direction, + u_int count, const struct user_msghdr_x *src, + struct recv_msg_elem *recv_msg_array) +{ + u_int i; + int seenlast = 0; + u_int retcnt = 0; + + for (i = 0; i < count; i++) { + const struct user_msghdr_x *user_msg = src + i; + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + user_ssize_t len; + + len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio); + + if (direction == UIO_READ) { + if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) + seenlast = 1; + } else { + if (user_msg->msg_datalen != 0 && len == 0) + seenlast = 1; + } + + if (seenlast == 0) + retcnt ++; + + if (spacetype == UIO_USERSPACE64) { + struct user64_msghdr_x *msghdr64; + + msghdr64 = ((struct user64_msghdr_x *)dst) + i; + + msghdr64->msg_flags = user_msg->msg_flags; + msghdr64->msg_datalen = len; + } else { struct user32_msghdr_x *msghdr32; @@ -2756,7 +3104,7 @@ uio_array_resid(struct uio **uiop, u_int count) for (i = 0; i < count; i++) { struct uio *auio = uiop[i]; - if (auio!= NULL) + if (auio != NULL) len += uio_resid(auio); } return (len); @@ -2770,17 +3118,90 @@ uio_array_is_valid(struct uio **uiop, u_int count) for (i = 0; i < count; i++) { struct uio *auio = uiop[i]; - + if (auio != NULL) { user_ssize_t resid = uio_resid(auio); - + /* * Sanity check on the validity of the iovec: * no point of going over sb_max */ if (resid < 0 || (u_int32_t)resid > sb_max) return (0); - + + len += resid; + if (len < 0 || (u_int32_t)len > sb_max) + return (0); + } + } + return (1); +} + + +struct recv_msg_elem * +alloc_recv_msg_array(u_int count) +{ + struct recv_msg_elem *recv_msg_array; + + recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem), + M_TEMP, M_WAITOK | M_ZERO); + + return (recv_msg_array); +} + +void +free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count) +{ + u_int i; + + for (i = 0; i < count; i++) { + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + + if (recv_msg_elem->uio != NULL) + uio_free(recv_msg_elem->uio); + if (recv_msg_elem->psa != NULL) + _FREE(recv_msg_elem->psa, M_TEMP); + if (recv_msg_elem->controlp != NULL) + m_freem(recv_msg_elem->controlp); + } + _FREE(recv_msg_array, M_TEMP); +} + + +__private_extern__ user_ssize_t +recv_msg_array_resid(struct recv_msg_elem *recv_msg_array, u_int count) +{ + user_ssize_t len = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + + if (recv_msg_elem->uio != NULL) + len += uio_resid(recv_msg_elem->uio); + } + return (len); +} + +int +recv_msg_array_is_valid(struct recv_msg_elem *recv_msg_array, u_int count) +{ + user_ssize_t len = 0; + u_int i; + + for (i = 0; i < count; i++) { + struct recv_msg_elem *recv_msg_elem = recv_msg_array + i; + + if (recv_msg_elem->uio != NULL) { + user_ssize_t resid = uio_resid(recv_msg_elem->uio); + + /* + * Sanity check on the validity of the iovec: + * no point of going over sb_max + */ + if (resid < 0 || (u_int32_t)resid > sb_max) + return (0); + len += resid; if (len < 0 || (u_int32_t)len > sb_max) return (0); @@ -2794,11 +3215,11 @@ uio_array_is_valid(struct uio **uiop, u_int count) #define SFUIOBUFS 64 /* Macros to compute the number of mbufs needed depending on cluster size */ -#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1) -#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1) +#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> M16KCLSHIFT) + 1) +#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> MBIGCLSHIFT) + 1) /* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */ -#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT) +#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT) /* Upper send limit in the number of mbuf clusters */ #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES) @@ -2871,13 +3292,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) size_t sizeof_hdtr; off_t file_size; struct vfs_context context = *vfs_context_current(); -#define ENXIO_10146739_DBG(err_str) { \ - if (error == ENXIO) { \ - printf(err_str, \ - __func__, \ - "File a radar related to rdar://10146739 \n"); \ - } \ -} + KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s, 0, 0, 0, 0); @@ -2889,7 +3304,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) * type and connected socket out, positive offset. */ if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) { - ENXIO_10146739_DBG("%s: fp_getfvp error. %s"); goto done; } if ((fp->f_flag & FREAD) == 0) { @@ -2902,7 +3316,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) } error = file_socket(uap->s, &so); if (error) { - ENXIO_10146739_DBG("%s: file_socket error. %s"); goto done1; } if (so == NULL) { @@ -2986,7 +3399,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) nuap.iovcnt = user_hdtr.hdr_cnt; error = writev_nocancel(p, &nuap, &writev_retval); if (error) { - ENXIO_10146739_DBG("%s: writev_nocancel error. %s"); goto done2; } sbytes += writev_retval; @@ -2999,7 +3411,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) * 2. We don't want to read past the end of file */ if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) { - ENXIO_10146739_DBG("%s: vnode_size error. %s"); goto done2; } @@ -3113,7 +3524,6 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) error == EINTR || error == EWOULDBLOCK)) { error = 0; } else { - ENXIO_10146739_DBG("%s: fo_read error. %s"); mbuf_freem(m0); goto done3; } @@ -3124,7 +3534,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) (unsigned int)(xfsize & 0x0ffffffff), 0, 0); if (xfsize == 0) { - //printf("sendfile: fo_read 0 bytes, EOF\n"); + // printf("sendfile: fo_read 0 bytes, EOF\n"); break; } if (xfsize + off > file_size) @@ -3163,7 +3573,6 @@ retry_space: so->so_error = 0; } m_freem(m0); - ENXIO_10146739_DBG("%s: Unexpected socket error. %s"); goto done3; } /* @@ -3206,7 +3615,6 @@ retry_space: error = 0; continue; } - ENXIO_10146739_DBG("%s: sflt_data_out error. %s"); goto done3; } /* @@ -3220,7 +3628,6 @@ retry_space: KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START), uap->s, 0, 0, 0, 0); if (error) { - ENXIO_10146739_DBG("%s: pru_send error. %s"); goto done3; } } @@ -3236,7 +3643,6 @@ retry_space: nuap.iovcnt = user_hdtr.trl_cnt; error = writev_nocancel(p, &nuap, &writev_retval); if (error) { - ENXIO_10146739_DBG("%s: writev_nocancel error. %s"); goto done2; } sbytes += writev_retval;