X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/55e303ae13a4cf49d70f2294092726f2fffb9ef2..6601e61aa18bf4f09af135ff61fc7f4771d23b06:/bsd/kern/uipc_usrreq.c diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c index 10688922f..ca560d415 100644 --- a/bsd/kern/uipc_usrreq.c +++ b/bsd/kern/uipc_usrreq.c @@ -1,24 +1,21 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -63,12 +60,13 @@ #include #include #include /* XXX must be before */ -#include +#include #include #include #include #include -#include +#include +#include #include #include #include @@ -76,14 +74,27 @@ #include #include #include -#include +#include +#include #include +#include +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data struct zone *unp_zone; static unp_gen_t unp_gencnt; static u_int unp_count; +static lck_attr_t *unp_mtx_attr; +static lck_grp_t *unp_mtx_grp; +static lck_grp_attr_t *unp_mtx_grp_attr; +static lck_rw_t *unp_list_mtx; + +extern lck_mtx_t * uipc_lock; static struct unp_head unp_shead, unp_dhead; /* @@ -95,22 +106,24 @@ static struct unp_head unp_shead, unp_dhead; * need a proper out-of-band * lock pushdown */ -static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; +static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL, { 0 } }; static ino_t unp_ino; /* prototype for fake inode numbers */ -static int unp_attach __P((struct socket *)); -static void unp_detach __P((struct unpcb *)); -static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *)); -static int unp_connect __P((struct socket *,struct sockaddr *, - struct proc *)); -static void unp_disconnect __P((struct unpcb *)); -static void unp_shutdown __P((struct unpcb *)); -static void unp_drop __P((struct unpcb *, int)); -static void unp_gc __P((void)); -static void unp_scan __P((struct mbuf *, void (*)(struct file *))); -static void unp_mark __P((struct file *)); -static void unp_discard __P((struct file *)); -static int unp_internalize __P((struct mbuf *, struct proc *)); +static int unp_attach(struct socket *); +static void unp_detach(struct unpcb *); +static int unp_bind(struct unpcb *,struct sockaddr *, struct proc *); +static int unp_connect(struct socket *,struct sockaddr *, struct proc *); +static void unp_disconnect(struct unpcb *); +static void unp_shutdown(struct unpcb *); +static void unp_drop(struct unpcb *, int); +static void unp_gc(void); +static void unp_scan(struct mbuf *, void (*)(struct fileglob *)); +static void unp_mark(struct fileglob *); +static void unp_discard(struct fileglob *); +static void unp_discard_fdlocked(struct fileglob *, struct proc *); +static int unp_internalize(struct mbuf *, struct proc *); +static int unp_listen(struct unpcb *, struct proc *); + static int uipc_abort(struct socket *so) @@ -120,6 +133,8 @@ uipc_abort(struct socket *so) if (unp == 0) return EINVAL; unp_drop(unp, ECONNABORTED); + unp_detach(unp); + sofree(so); return 0; } @@ -146,7 +161,7 @@ uipc_accept(struct socket *so, struct sockaddr **nam) } static int -uipc_attach(struct socket *so, int proto, struct proc *p) +uipc_attach(struct socket *so, __unused int proto, __unused struct proc *p) { struct unpcb *unp = sotounpcb(so); @@ -213,13 +228,13 @@ uipc_disconnect(struct socket *so) } static int -uipc_listen(struct socket *so, struct proc *p) +uipc_listen(struct socket *so, __unused struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp == 0 || unp->unp_vnode == 0) return EINVAL; - return 0; + return unp_listen(unp, p); } static int @@ -236,7 +251,7 @@ uipc_peeraddr(struct socket *so, struct sockaddr **nam) } static int -uipc_rcvd(struct socket *so, int flags) +uipc_rcvd(struct socket *so, __unused int flags) { struct unpcb *unp = sotounpcb(so); struct socket *so2; @@ -292,8 +307,13 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, goto release; } - if (control && (error = unp_internalize(control, p))) - goto release; + if (control) { + socket_unlock(so, 0); /* release global lock to avoid deadlock (4436174) */ + error = unp_internalize(control, p); + socket_lock(so, 0); + if (error) + goto release; + } switch (so->so_type) { case SOCK_DGRAM: @@ -319,18 +339,18 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; - if (sbappendaddr(&so2->so_rcv, from, m, control)) { + if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) { sorwakeup(so2); - m = 0; - control = 0; - } else - error = ENOBUFS; + } + m = 0; + control = 0; if (nam) unp_disconnect(unp); break; } - case SOCK_STREAM: + case SOCK_STREAM: { + int didreceive = 0; #define rcv (&so2->so_rcv) #define snd (&so->so_snd) /* Connect if not connected yet. */ @@ -361,20 +381,22 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, * send buffer hiwater marks to maintain backpressure. * Wake up readers. */ - if (control) { - if (sbappendcontrol(rcv, m, control)) - control = 0; - } else - sbappend(rcv, m); + if ((control && sbappendcontrol(rcv, m, control, NULL)) || + sbappend(rcv, m)) { + didreceive = 1; + } snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; unp->unp_conn->unp_cc = rcv->sb_cc; - sorwakeup(so2); + if (didreceive) + sorwakeup(so2); m = 0; + control = 0; #undef snd #undef rcv + } break; default: @@ -390,6 +412,9 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, unp_shutdown(unp); } + if (control && error != 0) + unp_dispose(control); + release: if (control) m_freem(control); @@ -447,8 +472,43 @@ struct pr_usrreqs uipc_usrreqs = { uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, - sosend, soreceive, sopoll + sosend, soreceive, pru_sopoll_notsupp }; + +int +uipc_ctloutput( + struct socket *so, + struct sockopt *sopt) +{ + struct unpcb *unp = sotounpcb(so); + int error; + + switch (sopt->sopt_dir) { + case SOPT_GET: + switch (sopt->sopt_name) { + case LOCAL_PEERCRED: + if (unp->unp_flags & UNP_HAVEPC) + error = sooptcopyout(sopt, &unp->unp_peercred, + sizeof(unp->unp_peercred)); + else { + if (so->so_type == SOCK_STREAM) + error = ENOTCONN; + else + error = EINVAL; + } + break; + default: + error = EOPNOTSUPP; + break; + } + break; + case SOPT_SET: + default: + error = EOPNOTSUPP; + break; + } + return (error); +} /* * Both send and receive buffers are allocated PIPSIZ bytes of buffering @@ -482,11 +542,10 @@ SYSCTL_DECL(_net_local); SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); static int -unp_attach(so) - struct socket *so; +unp_attach(struct socket *so) { - register struct unpcb *unp; - int error; + struct unpcb *unp; + int error = 0; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { switch (so->so_type) { @@ -509,36 +568,38 @@ unp_attach(so) if (unp == NULL) return (ENOBUFS); bzero(unp, sizeof *unp); - unp->unp_gencnt = ++unp_gencnt; - unp_count++; + lck_rw_lock_exclusive(unp_list_mtx); LIST_INIT(&unp->unp_refs); unp->unp_socket = so; + unp->unp_gencnt = ++unp_gencnt; + unp_count++; LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, unp, unp_link); so->so_pcb = (caddr_t)unp; + lck_rw_done(unp_list_mtx); return (0); } static void -unp_detach(unp) - register struct unpcb *unp; +unp_detach(struct unpcb *unp) { + lck_rw_lock_exclusive(unp_list_mtx); LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; + lck_rw_done(unp_list_mtx); --unp_count; if (unp->unp_vnode) { struct vnode *tvp = unp->unp_vnode; unp->unp_vnode->v_socket = 0; unp->unp_vnode = 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vrele(tvp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + vnode_rele(tvp); /* drop the usecount */ } if (unp->unp_conn) unp_disconnect(unp); while (unp->unp_refs.lh_first) unp_drop(unp->unp_refs.lh_first, ECONNRESET); soisdisconnected(unp->unp_socket); + unp->unp_socket->so_flags |= SOF_PCBCLEARING; /* makes sure we're getting dealloced */ unp->unp_socket->so_pcb = 0; if (unp_rights) { /* @@ -553,22 +614,26 @@ unp_detach(unp) } if (unp->unp_addr) FREE(unp->unp_addr, M_SONAME); - zfree(unp_zone, (vm_offset_t)unp); + zfree(unp_zone, unp); } static int -unp_bind(unp, nam, p) - struct unpcb *unp; - struct sockaddr *nam; - struct proc *p; +unp_bind( + struct unpcb *unp, + struct sockaddr *nam, + struct proc *p) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; - register struct vnode *vp; - struct vattr vattr; + struct vnode *vp, *dvp; + struct vnode_attr va; + struct vfs_context context; int error, namelen; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; + context.vc_proc = p; + context.vc_ucred = p->p_ucred; /* XXX kauth_cred_get() ??? proxy */ + if (unp->unp_vnode != NULL) return (EINVAL); namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); @@ -576,93 +641,109 @@ unp_bind(unp, nam, p) return EINVAL; strncpy(buf, soun->sun_path, namelen); buf[namelen] = 0; /* null-terminate the string */ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, - buf, p); + NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE32, + CAST_USER_ADDR_T(buf), &context); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); if (error) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (error); } + dvp = nd.ni_dvp; vp = nd.ni_vp; + if (vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(vp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + /* + * need to do this before the vnode_put of dvp + * since we may have to release an fs_nodelock + */ + nameidone(&nd); + + vnode_put(dvp); + vnode_put(vp); + return (EADDRINUSE); } - VATTR_NULL(&vattr); - vattr.va_type = VSOCK; - vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); -#if 0 - /* In FreeBSD create leave s parent held ; not here */ - vput(nd.ni_dvp); -#endif + + /* authorize before creating */ + error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context); + + if (!error) { + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VSOCK); + VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask)); + + /* create the socket */ + error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, &context); + } + + nameidone(&nd); + vnode_put(dvp); + if (error) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (error); } - vp = nd.ni_vp; + vnode_ref(vp); /* gain a longterm reference */ vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); - VOP_UNLOCK(vp, 0, p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + vnode_put(vp); /* drop the iocount */ + return (0); } static int -unp_connect(so, nam, p) - struct socket *so; - struct sockaddr *nam; - struct proc *p; +unp_connect( + struct socket *so, + struct sockaddr *nam, + struct proc *p) { - register struct sockaddr_un *soun = (struct sockaddr_un *)nam; - register struct vnode *vp; - register struct socket *so2, *so3; - struct unpcb *unp2, *unp3; + struct sockaddr_un *soun = (struct sockaddr_un *)nam; + struct vnode *vp; + struct socket *so2, *so3; + struct unpcb *unp, *unp2, *unp3; + struct vfs_context context; int error, len; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; + context.vc_proc = p; + context.vc_ucred = p->p_ucred; /* XXX kauth_cred_get() ??? proxy */ + so2 = so3 = NULL; + len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return EINVAL; strncpy(buf, soun->sun_path, len); buf[len] = 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(buf), &context); error = namei(&nd); if (error) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (error); } + nameidone(&nd); vp = nd.ni_vp; if (vp->v_type != VSOCK) { error = ENOTSOCK; goto bad; } - error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p); + + error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, &context); if (error) goto bad; so2 = vp->v_socket; - if (so2 == 0) { + if (so2 == 0 || so2->so_pcb == NULL ) { error = ECONNREFUSED; goto bad; } + + /* make sure the socket can't go away while we're connecting */ + so2->so_usecount++; + if (so->so_type != so2->so_type) { error = EPROTOTYPE; goto bad; } - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); /* * Check if socket was connected while we were trying to @@ -671,40 +752,66 @@ unp_connect(so, nam, p) */ if ((so->so_state & SS_ISCONNECTED) != 0) { error = EISCONN; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); goto bad; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if ((so2->so_options & SO_ACCEPTCONN) == 0 || - (so3 = sonewconn(so2, 0)) == 0) { + (so3 = sonewconn(so2, 0, nam)) == 0) { error = ECONNREFUSED; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); goto bad; } + unp = sotounpcb(so); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) unp3->unp_addr = (struct sockaddr_un *) dup_sockaddr((struct sockaddr *) unp2->unp_addr, 1); + + /* + * unp_peercred management: + * + * The connecter's (client's) credentials are copied + * from its process structure at the time of connect() + * (which is now). + */ + cru2x(p->p_ucred, &unp3->unp_peercred); + unp3->unp_flags |= UNP_HAVEPC; + /* + * The receiver's (server's) credentials are copied + * from the unp_peercred member of socket on which the + * former called listen(); unp_listen() cached that + * process's credentials at that time so we can use + * them now. + */ + KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, + ("unp_connect: listener without cached peercred")); + memcpy(&unp->unp_peercred, &unp2->unp_peercred, + sizeof(unp->unp_peercred)); + unp->unp_flags |= UNP_HAVEPC; + + so2->so_usecount--; /* drop reference taken on so2 */ so2 = so3; + so3->so_usecount++; /* make sure we keep it around */ } error = unp_connect2(so, so2); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); bad: - vput(vp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + + if (so2 != NULL) + so2->so_usecount--; /* release count on socket */ + + vnode_put(vp); return (error); } int -unp_connect2(so, so2) - register struct socket *so; - register struct socket *so2; +unp_connect2( + struct socket *so, + struct socket *so2) { - register struct unpcb *unp = sotounpcb(so); - register struct unpcb *unp2; + struct unpcb *unp = sotounpcb(so); + struct unpcb *unp2; if (so2->so_type != so->so_type) return (EPROTOTYPE); @@ -723,6 +830,14 @@ unp_connect2(so, so2) break; case SOCK_STREAM: + /* This takes care of socketpair */ + if (!(unp->unp_flags & UNP_HAVEPC) && !(unp2->unp_flags & UNP_HAVEPC)) { + cru2x(kauth_cred_get(), &unp->unp_peercred); + unp->unp_flags |= UNP_HAVEPC; + + cru2x(kauth_cred_get(), &unp2->unp_peercred); + unp2->unp_flags |= UNP_HAVEPC; + } unp2->unp_conn = unp; soisconnected(so); soisconnected(so2); @@ -735,10 +850,9 @@ unp_connect2(so, so2) } static void -unp_disconnect(unp) - struct unpcb *unp; +unp_disconnect(struct unpcb *unp) { - register struct unpcb *unp2 = unp->unp_conn; + struct unpcb *unp2 = unp->unp_conn; if (unp2 == 0) return; @@ -746,7 +860,9 @@ unp_disconnect(unp) switch (unp->unp_socket->so_type) { case SOCK_DGRAM: + lck_rw_lock_exclusive(unp_list_mtx); LIST_REMOVE(unp, unp_reflink); + lck_rw_done(unp_list_mtx); unp->unp_socket->so_state &= ~SS_ISCONNECTED; break; @@ -760,8 +876,7 @@ unp_disconnect(unp) #ifdef notdef void -unp_abort(unp) - struct unpcb *unp; +unp_abort(struct unpcb *unp) { unp_detach(unp); @@ -777,21 +892,25 @@ unp_pcblist SYSCTL_HANDLER_ARGS struct xunpgen xug; struct unp_head *head; + lck_rw_lock_shared(unp_list_mtx); head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); /* * The process of preparing the PCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - if (req->oldptr == 0) { + if (req->oldptr == USER_ADDR_NULL) { n = unp_count; req->oldidx = 2 * (sizeof xug) + (n + n/8) * sizeof(struct xunpcb); + lck_rw_done(unp_list_mtx); return 0; } - if (req->newptr != 0) + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(unp_list_mtx); return EPERM; + } /* * OK, now we're committed to doing something. @@ -799,23 +918,30 @@ unp_pcblist SYSCTL_HANDLER_ARGS gencnt = unp_gencnt; n = unp_count; + bzero(&xug, sizeof(xug)); xug.xug_len = sizeof xug; xug.xug_count = n; xug.xug_gen = gencnt; xug.xug_sogen = so_gencnt; error = SYSCTL_OUT(req, &xug, sizeof xug); - if (error) + if (error) { + lck_rw_done(unp_list_mtx); return error; + } /* * We are done if there is no pcb */ - if (n == 0) + if (n == 0) { + lck_rw_done(unp_list_mtx); return 0; + } - unp_list = _MALLOC(n * sizeof *unp_list, M_TEMP, M_WAITOK); - if (unp_list == 0) + MALLOC(unp_list, struct unpcb **, n * sizeof *unp_list, M_TEMP, M_WAITOK); + if (unp_list == 0) { + lck_rw_done(unp_list_mtx); return ENOMEM; + } for (unp = head->lh_first, i = 0; unp && i < n; unp = unp->unp_link.le_next) { @@ -829,8 +955,10 @@ unp_pcblist SYSCTL_HANDLER_ARGS unp = unp_list[i]; if (unp->unp_gencnt <= gencnt) { struct xunpcb xu; + + bzero(&xu, sizeof(xu)); xu.xu_len = sizeof xu; - xu.xu_unpp = unp; + xu.xu_unpp = (struct unpcb_compat *)unp; /* * XXX - need more locking here to protect against * connect/disconnect races for SMP. @@ -842,7 +970,7 @@ unp_pcblist SYSCTL_HANDLER_ARGS bcopy(unp->unp_conn->unp_addr, &xu.xu_caddr, unp->unp_conn->unp_addr->sun_len); - bcopy(unp, &xu.xu_unp, sizeof *unp); + bcopy(unp, &xu.xu_unp, sizeof(xu.xu_unp)); sotoxsocket(unp->unp_socket, &xu.xu_socket); error = SYSCTL_OUT(req, &xu, sizeof xu); } @@ -855,12 +983,15 @@ unp_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ + bzero(&xug, sizeof(xug)); + xug.xug_len = sizeof xug; xug.xug_gen = unp_gencnt; xug.xug_sogen = so_gencnt; xug.xug_count = unp_count; error = SYSCTL_OUT(req, &xug, sizeof xug); } FREE(unp_list, M_TEMP); + lck_rw_done(unp_list_mtx); return error; } @@ -872,8 +1003,7 @@ SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, "List of active local stream sockets"); static void -unp_shutdown(unp) - struct unpcb *unp; +unp_shutdown(struct unpcb *unp) { struct socket *so; @@ -883,24 +1013,14 @@ unp_shutdown(unp) } static void -unp_drop(unp, errno) - struct unpcb *unp; - int errno; +unp_drop( + struct unpcb *unp, + int errno) { struct socket *so = unp->unp_socket; so->so_error = errno; unp_disconnect(unp); - if (so->so_head) { - LIST_REMOVE(unp, unp_link); - unp->unp_gencnt = ++unp_gencnt; - unp_count--; - so->so_pcb = (caddr_t) 0; - if (unp->unp_addr) - FREE(unp->unp_addr, M_SONAME); - zfree(unp_zone, (vm_offset_t)unp); - sofree(so); - } } #ifdef notdef @@ -912,31 +1032,30 @@ unp_drain() #endif int -unp_externalize(rights) - struct mbuf *rights; +unp_externalize(struct mbuf *rights) { struct proc *p = current_proc(); /* XXX */ - register int i; - register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); - register struct file **rp = (struct file **)(cm + 1); - register struct file *fp; + int i; + struct cmsghdr *cm = mtod(rights, struct cmsghdr *); + struct fileglob **rp = (struct fileglob **)(cm + 1); + struct fileproc *fp; + struct fileglob *fg; int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int); int f; - - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + proc_fdlock(p); /* * if the new FD's will not fit, then we free them all */ if (!fdavail(p, newfds)) { for (i = 0; i < newfds; i++) { - fp = *rp; - unp_discard(fp); + fg = *rp; + unp_discard_fdlocked(fg, p); *rp++ = 0; } + proc_fdunlock(p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (EMSGSIZE); } /* @@ -948,15 +1067,18 @@ unp_externalize(rights) for (i = 0; i < newfds; i++) { if (fdalloc(p, 0, &f)) panic("unp_externalize"); - fp = *rp; - p->p_fd->fd_ofiles[f] = fp; - *fdflags(p, f) &= ~UF_RESERVED; - fp->f_msgcount--; + fg = *rp; + MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK); + bzero(fp, sizeof(struct fileproc)); + fp->f_iocount = 0; + fp->f_fglob = fg; + fg_removeuipc(fg); + procfdtbl_releasefd(p, f, fp); unp_rights--; *(int *)rp++ = f; } + proc_fdunlock(p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (0); } @@ -970,6 +1092,19 @@ unp_init(void) panic("unp_init"); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); + + /* + * allocate lock group attribute and group for udp pcb mutexes + */ + unp_mtx_grp_attr = lck_grp_attr_alloc_init(); + + unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr); + + unp_mtx_attr = lck_attr_alloc_init(); + + if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp, unp_mtx_attr)) == NULL) + return; /* pretty much dead if this fails... */ + } #ifndef MIN @@ -977,112 +1112,139 @@ unp_init(void) #endif static int -unp_internalize(control, p) - struct mbuf *control; - struct proc *p; +unp_internalize( + struct mbuf *control, + struct proc *p) { - register struct cmsghdr *cm = mtod(control, struct cmsghdr *); - register struct file **rp; - struct file *fp; + struct cmsghdr *cm = mtod(control, struct cmsghdr *); + struct fileglob **rp; + struct fileproc *fp; register int i, error; int oldfds; + int fdgetf_noref(proc_t, struct fileglob **, struct fileproc **); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (EINVAL); } - oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); - rp = (struct file **)(cm + 1); - for (i = 0; i < oldfds; i++) - if (error = fdgetf(p, *(int *)rp++, 0)) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - return (error); + proc_fdlock(p); + rp = (struct fileglob **)(cm + 1); + + for (i = 0; i < oldfds; i++) { + if (error = fdgetf_noref(p, *(int *)rp++, (struct fileglob **)0)) { + proc_fdunlock(p); + return (error); } + } + rp = (struct fileglob **)(cm + 1); - rp = (struct file **)(cm + 1); for (i = 0; i < oldfds; i++) { - (void) fdgetf(p, *(int *)rp, &fp); - *rp++ = fp; - fref(fp); - fp->f_msgcount++; + (void) fdgetf_noref(p, *(int *)rp, &fp); + fg_insertuipc(fp->f_fglob); + *rp++ = fp->f_fglob; unp_rights++; } + proc_fdunlock(p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (0); } -static int unp_defer, unp_gcing; +static int unp_defer, unp_gcing, unp_gcwait; +/* always called under uipc_lock */ +void +unp_gc_wait(void) +{ + while (unp_gcing != 0) { + unp_gcwait = 1; + msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL); + } +} static void unp_gc() { - register struct file *fp, *nextfp; + register struct fileglob *fg, *nextfg; register struct socket *so; - struct file **extra_ref, **fpp; + struct fileglob **extra_ref, **fpp; int nunref, i; - - if (unp_gcing) + int need_gcwakeup = 0; + + lck_mtx_lock(uipc_lock); + if (unp_gcing) { + lck_mtx_unlock(uipc_lock); return; + } unp_gcing = 1; unp_defer = 0; + lck_mtx_unlock(uipc_lock); /* * before going through all this, set all FDs to * be NOT defered and NOT externally accessible */ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) - fp->f_flag &= ~(FMARK|FDEFER); + for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) { + lck_mtx_lock(&fg->fg_lock); + fg->fg_flag &= ~(FMARK|FDEFER); + lck_mtx_unlock(&fg->fg_lock); + } do { - for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { + for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) { + lck_mtx_lock(&fg->fg_lock); /* * If the file is not open, skip it */ - if (fcount(fp) == 0) + if (fg->fg_count == 0) { + lck_mtx_unlock(&fg->fg_lock); continue; + } /* * If we already marked it as 'defer' in a * previous pass, then try process it this time * and un-mark it */ - if (fp->f_flag & FDEFER) { - fp->f_flag &= ~FDEFER; + if (fg->fg_flag & FDEFER) { + fg->fg_flag &= ~FDEFER; unp_defer--; } else { /* * if it's not defered, then check if it's * already marked.. if so skip it */ - if (fp->f_flag & FMARK) + if (fg->fg_flag & FMARK){ + lck_mtx_unlock(&fg->fg_lock); continue; + } /* * If all references are from messages * in transit, then skip it. it's not * externally accessible. */ - if (fcount(fp) == fp->f_msgcount) + if (fg->fg_count == fg->fg_msgcount) { + lck_mtx_unlock(&fg->fg_lock); continue; + } /* * If it got this far then it must be * externally accessible. */ - fp->f_flag |= FMARK; + fg->fg_flag |= FMARK; } /* * either it was defered, or it is externally * accessible and not already marked so. * Now check if it is possibly one of OUR sockets. */ - if (fp->f_type != DTYPE_SOCKET || - (so = (struct socket *)fp->f_data) == 0) + if (fg->fg_type != DTYPE_SOCKET || + (so = (struct socket *)fg->fg_data) == 0) { + lck_mtx_unlock(&fg->fg_lock); continue; + } if (so->so_proto->pr_domain != &localdomain || - (so->so_proto->pr_flags&PR_RIGHTS) == 0) + (so->so_proto->pr_flags&PR_RIGHTS) == 0) { + lck_mtx_unlock(&fg->fg_lock); continue; + } #ifdef notdef /* if this code is enabled need to run under network funnel */ if (so->so_rcv.sb_flags & SB_LOCK) { @@ -1108,6 +1270,7 @@ unp_gc() * as accessible too. */ unp_scan(so->so_rcv.sb_mb, unp_mark); + lck_mtx_unlock(&fg->fg_lock); } } while (unp_defer); /* @@ -1149,83 +1312,104 @@ unp_gc() * * 91/09/19, bsy@cs.cmu.edu */ - extra_ref = _MALLOC(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); - for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; - fp = nextfp) { - nextfp = fp->f_list.le_next; + extra_ref = _MALLOC(nfiles * sizeof(struct fileglob *), M_FILEGLOB, M_WAITOK); + for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0; + fg = nextfg) { + lck_mtx_lock(&fg->fg_lock); + + nextfg = fg->f_msglist.le_next; /* * If it's not open, skip it */ - if (fcount(fp) == 0) + if (fg->fg_count == 0) { + lck_mtx_unlock(&fg->fg_lock); continue; + } /* * If all refs are from msgs, and it's not marked accessible * then it must be referenced from some unreachable cycle * of (shut-down) FDs, so include it in our * list of FDs to remove */ - if (fcount(fp) == fp->f_msgcount && !(fp->f_flag & FMARK)) { - *fpp++ = fp; + if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) { + fg->fg_count++; + *fpp++ = fg; nunref++; - fref(fp); } + lck_mtx_unlock(&fg->fg_lock); } /* * for each FD on our hit list, do the following two things */ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { - struct file *tfp = *fpp; - if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - sorflush((struct socket *)(tfp->f_data)); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + struct fileglob *tfg; + + tfg = *fpp; + + if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) { + sorflush((struct socket *)(tfg->fg_data)); } } + for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) + closef_locked((struct fileproc *)0, *fpp, (struct proc *) NULL); + lck_mtx_lock(uipc_lock); + unp_gcing = 0; + if (unp_gcwait != 0) { + unp_gcwait = 0; + need_gcwakeup = 1; + } + lck_mtx_unlock(uipc_lock); - for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) - closef(*fpp, (struct proc *) NULL); - FREE((caddr_t)extra_ref, M_FILE); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + if (need_gcwakeup != 0) + wakeup(&unp_gcing); + FREE((caddr_t)extra_ref, M_FILEGLOB); - unp_gcing = 0; } void -unp_dispose(m) - struct mbuf *m; +unp_dispose(struct mbuf *m) { if (m) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); unp_scan(m, unp_discard); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); } } -/* should run under kernel funnel */ +static int +unp_listen( + struct unpcb *unp, + struct proc *p) +{ + + cru2x(p->p_ucred, &unp->unp_peercred); + unp->unp_flags |= UNP_HAVEPCCACHED; + return (0); +} + +/* should run under kernel funnel */ static void -unp_scan(m0, op) - register struct mbuf *m0; - void (*op) __P((struct file *)); +unp_scan( + struct mbuf *m0, + void (*op)(struct fileglob *)) { - register struct mbuf *m; - register struct file **rp; - register struct cmsghdr *cm; - register int i; + struct mbuf *m; + struct fileglob **rp; + struct cmsghdr *cm; + int i; int qfds; while (m0) { for (m = m0; m; m = m->m_next) if (m->m_type == MT_CONTROL && - m->m_len >= sizeof(*cm)) { + (size_t) m->m_len >= sizeof(*cm)) { cm = mtod(m, struct cmsghdr *); if (cm->cmsg_level != SOL_SOCKET || cm->cmsg_type != SCM_RIGHTS) continue; qfds = (cm->cmsg_len - sizeof *cm) - / sizeof (struct file *); - rp = (struct file **)(cm + 1); + / sizeof (struct fileglob *); + rp = (struct fileglob **)(cm + 1); for (i = 0; i < qfds; i++) (*op)(*rp++); break; /* XXX, but saves time */ @@ -1236,23 +1420,40 @@ unp_scan(m0, op) /* should run under kernel funnel */ static void -unp_mark(fp) - struct file *fp; +unp_mark(struct fileglob *fg) { + lck_mtx_lock(&fg->fg_lock); - if (fp->f_flag & FMARK) + if (fg->fg_flag & FMARK) { + lck_mtx_unlock(&fg->fg_lock); return; + } + fg->fg_flag |= (FMARK|FDEFER); + + lck_mtx_unlock(&fg->fg_lock); + unp_defer++; - fp->f_flag |= (FMARK|FDEFER); } /* should run under kernel funnel */ static void -unp_discard(fp) - struct file *fp; +unp_discard(fg) + struct fileglob *fg; +{ + struct proc *p = current_proc(); /* XXX */ + + proc_fdlock(p); + unp_discard_fdlocked(fg, p); + proc_fdunlock(p); +} +static void +unp_discard_fdlocked(fg, p) + struct fileglob *fg; + struct proc *p; { - fp->f_msgcount--; + fg_removeuipc(fg); + unp_rights--; - (void) closef(fp, (struct proc *)NULL); + (void) closef_locked((struct fileproc *)0, fg, p); }