X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e5568f75972dfc723778653c11cb6b4dc825716a..8ad349bb6ed4a0be06e34c92be0d98b92e078db4:/bsd/nfs/nfs_syscalls.c diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index 448b3b9f8..ec39c5f2d 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -1,23 +1,31 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ * - * @APPLE_LICENSE_HEADER_END@ + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the + * License may not be used to create, or enable the creation or + * redistribution of, unlawful or unlicensed copies of an Apple operating + * system, or to circumvent, violate, or enable the circumvention or + * violation of, any terms of an Apple operating system software license + * agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* @@ -66,27 +74,31 @@ #include #endif #include -#include +#include #include #include -#include -#include -#include +#include +#include +#include /* for fdflags */ +#include #include #include #include #include -#include +#include #include #include #include #include -#include #include #include #include #include -#include +#include +#include +#include + +#include #include #include @@ -101,45 +113,40 @@ #include #include #include -#include #include #include +extern void unix_syscall_return(int); + /* Global defs. */ -extern int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, +extern int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, struct nfssvc_sock *slp, - struct proc *procp, - struct mbuf **mreqp)); + proc_t procp, + mbuf_t *mreqp); extern int nfs_numasync; extern int nfs_ioddelwri; -extern time_t nqnfsstarttime; -extern int nqsrv_writeslack; extern int nfsrtton; extern struct nfsstats nfsstats; extern int nfsrvw_procrastinate; extern int nfsrvw_procrastinate_v3; + struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock; static int nuidhash_max = NFS_MAXUIDHASH; -static void nfsrv_zapsock __P((struct nfssvc_sock *slp)); -static int nfssvc_iod __P((struct proc *)); - -#define TRUE 1 -#define FALSE 0 +static void nfsrv_zapsock(struct nfssvc_sock *slp); +static int nfssvc_iod(proc_t); +static int nfskerb_clientd(struct nfsmount *, struct nfsd_cargs *, int, user_addr_t, proc_t); static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; #ifndef NFS_NOSERVER int nfsd_waiting = 0; static struct nfsdrt nfsdrt; -static int nfs_numnfsd = 0; -static int notstarted = 1; -static int modify_flag = 0; -static void nfsd_rt __P((int sotype, struct nfsrv_descript *nd, - int cacherep)); -static int nfssvc_addsock __P((struct file *, struct mbuf *, - struct proc *)); -static int nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *)); +int nfs_numnfsd = 0; +static void nfsd_rt(int sotype, struct nfsrv_descript *nd, int cacherep); +static int nfssvc_addsock(socket_t, mbuf_t, proc_t); +static int nfssvc_nfsd(struct nfsd_srvargs *,user_addr_t, proc_t); +static int nfssvc_export(user_addr_t, proc_t); static int nfs_privport = 0; /* XXX CSM 11/25/97 Upgrade sysctl.h someday */ @@ -157,45 +164,95 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate /* * Get file handle system call */ -#ifndef _SYS_SYSPROTO_H_ -struct getfh_args { - char *fname; - fhandle_t *fhp; -}; -#endif int -getfh(p, uap) - struct proc *p; - register struct getfh_args *uap; +getfh(proc_t p, struct getfh_args *uap, __unused int *retval) { - register struct vnode *vp; - fhandle_t fh; + vnode_t vp; + struct nfs_filehandle nfh; int error; struct nameidata nd; + struct vfs_context context; + char path[MAXPATHLEN], *ptr; + u_int pathlen; + struct nfs_exportfs *nxfs; + struct nfs_export *nx; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); /* * Must be super user */ - error = suser(p->p_ucred, &p->p_acflag); - if(error) + error = proc_suser(p); + if (error) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, uap->fname, p); + + error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen); + if (error) + return (error); + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, path, &context); error = namei(&nd); if (error) return (error); + nameidone(&nd); + vp = nd.ni_vp; - bzero((caddr_t)&fh, sizeof(fh)); - fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fh.fh_fid); - vput(vp); + + // find exportfs that matches f_mntonname + lck_rw_lock_shared(&nfs_export_rwlock); + ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; + LIST_FOREACH(nxfs, &nfs_exports, nxfs_next) { + if (!strcmp(nxfs->nxfs_path, ptr)) + break; + } + if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { + error = EINVAL; + goto out; + } + // find export that best matches remainder of path + ptr = path + strlen(nxfs->nxfs_path); + while (*ptr && (*ptr == '/')) + ptr++; + LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { + int len = strlen(nx->nx_path); + if (len == 0) // we've hit the export entry for the root directory + break; + if (!strncmp(nx->nx_path, ptr, len)) + break; + } + if (!nx) { + error = EINVAL; + goto out; + } + + bzero(&nfh, sizeof(nfh)); + nfh.nfh_xh.nxh_version = NFS_FH_VERSION; + nfh.nfh_xh.nxh_fsid = nxfs->nxfs_id; + nfh.nfh_xh.nxh_expid = nx->nx_id; + nfh.nfh_xh.nxh_flags = 0; + nfh.nfh_xh.nxh_reserved = 0; + nfh.nfh_len = NFS_MAX_FID_SIZE; + error = VFS_VPTOFH(vp, &nfh.nfh_len, &nfh.nfh_fid[0], NULL); + if (nfh.nfh_len > (int)NFS_MAX_FID_SIZE) + error = EOVERFLOW; + nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; + nfh.nfh_len += sizeof(nfh.nfh_xh); + +out: + lck_rw_done(&nfs_export_rwlock); + vnode_put(vp); if (error) return (error); - error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh)); + error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh)); return (error); } #endif /* NFS_NOSERVER */ +extern struct fileops vnops; + /* * syscall for the rpc.lockd to use to translate a NFS file handle into * an open descriptor. @@ -203,39 +260,30 @@ getfh(p, uap) * warning: do not remove the suser() call or this becomes one giant * security hole. */ -#ifndef _SYS_SYSPROTO_H_ -struct fhopen_args { - const struct fhandle *u_fhp; - int flags; -}; -#endif int -fhopen(p, uap, retval) - struct proc *p; - register struct fhopen_args *uap; - register_t *retval; +fhopen( proc_t p, + struct fhopen_args *uap, + register_t *retval) { - struct mount *mp; - struct vnode *vp; - struct fhandle fhp; - struct vattr vat; - struct vattr *vap = &vat; + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; struct flock lf; - struct file *fp; - register struct filedesc *fdp = p->p_fd; - int fmode, mode, error, type; - struct file *nfp; + struct fileproc *fp, *nfp; + int fmode, error, type; int indx; - struct ucred *credanon; - int exflags; - struct ucred *cred = p->p_ucred; - int didhold = 0; - extern struct fileops vnops; + kauth_cred_t cred = proc_ucred(p); + struct vfs_context context; + kauth_action_t action; + + context.vc_proc = p; + context.vc_ucred = cred; /* * Must be super user */ - error = suser(cred, &p->p_acflag); + error = suser(cred, 0); if (error) return (error); @@ -243,98 +291,76 @@ fhopen(p, uap, retval) /* why not allow a non-read/write open for our lockd? */ if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) return (EINVAL); - error = copyin((void*)uap->u_fhp, &fhp, sizeof(fhp)); + + error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); + if (error) + return (error); + if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || + (nfh.nfh_len > (int)NFS_MAX_FH_SIZE)) + return (EINVAL); + error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); if (error) return (error); - /* find the mount point */ - mp = vfs_getvfs(&fhp.fh_fsid); - if (mp == NULL) - return (ESTALE); - /* now give me my vnode, it gets returned to me locked */ -/* XXX CSM need to split VFS_CHECKEXP out of VFS_FHTOVP? */ - error = VFS_FHTOVP(mp, &fhp.fh_fid, NULL, &vp, &exflags, &credanon); + + lck_rw_lock_shared(&nfs_export_rwlock); + /* now give me my vnode, it gets returned to me with a reference */ + error = nfsrv_fhtovp(&nfh, NULL, TRUE, &vp, &nx, &nxo); + lck_rw_done(&nfs_export_rwlock); if (error) return (error); + /* - * from now on we have to make sure not - * to forget about the vnode - * any error that causes an abort must vput(vp) - * just set error = err and 'goto bad;'. + * From now on we have to make sure not + * to forget about the vnode. + * Any error that causes an abort must vnode_put(vp). + * Just set error = err and 'goto bad;'. */ /* * from vn_open */ - if (vp->v_type == VSOCK) { + if (vnode_vtype(vp) == VSOCK) { error = EOPNOTSUPP; goto bad; } - if (UBCINFOEXISTS(vp) && ((didhold = ubc_hold(vp)) == 0)) { - error = ENOENT; + /* disallow write operations on directories */ + if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { + error = EISDIR; goto bad; } - if (fmode & FREAD && fmode & (FWRITE | O_TRUNC)) { - int err = 0; - if (vp->v_type == VDIR) - err = EISDIR; - else - err = vn_writechk(vp); - if (err && !(error = VOP_ACCESS(vp, VREAD, cred, p))) - error = err; - if (error || (error = VOP_ACCESS(vp, VREAD|VWRITE, cred, p))) - goto bad; - } else if (fmode & FREAD) { - if ((error = VOP_ACCESS(vp, VREAD, cred, p))) - goto bad; - } else if (fmode & (FWRITE | O_TRUNC)) { - if (vp->v_type == VDIR) { - error = EISDIR; - goto bad; - } - if ((error = vn_writechk(vp)) || - (error = VOP_ACCESS(vp, VWRITE, cred, p))) - goto bad; - } - if (fmode & O_TRUNC) { - VOP_UNLOCK(vp, 0, p); /* XXX */ - VOP_LEASE(vp, p, cred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - VATTR_NULL(vap); - vap->va_size = 0; - error = VOP_SETATTR(vp, vap, cred, p); - if (error) - goto bad; - } + /* compute action to be authorized */ + action = 0; + if (fmode & FREAD) + action |= KAUTH_VNODE_READ_DATA; + if (fmode & (FWRITE | O_TRUNC)) + action |= KAUTH_VNODE_WRITE_DATA; + if ((error = vnode_authorize(vp, NULL, action, &context)) != 0) + goto bad; - error = VOP_OPEN(vp, fmode, cred, p); - if (error) + if ((error = VNOP_OPEN(vp, fmode, &context))) + goto bad; + if ((error = vnode_ref_ext(vp, fmode))) goto bad; - if (fmode & FWRITE) - if (++vp->v_writecount <= 0) - panic("fhopen: v_writecount"); /* * end of vn_open code */ + // starting here... error paths should call vn_close/vnode_put if ((error = falloc(p, &nfp, &indx)) != 0) { - if (fmode & FWRITE) - vp->v_writecount--; + vn_close(vp, fmode & FMASK, cred, p); goto bad; } fp = nfp; - /* - * Hold an extra reference to avoid having fp ripped out - * from under us while we block in the lock op - */ - fref(fp); - nfp->f_data = (caddr_t)vp; - nfp->f_flag = fmode & FMASK; - nfp->f_ops = &vnops; - nfp->f_type = DTYPE_VNODE; + fp->f_fglob->fg_flag = fmode & FMASK; + fp->f_fglob->fg_type = DTYPE_VNODE; + fp->f_fglob->fg_ops = &vnops; + fp->f_fglob->fg_data = (caddr_t)vp; + + // XXX do we really need to support this with fhopen()? if (fmode & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -346,34 +372,26 @@ fhopen(p, uap, retval) type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; - VOP_UNLOCK(vp, 0, p); - if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, - type)) != 0) { - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdrelse(p, indx); - /* - * release our private reference - */ - frele(fp); - + if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, &context))) { + vn_close(vp, fp->f_fglob->fg_flag, fp->f_fglob->fg_cred, p); + fp_free(p, indx, fp); return (error); } - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - fp->f_flag |= FHASLOCK; + fp->f_fglob->fg_flag |= FHASLOCK; } - VOP_UNLOCK(vp, 0, p); + vnode_put(vp); + + proc_fdlock(p); *fdflags(p, indx) &= ~UF_RESERVED; - frele(fp); + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + *retval = indx; return (0); bad: - VOP_UNLOCK(vp, 0, p); - if (didhold) - ubc_rele(vp); - vrele(vp); + vnode_put(vp); return (error); } @@ -384,22 +402,13 @@ bad: * - remains in the kernel as an nfsd * - remains in the kernel as an nfsiod */ -#ifndef _SYS_SYSPROTO_H_ -struct nfssvc_args { - int flag; - caddr_t argp; -}; -#endif int -nfssvc(p, uap) - struct proc *p; - register struct nfssvc_args *uap; +nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) { #ifndef NFS_NOSERVER struct nameidata nd; - struct file *fp; - struct mbuf *nam; - struct nfsd_args nfsdarg; + mbuf_t nam; + struct user_nfsd_args user_nfsdarg; struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs; struct nfsd_cargs ncd; struct nfsd *nfsd; @@ -407,19 +416,20 @@ nfssvc(p, uap) struct nfsuid *nuidp; struct nfsmount *nmp; struct timeval now; + socket_t so; + struct vfs_context context; + struct ucred temp_cred; #endif /* NFS_NOSERVER */ int error; + AUDIT_ARG(cmd, uap->flag); + /* * Must be super user */ - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if(error) return (error); - while (nfssvc_sockhead_flag & SLP_INIT) { - nfssvc_sockhead_flag |= SLP_WANTINIT; - (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0); - } if (uap->flag & NFSSVC_BIOD) error = nfssvc_iod(p); #ifdef NFS_NOSERVER @@ -427,18 +437,26 @@ nfssvc(p, uap) error = ENXIO; #else /* !NFS_NOSERVER */ else if (uap->flag & NFSSVC_MNTD) { + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)); if (error) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - ncd.ncd_dirp, p); + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + (proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + CAST_USER_ADDR_T(ncd.ncd_dirp), &context); error = namei(&nd); if (error) return (error); - if ((nd.ni_vp->v_flag & VROOT) == 0) + nameidone(&nd); + + if (vnode_isvroot(nd.ni_vp) == 0) error = EINVAL; - nmp = VFSTONFS(nd.ni_vp->v_mount); - vput(nd.ni_vp); + nmp = VFSTONFS(vnode_mount(nd.ni_vp)); + vnode_put(nd.ni_vp); if (error) return (error); @@ -446,28 +464,45 @@ nfssvc(p, uap) (uap->flag & NFSSVC_GOTAUTH) == 0) return (0); nmp->nm_state |= NFSSTA_MNTD; - error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag, - uap->argp, p); + error = nfskerb_clientd(nmp, &ncd, uap->flag, uap->argp, p); } else if (uap->flag & NFSSVC_ADDSOCK) { - error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg)); + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); + } else { + struct nfsd_args tmp_args; + error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); + if (error == 0) { + user_nfsdarg.sock = tmp_args.sock; + user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); + user_nfsdarg.namelen = tmp_args.namelen; + } + } if (error) return (error); - error = getsock(p->p_fd, nfsdarg.sock, &fp); + /* get the socket */ + error = file_socket(user_nfsdarg.sock, &so); if (error) return (error); - /* - * Get the client address for connected sockets. - */ - if (nfsdarg.name == NULL || nfsdarg.namelen == 0) - nam = (struct mbuf *)0; - else { - error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen, - MT_SONAME); - if (error) + /* Get the client address for connected sockets. */ + if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { + nam = NULL; + } else { + error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); + if (error) { + /* drop the iocount file_socket() grabbed on the file descriptor */ + file_drop(user_nfsdarg.sock); return (error); + } } - error = nfssvc_addsock(fp, nam, p); - } else { + /* + * nfssvc_addsock() will grab a retain count on the socket + * to keep the socket from being closed when nfsd closes its + * file descriptor for it. + */ + error = nfssvc_addsock(so, nam, p); + /* drop the iocount file_socket() grabbed on the file descriptor */ + file_drop(user_nfsdarg.sock); + } else if (uap->flag & NFSSVC_NFSD) { error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)); if (error) return (error); @@ -482,14 +517,14 @@ nfssvc(p, uap) */ for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { - if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid && + if (kauth_cred_getuid(nuidp->nu_cr) == nsd->nsd_cr.cr_uid && (!nfsd->nfsd_nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2))) break; } if (nuidp) { - nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr); + nfsrv_setcred(nuidp->nu_cr,nfsd->nfsd_nd->nd_cr); nfsd->nfsd_nd->nd_flag |= ND_KERBFULL; } else { /* @@ -503,23 +538,36 @@ nfssvc(p, uap) } else nuidp = (struct nfsuid *)0; if ((slp->ns_flag & SLP_VALID) == 0) { - if (nuidp) + if (nuidp) { FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID); + slp->ns_numuids--; + } } else { if (nuidp == (struct nfsuid *)0) { nuidp = slp->ns_uidlruhead.tqh_first; + if (!nuidp) + return (ENOMEM); LIST_REMOVE(nuidp, nu_hash); TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); if (nuidp->nu_flag & NU_NAM) - m_freem(nuidp->nu_nam); + mbuf_freem(nuidp->nu_nam); + kauth_cred_rele(nuidp->nu_cr); } nuidp->nu_flag = 0; - nuidp->nu_cr = nsd->nsd_cr; - if (nuidp->nu_cr.cr_ngroups > NGROUPS) - nuidp->nu_cr.cr_ngroups = NGROUPS; - nuidp->nu_cr.cr_ref = 1; + + if (nsd->nsd_cr.cr_ngroups > NGROUPS) + nsd->nsd_cr.cr_ngroups = NGROUPS; + + nfsrv_setcred(&nsd->nsd_cr, &temp_cred); + nuidp->nu_cr = kauth_cred_create(&temp_cred); + + if (!nuidp->nu_cr) { + FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID); + slp->ns_numuids--; + return (ENOMEM); + } nuidp->nu_timestamp = nsd->nsd_timestamp; microtime(&now); nuidp->nu_expire = now.tv_sec + nsd->nsd_ttl; @@ -531,8 +579,7 @@ nfssvc(p, uap) if (nfsd->nfsd_nd->nd_nam2) { struct sockaddr_in *saddr; - saddr = mtod(nfsd->nfsd_nd->nd_nam2, - struct sockaddr_in *); + saddr = mbuf_data(nfsd->nfsd_nd->nd_nam2); switch (saddr->sin_family) { case AF_INET: nuidp->nu_flag |= NU_INETADDR; @@ -542,9 +589,15 @@ nfssvc(p, uap) case AF_ISO: default: nuidp->nu_flag |= NU_NAM; - nuidp->nu_nam = m_copym( - nfsd->nfsd_nd->nd_nam2, 0, - M_COPYALL, M_WAIT); + error = mbuf_copym(nfsd->nfsd_nd->nd_nam2, 0, + MBUF_COPYALL, MBUF_WAITOK, + &nuidp->nu_nam); + if (error) { + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID); + slp->ns_numuids--; + return (error); + } break; }; } @@ -552,8 +605,8 @@ nfssvc(p, uap) nu_lru); LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid), nuidp, nu_hash); - nfsrv_setcred(&nuidp->nu_cr, - &nfsd->nfsd_nd->nd_cr); + nfsrv_setcred(nuidp->nu_cr, + nfsd->nfsd_nd->nd_cr); nfsd->nfsd_nd->nd_flag |= ND_KERBFULL; } } @@ -561,6 +614,10 @@ nfssvc(p, uap) if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd)) nfsd->nfsd_flag |= NFSD_AUTHFAIL; error = nfssvc_nfsd(nsd, uap->argp, p); + } else if (uap->flag & NFSSVC_EXPORT) { + error = nfssvc_export(uap->argp, p); + } else { + error = EINVAL; } #endif /* NFS_NOSERVER */ if (error == EINTR || error == ERESTART) @@ -568,57 +625,156 @@ nfssvc(p, uap) return (error); } +/* + * NFSKERB client helper daemon. + * Gets authorization strings for "kerb" mounts. + */ +static int +nfskerb_clientd( + struct nfsmount *nmp, + struct nfsd_cargs *ncd, + int flag, + user_addr_t argp, + proc_t p) +{ + struct nfsuid *nuidp, *nnuidp; + int error = 0; + struct nfsreq *rp; + struct timeval now; + + /* + * First initialize some variables + */ + microtime(&now); + + /* + * If an authorization string is being passed in, get it. + */ + if ((flag & NFSSVC_GOTAUTH) && (nmp->nm_state & NFSSTA_MOUNTED) && + ((nmp->nm_state & NFSSTA_WAITAUTH) == 0)) { + if (nmp->nm_state & NFSSTA_HASAUTH) + panic("cld kerb"); + if ((flag & NFSSVC_AUTHINFAIL) == 0) { + if (ncd->ncd_authlen <= nmp->nm_authlen && + ncd->ncd_verflen <= nmp->nm_verflen && + !copyin(CAST_USER_ADDR_T(ncd->ncd_authstr),nmp->nm_authstr,ncd->ncd_authlen)&& + !copyin(CAST_USER_ADDR_T(ncd->ncd_verfstr),nmp->nm_verfstr,ncd->ncd_verflen)){ + nmp->nm_authtype = ncd->ncd_authtype; + nmp->nm_authlen = ncd->ncd_authlen; + nmp->nm_verflen = ncd->ncd_verflen; +#if NFSKERB + nmp->nm_key = ncd->ncd_key; +#endif + } else + nmp->nm_state |= NFSSTA_AUTHERR; + } else + nmp->nm_state |= NFSSTA_AUTHERR; + nmp->nm_state |= NFSSTA_HASAUTH; + wakeup((caddr_t)&nmp->nm_authlen); + } else { + nmp->nm_state |= NFSSTA_WAITAUTH; + } + + /* + * Loop every second updating queue until there is a termination sig. + */ + while (nmp->nm_state & NFSSTA_MOUNTED) { + /* Get an authorization string, if required. */ + if ((nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH)) == 0) { + ncd->ncd_authuid = nmp->nm_authuid; + if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs))) + nmp->nm_state |= NFSSTA_WAITAUTH; + else + return (ENEEDAUTH); + } + /* Wait a bit (no pun) and do it again. */ + if ((nmp->nm_state & NFSSTA_MOUNTED) && + (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH))) { + error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH, + "nfskrbtimr", hz / 3); + if (error == EINTR || error == ERESTART) + dounmount(nmp->nm_mountp, 0, p); + } + } + + /* + * Finally, we can free up the mount structure. + */ + for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID); + } + /* + * Loop through outstanding request list and remove dangling + * references to defunct nfsmount struct + */ + for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next) + if (rp->r_nmp == nmp) + rp->r_nmp = (struct nfsmount *)0; + /* Need to wake up any rcvlock waiters so they notice the unmount. */ + if (nmp->nm_state & NFSSTA_WANTRCV) { + nmp->nm_state &= ~NFSSTA_WANTRCV; + wakeup(&nmp->nm_state); + } + FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT); + if (error == EWOULDBLOCK) + error = 0; + return (error); +} + #ifndef NFS_NOSERVER /* * Adds a socket to the list for servicing by nfsds. */ static int -nfssvc_addsock(fp, mynam, p) - struct file *fp; - struct mbuf *mynam; - struct proc *p; +nfssvc_addsock( + socket_t so, + mbuf_t mynam, + __unused proc_t p) { - register struct mbuf *m; - register int siz; - register struct nfssvc_sock *slp; - register struct socket *so; - struct nfssvc_sock *tslp; - int error, s; - - so = (struct socket *)fp->f_data; - tslp = (struct nfssvc_sock *)0; + int siz; + struct nfssvc_sock *slp; + struct nfssvc_sock *tslp = NULL; + int error, sodomain, sotype, soprotocol, on = 1; + struct timeval timeo; + + /* make sure mbuf constants are set up */ + if (!nfs_mbuf_mlen) + nfs_mbuf_init(); + + sock_gettype(so, &sodomain, &sotype, &soprotocol); + /* * Add it to the list, as required. */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if (so->so_proto->pr_protocol == IPPROTO_UDP) { + if (soprotocol == IPPROTO_UDP) { tslp = nfs_udpsock; - if (tslp->ns_flag & SLP_VALID) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if (!tslp || (tslp->ns_flag & SLP_VALID)) { + mbuf_freem(mynam); return (EPERM); } #if ISO - } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) { + } else if (soprotocol == ISOPROTO_CLTP) { tslp = nfs_cltpsock; - if (tslp->ns_flag & SLP_VALID) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if (!tslp || (tslp->ns_flag & SLP_VALID)) { + mbuf_freem(mynam); return (EPERM); } #endif /* ISO */ } /* reserve buffer space for 2 maximally-sized packets */ siz = NFS_MAXPACKET; - if (so->so_type == SOCK_STREAM) + if (sotype == SOCK_STREAM) siz += sizeof (u_long); siz *= 2; if (siz > NFS_MAXSOCKBUF) siz = NFS_MAXSOCKBUF; - error = soreserve(so, siz, siz); - if (error) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &siz, sizeof(siz))) || + (error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &siz, sizeof(siz)))) { + mbuf_freem(mynam); return (error); } @@ -627,62 +783,54 @@ nfssvc_addsock(fp, mynam, p) * reserve some space. For datagram sockets, this can get called * repeatedly for the same socket, but that isn't harmful. */ - if (so->so_type == SOCK_STREAM) { - struct sockopt sopt; - int val; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_KEEPALIVE; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - val = 1; - sosetopt(so, &sopt); + if (sotype == SOCK_STREAM) { + sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); } - if (so->so_proto->pr_domain->dom_family == AF_INET && - so->so_proto->pr_protocol == IPPROTO_TCP) { - struct sockopt sopt; - int val; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - val = 1; - sosetopt(so, &sopt); + if (sodomain == AF_INET && soprotocol == IPPROTO_TCP) { + sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); } - so->so_rcv.sb_flags &= ~SB_NOINTR; - so->so_rcv.sb_timeo = 0; - so->so_snd.sb_flags &= ~SB_NOINTR; - so->so_snd.sb_timeo = 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - if (tslp) + sock_nointerrupt(so, 0); + + timeo.tv_usec = 0; + timeo.tv_sec = 0; + error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); + error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); + + if (tslp) { slp = tslp; - else { + lck_mtx_lock(nfsd_mutex); + } else { MALLOC(slp, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); + if (!slp) { + mbuf_freem(mynam); + return (ENOMEM); + } bzero((caddr_t)slp, sizeof (struct nfssvc_sock)); + lck_rw_init(&slp->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + lck_mtx_init(&slp->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr); TAILQ_INIT(&slp->ns_uidlruhead); + lck_mtx_lock(nfsd_mutex); TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain); } + + sock_retain(so); /* grab a retain count on the socket */ slp->ns_so = so; + slp->ns_sotype = sotype; slp->ns_nam = mynam; - slp->ns_fp = fp; - (void)fref(fp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - s = splnet(); + + socket_lock(so, 1); so->so_upcallarg = (caddr_t)slp; so->so_upcall = nfsrv_rcv; so->so_rcv.sb_flags |= SB_UPCALL; /* required for freebsd merge */ - slp->ns_nflag = SLPN_NEEDQ; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - slp->ns_flag = SLP_VALID; + socket_unlock(so, 1); + + slp->ns_flag = SLP_VALID | SLP_NEEDQ; + nfsrv_wakenfsd(slp); - splx(s); + lck_mtx_unlock(nfsd_mutex); + return (0); } @@ -693,86 +841,96 @@ nfssvc_addsock(fp, mynam, p) static int nfssvc_nfsd(nsd, argp, p) struct nfsd_srvargs *nsd; - caddr_t argp; - struct proc *p; + user_addr_t argp; + proc_t p; { - register struct mbuf *m; - register int siz; - register struct nfssvc_sock *slp; - register struct socket *so; + mbuf_t m, mreq; + struct nfssvc_sock *slp; struct nfsd *nfsd = nsd->nsd_nfsd; struct nfsrv_descript *nd = NULL; - struct mbuf *mreq; - int error = 0, cacherep, s, sotype, writes_todo; - int procrastinate; + int error = 0, cacherep, writes_todo; + int siz, procrastinate; u_quad_t cur_usec; struct timeval now; + boolean_t funnel_state; #ifndef nolint cacherep = RC_DOIT; writes_todo = 0; #endif - s = splnet(); if (nfsd == (struct nfsd *)0) { MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); + if (!nfsd) + return (ENOMEM); nsd->nsd_nfsd = nfsd; bzero((caddr_t)nfsd, sizeof (struct nfsd)); nfsd->nfsd_procp = p; + lck_mtx_lock(nfsd_mutex); TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); nfs_numnfsd++; + lck_mtx_unlock(nfsd_mutex); } + + funnel_state = thread_funnel_set(kernel_flock, FALSE); + /* * Loop getting rpc requests until SIGKILL. */ for (;;) { if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) { - while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 && - (nfsd_head_flag & NFSD_CHECKSLP) == 0) { + lck_mtx_lock(nfsd_mutex); + while ((nfsd->nfsd_slp == NULL) && !(nfsd_head_flag & NFSD_CHECKSLP)) { nfsd->nfsd_flag |= NFSD_WAITING; nfsd_waiting++; - error = tsleep((caddr_t)nfsd, PSOCK | PCATCH, - "nfsd", 0); + error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", 0); nfsd_waiting--; - if (error) + if (error) { + lck_mtx_unlock(nfsd_mutex); goto done; + } } - if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 && - (nfsd_head_flag & NFSD_CHECKSLP) != 0) { - for (slp = nfssvc_sockhead.tqh_first; slp != 0; - slp = slp->ns_chain.tqe_next) { + if ((nfsd->nfsd_slp == NULL) && (nfsd_head_flag & NFSD_CHECKSLP)) { + TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) { + lck_rw_lock_shared(&slp->ns_rwlock); if ((slp->ns_flag & (SLP_VALID | SLP_DOREC)) == (SLP_VALID | SLP_DOREC)) { + if (lck_rw_lock_shared_to_exclusive(&slp->ns_rwlock)) { + /* upgrade failed and we lost the lock; take exclusive and recheck */ + lck_rw_lock_exclusive(&slp->ns_rwlock); + if ((slp->ns_flag & (SLP_VALID | SLP_DOREC)) + != (SLP_VALID | SLP_DOREC)) { + /* flags no longer set, so skip this socket */ + lck_rw_done(&slp->ns_rwlock); + continue; + } + } slp->ns_flag &= ~SLP_DOREC; slp->ns_sref++; nfsd->nfsd_slp = slp; + lck_rw_done(&slp->ns_rwlock); break; } + lck_rw_done(&slp->ns_rwlock); } if (slp == 0) nfsd_head_flag &= ~NFSD_CHECKSLP; } - if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0) + lck_mtx_unlock(nfsd_mutex); + if ((slp = nfsd->nfsd_slp) == NULL) continue; + lck_rw_lock_exclusive(&slp->ns_rwlock); if (slp->ns_flag & SLP_VALID) { - nfs_slplock(slp, 1); - if (slp->ns_nflag & SLPN_DISCONN) { + if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { + slp->ns_flag &= ~SLP_NEEDQ; + nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); + } + if (slp->ns_flag & SLP_DISCONN) nfsrv_zapsock(slp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - } else if (slp->ns_nflag & SLPN_NEEDQ) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - slp->ns_nflag &= ~SLPN_NEEDQ; - nfsrv_rcv(slp->ns_so, (caddr_t)slp, - M_WAIT); - } else - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); error = nfsrv_dorec(slp, nfsd, &nd); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - nfs_slpunlock(slp); microuptime(&now); cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec; - if (error && slp->ns_tq.lh_first && - slp->ns_tq.lh_first->nd_time <= cur_usec) { + if (error && slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) { error = 0; cacherep = RC_DOIT; writes_todo = 1; @@ -780,6 +938,7 @@ nfssvc_nfsd(nsd, argp, p) writes_todo = 0; nfsd->nfsd_flag |= NFSD_REQINPROG; } + lck_rw_done(&slp->ns_rwlock); } else { error = 0; slp = nfsd->nfsd_slp; @@ -787,19 +946,18 @@ nfssvc_nfsd(nsd, argp, p) if (error || (slp->ns_flag & SLP_VALID) == 0) { if (nd) { if (nd->nd_nam2) - m_freem(nd->nd_nam2); + mbuf_freem(nd->nd_nam2); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nd = NULL; } - nfsd->nfsd_slp = (struct nfssvc_sock *)0; + nfsd->nfsd_slp = NULL; nfsd->nfsd_flag &= ~NFSD_REQINPROG; nfsrv_slpderef(slp); continue; } - splx(s); - so = slp->ns_so; - sotype = so->so_type; if (nd) { microuptime(&nd->nd_starttime); if (nd->nd_nam2) @@ -812,42 +970,22 @@ nfssvc_nfsd(nsd, argp, p) */ if (nfsd->nfsd_flag & NFSD_NEEDAUTH) { nfsd->nfsd_flag &= ~NFSD_NEEDAUTH; - nsd->nsd_haddr = mtod(nd->nd_nam, - struct sockaddr_in *)->sin_addr.s_addr; + nsd->nsd_haddr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr; nsd->nsd_authlen = nfsd->nfsd_authlen; nsd->nsd_verflen = nfsd->nfsd_verflen; - if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr, + if (!copyout(nfsd->nfsd_authstr,CAST_USER_ADDR_T(nsd->nsd_authstr), nfsd->nfsd_authlen) && - !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr, + !copyout(nfsd->nfsd_verfstr, CAST_USER_ADDR_T(nsd->nsd_verfstr), nfsd->nfsd_verflen) && - !copyout((caddr_t)nsd, argp, sizeof (*nsd))) + !copyout((caddr_t)nsd, argp, sizeof (*nsd))) { + thread_funnel_set(kernel_flock, funnel_state); return (ENEEDAUTH); + } cacherep = RC_DROPIT; } else cacherep = nfsrv_getcache(nd, slp, &mreq); - /* - * Check for just starting up for NQNFS and send - * fake "try again later" replies to the NQNFS clients. - */ - microtime(&now); - if (notstarted && nqnfsstarttime <= now.tv_sec) { - if (modify_flag) { - nqnfsstarttime = now.tv_sec + nqsrv_writeslack; - modify_flag = 0; - } else - notstarted = 0; - } - if (notstarted) { - if ((nd->nd_flag & ND_NQNFS) == 0) - cacherep = RC_DROPIT; - else if (nd->nd_procnum != NFSPROC_WRITE) { - nd->nd_procnum = NFSPROC_NOOP; - nd->nd_repstat = NQNFS_TRYLATER; - cacherep = RC_DOIT; - } else - modify_flag = 1; - } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) { + if (nfsd->nfsd_flag & NFSD_AUTHFAIL) { nfsd->nfsd_flag &= ~NFSD_AUTHFAIL; nd->nd_procnum = NFSPROC_NOOP; nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); @@ -855,18 +993,20 @@ nfssvc_nfsd(nsd, argp, p) } else if (nfs_privport) { /* Check if source port is privileged */ u_short port; - struct sockaddr *nam = mtod(nd->nd_nam, struct sockaddr*); + struct sockaddr *nam = mbuf_data(nd->nd_nam); struct sockaddr_in *sin; sin = (struct sockaddr_in *)nam; port = ntohs(sin->sin_port); if (port >= IPPORT_RESERVED && nd->nd_procnum != NFSPROC_NULL) { + char strbuf[MAX_IPv4_STR_LEN]; nd->nd_procnum = NFSPROC_NOOP; nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); cacherep = RC_DOIT; printf("NFS request from unprivileged port (%s:%d)\n", - (char *)(inet_ntoa(sin->sin_addr)), port); + inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)), + port); } } @@ -883,94 +1023,100 @@ nfssvc_nfsd(nsd, argp, p) procrastinate = nfsrvw_procrastinate_v3; else procrastinate = nfsrvw_procrastinate; - if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE && - procrastinate > 0 && !notstarted)) - error = nfsrv_writegather(&nd, slp, - nfsd->nfsd_procp, &mreq); + lck_rw_lock_shared(&nfs_export_rwlock); + if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) + error = nfsrv_writegather(&nd, slp, nfsd->nfsd_procp, &mreq); else - error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, - slp, nfsd->nfsd_procp, &mreq); + error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, slp, nfsd->nfsd_procp, &mreq); + lck_rw_done(&nfs_export_rwlock); if (mreq == NULL) break; if (error) { - if (nd->nd_procnum != NQNFSPROC_VACATED) - nfsstats.srv_errs++; + OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs); nfsrv_updatecache(nd, FALSE, mreq); if (nd->nd_nam2) { - m_freem(nd->nd_nam2); + mbuf_freem(nd->nd_nam2); nd->nd_nam2 = NULL; } break; } - nfsstats.srvrpccnt[nd->nd_procnum]++; + OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]); nfsrv_updatecache(nd, TRUE, mreq); - nd->nd_mrep = (struct mbuf *)0; + nd->nd_mrep = NULL; case RC_REPLY: m = mreq; siz = 0; while (m) { - siz += m->m_len; - m = m->m_next; + siz += mbuf_len(m); + m = mbuf_next(m); } if (siz <= 0 || siz > NFS_MAXPACKET) { printf("mbuf siz=%d\n",siz); panic("Bad nfs svc reply"); } m = mreq; - m->m_pkthdr.len = siz; - m->m_pkthdr.rcvif = (struct ifnet *)0; + mbuf_pkthdr_setlen(m, siz); + error = mbuf_pkthdr_setrcvif(m, NULL); + if (error) + panic("nfsd setrcvif failed: %d", error); /* * For stream protocols, prepend a Sun RPC * Record Mark. */ - if (sotype == SOCK_STREAM) { - M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); - *mtod(m, u_long *) = htonl(0x80000000 | siz); + if (slp->ns_sotype == SOCK_STREAM) { + error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); + if (!error) + *(u_long*)mbuf_data(m) = htonl(0x80000000 | siz); } - if (so->so_proto->pr_flags & PR_CONNREQUIRED) - (void) nfs_slplock(slp, 1); - if (slp->ns_flag & SLP_VALID) - error = nfs_send(so, nd->nd_nam2, m, NULL); - else { - error = EPIPE; - m_freem(m); + if (!error) { + if (slp->ns_flag & SLP_VALID) { + error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL); + } else { + error = EPIPE; + mbuf_freem(m); + } + } else { + mbuf_freem(m); } mreq = NULL; if (nfsrtton) - nfsd_rt(sotype, nd, cacherep); + nfsd_rt(slp->ns_sotype, nd, cacherep); if (nd->nd_nam2) { - MFREE(nd->nd_nam2, m); + mbuf_freem(nd->nd_nam2); nd->nd_nam2 = NULL; } if (nd->nd_mrep) { - m_freem(nd->nd_mrep); + mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } - if (error == EPIPE) + if (error == EPIPE) { + lck_rw_lock_exclusive(&slp->ns_rwlock); nfsrv_zapsock(slp); - if (so->so_proto->pr_flags & PR_CONNREQUIRED) - nfs_slpunlock(slp); + lck_rw_done(&slp->ns_rwlock); + } if (error == EINTR || error == ERESTART) { - FREE_ZONE((caddr_t)nd, - sizeof *nd, M_NFSRVDESC); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); + FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nfsrv_slpderef(slp); - s = splnet(); goto done; } break; case RC_DROPIT: if (nfsrtton) - nfsd_rt(sotype, nd, cacherep); - m_freem(nd->nd_mrep); - m_freem(nd->nd_nam2); + nfsd_rt(slp->ns_sotype, nd, cacherep); + mbuf_freem(nd->nd_mrep); + mbuf_freem(nd->nd_nam2); nd->nd_mrep = nd->nd_nam2 = NULL; break; }; if (nd) { if (nd->nd_mrep) - m_freem(nd->nd_mrep); + mbuf_freem(nd->nd_mrep); if (nd->nd_nam2) - m_freem(nd->nd_nam2); + mbuf_freem(nd->nd_nam2); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nd = NULL; } @@ -982,34 +1128,71 @@ nfssvc_nfsd(nsd, argp, p) microuptime(&now); cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec; - s = splsoftclock(); - if (slp->ns_tq.lh_first && - slp->ns_tq.lh_first->nd_time <= cur_usec) { + if (slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) { cacherep = RC_DOIT; writes_todo = 1; - } else + } else { writes_todo = 0; - splx(s); + } } while (writes_todo); - s = splnet(); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + lck_rw_lock_exclusive(&slp->ns_rwlock); if (nfsrv_dorec(slp, nfsd, &nd)) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_rw_done(&slp->ns_rwlock); nfsd->nfsd_flag &= ~NFSD_REQINPROG; nfsd->nfsd_slp = NULL; nfsrv_slpderef(slp); - } else - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + } else { + lck_rw_done(&slp->ns_rwlock); + } } done: + thread_funnel_set(kernel_flock, funnel_state); + lck_mtx_lock(nfsd_mutex); TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); - splx(s); - _FREE((caddr_t)nfsd, M_NFSD); + FREE(nfsd, M_NFSD); nsd->nsd_nfsd = (struct nfsd *)0; if (--nfs_numnfsd == 0) nfsrv_init(TRUE); /* Reinitialize everything */ + lck_mtx_unlock(nfsd_mutex); return (error); } + +static int +nfssvc_export(user_addr_t argp, proc_t p) +{ + int error = 0, is_64bit; + struct user_nfs_export_args unxa; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + is_64bit = IS_64BIT_PROCESS(p); + + /* copy in pointers to path and export args */ + if (is_64bit) { + error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); + } else { + struct nfs_export_args tnxa; + error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); + if (error == 0) { + /* munge into LP64 version of nfs_export_args structure */ + unxa.nxa_fsid = tnxa.nxa_fsid; + unxa.nxa_expid = tnxa.nxa_expid; + unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); + unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); + unxa.nxa_flags = tnxa.nxa_flags; + unxa.nxa_netcount = tnxa.nxa_netcount; + unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); + } + } + if (error) + return (error); + + error = nfsrv_export(&unxa, &context); + + return (error); +} + #endif /* NFS_NOSERVER */ int nfs_defect = 0; @@ -1018,14 +1201,8 @@ int nfs_defect = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, ""); #endif -#ifndef _SYS_SYSPROTO_H_ -struct nfsclnt_args { - int flag; - caddr_t argp; -}; -#endif int -nfsclnt(struct proc *p, struct nfsclnt_args *uap) +nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) { struct lockd_ans la; int error; @@ -1038,7 +1215,7 @@ nfsclnt(struct proc *p, struct nfsclnt_args *uap) return (error != 0 ? error : nfslockdans(p, &la)); } if (uap->flag == NFSCLNT_LOCKDFD) - return (nfslockdfd(p, (int)uap->argp)); + return (nfslockdfd(p, CAST_DOWN(int, uap->argp))); return EINVAL; } @@ -1051,12 +1228,9 @@ static int nfssvc_iod_continue(int); * Never returns unless it fails or gets killed. */ static int -nfssvc_iod(p) - struct proc *p; +nfssvc_iod(__unused proc_t p) { register int i, myiod; - struct nfsmount *nmp; - int error = 0; struct uthread *ut; /* @@ -1075,106 +1249,131 @@ nfssvc_iod(p) /* stuff myiod into uthread to get off local stack for continuation */ - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); ut->uu_state.uu_nfs_myiod = myiod; /* squirrel away for continuation */ nfssvc_iod_continue(0); /* NOTREACHED */ - + return (0); } /* * Continuation for Asynchronous I/O daemons for client nfs. */ static int -nfssvc_iod_continue(error) +nfssvc_iod_continue(int error) { register struct nfsbuf *bp; register int i, myiod; struct nfsmount *nmp; struct uthread *ut; - struct proc *p; + proc_t p; /* * real myiod is stored in uthread, recover it */ - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); myiod = ut->uu_state.uu_nfs_myiod; - p = current_proc(); + p = current_proc(); // XXX /* * Just loop around doin our stuff until SIGKILL * - actually we don't loop with continuations... */ + lck_mtx_lock(nfs_iod_mutex); for (;;) { while (((nmp = nfs_iodmount[myiod]) == NULL || nmp->nm_bufq.tqh_first == NULL) && error == 0 && nfs_ioddelwri == 0) { if (nmp) nmp->nm_bufqiods--; - nfs_iodwant[myiod] = p; + nfs_iodwant[myiod] = p; // XXX this doesn't need to be a proc_t nfs_iodmount[myiod] = NULL; - error = tsleep0((caddr_t)&nfs_iodwant[myiod], - PWAIT | PCATCH, "nfsidl", 0, nfssvc_iod_continue); - /* NOTREACHED */ + error = msleep0((caddr_t)&nfs_iodwant[myiod], nfs_iod_mutex, + PWAIT | PCATCH | PDROP, "nfsidl", 0, nfssvc_iod_continue); + lck_mtx_lock(nfs_iod_mutex); } if (error) { nfs_asyncdaemon[myiod] = 0; if (nmp) nmp->nm_bufqiods--; nfs_iodwant[myiod] = NULL; nfs_iodmount[myiod] = NULL; + lck_mtx_unlock(nfs_iod_mutex); nfs_numasync--; if (error == EINTR || error == ERESTART) error = 0; unix_syscall_return(error); } if (nmp != NULL) { - while ((bp = nmp->nm_bufq.tqh_first) != NULL) { + while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { /* Take one off the front of the list */ TAILQ_REMOVE(&nmp->nm_bufq, bp, nb_free); bp->nb_free.tqe_next = NFSNOLIST; nmp->nm_bufqlen--; if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { nmp->nm_bufqwant = FALSE; + lck_mtx_unlock(nfs_iod_mutex); wakeup(&nmp->nm_bufq); + } else { + lck_mtx_unlock(nfs_iod_mutex); } + + SET(bp->nb_flags, NB_IOD); if (ISSET(bp->nb_flags, NB_READ)) - (void) nfs_doio(bp, bp->nb_rcred, (struct proc *)0); + nfs_doio(bp, bp->nb_rcred, NULL); else - (void) nfs_doio(bp, bp->nb_wcred, (struct proc *)0); + nfs_doio(bp, bp->nb_wcred, NULL); + lck_mtx_lock(nfs_iod_mutex); /* * If there are more than one iod on this mount, then defect * so that the iods can be shared out fairly between the mounts */ if (nfs_defect && nmp->nm_bufqiods > 1) { - NFS_DPF(ASYNCIO, - ("nfssvc_iod: iod %d defecting from mount %p\n", - myiod, nmp)); nfs_iodmount[myiod] = NULL; nmp->nm_bufqiods--; break; } } } + lck_mtx_unlock(nfs_iod_mutex); + if (nfs_ioddelwri) { i = 0; nfs_ioddelwri = 0; + lck_mtx_lock(nfs_buf_mutex); while (i < 8 && (bp = TAILQ_FIRST(&nfsbufdelwri)) != NULL) { struct nfsnode *np = VTONFS(bp->nb_vp); nfs_buf_remfree(bp); + nfs_buf_refget(bp); + while ((error = nfs_buf_acquire(bp, 0, 0, 0)) == EAGAIN); + nfs_buf_refrele(bp); + if (error) + break; + if (!bp->nb_vp) { + /* buffer is no longer valid */ + nfs_buf_drop(bp); + continue; + } if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { /* put buffer at end of delwri list */ TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free); nfsbufdelwricnt++; - nfs_flushcommits(np->n_vnode, (struct proc *)0); + nfs_buf_drop(bp); + lck_mtx_unlock(nfs_buf_mutex); + nfs_flushcommits(np->n_vnode, NULL, 1); } else { - SET(bp->nb_flags, (NB_BUSY | NB_ASYNC | NB_IOD)); + SET(bp->nb_flags, (NB_ASYNC | NB_IOD)); + lck_mtx_unlock(nfs_buf_mutex); nfs_buf_write(bp); } i++; + lck_mtx_lock(nfs_buf_mutex); } + lck_mtx_unlock(nfs_buf_mutex); } + + lck_mtx_lock(nfs_iod_mutex); } } @@ -1186,52 +1385,28 @@ nfssvc_iod_continue(error) * reassigned during cleanup. */ static void -nfsrv_zapsock(slp) - register struct nfssvc_sock *slp; +nfsrv_zapsock(struct nfssvc_sock *slp) { - register struct nfsuid *nuidp, *nnuidp; - register struct nfsrv_descript *nwp, *nnwp; - struct socket *so; - struct file *fp; - struct mbuf *m; - int s; + socket_t so; + if ((slp->ns_flag & SLP_VALID) == 0) + return; slp->ns_flag &= ~SLP_ALLFLAGS; - slp->ns_nflag &= ~SLP_ALLFLAGS; - fp = slp->ns_fp; - if (fp) { - slp->ns_fp = (struct file *)0; - so = slp->ns_so; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - so->so_upcall = NULL; - so->so_rcv.sb_flags &= ~SB_UPCALL; - soshutdown(so, 2); - if (slp->ns_nam) - MFREE(slp->ns_nam, m); - m_freem(slp->ns_raw); - m_freem(slp->ns_rec); - slp->ns_nam = slp->ns_raw = slp->ns_rec = NULL; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - closef(fp, (struct proc *)0); - for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0; - nuidp = nnuidp) { - nnuidp = nuidp->nu_lru.tqe_next; - LIST_REMOVE(nuidp, nu_hash); - TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); - if (nuidp->nu_flag & NU_NAM) - m_freem(nuidp->nu_nam); - FREE_ZONE((caddr_t)nuidp, - sizeof (struct nfsuid), M_NFSUID); - } - s = splsoftclock(); - for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { - nnwp = nwp->nd_tq.le_next; - LIST_REMOVE(nwp, nd_tq); - FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC); - } - LIST_INIT(&slp->ns_tq); - splx(s); - } + + so = slp->ns_so; + if (so == NULL) + return; + + /* + * Attempt to deter future upcalls, but leave the + * upcall info in place to avoid a race with the + * networking code. + */ + socket_lock(so, 1); + so->so_rcv.sb_flags &= ~SB_UPCALL; + socket_unlock(so, 1); + + sock_shutdown(so, SHUT_RDWR); } /* @@ -1242,7 +1417,7 @@ int nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) register struct nfsmount *nmp; struct nfsreq *rep; - struct ucred *cred; + kauth_cred_t cred; char **auth_str; int *auth_len; char *verf_str; @@ -1261,13 +1436,16 @@ nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) return (error); } } - nmp->nm_state &= ~(NFSSTA_WAITAUTH | NFSSTA_WANTAUTH); + nmp->nm_state &= ~NFSSTA_WANTAUTH; MALLOC(*auth_str, char *, RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK); + if (!*auth_str) + return (ENOMEM); nmp->nm_authstr = *auth_str; nmp->nm_authlen = RPCAUTH_MAXSIZ; nmp->nm_verfstr = verf_str; nmp->nm_verflen = *verf_len; - nmp->nm_authuid = cred->cr_uid; + nmp->nm_authuid = kauth_cred_getuid(cred); + nmp->nm_state &= ~NFSSTA_WAITAUTH; wakeup((caddr_t)&nmp->nm_authstr); /* @@ -1283,7 +1461,7 @@ nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) error = EAUTH; } if (error) - _FREE((caddr_t)*auth_str, M_TEMP); + FREE(*auth_str, M_TEMP); else { *auth_len = nmp->nm_authlen; *verf_len = nmp->nm_verflen; @@ -1302,13 +1480,13 @@ nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) * Get a nickname authenticator and verifier. */ int -nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) - struct nfsmount *nmp; - struct ucred *cred; - char **auth_str; - int *auth_len; - char *verf_str; - int verf_len; +nfs_getnickauth( + struct nfsmount *nmp, + kauth_cred_t cred, + char **auth_str, + int *auth_len, + char *verf_str, + __unused int verf_len) { register struct nfsuid *nuidp; register u_long *nickp, *verfp; @@ -1318,22 +1496,25 @@ nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) if (verf_len < (4 * NFSX_UNSIGNED)) panic("nfs_getnickauth verf too small"); #endif - for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first; + for (nuidp = NMUIDHASH(nmp, kauth_cred_getuid(cred))->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { - if (nuidp->nu_cr.cr_uid == cred->cr_uid) + if (kauth_cred_getuid(nuidp->nu_cr) == kauth_cred_getuid(cred)) break; } microtime(&now); if (!nuidp || nuidp->nu_expire < now.tv_sec) return (EACCES); + MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK); + if (!nickp) + return (ENOMEM); + /* * Move to the end of the lru list (end of lru == most recently used). */ TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru); - MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK); *nickp++ = txdr_unsigned(RPCAKN_NICKNAME); *nickp = txdr_unsigned(nuidp->nu_nickname); *auth_str = (char *)nickp; @@ -1374,17 +1555,17 @@ nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) int nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep) register struct nfsmount *nmp; - struct ucred *cred; + kauth_cred_t cred; int len; NFSKERBKEY_T key; - struct mbuf **mdp; + mbuf_t *mdp; char **dposp; - struct mbuf *mrep; + mbuf_t mrep; { register struct nfsuid *nuidp; register u_long *tl; register long t1; - struct mbuf *md = *mdp; + mbuf_t md = *mdp; struct timeval ktvin, ktvout, now; u_long nick; char *dpos = *dposp, *cp2; @@ -1418,20 +1599,27 @@ nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep) sizeof (struct nfsuid), M_NFSUID, M_WAITOK); } else { + nuidp = NULL; + } + if (!nuidp) { nuidp = nmp->nm_uidlruhead.tqh_first; + if (!nuidp) { + error = ENOMEM; + goto nfsmout; + } LIST_REMOVE(nuidp, nu_hash); - TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, - nu_lru); + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); + kauth_cred_rele(nuidp->nu_cr); } nuidp->nu_flag = 0; - nuidp->nu_cr.cr_uid = cred->cr_uid; + kauth_cred_ref(cred); + nuidp->nu_cr = cred; nuidp->nu_expire = now.tv_sec + NFS_KERBTTL; nuidp->nu_timestamp = ktvout; nuidp->nu_nickname = nick; bcopy(key, nuidp->nu_key, sizeof (key)); - TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, - nu_lru); - LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid), + TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru); + LIST_INSERT_HEAD(NMUIDHASH(nmp, kauth_cred_getuid(cred)), nuidp, nu_hash); } } else @@ -1445,57 +1633,86 @@ nfsmout: #ifndef NFS_NOSERVER /* - * Derefence a server socket structure. If it has no more references and - * is no longer valid, you can throw it away. + * cleanup and release a server socket structure. */ void -nfsrv_slpderef(slp) - register struct nfssvc_sock *slp; +nfsrv_slpfree(struct nfssvc_sock *slp) { - if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) { - TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); - _FREE((caddr_t)slp, M_NFSSVC); - } -} + struct nfsuid *nuidp, *nnuidp; + struct nfsrv_descript *nwp, *nnwp; -/* - * Lock a socket against others. - */ -int -nfs_slplock(slp, wait) - register struct nfssvc_sock *slp; - int wait; -{ - int *statep = &slp->ns_solock; + if (slp->ns_so) { + sock_release(slp->ns_so); + slp->ns_so = NULL; + } + if (slp->ns_nam) + mbuf_free(slp->ns_nam); + if (slp->ns_raw) + mbuf_freem(slp->ns_raw); + if (slp->ns_rec) + mbuf_freem(slp->ns_rec); + slp->ns_nam = slp->ns_raw = slp->ns_rec = NULL; + + for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0; + nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); + if (nuidp->nu_flag & NU_NAM) + mbuf_freem(nuidp->nu_nam); + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE((caddr_t)nuidp, + sizeof (struct nfsuid), M_NFSUID); + } - if (!wait && (*statep & NFSSTA_SNDLOCK)) - return(0); /* already locked, fail */ - while (*statep & NFSSTA_SNDLOCK) { - *statep |= NFSSTA_WANTSND; - (void) tsleep((caddr_t)statep, PZERO - 1, "nfsslplck", 0); + for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { + nnwp = nwp->nd_tq.le_next; + LIST_REMOVE(nwp, nd_tq); + if (nwp->nd_cr) + kauth_cred_rele(nwp->nd_cr); + FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC); } - *statep |= NFSSTA_SNDLOCK; - return (1); + LIST_INIT(&slp->ns_tq); + + lck_rw_destroy(&slp->ns_rwlock, nfs_slp_rwlock_group); + lck_mtx_destroy(&slp->ns_wgmutex, nfs_slp_mutex_group); + FREE(slp, M_NFSSVC); } /* - * Unlock the stream socket for others. + * Derefence a server socket structure. If it has no more references and + * is no longer valid, you can throw it away. */ void -nfs_slpunlock(slp) - struct nfssvc_sock *slp; +nfsrv_slpderef(struct nfssvc_sock *slp) { - int *statep = &slp->ns_solock; - - if ((*statep & NFSSTA_SNDLOCK) == 0) - panic("nfs slpunlock"); - *statep &= ~NFSSTA_SNDLOCK; - if (*statep & NFSSTA_WANTSND) { - *statep &= ~NFSSTA_WANTSND; - wakeup((caddr_t)statep); + struct timeval now; + + lck_mtx_lock(nfsd_mutex); + lck_rw_lock_exclusive(&slp->ns_rwlock); + slp->ns_sref--; + if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { + lck_rw_done(&slp->ns_rwlock); + lck_mtx_unlock(nfsd_mutex); + return; } + + /* queue the socket up for deletion */ + microuptime(&now); + slp->ns_timestamp = now.tv_sec; + TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); + TAILQ_INSERT_TAIL(&nfssvc_deadsockhead, slp, ns_chain); + lck_rw_done(&slp->ns_rwlock); + if (slp == nfs_udpsock) + nfs_udpsock = NULL; +#if ISO + else if (slp == nfs_cltpsock) + nfs_cltpsock = NULL; +#endif + lck_mtx_unlock(nfsd_mutex); } + /* * Initialize the data structures for the server. * Handshake with any new nfsds starting up to avoid any chance of @@ -1505,49 +1722,70 @@ void nfsrv_init(terminating) int terminating; { - register struct nfssvc_sock *slp, *nslp; + struct nfssvc_sock *slp, *nslp; + struct timeval now; - if (nfssvc_sockhead_flag & SLP_INIT) - panic("nfsd init"); - nfssvc_sockhead_flag |= SLP_INIT; if (terminating) { - for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) { - nslp = slp->ns_chain.tqe_next; - if (slp->ns_flag & SLP_VALID) + microuptime(&now); + for (slp = TAILQ_FIRST(&nfssvc_sockhead); slp != 0; slp = nslp) { + nslp = TAILQ_NEXT(slp, ns_chain); + if (slp->ns_flag & SLP_VALID) { + lck_rw_lock_exclusive(&slp->ns_rwlock); nfsrv_zapsock(slp); + lck_rw_done(&slp->ns_rwlock); + } + /* queue the socket up for deletion */ + slp->ns_timestamp = now.tv_sec; TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); - _FREE((caddr_t)slp, M_NFSSVC); + TAILQ_INSERT_TAIL(&nfssvc_deadsockhead, slp, ns_chain); + if (slp == nfs_udpsock) + nfs_udpsock = NULL; +#if ISO + else if (slp == nfs_cltpsock) + nfs_cltpsock = NULL; +#endif } nfsrv_cleancache(); /* And clear out server cache */ -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED } else nfs_pub.np_valid = 0; #else } #endif - TAILQ_INIT(&nfssvc_sockhead); - nfssvc_sockhead_flag &= ~SLP_INIT; - if (nfssvc_sockhead_flag & SLP_WANTINIT) { - nfssvc_sockhead_flag &= ~SLP_WANTINIT; - wakeup((caddr_t)&nfssvc_sockhead); + if (!terminating) { + TAILQ_INIT(&nfssvc_sockhead); + TAILQ_INIT(&nfssvc_deadsockhead); + TAILQ_INIT(&nfsd_head); + nfsd_head_flag &= ~NFSD_CHECKSLP; } - TAILQ_INIT(&nfsd_head); - nfsd_head_flag &= ~NFSD_CHECKSLP; - MALLOC(nfs_udpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); - bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock)); - TAILQ_INIT(&nfs_udpsock->ns_uidlruhead); - TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain); + if (nfs_udpsock) { + bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock)); + lck_rw_init(&nfs_udpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + lck_mtx_init(&nfs_udpsock->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr); + TAILQ_INIT(&nfs_udpsock->ns_uidlruhead); + TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain); + } else { + printf("nfsrv_init() failed to allocate UDP socket\n"); + } +#if ISO MALLOC(nfs_cltpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); - bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock)); - TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead); - TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain); + if (nfs_cltpsock) { + bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock)); + lck_rw_init(&nfs_cltpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + lck_mtx_init(&nfs_cltpsock->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr); + TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead); + TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain); + } else { + printf("nfsrv_init() failed to allocate CLTP socket\n"); + } +#endif } /* @@ -1571,13 +1809,11 @@ nfsd_rt(sotype, nd, cacherep) rt->flag = DRT_CACHEDROP; if (sotype == SOCK_STREAM) rt->flag |= DRT_TCP; - if (nd->nd_flag & ND_NQNFS) - rt->flag |= DRT_NQNFS; else if (nd->nd_flag & ND_NFSV3) rt->flag |= DRT_NFSV3; rt->proc = nd->nd_procnum; - if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET) - rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr; + if (((struct sockaddr *)mbuf_data(nd->nd_nam))->sa_family == AF_INET) + rt->ipadr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr; else rt->ipadr = INADDR_ANY; microuptime(&now);