X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/43866e378188c25dd1e2208016ab3cbeb086ae6c..ff6e181ae92fc6f1e89841290f461d1f2f9badd9:/bsd/nfs/nfs_syscalls.c diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index f29b5c80e..7241bdfc5 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -1,10 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -69,25 +67,31 @@ #include <sys/sysproto.h> #endif #include <sys/kernel.h> -#include <sys/file.h> +#include <sys/file_internal.h> #include <sys/filedesc.h> #include <sys/stat.h> -#include <sys/vnode.h> -#include <sys/mount.h> -#include <sys/proc.h> +#include <sys/vnode_internal.h> +#include <sys/mount_internal.h> +#include <sys/proc_internal.h> /* for fdflags */ +#include <sys/kauth.h> #include <sys/sysctl.h> +#include <sys/ubc.h> #include <sys/uio.h> #include <sys/malloc.h> -#include <sys/buf.h> -#include <sys/mbuf.h> +#include <sys/kpi_mbuf.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/domain.h> #include <sys/protosw.h> -#include <sys/namei.h> +#include <sys/fcntl.h> +#include <sys/lockf.h> #include <sys/syslog.h> #include <sys/user.h> -#include <machine/spl.h> +#include <sys/sysproto.h> +#include <sys/kpi_socket.h> +#include <libkern/OSAtomic.h> + +#include <bsm/audit_kernel.h> #include <netinet/in.h> #include <netinet/tcp.h> @@ -102,44 +106,40 @@ #include <nfs/nfsrvcache.h> #include <nfs/nfsmount.h> #include <nfs/nfsnode.h> -#include <nfs/nqnfs.h> #include <nfs/nfsrtt.h> +#include <nfs/nfs_lock.h> +extern void unix_syscall_return(int); /* Global defs. */ -extern int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, +extern int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, struct nfssvc_sock *slp, - struct proc *procp, - struct mbuf **mreqp)); + proc_t procp, + mbuf_t *mreqp); extern int nfs_numasync; -extern time_t nqnfsstarttime; -extern int nqsrv_writeslack; +extern int nfs_ioddelwri; extern int nfsrtton; extern struct nfsstats nfsstats; extern int nfsrvw_procrastinate; extern int nfsrvw_procrastinate_v3; + struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock; static int nuidhash_max = NFS_MAXUIDHASH; -static void nfsrv_zapsock __P((struct nfssvc_sock *slp)); -static int nfssvc_iod __P((struct proc *)); - -#define TRUE 1 -#define FALSE 0 +static void nfsrv_zapsock(struct nfssvc_sock *slp); +static int nfssvc_iod(proc_t); +static int nfskerb_clientd(struct nfsmount *, struct nfsd_cargs *, int, user_addr_t, proc_t); static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; #ifndef NFS_NOSERVER int nfsd_waiting = 0; static struct nfsdrt nfsdrt; -static int nfs_numnfsd = 0; -static int notstarted = 1; -static int modify_flag = 0; -static void nfsd_rt __P((int sotype, struct nfsrv_descript *nd, - int cacherep)); -static int nfssvc_addsock __P((struct file *, struct mbuf *, - struct proc *)); -static int nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *)); +int nfs_numnfsd = 0; +static void nfsd_rt(int sotype, struct nfsrv_descript *nd, int cacherep); +static int nfssvc_addsock(socket_t, mbuf_t, proc_t); +static int nfssvc_nfsd(struct nfsd_srvargs *,user_addr_t, proc_t); +static int nfssvc_export(user_addr_t, proc_t); static int nfs_privport = 0; /* XXX CSM 11/25/97 Upgrade sysctl.h someday */ @@ -157,44 +157,237 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate /* * Get file handle system call */ -#ifndef _SYS_SYSPROTO_H_ -struct getfh_args { - char *fname; - fhandle_t *fhp; -}; -#endif int -getfh(p, uap) - struct proc *p; - register struct getfh_args *uap; +getfh(proc_t p, struct getfh_args *uap, __unused int *retval) { - register struct vnode *vp; - fhandle_t fh; + vnode_t vp; + struct nfs_filehandle nfh; int error; struct nameidata nd; + struct vfs_context context; + char path[MAXPATHLEN], *ptr; + u_int pathlen; + struct nfs_exportfs *nxfs; + struct nfs_export *nx; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); /* * Must be super user */ - error = suser(p->p_ucred, &p->p_acflag); - if(error) + error = proc_suser(p); + if (error) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p); + + error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen); + if (error) + return (error); + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, path, &context); error = namei(&nd); if (error) return (error); + nameidone(&nd); + vp = nd.ni_vp; - bzero((caddr_t)&fh, sizeof(fh)); - fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fh.fh_fid); - vput(vp); + + // find exportfs that matches f_mntonname + lck_rw_lock_shared(&nfs_export_rwlock); + ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; + LIST_FOREACH(nxfs, &nfs_exports, nxfs_next) { + if (!strcmp(nxfs->nxfs_path, ptr)) + break; + } + if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { + error = EINVAL; + goto out; + } + // find export that best matches remainder of path + ptr = path + strlen(nxfs->nxfs_path); + while (*ptr && (*ptr == '/')) + ptr++; + LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { + int len = strlen(nx->nx_path); + if (len == 0) // we've hit the export entry for the root directory + break; + if (!strncmp(nx->nx_path, ptr, len)) + break; + } + if (!nx) { + error = EINVAL; + goto out; + } + + bzero(&nfh, sizeof(nfh)); + nfh.nfh_xh.nxh_version = NFS_FH_VERSION; + nfh.nfh_xh.nxh_fsid = nxfs->nxfs_id; + nfh.nfh_xh.nxh_expid = nx->nx_id; + nfh.nfh_xh.nxh_flags = 0; + nfh.nfh_xh.nxh_reserved = 0; + nfh.nfh_len = NFS_MAX_FID_SIZE; + error = VFS_VPTOFH(vp, &nfh.nfh_len, &nfh.nfh_fid[0], NULL); + if (nfh.nfh_len > (int)NFS_MAX_FID_SIZE) + error = EOVERFLOW; + nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; + nfh.nfh_len += sizeof(nfh.nfh_xh); + +out: + lck_rw_done(&nfs_export_rwlock); + vnode_put(vp); if (error) return (error); - error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh)); + error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh)); return (error); } #endif /* NFS_NOSERVER */ + +extern struct fileops vnops; + +/* + * syscall for the rpc.lockd to use to translate a NFS file handle into + * an open descriptor. + * + * warning: do not remove the suser() call or this becomes one giant + * security hole. + */ +int +fhopen( proc_t p, + struct fhopen_args *uap, + register_t *retval) +{ + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct flock lf; + struct fileproc *fp, *nfp; + int fmode, error, type; + int indx; + kauth_cred_t cred = proc_ucred(p); + struct vfs_context context; + kauth_action_t action; + + context.vc_proc = p; + context.vc_ucred = cred; + + /* + * Must be super user + */ + error = suser(cred, 0); + if (error) + return (error); + + fmode = FFLAGS(uap->flags); + /* why not allow a non-read/write open for our lockd? */ + if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) + return (EINVAL); + + error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); + if (error) + return (error); + if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || + (nfh.nfh_len > (int)NFS_MAX_FH_SIZE)) + return (EINVAL); + error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); + if (error) + return (error); + + lck_rw_lock_shared(&nfs_export_rwlock); + /* now give me my vnode, it gets returned to me with a reference */ + error = nfsrv_fhtovp(&nfh, NULL, TRUE, &vp, &nx, &nxo); + lck_rw_done(&nfs_export_rwlock); + if (error) + return (error); + + /* + * From now on we have to make sure not + * to forget about the vnode. + * Any error that causes an abort must vnode_put(vp). + * Just set error = err and 'goto bad;'. + */ + + /* + * from vn_open + */ + if (vnode_vtype(vp) == VSOCK) { + error = EOPNOTSUPP; + goto bad; + } + + /* disallow write operations on directories */ + if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { + error = EISDIR; + goto bad; + } + + /* compute action to be authorized */ + action = 0; + if (fmode & FREAD) + action |= KAUTH_VNODE_READ_DATA; + if (fmode & (FWRITE | O_TRUNC)) + action |= KAUTH_VNODE_WRITE_DATA; + if ((error = vnode_authorize(vp, NULL, action, &context)) != 0) + goto bad; + + if ((error = VNOP_OPEN(vp, fmode, &context))) + goto bad; + if ((error = vnode_ref_ext(vp, fmode))) + goto bad; + + /* + * end of vn_open code + */ + + // starting here... error paths should call vn_close/vnode_put + if ((error = falloc(p, &nfp, &indx)) != 0) { + vn_close(vp, fmode & FMASK, cred, p); + goto bad; + } + fp = nfp; + + fp->f_fglob->fg_flag = fmode & FMASK; + fp->f_fglob->fg_type = DTYPE_VNODE; + fp->f_fglob->fg_ops = &vnops; + fp->f_fglob->fg_data = (caddr_t)vp; + + // XXX do we really need to support this with fhopen()? + if (fmode & (O_EXLOCK | O_SHLOCK)) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (fmode & O_EXLOCK) + lf.l_type = F_WRLCK; + else + lf.l_type = F_RDLCK; + type = F_FLOCK; + if ((fmode & FNONBLOCK) == 0) + type |= F_WAIT; + if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, &context))) { + vn_close(vp, fp->f_fglob->fg_flag, fp->f_fglob->fg_cred, p); + fp_free(p, indx, fp); + return (error); + } + fp->f_fglob->fg_flag |= FHASLOCK; + } + + vnode_put(vp); + + proc_fdlock(p); + *fdflags(p, indx) &= ~UF_RESERVED; + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + + *retval = indx; + return (0); + +bad: + vnode_put(vp); + return (error); +} + /* * Nfs server psuedo system call for the nfsd's * Based on the flag value it either: @@ -202,41 +395,34 @@ getfh(p, uap) * - remains in the kernel as an nfsd * - remains in the kernel as an nfsiod */ -#ifndef _SYS_SYSPROTO_H_ -struct nfssvc_args { - int flag; - caddr_t argp; -}; -#endif int -nfssvc(p, uap) - struct proc *p; - register struct nfssvc_args *uap; +nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) { #ifndef NFS_NOSERVER struct nameidata nd; - struct file *fp; - struct mbuf *nam; - struct nfsd_args nfsdarg; + mbuf_t nam; + struct user_nfsd_args user_nfsdarg; struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs; struct nfsd_cargs ncd; struct nfsd *nfsd; struct nfssvc_sock *slp; struct nfsuid *nuidp; struct nfsmount *nmp; + struct timeval now; + socket_t so; + struct vfs_context context; + struct ucred temp_cred; #endif /* NFS_NOSERVER */ int error; + AUDIT_ARG(cmd, uap->flag); + /* * Must be super user */ - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if(error) return (error); - while (nfssvc_sockhead_flag & SLP_INIT) { - nfssvc_sockhead_flag |= SLP_WANTINIT; - (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0); - } if (uap->flag & NFSSVC_BIOD) error = nfssvc_iod(p); #ifdef NFS_NOSERVER @@ -244,57 +430,76 @@ nfssvc(p, uap) error = ENXIO; #else /* !NFS_NOSERVER */ else if (uap->flag & NFSSVC_MNTD) { + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)); if (error) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - ncd.ncd_dirp, p); + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + (proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + CAST_USER_ADDR_T(ncd.ncd_dirp), &context); error = namei(&nd); if (error) return (error); - if ((nd.ni_vp->v_flag & VROOT) == 0) + nameidone(&nd); + + if (vnode_isvroot(nd.ni_vp) == 0) error = EINVAL; - nmp = VFSTONFS(nd.ni_vp->v_mount); - vput(nd.ni_vp); + nmp = VFSTONFS(vnode_mount(nd.ni_vp)); + vnode_put(nd.ni_vp); if (error) return (error); - /* disable split funnels now */ - thread_funnel_merge(kernel_flock, network_flock); - - if ((nmp->nm_flag & NFSMNT_MNTD) && + if ((nmp->nm_state & NFSSTA_MNTD) && (uap->flag & NFSSVC_GOTAUTH) == 0) return (0); - nmp->nm_flag |= NFSMNT_MNTD; - error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag, - uap->argp, p); + nmp->nm_state |= NFSSTA_MNTD; + error = nfskerb_clientd(nmp, &ncd, uap->flag, uap->argp, p); } else if (uap->flag & NFSSVC_ADDSOCK) { - error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg)); + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); + } else { + struct nfsd_args tmp_args; + error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); + if (error == 0) { + user_nfsdarg.sock = tmp_args.sock; + user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); + user_nfsdarg.namelen = tmp_args.namelen; + } + } if (error) return (error); - error = getsock(p->p_fd, nfsdarg.sock, &fp); + /* get the socket */ + error = file_socket(user_nfsdarg.sock, &so); if (error) return (error); - /* - * Get the client address for connected sockets. - */ - if (nfsdarg.name == NULL || nfsdarg.namelen == 0) - nam = (struct mbuf *)0; - else { - error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen, - MT_SONAME); - if (error) + /* Get the client address for connected sockets. */ + if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { + nam = NULL; + } else { + error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); + if (error) { + /* drop the iocount file_socket() grabbed on the file descriptor */ + file_drop(user_nfsdarg.sock); return (error); + } } - error = nfssvc_addsock(fp, nam, p); - } else { + /* + * nfssvc_addsock() will grab a retain count on the socket + * to keep the socket from being closed when nfsd closes its + * file descriptor for it. + */ + error = nfssvc_addsock(so, nam, p); + /* drop the iocount file_socket() grabbed on the file descriptor */ + file_drop(user_nfsdarg.sock); + } else if (uap->flag & NFSSVC_NFSD) { error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)); if (error) return (error); - /* disable split funnels now */ - thread_funnel_merge(kernel_flock, network_flock); - if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) && (nfsd->nfsd_slp->ns_flag & SLP_VALID)) { slp = nfsd->nfsd_slp; @@ -305,14 +510,14 @@ nfssvc(p, uap) */ for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { - if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid && + if (kauth_cred_getuid(nuidp->nu_cr) == nsd->nsd_cr.cr_uid && (!nfsd->nfsd_nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2))) break; } if (nuidp) { - nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr); + nfsrv_setcred(nuidp->nu_cr,nfsd->nfsd_nd->nd_cr); nfsd->nfsd_nd->nd_flag |= ND_KERBFULL; } else { /* @@ -326,25 +531,39 @@ nfssvc(p, uap) } else nuidp = (struct nfsuid *)0; if ((slp->ns_flag & SLP_VALID) == 0) { - if (nuidp) - _FREE_ZONE((caddr_t)nuidp, + if (nuidp) { + FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID); + slp->ns_numuids--; + } } else { if (nuidp == (struct nfsuid *)0) { nuidp = slp->ns_uidlruhead.tqh_first; + if (!nuidp) + return (ENOMEM); LIST_REMOVE(nuidp, nu_hash); TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); if (nuidp->nu_flag & NU_NAM) - m_freem(nuidp->nu_nam); - } + mbuf_freem(nuidp->nu_nam); + kauth_cred_rele(nuidp->nu_cr); + } nuidp->nu_flag = 0; - nuidp->nu_cr = nsd->nsd_cr; - if (nuidp->nu_cr.cr_ngroups > NGROUPS) - nuidp->nu_cr.cr_ngroups = NGROUPS; - nuidp->nu_cr.cr_ref = 1; + + if (nsd->nsd_cr.cr_ngroups > NGROUPS) + nsd->nsd_cr.cr_ngroups = NGROUPS; + + nfsrv_setcred(&nsd->nsd_cr, &temp_cred); + nuidp->nu_cr = kauth_cred_create(&temp_cred); + + if (!nuidp->nu_cr) { + FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID); + slp->ns_numuids--; + return (ENOMEM); + } nuidp->nu_timestamp = nsd->nsd_timestamp; - nuidp->nu_expire = time.tv_sec + nsd->nsd_ttl; + microtime(&now); + nuidp->nu_expire = now.tv_sec + nsd->nsd_ttl; /* * and save the session key in nu_key. */ @@ -353,8 +572,7 @@ nfssvc(p, uap) if (nfsd->nfsd_nd->nd_nam2) { struct sockaddr_in *saddr; - saddr = mtod(nfsd->nfsd_nd->nd_nam2, - struct sockaddr_in *); + saddr = mbuf_data(nfsd->nfsd_nd->nd_nam2); switch (saddr->sin_family) { case AF_INET: nuidp->nu_flag |= NU_INETADDR; @@ -364,9 +582,15 @@ nfssvc(p, uap) case AF_ISO: default: nuidp->nu_flag |= NU_NAM; - nuidp->nu_nam = m_copym( - nfsd->nfsd_nd->nd_nam2, 0, - M_COPYALL, M_WAIT); + error = mbuf_copym(nfsd->nfsd_nd->nd_nam2, 0, + MBUF_COPYALL, MBUF_WAITOK, + &nuidp->nu_nam); + if (error) { + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID); + slp->ns_numuids--; + return (error); + } break; }; } @@ -374,8 +598,8 @@ nfssvc(p, uap) nu_lru); LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid), nuidp, nu_hash); - nfsrv_setcred(&nuidp->nu_cr, - &nfsd->nfsd_nd->nd_cr); + nfsrv_setcred(nuidp->nu_cr, + nfsd->nfsd_nd->nd_cr); nfsd->nfsd_nd->nd_flag |= ND_KERBFULL; } } @@ -383,6 +607,10 @@ nfssvc(p, uap) if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd)) nfsd->nfsd_flag |= NFSD_AUTHFAIL; error = nfssvc_nfsd(nsd, uap->argp, p); + } else if (uap->flag & NFSSVC_EXPORT) { + error = nfssvc_export(uap->argp, p); + } else { + error = EINVAL; } #endif /* NFS_NOSERVER */ if (error == EINTR || error == ERESTART) @@ -390,54 +618,156 @@ nfssvc(p, uap) return (error); } +/* + * NFSKERB client helper daemon. + * Gets authorization strings for "kerb" mounts. + */ +static int +nfskerb_clientd( + struct nfsmount *nmp, + struct nfsd_cargs *ncd, + int flag, + user_addr_t argp, + proc_t p) +{ + struct nfsuid *nuidp, *nnuidp; + int error = 0; + struct nfsreq *rp; + struct timeval now; + + /* + * First initialize some variables + */ + microtime(&now); + + /* + * If an authorization string is being passed in, get it. + */ + if ((flag & NFSSVC_GOTAUTH) && (nmp->nm_state & NFSSTA_MOUNTED) && + ((nmp->nm_state & NFSSTA_WAITAUTH) == 0)) { + if (nmp->nm_state & NFSSTA_HASAUTH) + panic("cld kerb"); + if ((flag & NFSSVC_AUTHINFAIL) == 0) { + if (ncd->ncd_authlen <= nmp->nm_authlen && + ncd->ncd_verflen <= nmp->nm_verflen && + !copyin(CAST_USER_ADDR_T(ncd->ncd_authstr),nmp->nm_authstr,ncd->ncd_authlen)&& + !copyin(CAST_USER_ADDR_T(ncd->ncd_verfstr),nmp->nm_verfstr,ncd->ncd_verflen)){ + nmp->nm_authtype = ncd->ncd_authtype; + nmp->nm_authlen = ncd->ncd_authlen; + nmp->nm_verflen = ncd->ncd_verflen; +#if NFSKERB + nmp->nm_key = ncd->ncd_key; +#endif + } else + nmp->nm_state |= NFSSTA_AUTHERR; + } else + nmp->nm_state |= NFSSTA_AUTHERR; + nmp->nm_state |= NFSSTA_HASAUTH; + wakeup((caddr_t)&nmp->nm_authlen); + } else { + nmp->nm_state |= NFSSTA_WAITAUTH; + } + + /* + * Loop every second updating queue until there is a termination sig. + */ + while (nmp->nm_state & NFSSTA_MOUNTED) { + /* Get an authorization string, if required. */ + if ((nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH)) == 0) { + ncd->ncd_authuid = nmp->nm_authuid; + if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs))) + nmp->nm_state |= NFSSTA_WAITAUTH; + else + return (ENEEDAUTH); + } + /* Wait a bit (no pun) and do it again. */ + if ((nmp->nm_state & NFSSTA_MOUNTED) && + (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH))) { + error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH, + "nfskrbtimr", hz / 3); + if (error == EINTR || error == ERESTART) + dounmount(nmp->nm_mountp, 0, p); + } + } + + /* + * Finally, we can free up the mount structure. + */ + for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID); + } + /* + * Loop through outstanding request list and remove dangling + * references to defunct nfsmount struct + */ + for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next) + if (rp->r_nmp == nmp) + rp->r_nmp = (struct nfsmount *)0; + /* Need to wake up any rcvlock waiters so they notice the unmount. */ + if (nmp->nm_state & NFSSTA_WANTRCV) { + nmp->nm_state &= ~NFSSTA_WANTRCV; + wakeup(&nmp->nm_state); + } + FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT); + if (error == EWOULDBLOCK) + error = 0; + return (error); +} + #ifndef NFS_NOSERVER /* * Adds a socket to the list for servicing by nfsds. */ static int -nfssvc_addsock(fp, mynam, p) - struct file *fp; - struct mbuf *mynam; - struct proc *p; +nfssvc_addsock( + socket_t so, + mbuf_t mynam, + __unused proc_t p) { - register struct mbuf *m; - register int siz; - register struct nfssvc_sock *slp; - register struct socket *so; - struct nfssvc_sock *tslp; - int error, s; - - so = (struct socket *)fp->f_data; - tslp = (struct nfssvc_sock *)0; + int siz; + struct nfssvc_sock *slp; + struct nfssvc_sock *tslp = NULL; + int error, sodomain, sotype, soprotocol, on = 1; + struct timeval timeo; + + /* make sure mbuf constants are set up */ + if (!nfs_mbuf_mlen) + nfs_mbuf_init(); + + sock_gettype(so, &sodomain, &sotype, &soprotocol); + /* * Add it to the list, as required. */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if (so->so_proto->pr_protocol == IPPROTO_UDP) { + if (soprotocol == IPPROTO_UDP) { tslp = nfs_udpsock; - if (tslp->ns_flag & SLP_VALID) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if (!tslp || (tslp->ns_flag & SLP_VALID)) { + mbuf_freem(mynam); return (EPERM); } #if ISO - } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) { + } else if (soprotocol == ISOPROTO_CLTP) { tslp = nfs_cltpsock; - if (tslp->ns_flag & SLP_VALID) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if (!tslp || (tslp->ns_flag & SLP_VALID)) { + mbuf_freem(mynam); return (EPERM); } #endif /* ISO */ } - if (so->so_type == SOCK_STREAM) - siz = NFS_MAXPACKET + sizeof (u_long); - else - siz = NFS_MAXPACKET; - error = soreserve(so, siz, siz); - if (error) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + /* reserve buffer space for 2 maximally-sized packets */ + siz = NFS_MAXPACKET; + if (sotype == SOCK_STREAM) + siz += sizeof (u_long); + siz *= 2; + if (siz > NFS_MAXSOCKBUF) + siz = NFS_MAXSOCKBUF; + if ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &siz, sizeof(siz))) || + (error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &siz, sizeof(siz)))) { + mbuf_freem(mynam); return (error); } @@ -446,59 +776,54 @@ nfssvc_addsock(fp, mynam, p) * reserve some space. For datagram sockets, this can get called * repeatedly for the same socket, but that isn't harmful. */ - if (so->so_type == SOCK_STREAM) { - struct sockopt sopt; - int val; - - bzero(&sopt, sizeof sopt); - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_KEEPALIVE; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - val = 1; - sosetopt(so, &sopt); + if (sotype == SOCK_STREAM) { + sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); } - if (so->so_proto->pr_domain->dom_family == AF_INET && - so->so_proto->pr_protocol == IPPROTO_TCP) { - struct sockopt sopt; - int val; - - bzero(&sopt, sizeof sopt); - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - val = 1; - sosetopt(so, &sopt); + if (sodomain == AF_INET && soprotocol == IPPROTO_TCP) { + sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); } - so->so_rcv.sb_flags &= ~SB_NOINTR; - so->so_rcv.sb_timeo = 0; - so->so_snd.sb_flags &= ~SB_NOINTR; - so->so_snd.sb_timeo = 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - if (tslp) + sock_nointerrupt(so, 0); + + timeo.tv_usec = 0; + timeo.tv_sec = 0; + error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); + error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); + + if (tslp) { slp = tslp; - else { + lck_mtx_lock(nfsd_mutex); + } else { MALLOC(slp, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); + if (!slp) { + mbuf_freem(mynam); + return (ENOMEM); + } bzero((caddr_t)slp, sizeof (struct nfssvc_sock)); + lck_rw_init(&slp->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + lck_mtx_init(&slp->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr); TAILQ_INIT(&slp->ns_uidlruhead); + lck_mtx_lock(nfsd_mutex); TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain); } + + sock_retain(so); /* grab a retain count on the socket */ slp->ns_so = so; + slp->ns_sotype = sotype; slp->ns_nam = mynam; - slp->ns_fp = fp; - (void)fref(fp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - s = splnet(); + + socket_lock(so, 1); so->so_upcallarg = (caddr_t)slp; so->so_upcall = nfsrv_rcv; so->so_rcv.sb_flags |= SB_UPCALL; /* required for freebsd merge */ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - slp->ns_flag = (SLP_VALID | SLP_NEEDQ); + socket_unlock(so, 1); + + slp->ns_flag = SLP_VALID | SLP_NEEDQ; + nfsrv_wakenfsd(slp); - splx(s); + lck_mtx_unlock(nfsd_mutex); + return (0); } @@ -509,86 +834,96 @@ nfssvc_addsock(fp, mynam, p) static int nfssvc_nfsd(nsd, argp, p) struct nfsd_srvargs *nsd; - caddr_t argp; - struct proc *p; + user_addr_t argp; + proc_t p; { - register struct mbuf *m; - register int siz; - register struct nfssvc_sock *slp; - register struct socket *so; - register int *solockp; + mbuf_t m, mreq; + struct nfssvc_sock *slp; struct nfsd *nfsd = nsd->nsd_nfsd; struct nfsrv_descript *nd = NULL; - struct mbuf *mreq; - int error = 0, cacherep, s, sotype, writes_todo; - int procrastinate; + int error = 0, cacherep, writes_todo; + int siz, procrastinate; u_quad_t cur_usec; - extern void nfs_aio_thread_init(); + struct timeval now; + boolean_t funnel_state; #ifndef nolint cacherep = RC_DOIT; writes_todo = 0; #endif - s = splnet(); if (nfsd == (struct nfsd *)0) { MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); + if (!nfsd) + return (ENOMEM); nsd->nsd_nfsd = nfsd; bzero((caddr_t)nfsd, sizeof (struct nfsd)); nfsd->nfsd_procp = p; + lck_mtx_lock(nfsd_mutex); TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); nfs_numnfsd++; - nfs_aio_thread_init(); + lck_mtx_unlock(nfsd_mutex); } + + funnel_state = thread_funnel_set(kernel_flock, FALSE); + /* * Loop getting rpc requests until SIGKILL. */ for (;;) { if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) { - while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 && - (nfsd_head_flag & NFSD_CHECKSLP) == 0) { + lck_mtx_lock(nfsd_mutex); + while ((nfsd->nfsd_slp == NULL) && !(nfsd_head_flag & NFSD_CHECKSLP)) { nfsd->nfsd_flag |= NFSD_WAITING; nfsd_waiting++; - error = tsleep((caddr_t)nfsd, PSOCK | PCATCH, - "nfsd", 0); + error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", 0); nfsd_waiting--; - if (error) + if (error) { + lck_mtx_unlock(nfsd_mutex); goto done; + } } - if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 && - (nfsd_head_flag & NFSD_CHECKSLP) != 0) { - for (slp = nfssvc_sockhead.tqh_first; slp != 0; - slp = slp->ns_chain.tqe_next) { + if ((nfsd->nfsd_slp == NULL) && (nfsd_head_flag & NFSD_CHECKSLP)) { + TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) { + lck_rw_lock_shared(&slp->ns_rwlock); if ((slp->ns_flag & (SLP_VALID | SLP_DOREC)) == (SLP_VALID | SLP_DOREC)) { + if (lck_rw_lock_shared_to_exclusive(&slp->ns_rwlock)) { + /* upgrade failed and we lost the lock; take exclusive and recheck */ + lck_rw_lock_exclusive(&slp->ns_rwlock); + if ((slp->ns_flag & (SLP_VALID | SLP_DOREC)) + != (SLP_VALID | SLP_DOREC)) { + /* flags no longer set, so skip this socket */ + lck_rw_done(&slp->ns_rwlock); + continue; + } + } slp->ns_flag &= ~SLP_DOREC; slp->ns_sref++; nfsd->nfsd_slp = slp; + lck_rw_done(&slp->ns_rwlock); break; } + lck_rw_done(&slp->ns_rwlock); } if (slp == 0) nfsd_head_flag &= ~NFSD_CHECKSLP; } - if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0) + lck_mtx_unlock(nfsd_mutex); + if ((slp = nfsd->nfsd_slp) == NULL) continue; + lck_rw_lock_exclusive(&slp->ns_rwlock); if (slp->ns_flag & SLP_VALID) { - if (slp->ns_flag & SLP_DISCONN) + if (slp->ns_flag & SLP_DISCONN) { nfsrv_zapsock(slp); - else if (slp->ns_flag & SLP_NEEDQ) { + } else if (slp->ns_flag & SLP_NEEDQ) { slp->ns_flag &= ~SLP_NEEDQ; - (void) nfs_sndlock(&slp->ns_solock, - (struct nfsreq *)0); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - nfsrv_rcv(slp->ns_so, (caddr_t)slp, - M_WAIT); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - nfs_sndunlock(&slp->ns_solock); + nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); } error = nfsrv_dorec(slp, nfsd, &nd); - cur_usec = (u_quad_t)time.tv_sec * 1000000 + - (u_quad_t)time.tv_usec; - if (error && slp->ns_tq.lh_first && - slp->ns_tq.lh_first->nd_time <= cur_usec) { + microuptime(&now); + cur_usec = (u_quad_t)now.tv_sec * 1000000 + + (u_quad_t)now.tv_usec; + if (error && slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) { error = 0; cacherep = RC_DOIT; writes_todo = 1; @@ -596,30 +931,28 @@ nfssvc_nfsd(nsd, argp, p) writes_todo = 0; nfsd->nfsd_flag |= NFSD_REQINPROG; } + lck_rw_done(&slp->ns_rwlock); } else { error = 0; slp = nfsd->nfsd_slp; } if (error || (slp->ns_flag & SLP_VALID) == 0) { if (nd) { - _FREE_ZONE((caddr_t)nd, + if (nd->nd_nam2) + mbuf_freem(nd->nd_nam2); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); + FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nd = NULL; } - nfsd->nfsd_slp = (struct nfssvc_sock *)0; + nfsd->nfsd_slp = NULL; nfsd->nfsd_flag &= ~NFSD_REQINPROG; nfsrv_slpderef(slp); continue; } - splx(s); - so = slp->ns_so; - sotype = so->so_type; - if (so->so_proto->pr_flags & PR_CONNREQUIRED) - solockp = &slp->ns_solock; - else - solockp = (int *)0; if (nd) { - nd->nd_starttime = time; + microuptime(&nd->nd_starttime); if (nd->nd_nam2) nd->nd_nam = nd->nd_nam2; else @@ -630,41 +963,22 @@ nfssvc_nfsd(nsd, argp, p) */ if (nfsd->nfsd_flag & NFSD_NEEDAUTH) { nfsd->nfsd_flag &= ~NFSD_NEEDAUTH; - nsd->nsd_haddr = mtod(nd->nd_nam, - struct sockaddr_in *)->sin_addr.s_addr; + nsd->nsd_haddr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr; nsd->nsd_authlen = nfsd->nfsd_authlen; nsd->nsd_verflen = nfsd->nfsd_verflen; - if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr, + if (!copyout(nfsd->nfsd_authstr,CAST_USER_ADDR_T(nsd->nsd_authstr), nfsd->nfsd_authlen) && - !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr, + !copyout(nfsd->nfsd_verfstr, CAST_USER_ADDR_T(nsd->nsd_verfstr), nfsd->nfsd_verflen) && - !copyout((caddr_t)nsd, argp, sizeof (*nsd))) + !copyout((caddr_t)nsd, argp, sizeof (*nsd))) { + thread_funnel_set(kernel_flock, funnel_state); return (ENEEDAUTH); + } cacherep = RC_DROPIT; } else cacherep = nfsrv_getcache(nd, slp, &mreq); - /* - * Check for just starting up for NQNFS and send - * fake "try again later" replies to the NQNFS clients. - */ - if (notstarted && nqnfsstarttime <= time.tv_sec) { - if (modify_flag) { - nqnfsstarttime = time.tv_sec + nqsrv_writeslack; - modify_flag = 0; - } else - notstarted = 0; - } - if (notstarted) { - if ((nd->nd_flag & ND_NQNFS) == 0) - cacherep = RC_DROPIT; - else if (nd->nd_procnum != NFSPROC_WRITE) { - nd->nd_procnum = NFSPROC_NOOP; - nd->nd_repstat = NQNFS_TRYLATER; - cacherep = RC_DOIT; - } else - modify_flag = 1; - } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) { + if (nfsd->nfsd_flag & NFSD_AUTHFAIL) { nfsd->nfsd_flag &= ~NFSD_AUTHFAIL; nd->nd_procnum = NFSPROC_NOOP; nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); @@ -672,18 +986,20 @@ nfssvc_nfsd(nsd, argp, p) } else if (nfs_privport) { /* Check if source port is privileged */ u_short port; - struct sockaddr *nam = nd->nd_nam; + struct sockaddr *nam = mbuf_data(nd->nd_nam); struct sockaddr_in *sin; sin = (struct sockaddr_in *)nam; port = ntohs(sin->sin_port); if (port >= IPPORT_RESERVED && nd->nd_procnum != NFSPROC_NULL) { + char strbuf[MAX_IPv4_STR_LEN]; nd->nd_procnum = NFSPROC_NOOP; nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); cacherep = RC_DOIT; printf("NFS request from unprivileged port (%s:%d)\n", - (char *)(inet_ntoa(sin->sin_addr)), port); + inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)), + port); } } @@ -700,82 +1016,100 @@ nfssvc_nfsd(nsd, argp, p) procrastinate = nfsrvw_procrastinate_v3; else procrastinate = nfsrvw_procrastinate; - if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE && - procrastinate > 0 && !notstarted)) - error = nfsrv_writegather(&nd, slp, - nfsd->nfsd_procp, &mreq); + lck_rw_lock_shared(&nfs_export_rwlock); + if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) + error = nfsrv_writegather(&nd, slp, nfsd->nfsd_procp, &mreq); else - error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, - slp, nfsd->nfsd_procp, &mreq); + error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, slp, nfsd->nfsd_procp, &mreq); + lck_rw_done(&nfs_export_rwlock); if (mreq == NULL) break; if (error) { - if (nd->nd_procnum != NQNFSPROC_VACATED) - nfsstats.srv_errs++; + OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs); nfsrv_updatecache(nd, FALSE, mreq); - if (nd->nd_nam2) - m_freem(nd->nd_nam2); + if (nd->nd_nam2) { + mbuf_freem(nd->nd_nam2); + nd->nd_nam2 = NULL; + } break; } - nfsstats.srvrpccnt[nd->nd_procnum]++; + OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]); nfsrv_updatecache(nd, TRUE, mreq); - nd->nd_mrep = (struct mbuf *)0; + nd->nd_mrep = NULL; case RC_REPLY: m = mreq; siz = 0; while (m) { - siz += m->m_len; - m = m->m_next; + siz += mbuf_len(m); + m = mbuf_next(m); } if (siz <= 0 || siz > NFS_MAXPACKET) { printf("mbuf siz=%d\n",siz); panic("Bad nfs svc reply"); } m = mreq; - m->m_pkthdr.len = siz; - m->m_pkthdr.rcvif = (struct ifnet *)0; + mbuf_pkthdr_setlen(m, siz); + error = mbuf_pkthdr_setrcvif(m, NULL); + if (error) + panic("nfsd setrcvif failed: %d", error); /* * For stream protocols, prepend a Sun RPC * Record Mark. */ - if (sotype == SOCK_STREAM) { - M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); - *mtod(m, u_long *) = htonl(0x80000000 | siz); + if (slp->ns_sotype == SOCK_STREAM) { + error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); + if (!error) + *(u_long*)mbuf_data(m) = htonl(0x80000000 | siz); } - if (solockp) - (void) nfs_sndlock(solockp, (struct nfsreq *)0); - if (slp->ns_flag & SLP_VALID) - error = nfs_send(so, nd->nd_nam2, m, NULL); - else { - error = EPIPE; - m_freem(m); + if (!error) { + if (slp->ns_flag & SLP_VALID) { + error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL); + } else { + error = EPIPE; + mbuf_freem(m); + } + } else { + mbuf_freem(m); } + mreq = NULL; if (nfsrtton) - nfsd_rt(sotype, nd, cacherep); - if (nd->nd_nam2) - MFREE(nd->nd_nam2, m); - if (nd->nd_mrep) - m_freem(nd->nd_mrep); - if (error == EPIPE) + nfsd_rt(slp->ns_sotype, nd, cacherep); + if (nd->nd_nam2) { + mbuf_freem(nd->nd_nam2); + nd->nd_nam2 = NULL; + } + if (nd->nd_mrep) { + mbuf_freem(nd->nd_mrep); + nd->nd_mrep = NULL; + } + if (error == EPIPE) { + lck_rw_lock_exclusive(&slp->ns_rwlock); nfsrv_zapsock(slp); - if (solockp) - nfs_sndunlock(solockp); + lck_rw_done(&slp->ns_rwlock); + } if (error == EINTR || error == ERESTART) { - _FREE_ZONE((caddr_t)nd, - sizeof *nd, M_NFSRVDESC); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); + FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nfsrv_slpderef(slp); - s = splnet(); goto done; } break; case RC_DROPIT: if (nfsrtton) - nfsd_rt(sotype, nd, cacherep); - m_freem(nd->nd_mrep); - m_freem(nd->nd_nam2); + nfsd_rt(slp->ns_sotype, nd, cacherep); + mbuf_freem(nd->nd_mrep); + mbuf_freem(nd->nd_nam2); + nd->nd_mrep = nd->nd_nam2 = NULL; break; }; if (nd) { + if (nd->nd_mrep) + mbuf_freem(nd->nd_mrep); + if (nd->nd_nam2) + mbuf_freem(nd->nd_nam2); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nd = NULL; } @@ -784,33 +1118,74 @@ nfssvc_nfsd(nsd, argp, p) * Check to see if there are outstanding writes that * need to be serviced. */ - cur_usec = (u_quad_t)time.tv_sec * 1000000 + - (u_quad_t)time.tv_usec; - s = splsoftclock(); - if (slp->ns_tq.lh_first && - slp->ns_tq.lh_first->nd_time <= cur_usec) { + microuptime(&now); + cur_usec = (u_quad_t)now.tv_sec * 1000000 + + (u_quad_t)now.tv_usec; + if (slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) { cacherep = RC_DOIT; writes_todo = 1; - } else + } else { writes_todo = 0; - splx(s); + } } while (writes_todo); - s = splnet(); + lck_rw_lock_exclusive(&slp->ns_rwlock); if (nfsrv_dorec(slp, nfsd, &nd)) { + lck_rw_done(&slp->ns_rwlock); nfsd->nfsd_flag &= ~NFSD_REQINPROG; nfsd->nfsd_slp = NULL; nfsrv_slpderef(slp); + } else { + lck_rw_done(&slp->ns_rwlock); } } done: + thread_funnel_set(kernel_flock, funnel_state); + lck_mtx_lock(nfsd_mutex); TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); - splx(s); - _FREE((caddr_t)nfsd, M_NFSD); + FREE(nfsd, M_NFSD); nsd->nsd_nfsd = (struct nfsd *)0; if (--nfs_numnfsd == 0) nfsrv_init(TRUE); /* Reinitialize everything */ + lck_mtx_unlock(nfsd_mutex); return (error); } + +static int +nfssvc_export(user_addr_t argp, proc_t p) +{ + int error = 0, is_64bit; + struct user_nfs_export_args unxa; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + is_64bit = IS_64BIT_PROCESS(p); + + /* copy in pointers to path and export args */ + if (is_64bit) { + error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); + } else { + struct nfs_export_args tnxa; + error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); + if (error == 0) { + /* munge into LP64 version of nfs_export_args structure */ + unxa.nxa_fsid = tnxa.nxa_fsid; + unxa.nxa_expid = tnxa.nxa_expid; + unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); + unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); + unxa.nxa_flags = tnxa.nxa_flags; + unxa.nxa_netcount = tnxa.nxa_netcount; + unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); + } + } + if (error) + return (error); + + error = nfsrv_export(&unxa, &context); + + return (error); +} + #endif /* NFS_NOSERVER */ int nfs_defect = 0; @@ -819,6 +1194,25 @@ int nfs_defect = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, ""); #endif +int +nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) +{ + struct lockd_ans la; + int error; + + if (uap->flag == NFSCLNT_LOCKDWAIT) { + return (nfslockdwait(p)); + } + if (uap->flag == NFSCLNT_LOCKDANS) { + error = copyin(uap->argp, &la, sizeof(la)); + return (error != 0 ? error : nfslockdans(p, &la)); + } + if (uap->flag == NFSCLNT_LOCKDFD) + return (nfslockdfd(p, CAST_DOWN(int, uap->argp))); + return EINVAL; +} + + static int nfssvc_iod_continue(int); /* @@ -827,13 +1221,9 @@ static int nfssvc_iod_continue(int); * Never returns unless it fails or gets killed. */ static int -nfssvc_iod(p) - struct proc *p; +nfssvc_iod(__unused proc_t p) { - register struct buf *bp; register int i, myiod; - struct nfsmount *nmp; - int error = 0; struct uthread *ut; /* @@ -850,88 +1240,133 @@ nfssvc_iod(p) return (EBUSY); nfs_numasync++; - /* stuff myiod into uthread to get off local stack for - continuation */ + /* stuff myiod into uthread to get off local stack for continuation */ - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); ut->uu_state.uu_nfs_myiod = myiod; /* squirrel away for continuation */ nfssvc_iod_continue(0); /* NOTREACHED */ - + return (0); } /* * Continuation for Asynchronous I/O daemons for client nfs. */ static int -nfssvc_iod_continue(error) +nfssvc_iod_continue(int error) { - register struct buf *bp; + register struct nfsbuf *bp; register int i, myiod; struct nfsmount *nmp; struct uthread *ut; - struct proc *p; + proc_t p; /* * real myiod is stored in uthread, recover it */ - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); myiod = ut->uu_state.uu_nfs_myiod; - p = current_proc(); + p = current_proc(); // XXX /* * Just loop around doin our stuff until SIGKILL - * - actually we don't loop with continuations... + * - actually we don't loop with continuations... */ + lck_mtx_lock(nfs_iod_mutex); for (;;) { while (((nmp = nfs_iodmount[myiod]) == NULL || nmp->nm_bufq.tqh_first == NULL) - && error == 0) { + && error == 0 && nfs_ioddelwri == 0) { if (nmp) nmp->nm_bufqiods--; - nfs_iodwant[myiod] = p; + nfs_iodwant[myiod] = p; // XXX this doesn't need to be a proc_t nfs_iodmount[myiod] = NULL; - error = tsleep0((caddr_t)&nfs_iodwant[myiod], - PWAIT | PCATCH, "nfsidl", 0, nfssvc_iod_continue); - /* NOTREACHED */ + error = msleep0((caddr_t)&nfs_iodwant[myiod], nfs_iod_mutex, + PWAIT | PCATCH | PDROP, "nfsidl", 0, nfssvc_iod_continue); + lck_mtx_lock(nfs_iod_mutex); } if (error) { nfs_asyncdaemon[myiod] = 0; if (nmp) nmp->nm_bufqiods--; nfs_iodwant[myiod] = NULL; nfs_iodmount[myiod] = NULL; + lck_mtx_unlock(nfs_iod_mutex); nfs_numasync--; if (error == EINTR || error == ERESTART) error = 0; unix_syscall_return(error); } - while ((bp = nmp->nm_bufq.tqh_first) != NULL) { - /* Take one off the front of the list */ - TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); - nmp->nm_bufqlen--; - if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { - nmp->nm_bufqwant = FALSE; - wakeup(&nmp->nm_bufq); - } - if (ISSET(bp->b_flags, B_READ)) - (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0); - else - (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0); + if (nmp != NULL) { + while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { + /* Take one off the front of the list */ + TAILQ_REMOVE(&nmp->nm_bufq, bp, nb_free); + bp->nb_free.tqe_next = NFSNOLIST; + nmp->nm_bufqlen--; + if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { + nmp->nm_bufqwant = FALSE; + lck_mtx_unlock(nfs_iod_mutex); + wakeup(&nmp->nm_bufq); + } else { + lck_mtx_unlock(nfs_iod_mutex); + } - /* - * If there are more than one iod on this mount, then defect - * so that the iods can be shared out fairly between the mounts - */ - if (nfs_defect && nmp->nm_bufqiods > 1) { - NFS_DPF(ASYNCIO, - ("nfssvc_iod: iod %d defecting from mount %p\n", - myiod, nmp)); - nfs_iodmount[myiod] = NULL; - nmp->nm_bufqiods--; - break; + SET(bp->nb_flags, NB_IOD); + if (ISSET(bp->nb_flags, NB_READ)) + nfs_doio(bp, bp->nb_rcred, NULL); + else + nfs_doio(bp, bp->nb_wcred, NULL); + + lck_mtx_lock(nfs_iod_mutex); + /* + * If there are more than one iod on this mount, then defect + * so that the iods can be shared out fairly between the mounts + */ + if (nfs_defect && nmp->nm_bufqiods > 1) { + nfs_iodmount[myiod] = NULL; + nmp->nm_bufqiods--; + break; + } } } + lck_mtx_unlock(nfs_iod_mutex); + + if (nfs_ioddelwri) { + i = 0; + nfs_ioddelwri = 0; + lck_mtx_lock(nfs_buf_mutex); + while (i < 8 && (bp = TAILQ_FIRST(&nfsbufdelwri)) != NULL) { + struct nfsnode *np = VTONFS(bp->nb_vp); + nfs_buf_remfree(bp); + nfs_buf_refget(bp); + while ((error = nfs_buf_acquire(bp, 0, 0, 0)) == EAGAIN); + nfs_buf_refrele(bp); + if (error) + break; + if (!bp->nb_vp) { + /* buffer is no longer valid */ + nfs_buf_drop(bp); + continue; + } + if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { + /* put buffer at end of delwri list */ + TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free); + nfsbufdelwricnt++; + nfs_buf_drop(bp); + lck_mtx_unlock(nfs_buf_mutex); + nfs_flushcommits(np->n_vnode, NULL, 1); + } else { + SET(bp->nb_flags, (NB_ASYNC | NB_IOD)); + lck_mtx_unlock(nfs_buf_mutex); + nfs_buf_write(bp); + } + i++; + lck_mtx_lock(nfs_buf_mutex); + } + lck_mtx_unlock(nfs_buf_mutex); + } + + lck_mtx_lock(nfs_iod_mutex); } } @@ -943,50 +1378,23 @@ nfssvc_iod_continue(error) * reassigned during cleanup. */ static void -nfsrv_zapsock(slp) - register struct nfssvc_sock *slp; +nfsrv_zapsock(struct nfssvc_sock *slp) { - register struct nfsuid *nuidp, *nnuidp; - register struct nfsrv_descript *nwp, *nnwp; - struct socket *so; - struct file *fp; - struct mbuf *m; - int s; + socket_t so; + if ((slp->ns_flag & SLP_VALID) == 0) + return; slp->ns_flag &= ~SLP_ALLFLAGS; - fp = slp->ns_fp; - if (fp) { - slp->ns_fp = (struct file *)0; - so = slp->ns_so; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - so->so_upcall = NULL; - so->so_rcv.sb_flags &= ~SB_UPCALL; - soshutdown(so, 2); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - closef(fp, (struct proc *)0); - if (slp->ns_nam) - MFREE(slp->ns_nam, m); - m_freem(slp->ns_raw); - m_freem(slp->ns_rec); - for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0; - nuidp = nnuidp) { - nnuidp = nuidp->nu_lru.tqe_next; - LIST_REMOVE(nuidp, nu_hash); - TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); - if (nuidp->nu_flag & NU_NAM) - m_freem(nuidp->nu_nam); - _FREE_ZONE((caddr_t)nuidp, - sizeof (struct nfsuid), M_NFSUID); - } - s = splsoftclock(); - for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { - nnwp = nwp->nd_tq.le_next; - LIST_REMOVE(nwp, nd_tq); - _FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC); - } - LIST_INIT(&slp->ns_tq); - splx(s); - } + + so = slp->ns_so; + if (so == NULL) + return; + + socket_lock(so, 1); + so->so_upcall = NULL; + so->so_rcv.sb_flags &= ~SB_UPCALL; + socket_unlock(so, 1); + sock_shutdown(so, SHUT_RDWR); } /* @@ -997,7 +1405,7 @@ int nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) register struct nfsmount *nmp; struct nfsreq *rep; - struct ucred *cred; + kauth_cred_t cred; char **auth_str; int *auth_len; char *verf_str; @@ -1006,48 +1414,51 @@ nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) { int error = 0; - while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) { - nmp->nm_flag |= NFSMNT_WANTAUTH; + while ((nmp->nm_state & NFSSTA_WAITAUTH) == 0) { + nmp->nm_state |= NFSSTA_WANTAUTH; (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK, "nfsauth1", 2 * hz); error = nfs_sigintr(nmp, rep, rep->r_procp); if (error) { - nmp->nm_flag &= ~NFSMNT_WANTAUTH; + nmp->nm_state &= ~NFSSTA_WANTAUTH; return (error); } } - nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH); + nmp->nm_state &= ~NFSSTA_WANTAUTH; MALLOC(*auth_str, char *, RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK); + if (!*auth_str) + return (ENOMEM); nmp->nm_authstr = *auth_str; nmp->nm_authlen = RPCAUTH_MAXSIZ; nmp->nm_verfstr = verf_str; nmp->nm_verflen = *verf_len; - nmp->nm_authuid = cred->cr_uid; + nmp->nm_authuid = kauth_cred_getuid(cred); + nmp->nm_state &= ~NFSSTA_WAITAUTH; wakeup((caddr_t)&nmp->nm_authstr); /* * And wait for mount_nfs to do its stuff. */ - while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) { + while ((nmp->nm_state & NFSSTA_HASAUTH) == 0 && error == 0) { (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK, "nfsauth2", 2 * hz); error = nfs_sigintr(nmp, rep, rep->r_procp); } - if (nmp->nm_flag & NFSMNT_AUTHERR) { - nmp->nm_flag &= ~NFSMNT_AUTHERR; + if (nmp->nm_state & NFSSTA_AUTHERR) { + nmp->nm_state &= ~NFSSTA_AUTHERR; error = EAUTH; } if (error) - _FREE((caddr_t)*auth_str, M_TEMP); + FREE(*auth_str, M_TEMP); else { *auth_len = nmp->nm_authlen; *verf_len = nmp->nm_verflen; bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key)); } - nmp->nm_flag &= ~NFSMNT_HASAUTH; - nmp->nm_flag |= NFSMNT_WAITAUTH; - if (nmp->nm_flag & NFSMNT_WANTAUTH) { - nmp->nm_flag &= ~NFSMNT_WANTAUTH; + nmp->nm_state &= ~NFSSTA_HASAUTH; + nmp->nm_state |= NFSSTA_WAITAUTH; + if (nmp->nm_state & NFSSTA_WANTAUTH) { + nmp->nm_state &= ~NFSSTA_WANTAUTH; wakeup((caddr_t)&nmp->nm_authtype); } return (error); @@ -1057,37 +1468,41 @@ nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) * Get a nickname authenticator and verifier. */ int -nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) - struct nfsmount *nmp; - struct ucred *cred; - char **auth_str; - int *auth_len; - char *verf_str; - int verf_len; +nfs_getnickauth( + struct nfsmount *nmp, + kauth_cred_t cred, + char **auth_str, + int *auth_len, + char *verf_str, + __unused int verf_len) { register struct nfsuid *nuidp; register u_long *nickp, *verfp; - struct timeval ktvin, ktvout; + struct timeval ktvin, ktvout, now; #if DIAGNOSTIC if (verf_len < (4 * NFSX_UNSIGNED)) panic("nfs_getnickauth verf too small"); #endif - for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first; + for (nuidp = NMUIDHASH(nmp, kauth_cred_getuid(cred))->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { - if (nuidp->nu_cr.cr_uid == cred->cr_uid) + if (kauth_cred_getuid(nuidp->nu_cr) == kauth_cred_getuid(cred)) break; } - if (!nuidp || nuidp->nu_expire < time.tv_sec) + microtime(&now); + if (!nuidp || nuidp->nu_expire < now.tv_sec) return (EACCES); + MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK); + if (!nickp) + return (ENOMEM); + /* * Move to the end of the lru list (end of lru == most recently used). */ TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru); - MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK); *nickp++ = txdr_unsigned(RPCAKN_NICKNAME); *nickp = txdr_unsigned(nuidp->nu_nickname); *auth_str = (char *)nickp; @@ -1098,10 +1513,11 @@ nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) */ verfp = (u_long *)verf_str; *verfp++ = txdr_unsigned(RPCAKN_NICKNAME); - if (time.tv_sec > nuidp->nu_timestamp.tv_sec || - (time.tv_sec == nuidp->nu_timestamp.tv_sec && - time.tv_usec > nuidp->nu_timestamp.tv_usec)) - nuidp->nu_timestamp = time; + microtime(&now); + if (now.tv_sec > nuidp->nu_timestamp.tv_sec || + (now.tv_sec == nuidp->nu_timestamp.tv_sec && + now.tv_usec > nuidp->nu_timestamp.tv_usec)) + nuidp->nu_timestamp = now; else nuidp->nu_timestamp.tv_usec++; ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec); @@ -1127,18 +1543,18 @@ nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) int nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep) register struct nfsmount *nmp; - struct ucred *cred; + kauth_cred_t cred; int len; NFSKERBKEY_T key; - struct mbuf **mdp; + mbuf_t *mdp; char **dposp; - struct mbuf *mrep; + mbuf_t mrep; { register struct nfsuid *nuidp; register u_long *tl; register long t1; - struct mbuf *md = *mdp; - struct timeval ktvin, ktvout; + mbuf_t md = *mdp; + struct timeval ktvin, ktvout, now; u_long nick; char *dpos = *dposp, *cp2; int deltasec, error = 0; @@ -1157,7 +1573,8 @@ nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep) #endif ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec); ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec); - deltasec = time.tv_sec - ktvout.tv_sec; + microtime(&now); + deltasec = now.tv_sec - ktvout.tv_sec; if (deltasec < 0) deltasec = -deltasec; /* @@ -1170,20 +1587,27 @@ nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep) sizeof (struct nfsuid), M_NFSUID, M_WAITOK); } else { + nuidp = NULL; + } + if (!nuidp) { nuidp = nmp->nm_uidlruhead.tqh_first; + if (!nuidp) { + error = ENOMEM; + goto nfsmout; + } LIST_REMOVE(nuidp, nu_hash); - TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, - nu_lru); + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); + kauth_cred_rele(nuidp->nu_cr); } nuidp->nu_flag = 0; - nuidp->nu_cr.cr_uid = cred->cr_uid; - nuidp->nu_expire = time.tv_sec + NFS_KERBTTL; + kauth_cred_ref(cred); + nuidp->nu_cr = cred; + nuidp->nu_expire = now.tv_sec + NFS_KERBTTL; nuidp->nu_timestamp = ktvout; nuidp->nu_nickname = nick; bcopy(key, nuidp->nu_key, sizeof (key)); - TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, - nu_lru); - LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid), + TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru); + LIST_INSERT_HEAD(NMUIDHASH(nmp, kauth_cred_getuid(cred)), nuidp, nu_hash); } } else @@ -1196,20 +1620,76 @@ nfsmout: #ifndef NFS_NOSERVER +/* + * cleanup and release a server socket structure. + */ +static void +nfsrv_slpfree(struct nfssvc_sock *slp) +{ + struct nfsuid *nuidp, *nnuidp; + struct nfsrv_descript *nwp, *nnwp; + + if (slp->ns_so) { + sock_release(slp->ns_so); + slp->ns_so = NULL; + } + if (slp->ns_nam) + mbuf_free(slp->ns_nam); + if (slp->ns_raw) + mbuf_freem(slp->ns_raw); + if (slp->ns_rec) + mbuf_freem(slp->ns_rec); + slp->ns_nam = slp->ns_raw = slp->ns_rec = NULL; + + for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0; + nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); + if (nuidp->nu_flag & NU_NAM) + mbuf_freem(nuidp->nu_nam); + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE((caddr_t)nuidp, + sizeof (struct nfsuid), M_NFSUID); + } + + for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { + nnwp = nwp->nd_tq.le_next; + LIST_REMOVE(nwp, nd_tq); + if (nwp->nd_cr) + kauth_cred_rele(nwp->nd_cr); + FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC); + } + LIST_INIT(&slp->ns_tq); + + lck_rw_destroy(&slp->ns_rwlock, nfs_slp_rwlock_group); + lck_mtx_destroy(&slp->ns_wgmutex, nfs_slp_mutex_group); + FREE(slp, M_NFSSVC); +} + /* * Derefence a server socket structure. If it has no more references and * is no longer valid, you can throw it away. */ void -nfsrv_slpderef(slp) - register struct nfssvc_sock *slp; +nfsrv_slpderef(struct nfssvc_sock *slp) { - if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) { - TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); - _FREE((caddr_t)slp, M_NFSSVC); + lck_mtx_lock(nfsd_mutex); + lck_rw_lock_exclusive(&slp->ns_rwlock); + slp->ns_sref--; + if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { + lck_rw_done(&slp->ns_rwlock); + lck_mtx_unlock(nfsd_mutex); + return; } + + TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); + lck_mtx_unlock(nfsd_mutex); + + nfsrv_slpfree(slp); } + /* * Initialize the data structures for the server. * Handshake with any new nfsds starting up to avoid any chance of @@ -1219,22 +1699,24 @@ void nfsrv_init(terminating) int terminating; { - register struct nfssvc_sock *slp, *nslp; + struct nfssvc_sock *slp, *nslp; - if (nfssvc_sockhead_flag & SLP_INIT) - panic("nfsd init"); - nfssvc_sockhead_flag |= SLP_INIT; if (terminating) { - for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) { - nslp = slp->ns_chain.tqe_next; - if (slp->ns_flag & SLP_VALID) + for (slp = TAILQ_FIRST(&nfssvc_sockhead); slp != 0; slp = nslp) { + nslp = TAILQ_NEXT(slp, ns_chain); + if (slp->ns_flag & SLP_VALID) { + lck_rw_lock_exclusive(&slp->ns_rwlock); nfsrv_zapsock(slp); + lck_rw_done(&slp->ns_rwlock); + } TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); - _FREE((caddr_t)slp, M_NFSSVC); + /* grab the lock one final time in case anyone's using it */ + lck_rw_lock_exclusive(&slp->ns_rwlock); + nfsrv_slpfree(slp); } nfsrv_cleancache(); /* And clear out server cache */ -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED } else nfs_pub.np_valid = 0; #else @@ -1242,26 +1724,33 @@ nfsrv_init(terminating) #endif TAILQ_INIT(&nfssvc_sockhead); - nfssvc_sockhead_flag &= ~SLP_INIT; - if (nfssvc_sockhead_flag & SLP_WANTINIT) { - nfssvc_sockhead_flag &= ~SLP_WANTINIT; - wakeup((caddr_t)&nfssvc_sockhead); - } TAILQ_INIT(&nfsd_head); nfsd_head_flag &= ~NFSD_CHECKSLP; MALLOC(nfs_udpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); - bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock)); - TAILQ_INIT(&nfs_udpsock->ns_uidlruhead); - TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain); + if (nfs_udpsock) { + bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock)); + lck_rw_init(&nfs_udpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + TAILQ_INIT(&nfs_udpsock->ns_uidlruhead); + TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain); + } else { + printf("nfsrv_init() failed to allocate UDP socket\n"); + } +#if ISO MALLOC(nfs_cltpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); - bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock)); - TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead); - TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain); + if (nfs_cltpsock) { + bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock)); + lck_rw_init(&nfs_cltpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead); + TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain); + } else { + printf("nfsrv_init() failed to allocate CLTP socket\n"); + } +#endif } /* @@ -1274,6 +1763,7 @@ nfsd_rt(sotype, nd, cacherep) int cacherep; { register struct drt *rt; + struct timeval now; rt = &nfsdrt.drt[nfsdrt.pos]; if (cacherep == RC_DOIT) @@ -1284,18 +1774,17 @@ nfsd_rt(sotype, nd, cacherep) rt->flag = DRT_CACHEDROP; if (sotype == SOCK_STREAM) rt->flag |= DRT_TCP; - if (nd->nd_flag & ND_NQNFS) - rt->flag |= DRT_NQNFS; else if (nd->nd_flag & ND_NFSV3) rt->flag |= DRT_NFSV3; rt->proc = nd->nd_procnum; - if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET) - rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr; + if (((struct sockaddr *)mbuf_data(nd->nd_nam))->sa_family == AF_INET) + rt->ipadr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr; else rt->ipadr = INADDR_ANY; - rt->resptime = ((time.tv_sec - nd->nd_starttime.tv_sec) * 1000000) + - (time.tv_usec - nd->nd_starttime.tv_usec); - rt->tstamp = time; + microuptime(&now); + rt->resptime = ((now.tv_sec - nd->nd_starttime.tv_sec) * 1000000) + + (now.tv_usec - nd->nd_starttime.tv_usec); + microtime(&rt->tstamp); // XXX unused nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ; } #endif /* NFS_NOSERVER */