X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4452a7af2eac33dbad800bcc91f2399d62c18f53..13f56ec4e58bf8687e2a68032c093c0213dd519b:/bsd/nfs/nfs_subs.c diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index 3497ef8f9..dccead918 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -82,12 +82,12 @@ #include #include #include -#include #include #include -#include +#include #include #include +#include #include #include @@ -99,82 +99,118 @@ #include #include #include +#if NFSCLIENT +#define _NFS_XDR_SUBS_FUNCS_ /* define this to get xdrbuf function definitions */ +#endif #include #include +#include #include -#include #include #include #include -#if ISO -#include -#endif +#include -#include +#include -SYSCTL_DECL(_vfs_generic); -SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW, 0, "nfs hinge"); +/* + * NFS globals + */ +struct nfsstats nfsstats; +size_t nfs_mbuf_mhlen = 0, nfs_mbuf_minclsize = 0; -#define FSDBG(A, B, C, D, E) \ - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \ - (int)(B), (int)(C), (int)(D), (int)(E), 0) -#define FSDBG_TOP(A, B, C, D, E) \ - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \ - (int)(B), (int)(C), (int)(D), (int)(E), 0) -#define FSDBG_BOT(A, B, C, D, E) \ - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \ - (int)(B), (int)(C), (int)(D), (int)(E), 0) /* - * Data items converted to xdr at startup, since they are constant - * This is kinda hokey, but may save a little time doing byte swaps + * functions to convert between NFS and VFS types */ -u_long nfs_xdrneg1; -u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, - rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, - rpc_auth_kerb; -u_long nfs_prog, nfs_true, nfs_false; -__private_extern__ int nfs_mbuf_mlen = 0, nfs_mbuf_mhlen = 0, - nfs_mbuf_minclsize = 0, nfs_mbuf_mclbytes = 0; - -/* And other global data */ -static u_long nfs_xid = 0; -u_long nfs_xidwrap = 0; /* to build a (non-wwrapping) 64 bit xid */ -static enum vtype nv2tov_type[8]= { - VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON -}; -enum vtype nv3tov_type[8]= { - VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO -}; +nfstype +vtonfs_type(enum vtype vtype, int nfsvers) +{ + switch (vtype) { + case VNON: + return NFNON; + case VREG: + return NFREG; + case VDIR: + return NFDIR; + case VBLK: + return NFBLK; + case VCHR: + return NFCHR; + case VLNK: + return NFLNK; + case VSOCK: + if (nfsvers > NFS_VER2) + return NFSOCK; + case VFIFO: + if (nfsvers > NFS_VER2) + return NFFIFO; + case VBAD: + case VSTR: + case VCPLX: + default: + return NFNON; + } +} + +enum vtype +nfstov_type(nfstype nvtype, int nfsvers) +{ + switch (nvtype) { + case NFNON: + return VNON; + case NFREG: + return VREG; + case NFDIR: + return VDIR; + case NFBLK: + return VBLK; + case NFCHR: + return VCHR; + case NFLNK: + return VLNK; + case NFSOCK: + if (nfsvers > NFS_VER2) + return VSOCK; + case NFFIFO: + if (nfsvers > NFS_VER2) + return VFIFO; + case NFATTRDIR: + if (nfsvers > NFS_VER3) + return VDIR; + case NFNAMEDATTR: + if (nfsvers > NFS_VER3) + return VREG; + default: + return VNON; + } +} + +int +vtonfsv2_mode(enum vtype vtype, mode_t m) +{ + switch (vtype) { + case VNON: + case VREG: + case VDIR: + case VBLK: + case VCHR: + case VLNK: + case VSOCK: + return vnode_makeimode(vtype, m); + case VFIFO: + return vnode_makeimode(VCHR, m); + case VBAD: + case VSTR: + case VCPLX: + default: + return vnode_makeimode(VNON, m); + } +} + +#if NFSSERVER -int nfs_mount_type; -int nfs_ticks; - -lck_grp_t *nfsd_lck_grp; -lck_grp_attr_t *nfsd_lck_grp_attr; -lck_attr_t *nfsd_lck_attr; -lck_mtx_t *nfsd_mutex; - -lck_grp_attr_t *nfs_slp_group_attr; -lck_attr_t *nfs_slp_lock_attr; -lck_grp_t *nfs_slp_rwlock_group; -lck_grp_t *nfs_slp_mutex_group; - -struct nfs_reqq nfs_reqq; -struct nfssvc_sockhead nfssvc_sockhead, nfssvc_deadsockhead; -struct nfsd_head nfsd_head; -int nfsd_head_flag; - -struct nfsexpfslist nfs_exports; -struct nfsexphashhead *nfsexphashtbl; -u_long nfsexphash; -lck_grp_attr_t *nfs_export_group_attr; -lck_attr_t *nfs_export_lock_attr; -lck_grp_t *nfs_export_rwlock_group; -lck_rw_t nfs_export_rwlock; - -#ifndef NFS_NOSERVER /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ @@ -204,7 +240,8 @@ int nfsv3_procid[NFS_NPROCS] = { NFSPROC_NOOP }; -#endif /* NFS_NOSERVER */ +#endif /* NFSSERVER */ + /* * and the reverse mapping from generic to Version 2 procedure numbers */ @@ -234,2187 +271,2598 @@ int nfsv2_procid[NFS_NPROCS] = { NFSV2PROC_NOOP }; -#ifndef NFS_NOSERVER + /* - * Maps errno values to nfs error numbers. - * Use NFSERR_IO as the catch all for ones not specifically defined in - * RFC 1094. + * initialize NFS's cache of mbuf constants */ -static u_char nfsrv_v2errmap[ELAST] = { - NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, - NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, - NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, - NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, -}; +void +nfs_mbuf_init(void) +{ + struct mbuf_stat ms; + + mbuf_stats(&ms); + nfs_mbuf_mhlen = ms.mhlen; + nfs_mbuf_minclsize = ms.minclsize; +} + +#if NFSSERVER /* - * Maps errno values to nfs error numbers. - * Although it is not obvious whether or not NFS clients really care if - * a returned error value is in the specified list for the procedure, the - * safest thing to do is filter them appropriately. For Version 2, the - * X/Open XNFS document is the only specification that defines error values - * for each RPC (The RFC simply lists all possible error values for all RPCs), - * so I have decided to not do this for Version 2. - * The first entry is the default error return and the rest are the valid - * errors for that RPC in increasing numeric order. + * allocate a list of mbufs to hold the given amount of data */ -static short nfsv3err_null[] = { - 0, - 0, -}; +int +nfsm_mbuf_get_list(size_t size, mbuf_t *mp, int *mbcnt) +{ + int error, cnt; + mbuf_t mhead, mlast, m; + size_t len, mlen; -static short nfsv3err_getattr[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; + error = cnt = 0; + mhead = mlast = NULL; + len = 0; -static short nfsv3err_setattr[] = { - NFSERR_IO, - NFSERR_PERM, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_INVAL, - NFSERR_NOSPC, - NFSERR_ROFS, - NFSERR_DQUOT, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOT_SYNC, - NFSERR_SERVERFAULT, - 0, -}; + while (len < size) { + nfsm_mbuf_get(error, &m, (size - len)); + if (error) + break; + if (!mhead) + mhead = m; + if (mlast && ((error = mbuf_setnext(mlast, m)))) { + mbuf_free(m); + break; + } + mlen = mbuf_maxlen(m); + if ((len + mlen) > size) + mlen = size - len; + mbuf_setlen(m, mlen); + len += mlen; + cnt++; + mlast = m; + } -static short nfsv3err_lookup[] = { - NFSERR_IO, - NFSERR_NOENT, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_NOTDIR, - NFSERR_NAMETOL, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; + if (!error) { + *mp = mhead; + *mbcnt = cnt; + } + return (error); +} -static short nfsv3err_access[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; +#endif /* NFSSERVER */ -static short nfsv3err_readlink[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_INVAL, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOTSUPP, - NFSERR_SERVERFAULT, - 0, -}; +/* + * nfsm_chain_new_mbuf() + * + * Add a new mbuf to the given chain. + */ +int +nfsm_chain_new_mbuf(struct nfsm_chain *nmc, size_t sizehint) +{ + mbuf_t mb; + int error = 0; -static short nfsv3err_read[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_NXIO, - NFSERR_ACCES, - NFSERR_INVAL, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; + if (nmc->nmc_flags & NFSM_CHAIN_FLAG_ADD_CLUSTERS) + sizehint = nfs_mbuf_minclsize; -static short nfsv3err_write[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_INVAL, - NFSERR_FBIG, - NFSERR_NOSPC, - NFSERR_ROFS, - NFSERR_DQUOT, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; + /* allocate a new mbuf */ + nfsm_mbuf_get(error, &mb, sizehint); + if (error) + return (error); + if (mb == NULL) + panic("got NULL mbuf?"); + + /* do we have a current mbuf? */ + if (nmc->nmc_mcur) { + /* first cap off current mbuf */ + mbuf_setlen(nmc->nmc_mcur, nmc->nmc_ptr - (caddr_t)mbuf_data(nmc->nmc_mcur)); + /* then append the new mbuf */ + error = mbuf_setnext(nmc->nmc_mcur, mb); + if (error) { + mbuf_free(mb); + return (error); + } + } -static short nfsv3err_create[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_EXIST, - NFSERR_NOTDIR, - NFSERR_NOSPC, - NFSERR_ROFS, - NFSERR_NAMETOL, - NFSERR_DQUOT, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOTSUPP, - NFSERR_SERVERFAULT, - 0, -}; + /* set up for using the new mbuf */ + nmc->nmc_mcur = mb; + nmc->nmc_ptr = mbuf_data(mb); + nmc->nmc_left = mbuf_trailingspace(mb); -static short nfsv3err_mkdir[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_EXIST, - NFSERR_NOTDIR, - NFSERR_NOSPC, - NFSERR_ROFS, - NFSERR_NAMETOL, - NFSERR_DQUOT, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOTSUPP, - NFSERR_SERVERFAULT, - 0, -}; + return (0); +} -static short nfsv3err_symlink[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_EXIST, - NFSERR_NOTDIR, - NFSERR_NOSPC, - NFSERR_ROFS, - NFSERR_NAMETOL, - NFSERR_DQUOT, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOTSUPP, - NFSERR_SERVERFAULT, - 0, -}; +/* + * nfsm_chain_add_opaque_f() + * + * Add "len" bytes of opaque data pointed to by "buf" to the given chain. + */ +int +nfsm_chain_add_opaque_f(struct nfsm_chain *nmc, const u_char *buf, uint32_t len) +{ + uint32_t paddedlen, tlen; + int error; -static short nfsv3err_mknod[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_EXIST, - NFSERR_NOTDIR, - NFSERR_NOSPC, - NFSERR_ROFS, - NFSERR_NAMETOL, - NFSERR_DQUOT, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOTSUPP, - NFSERR_SERVERFAULT, - NFSERR_BADTYPE, - 0, -}; + paddedlen = nfsm_rndup(len); -static short nfsv3err_remove[] = { - NFSERR_IO, - NFSERR_NOENT, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_NOTDIR, - NFSERR_ROFS, - NFSERR_NAMETOL, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_rmdir[] = { - NFSERR_IO, - NFSERR_NOENT, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_EXIST, - NFSERR_NOTDIR, - NFSERR_INVAL, - NFSERR_ROFS, - NFSERR_NAMETOL, - NFSERR_NOTEMPTY, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOTSUPP, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_rename[] = { - NFSERR_IO, - NFSERR_NOENT, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_EXIST, - NFSERR_XDEV, - NFSERR_NOTDIR, - NFSERR_ISDIR, - NFSERR_INVAL, - NFSERR_NOSPC, - NFSERR_ROFS, - NFSERR_MLINK, - NFSERR_NAMETOL, - NFSERR_NOTEMPTY, - NFSERR_DQUOT, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOTSUPP, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_link[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_EXIST, - NFSERR_XDEV, - NFSERR_NOTDIR, - NFSERR_INVAL, - NFSERR_NOSPC, - NFSERR_ROFS, - NFSERR_MLINK, - NFSERR_NAMETOL, - NFSERR_DQUOT, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_NOTSUPP, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_readdir[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_NOTDIR, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_BAD_COOKIE, - NFSERR_TOOSMALL, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_readdirplus[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_ACCES, - NFSERR_NOTDIR, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_BAD_COOKIE, - NFSERR_NOTSUPP, - NFSERR_TOOSMALL, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_fsstat[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_fsinfo[] = { - NFSERR_STALE, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_pathconf[] = { - NFSERR_STALE, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; - -static short nfsv3err_commit[] = { - NFSERR_IO, - NFSERR_IO, - NFSERR_STALE, - NFSERR_BADHANDLE, - NFSERR_SERVERFAULT, - 0, -}; - -static short *nfsrv_v3errmap[] = { - nfsv3err_null, - nfsv3err_getattr, - nfsv3err_setattr, - nfsv3err_lookup, - nfsv3err_access, - nfsv3err_readlink, - nfsv3err_read, - nfsv3err_write, - nfsv3err_create, - nfsv3err_mkdir, - nfsv3err_symlink, - nfsv3err_mknod, - nfsv3err_remove, - nfsv3err_rmdir, - nfsv3err_rename, - nfsv3err_link, - nfsv3err_readdir, - nfsv3err_readdirplus, - nfsv3err_fsstat, - nfsv3err_fsinfo, - nfsv3err_pathconf, - nfsv3err_commit, -}; - -#endif /* NFS_NOSERVER */ - -extern struct nfsrtt nfsrtt; -extern struct nfsstats nfsstats; -extern nfstype nfsv2_type[9]; -extern nfstype nfsv3_type[9]; -extern struct nfsnodehashhead *nfsnodehashtbl; -extern u_long nfsnodehash; - - -LIST_HEAD(nfsnodehashhead, nfsnode); + while (paddedlen) { + if (!nmc->nmc_left) { + error = nfsm_chain_new_mbuf(nmc, paddedlen); + if (error) + return (error); + } + tlen = MIN(nmc->nmc_left, paddedlen); + if (tlen) { + if (len) { + if (tlen > len) + tlen = len; + bcopy(buf, nmc->nmc_ptr, tlen); + } else { + bzero(nmc->nmc_ptr, tlen); + } + nmc->nmc_ptr += tlen; + nmc->nmc_left -= tlen; + paddedlen -= tlen; + if (len) { + buf += tlen; + len -= tlen; + } + } + } + return (0); +} /* - * Create the header for an rpc request packet - * The hsiz is the size of the rest of the nfs request header. - * (just used to decide if a cluster is a good idea) + * nfsm_chain_add_opaque_nopad_f() + * + * Add "len" bytes of opaque data pointed to by "buf" to the given chain. + * Do not XDR pad. */ int -nfsm_reqh(int hsiz, caddr_t *bposp, mbuf_t *mbp) +nfsm_chain_add_opaque_nopad_f(struct nfsm_chain *nmc, const u_char *buf, uint32_t len) { + uint32_t tlen; int error; - *mbp = NULL; - if (hsiz >= nfs_mbuf_minclsize) - error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, mbp); - else - error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, mbp); - if (error) - return (error); - *bposp = mbuf_data(*mbp); + while (len > 0) { + if (nmc->nmc_left <= 0) { + error = nfsm_chain_new_mbuf(nmc, len); + if (error) + return (error); + } + tlen = MIN(nmc->nmc_left, len); + bcopy(buf, nmc->nmc_ptr, tlen); + nmc->nmc_ptr += tlen; + nmc->nmc_left -= tlen; + len -= tlen; + buf += tlen; + } return (0); } /* - * Build the RPC header and fill in the authorization info. - * The authorization string argument is only used when the credentials - * come from outside of the kernel. - * Returns the head of the mbuf list. + * nfsm_chain_add_uio() + * + * Add "len" bytes of data from "uio" to the given chain. */ int -nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, - verf_str, mrest, mrest_len, mbp, xidp, mreqp) - kauth_cred_t cr; - int nmflag; - int procid; - int auth_type; - int auth_len; - char *auth_str; - int verf_len; - char *verf_str; - mbuf_t mrest; - int mrest_len; - mbuf_t *mbp; - u_long *xidp; - mbuf_t *mreqp; +nfsm_chain_add_uio(struct nfsm_chain *nmc, uio_t uio, uint32_t len) { - mbuf_t mb; - u_long *tl; - caddr_t bpos; - int i, error, len; - mbuf_t mreq, mb2; - int siz, grpsiz, authsiz, mlen; - struct timeval tv; - - authsiz = nfsm_rndup(auth_len); - len = authsiz + 10 * NFSX_UNSIGNED; - if (len >= nfs_mbuf_minclsize) { - error = mbuf_getpacket(MBUF_WAITOK, &mb); - } else { - error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mb); - if (!error) { - if (len < nfs_mbuf_mhlen) - mbuf_align_32(mb, len); - else - mbuf_align_32(mb, 8 * NFSX_UNSIGNED); - } - } - if (error) { - /* unable to allocate packet */ - /* XXX nfsstat? */ - return (error); - } - mreq = mb; - bpos = mbuf_data(mb); - - /* - * First the RPC header. - */ - nfsm_build(tl, u_long *, 8 * NFSX_UNSIGNED); - - /* - * derive initial xid from system time - */ - if (!nfs_xid) { - /* - * Note: it's OK if this code inits nfs_xid to 0 (for example, - * due to a broken clock) because we immediately increment it - * and we guarantee to never use xid 0. So, nfs_xid should only - * ever be 0 the first time this function is called. - */ - microtime(&tv); - nfs_xid = tv.tv_sec << 12; - } - /* - * Skip zero xid if it should ever happen. - */ - if (++nfs_xid == 0) { - nfs_xidwrap++; - nfs_xid++; - } + uint32_t paddedlen, tlen; + int error; - *tl++ = *xidp = txdr_unsigned(nfs_xid); - *tl++ = rpc_call; - *tl++ = rpc_vers; - *tl++ = txdr_unsigned(NFS_PROG); - if (nmflag & NFSMNT_NFSV3) - *tl++ = txdr_unsigned(NFS_VER3); - else - *tl++ = txdr_unsigned(NFS_VER2); - if (nmflag & NFSMNT_NFSV3) - *tl++ = txdr_unsigned(procid); - else - *tl++ = txdr_unsigned(nfsv2_procid[procid]); + paddedlen = nfsm_rndup(len); - /* - * And then the authorization cred. - */ - *tl++ = txdr_unsigned(auth_type); - *tl = txdr_unsigned(authsiz); - switch (auth_type) { - case RPCAUTH_UNIX: - nfsm_build(tl, u_long *, auth_len); - *tl++ = 0; /* stamp ?? */ - *tl++ = 0; /* NULL hostname */ - *tl++ = txdr_unsigned(kauth_cred_getuid(cr)); - *tl++ = txdr_unsigned(cr->cr_groups[0]); - grpsiz = (auth_len >> 2) - 5; - *tl++ = txdr_unsigned(grpsiz); - for (i = 1; i <= grpsiz; i++) - *tl++ = txdr_unsigned(cr->cr_groups[i]); - break; - case RPCAUTH_KERB4: - siz = auth_len; - mlen = mbuf_len(mb); - while (siz > 0) { - if (mbuf_trailingspace(mb) == 0) { - mb2 = NULL; - if (siz >= nfs_mbuf_minclsize) - error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); - else - error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); - if (!error) - error = mbuf_setnext(mb, mb2); - if (error) { - mbuf_freem(mreq); - return (error); - } - mb = mb2; - mlen = 0; - bpos = mbuf_data(mb); - } - i = min(siz, mbuf_trailingspace(mb)); - bcopy(auth_str, bpos, i); - mlen += i; - mbuf_setlen(mb, mlen); - auth_str += i; - bpos += i; - siz -= i; - } - if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { - for (i = 0; i < siz; i++) - *bpos++ = '\0'; - mlen += siz; - mbuf_setlen(mb, mlen); + while (paddedlen) { + if (!nmc->nmc_left) { + error = nfsm_chain_new_mbuf(nmc, paddedlen); + if (error) + return (error); } - break; - }; - - /* - * And the verifier... - */ - nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); - if (verf_str) { - mlen = mbuf_len(mb); - *tl++ = txdr_unsigned(RPCAUTH_KERB4); - *tl = txdr_unsigned(verf_len); - siz = verf_len; - while (siz > 0) { - if (mbuf_trailingspace(mb) == 0) { - mb2 = NULL; - if (siz >= nfs_mbuf_minclsize) - error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); - else - error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); - if (!error) - error = mbuf_setnext(mb, mb2); - if (error) { - mbuf_freem(mreq); - return (error); - } - mb = mb2; - mlen = 0; - bpos = mbuf_data(mb); + tlen = MIN(nmc->nmc_left, paddedlen); + if (tlen) { + if (len) { + if (tlen > len) + tlen = len; + uiomove(nmc->nmc_ptr, tlen, uio); + } else { + bzero(nmc->nmc_ptr, tlen); } - i = min(siz, mbuf_trailingspace(mb)); - bcopy(verf_str, bpos, i); - mlen += i; - mbuf_setlen(mb, mlen); - verf_str += i; - bpos += i; - siz -= i; - } - if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { - for (i = 0; i < siz; i++) - *bpos++ = '\0'; - mlen += siz; - mbuf_setlen(mb, mlen); + nmc->nmc_ptr += tlen; + nmc->nmc_left -= tlen; + paddedlen -= tlen; + if (len) + len -= tlen; } - } else { - *tl++ = txdr_unsigned(RPCAUTH_NULL); - *tl = 0; - } - error = mbuf_pkthdr_setrcvif(mreq, 0); - if (!error) - error = mbuf_setnext(mb, mrest); - if (error) { - mbuf_freem(mreq); - return (error); } - mbuf_pkthdr_setlen(mreq, authsiz + 10 * NFSX_UNSIGNED + mrest_len); - *mbp = mb; - *mreqp = mreq; return (0); } /* - * copies mbuf chain to the uio scatter/gather list + * Find the length of the NFS mbuf chain + * up to the current encoding/decoding offset. */ int -nfsm_mbuftouio(mrep, uiop, siz, dpos) - mbuf_t *mrep; - struct uio *uiop; - int siz; - caddr_t *dpos; +nfsm_chain_offset(struct nfsm_chain *nmc) { - char *mbufcp, *uiocp; - int xfer, left, len; - mbuf_t mp; - long uiosiz, rem; - int error = 0; + mbuf_t mb; + int len = 0; - mp = *mrep; - mbufcp = *dpos; - len = (caddr_t)mbuf_data(mp) + mbuf_len(mp) - mbufcp; - rem = nfsm_rndup(siz)-siz; - while (siz > 0) { - if (uiop->uio_iovcnt <= 0 || uiop->uio_iovs.iov32p == NULL) - return (EFBIG); - // LP64todo - fix this! - left = uio_iov_len(uiop); - uiocp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); - if (left > siz) - left = siz; - uiosiz = left; - while (left > 0) { - while (len == 0) { - mp = mbuf_next(mp); - if (mp == NULL) - return (EBADRPC); - mbufcp = mbuf_data(mp); - len = mbuf_len(mp); - } - xfer = (left > len) ? len : left; - if (UIO_SEG_IS_USER_SPACE(uiop->uio_segflg)) - copyout(mbufcp, CAST_USER_ADDR_T(uiocp), xfer); - else - bcopy(mbufcp, uiocp, xfer); - left -= xfer; - len -= xfer; - mbufcp += xfer; - uiocp += xfer; - uiop->uio_offset += xfer; - uio_uio_resid_add(uiop, -xfer); - } - if (uio_iov_len(uiop) <= (size_t)siz) { - uiop->uio_iovcnt--; - uio_next_iov(uiop); - } else { - uio_iov_base_add(uiop, uiosiz); - uio_iov_len_add(uiop, -uiosiz); - } - siz -= uiosiz; + for (mb = nmc->nmc_mhead; mb; mb = mbuf_next(mb)) { + if (mb == nmc->nmc_mcur) + return (len + (nmc->nmc_ptr - (caddr_t) mbuf_data(mb))); + len += mbuf_len(mb); } - *dpos = mbufcp; - *mrep = mp; - if (rem > 0) { - if (len < rem) - error = nfs_adv(mrep, dpos, rem, len); - else - *dpos += rem; - } - return (error); + + return (len); } /* - * copies a uio scatter/gather list to an mbuf chain. - * NOTE: can ony handle iovcnt == 1 + * nfsm_chain_advance() + * + * Advance an nfsm_chain by "len" bytes. */ int -nfsm_uiotombuf(uiop, mq, siz, bpos) - struct uio *uiop; - mbuf_t *mq; - int siz; - caddr_t *bpos; +nfsm_chain_advance(struct nfsm_chain *nmc, uint32_t len) { - char *uiocp; - mbuf_t mp, mp2; - int xfer, left, mlen, mplen; - int uiosiz, clflg, rem, error; - char *cp; + mbuf_t mb; - if (uiop->uio_iovcnt != 1) - panic("nfsm_uiotombuf: iovcnt != 1"); + while (len) { + if (nmc->nmc_left >= len) { + nmc->nmc_left -= len; + nmc->nmc_ptr += len; + return (0); + } + len -= nmc->nmc_left; + nmc->nmc_mcur = mb = mbuf_next(nmc->nmc_mcur); + if (!mb) + return (EBADRPC); + nmc->nmc_ptr = mbuf_data(mb); + nmc->nmc_left = mbuf_len(mb); + } - if (siz > nfs_mbuf_mlen) /* or should it >= MCLBYTES ?? */ - clflg = 1; - else - clflg = 0; - rem = nfsm_rndup(siz)-siz; - mp = mp2 = *mq; - mplen = mbuf_len(mp); - while (siz > 0) { - // LP64todo - fix this! - left = uio_iov_len(uiop); - uiocp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); - if (left > siz) - left = siz; - uiosiz = left; - while (left > 0) { - mlen = mbuf_trailingspace(mp); - if (mlen == 0) { - mp = NULL; - if (clflg) - error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mp); - else - error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mp); - if (!error) - error = mbuf_setnext(mp2, mp); - if (error) - return (error); - mplen = 0; - mp2 = mp; - mlen = mbuf_trailingspace(mp); - } - xfer = (left > mlen) ? mlen : left; - if (UIO_SEG_IS_USER_SPACE(uiop->uio_segflg)) - copyin(CAST_USER_ADDR_T(uiocp), (caddr_t)mbuf_data(mp) + mplen, xfer); - else - bcopy(uiocp, (caddr_t)mbuf_data(mp) + mplen, xfer); - mplen += xfer; - mbuf_setlen(mp, mplen); - left -= xfer; - uiocp += xfer; - uiop->uio_offset += xfer; - uio_uio_resid_add(uiop, -xfer); - } - uio_iov_base_add(uiop, uiosiz); - uio_iov_len_add(uiop, -uiosiz); - siz -= uiosiz; - } - if (rem > 0) { - if (rem > mbuf_trailingspace(mp)) { - error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mp); - if (!error) - error = mbuf_setnext(mp2, mp); - if (error) - return (error); - mplen = 0; - } - cp = (caddr_t)mbuf_data(mp) + mplen; - for (left = 0; left < rem; left++) - *cp++ = '\0'; - mplen += rem; - mbuf_setlen(mp, mplen); - *bpos = cp; - } else { - *bpos = (caddr_t)mbuf_data(mp) + mplen; - } - *mq = mp; return (0); } /* - * Help break down an mbuf chain by setting the first siz bytes contiguous - * pointed to by returned val. - * This is used by the macros nfsm_dissect and nfsm_dissecton for tough - * cases. (The macros use the vars. dpos and dpos2) + * nfsm_chain_reverse() + * + * Reverse decode offset in an nfsm_chain by "len" bytes. */ int -nfsm_disct(mdp, dposp, siz, left, cp2) - mbuf_t *mdp; - caddr_t *dposp; - int siz; - int left; - caddr_t *cp2; +nfsm_chain_reverse(struct nfsm_chain *nmc, uint32_t len) { - mbuf_t mp, mp2; - int siz2, xfer, error, mp2len; - caddr_t p, mp2data; - - mp = *mdp; - while (left == 0) { - *mdp = mp = mbuf_next(mp); - if (mp == NULL) - return (EBADRPC); - left = mbuf_len(mp); - *dposp = mbuf_data(mp); + uint32_t mlen, new_offset; + int error = 0; + + mlen = nmc->nmc_ptr - (caddr_t) mbuf_data(nmc->nmc_mcur); + if (len <= mlen) { + nmc->nmc_ptr -= len; + nmc->nmc_left += len; + return (0); + } + + new_offset = nfsm_chain_offset(nmc) - len; + nfsm_chain_dissect_init(error, nmc, nmc->nmc_mhead); + if (error) + return (error); + + return (nfsm_chain_advance(nmc, new_offset)); +} + +/* + * nfsm_chain_get_opaque_pointer_f() + * + * Return a pointer to the next "len" bytes of contiguous data in + * the mbuf chain. If the next "len" bytes are not contiguous, we + * try to manipulate the mbuf chain so that it is. + * + * The nfsm_chain is advanced by nfsm_rndup("len") bytes. + */ +int +nfsm_chain_get_opaque_pointer_f(struct nfsm_chain *nmc, uint32_t len, u_char **pptr) +{ + mbuf_t mbcur, mb; + uint32_t left, need, mblen, cplen, padlen; + u_char *ptr; + int error = 0; + + /* move to next mbuf with data */ + while (nmc->nmc_mcur && (nmc->nmc_left == 0)) { + mb = mbuf_next(nmc->nmc_mcur); + nmc->nmc_mcur = mb; + if (!mb) + break; + nmc->nmc_ptr = mbuf_data(mb); + nmc->nmc_left = mbuf_len(mb); } - if (left >= siz) { - *cp2 = *dposp; - *dposp += siz; - } else if (mbuf_next(mp) == NULL) { + /* check if we've run out of data */ + if (!nmc->nmc_mcur) return (EBADRPC); - } else if (siz > nfs_mbuf_mhlen) { - panic("nfs S too big"); - } else { - error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mp2); + + /* do we already have a contiguous buffer? */ + if (nmc->nmc_left >= len) { + /* the returned pointer will be the current pointer */ + *pptr = (u_char*)nmc->nmc_ptr; + error = nfsm_chain_advance(nmc, nfsm_rndup(len)); + return (error); + } + + padlen = nfsm_rndup(len) - len; + + /* we need (len - left) more bytes */ + mbcur = nmc->nmc_mcur; + left = nmc->nmc_left; + need = len - left; + + if (need > mbuf_trailingspace(mbcur)) { + /* + * The needed bytes won't fit in the current mbuf so we'll + * allocate a new mbuf to hold the contiguous range of data. + */ + nfsm_mbuf_get(error, &mb, len); if (error) return (error); - error = mbuf_setnext(mp2, mbuf_next(mp)); + /* double check that this mbuf can hold all the data */ + if (mbuf_maxlen(mb) < len) { + mbuf_free(mb); + return (EOVERFLOW); + } + + /* the returned pointer will be the new mbuf's data pointer */ + *pptr = ptr = mbuf_data(mb); + + /* copy "left" bytes to the new mbuf */ + bcopy(nmc->nmc_ptr, ptr, left); + ptr += left; + mbuf_setlen(mb, left); + + /* insert the new mbuf between the current and next mbufs */ + error = mbuf_setnext(mb, mbuf_next(mbcur)); if (!error) - error = mbuf_setnext(mp, mp2); + error = mbuf_setnext(mbcur, mb); if (error) { - mbuf_free(mp2); + mbuf_free(mb); return (error); } - mbuf_setlen(mp, mbuf_len(mp) - left); - mp = mp2; - *cp2 = p = mbuf_data(mp); - bcopy(*dposp, p, left); /* Copy what was left */ - siz2 = siz-left; - p += left; - mp2 = mbuf_next(mp); - mp2data = mbuf_data(mp2); - mp2len = mbuf_len(mp2); - /* Loop around copying up the siz2 bytes */ - while (siz2 > 0) { - if (mp2 == NULL) - return (EBADRPC); - xfer = (siz2 > mp2len) ? mp2len : siz2; - if (xfer > 0) { - bcopy(mp2data, p, xfer); - mp2data += xfer; - mp2len -= xfer; - mbuf_setdata(mp2, mp2data, mp2len); - p += xfer; - siz2 -= xfer; - } - if (siz2 > 0) { - mp2 = mbuf_next(mp2); - mp2data = mbuf_data(mp2); - mp2len = mbuf_len(mp2); + + /* reduce current mbuf's length by "left" */ + mbuf_setlen(mbcur, mbuf_len(mbcur) - left); + + /* + * update nmc's state to point at the end of the mbuf + * where the needed data will be copied to. + */ + nmc->nmc_mcur = mbcur = mb; + nmc->nmc_left = 0; + nmc->nmc_ptr = (caddr_t)ptr; + } else { + /* The rest of the data will fit in this mbuf. */ + + /* the returned pointer will be the current pointer */ + *pptr = (u_char*)nmc->nmc_ptr; + + /* + * update nmc's state to point at the end of the mbuf + * where the needed data will be copied to. + */ + nmc->nmc_ptr += left; + nmc->nmc_left = 0; + } + + /* + * move the next "need" bytes into the current + * mbuf from the mbufs that follow + */ + + /* extend current mbuf length */ + mbuf_setlen(mbcur, mbuf_len(mbcur) + need); + + /* mb follows mbufs we're copying/compacting data from */ + mb = mbuf_next(mbcur); + + while (need && mb) { + /* copy as much as we need/can */ + ptr = mbuf_data(mb); + mblen = mbuf_len(mb); + cplen = MIN(mblen, need); + if (cplen) { + bcopy(ptr, nmc->nmc_ptr, cplen); + /* + * update the mbuf's pointer and length to reflect that + * the data was shifted to an earlier mbuf in the chain + */ + error = mbuf_setdata(mb, ptr + cplen, mblen - cplen); + if (error) { + mbuf_setlen(mbcur, mbuf_len(mbcur) - need); + return (error); } + /* update pointer/need */ + nmc->nmc_ptr += cplen; + need -= cplen; } - mbuf_setlen(mp, siz); - *mdp = mp2; - *dposp = mp2data; + /* if more needed, go to next mbuf */ + if (need) + mb = mbuf_next(mb); } - return (0); + + /* did we run out of data in the mbuf chain? */ + if (need) { + mbuf_setlen(mbcur, mbuf_len(mbcur) - need); + return (EBADRPC); + } + + /* + * update nmc's state to point after this contiguous data + * + * "mb" points to the last mbuf we copied data from so we + * just set nmc to point at whatever remains in that mbuf. + */ + nmc->nmc_mcur = mb; + nmc->nmc_ptr = mbuf_data(mb); + nmc->nmc_left = mbuf_len(mb); + + /* move past any padding */ + if (padlen) + error = nfsm_chain_advance(nmc, padlen); + + return (error); } /* - * Advance the position in the mbuf chain. + * nfsm_chain_get_opaque_f() + * + * Read the next "len" bytes in the chain into "buf". + * The nfsm_chain is advanced by nfsm_rndup("len") bytes. */ int -nfs_adv(mdp, dposp, offs, left) - mbuf_t *mdp; - caddr_t *dposp; - int offs; - int left; +nfsm_chain_get_opaque_f(struct nfsm_chain *nmc, uint32_t len, u_char *buf) { - mbuf_t m; - int s; - - m = *mdp; - s = left; - while (s < offs) { - offs -= s; - m = mbuf_next(m); - if (m == NULL) - return (EBADRPC); - s = mbuf_len(m); + uint32_t cplen, padlen; + int error = 0; + + padlen = nfsm_rndup(len) - len; + + /* loop through mbufs copying all the data we need */ + while (len && nmc->nmc_mcur) { + /* copy as much as we need/can */ + cplen = MIN(nmc->nmc_left, len); + if (cplen) { + bcopy(nmc->nmc_ptr, buf, cplen); + nmc->nmc_ptr += cplen; + nmc->nmc_left -= cplen; + buf += cplen; + len -= cplen; + } + /* if more needed, go to next mbuf */ + if (len) { + mbuf_t mb = mbuf_next(nmc->nmc_mcur); + nmc->nmc_mcur = mb; + nmc->nmc_ptr = mb ? mbuf_data(mb) : NULL; + nmc->nmc_left = mb ? mbuf_len(mb) : 0; + } } - *mdp = m; - *dposp = (caddr_t)mbuf_data(m) + offs; - return (0); + + /* did we run out of data in the mbuf chain? */ + if (len) + return (EBADRPC); + + if (padlen) + nfsm_chain_adv(error, nmc, padlen); + + return (error); } /* - * Copy a string into mbufs for the hard cases... + * nfsm_chain_get_uio() + * + * Read the next "len" bytes in the chain into the given uio. + * The nfsm_chain is advanced by nfsm_rndup("len") bytes. */ int -nfsm_strtmbuf(mb, bpos, cp, siz) - mbuf_t *mb; - char **bpos; - char *cp; - long siz; +nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, uio_t uio) { - mbuf_t m1 = NULL, m2; - long left, xfer, len, tlen, mlen; - u_long *tl; - int putsize, error; - - putsize = 1; - m2 = *mb; - left = mbuf_trailingspace(m2); - if (left >= NFSX_UNSIGNED) { - tl = ((u_long *)(*bpos)); - *tl++ = txdr_unsigned(siz); - putsize = 0; - left -= NFSX_UNSIGNED; - len = mbuf_len(m2); - len += NFSX_UNSIGNED; - mbuf_setlen(m2, len); - if (left > 0) { - bcopy(cp, (caddr_t) tl, left); - siz -= left; - cp += left; - len += left; - mbuf_setlen(m2, len); - left = 0; - } - } - /* Loop around adding mbufs */ - while (siz > 0) { - m1 = NULL; - if (siz > nfs_mbuf_mlen) - error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &m1); - else - error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &m1); - if (!error) - error = mbuf_setnext(m2, m1); - if (error) - return (error); - mlen = mbuf_maxlen(m1); - mbuf_setlen(m1, mlen); - m2 = m1; - tl = mbuf_data(m1); - tlen = 0; - if (putsize) { - *tl++ = txdr_unsigned(siz); - mlen -= NFSX_UNSIGNED; - mbuf_setlen(m1, mlen); - tlen = NFSX_UNSIGNED; - putsize = 0; - } - if (siz < mlen) { - len = nfsm_rndup(siz); - xfer = siz; - if (xfer < len) - *(tl+(xfer>>2)) = 0; - } else { - xfer = len = mlen; + uint32_t cplen, padlen; + int error = 0; + + padlen = nfsm_rndup(len) - len; + + /* loop through mbufs copying all the data we need */ + while (len && nmc->nmc_mcur) { + /* copy as much as we need/can */ + cplen = MIN(nmc->nmc_left, len); + if (cplen) { + error = uiomove(nmc->nmc_ptr, cplen, uio); + if (error) + return (error); + nmc->nmc_ptr += cplen; + nmc->nmc_left -= cplen; + len -= cplen; + } + /* if more needed, go to next mbuf */ + if (len) { + mbuf_t mb = mbuf_next(nmc->nmc_mcur); + nmc->nmc_mcur = mb; + nmc->nmc_ptr = mb ? mbuf_data(mb) : NULL; + nmc->nmc_left = mb ? mbuf_len(mb) : 0; } - bcopy(cp, (caddr_t) tl, xfer); - mbuf_setlen(m1, len + tlen); - siz -= xfer; - cp += xfer; } - *mb = m1; - *bpos = (caddr_t)mbuf_data(m1) + mbuf_len(m1); - return (0); + + /* did we run out of data in the mbuf chain? */ + if (len) + return (EBADRPC); + + if (padlen) + nfsm_chain_adv(error, nmc, padlen); + + return (error); } -/* - * Called once to initialize data structures... - */ +#if NFSCLIENT + int -nfs_init(struct vfsconf *vfsp) +nfsm_chain_add_string_nfc(struct nfsm_chain *nmc, const uint8_t *s, uint32_t slen) { - int i; - - /* - * Check to see if major data structures haven't bloated. - */ - if (sizeof (struct nfsnode) > NFS_NODEALLOC) { - printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC); - printf("Try reducing NFS_SMALLFH\n"); - } - if (sizeof (struct nfsmount) > NFS_MNTALLOC) { - printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC); - printf("Try reducing NFS_MUIDHASHSIZ\n"); - } - if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) { - printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC); - printf("Try reducing NFS_UIDHASHSIZ\n"); - } - if (sizeof (struct nfsuid) > NFS_UIDALLOC) { - printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC); - printf("Try unionizing the nu_nickname and nu_flag fields\n"); - } - - nfs_mount_type = vfsp->vfc_typenum; - nfsrtt.pos = 0; - rpc_vers = txdr_unsigned(RPC_VER2); - rpc_call = txdr_unsigned(RPC_CALL); - rpc_reply = txdr_unsigned(RPC_REPLY); - rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); - rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); - rpc_mismatch = txdr_unsigned(RPC_MISMATCH); - rpc_autherr = txdr_unsigned(RPC_AUTHERR); - rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); - rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); - nfs_prog = txdr_unsigned(NFS_PROG); - nfs_true = txdr_unsigned(TRUE); - nfs_false = txdr_unsigned(FALSE); - nfs_xdrneg1 = txdr_unsigned(-1); - - nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; - if (nfs_ticks < 1) - nfs_ticks = 1; - /* Ensure async daemons disabled */ - for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { - nfs_iodwant[i] = NULL; - nfs_iodmount[i] = (struct nfsmount *)0; - } - /* init nfsiod mutex */ - nfs_iod_lck_grp_attr = lck_grp_attr_alloc_init(); - nfs_iod_lck_grp = lck_grp_alloc_init("nfs_iod", nfs_iod_lck_grp_attr); - nfs_iod_lck_attr = lck_attr_alloc_init(); - nfs_iod_mutex = lck_mtx_alloc_init(nfs_iod_lck_grp, nfs_iod_lck_attr); - - nfs_nbinit(); /* Init the nfsbuf table */ - nfs_nhinit(); /* Init the nfsnode table */ - nfs_lockinit(); /* Init the nfs lock state */ - -#ifndef NFS_NOSERVER - /* init nfsd mutex */ - nfsd_lck_grp_attr = lck_grp_attr_alloc_init(); - nfsd_lck_grp = lck_grp_alloc_init("nfsd", nfsd_lck_grp_attr); - nfsd_lck_attr = lck_attr_alloc_init(); - nfsd_mutex = lck_mtx_alloc_init(nfsd_lck_grp, nfsd_lck_attr); - - /* init slp rwlock */ - nfs_slp_lock_attr = lck_attr_alloc_init(); - nfs_slp_group_attr = lck_grp_attr_alloc_init(); - nfs_slp_rwlock_group = lck_grp_alloc_init("nfs-slp-rwlock", nfs_slp_group_attr); - nfs_slp_mutex_group = lck_grp_alloc_init("nfs-slp-mutex", nfs_slp_group_attr); - - /* init export data structures */ - nfsexphashtbl = hashinit(8, M_TEMP, &nfsexphash); - LIST_INIT(&nfs_exports); - nfs_export_lock_attr = lck_attr_alloc_init(); - nfs_export_group_attr = lck_grp_attr_alloc_init(); - nfs_export_rwlock_group = lck_grp_alloc_init("nfs-export-rwlock", nfs_export_group_attr); - lck_rw_init(&nfs_export_rwlock, nfs_export_rwlock_group, nfs_export_lock_attr); - - lck_mtx_lock(nfsd_mutex); - nfsrv_init(0); /* Init server data structures */ - nfsrv_initcache(); /* Init the server request cache */ - lck_mtx_unlock(nfsd_mutex); -#endif + uint8_t smallbuf[64]; + uint8_t *nfcname = smallbuf; + size_t buflen = sizeof(smallbuf), nfclen; + int error; - /* - * Initialize reply list and start timer - */ - TAILQ_INIT(&nfs_reqq); + error = utf8_normalizestr(s, slen, nfcname, &nfclen, buflen, UTF_PRECOMPOSED|UTF_NO_NULL_TERM); + if (error == ENAMETOOLONG) { + buflen = MAXPATHLEN; + MALLOC_ZONE(nfcname, uint8_t *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (nfcname) + error = utf8_normalizestr(s, slen, nfcname, &nfclen, buflen, UTF_PRECOMPOSED|UTF_NO_NULL_TERM); + } - nfs_timer(0); + /* if we got an error, just use the original string */ + if (error) + nfsm_chain_add_string(error, nmc, s, slen); + else + nfsm_chain_add_string(error, nmc, nfcname, nfclen); - vfsp->vfc_refcount++; /* make us non-unloadable */ - return (0); + if (nfcname && (nfcname != smallbuf)) + FREE_ZONE(nfcname, MAXPATHLEN, M_NAMEI); + return (error); } /* - * initialize NFS's cache of mbuf constants + * Add an NFSv2 "sattr" structure to an mbuf chain */ -void -nfs_mbuf_init(void) +int +nfsm_chain_add_v2sattr_f(struct nfsm_chain *nmc, struct vnode_attr *vap, uint32_t szrdev) { - struct mbuf_stat ms; + int error = 0; - mbuf_stats(&ms); - nfs_mbuf_mlen = ms.mlen; - nfs_mbuf_mhlen = ms.mhlen; - nfs_mbuf_minclsize = ms.minclsize; - nfs_mbuf_mclbytes = ms.mclbytes; + nfsm_chain_add_32(error, nmc, vtonfsv2_mode(vap->va_type, + (VATTR_IS_ACTIVE(vap, va_mode) ? vap->va_mode : 0600))); + nfsm_chain_add_32(error, nmc, + VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : (uint32_t)-1); + nfsm_chain_add_32(error, nmc, + VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : (uint32_t)-1); + nfsm_chain_add_32(error, nmc, szrdev); + nfsm_chain_add_v2time(error, nmc, + VATTR_IS_ACTIVE(vap, va_access_time) ? + &vap->va_access_time : NULL); + nfsm_chain_add_v2time(error, nmc, + VATTR_IS_ACTIVE(vap, va_modify_time) ? + &vap->va_modify_time : NULL); + + return (error); } /* - * Parse the attributes that are in the mbuf list and store them in *nvap. + * Add an NFSv3 "sattr" structure to an mbuf chain */ int -nfs_parsefattr(mbuf_t *mdp, caddr_t *dposp, int v3, struct nfs_vattr *nvap) +nfsm_chain_add_v3sattr_f(struct nfsm_chain *nmc, struct vnode_attr *vap) { - struct nfs_fattr *fp; - long t1; - caddr_t cp2; - int error = 0, rdev; - mbuf_t md; - enum vtype vtype; - u_short vmode; + int error = 0; - md = *mdp; - t1 = ((caddr_t)mbuf_data(md) + mbuf_len(md)) - *dposp; - if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2))) { - return (error); + if (VATTR_IS_ACTIVE(vap, va_mode)) { + nfsm_chain_add_32(error, nmc, TRUE); + nfsm_chain_add_32(error, nmc, vap->va_mode); + } else { + nfsm_chain_add_32(error, nmc, FALSE); } - fp = (struct nfs_fattr *)cp2; - if (v3) { - vtype = nfsv3tov_type(fp->fa_type); - vmode = fxdr_unsigned(u_short, fp->fa_mode); - rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), - fxdr_unsigned(int, fp->fa3_rdev.specdata2)); + if (VATTR_IS_ACTIVE(vap, va_uid)) { + nfsm_chain_add_32(error, nmc, TRUE); + nfsm_chain_add_32(error, nmc, vap->va_uid); } else { - vtype = nfsv2tov_type(fp->fa_type); - vmode = fxdr_unsigned(u_short, fp->fa_mode); - /* - * XXX - * - * The duplicate information returned in fa_type and fa_mode - * is an ambiguity in the NFS version 2 protocol. - * - * VREG should be taken literally as a regular file. If a - * server intents to return some type information differently - * in the upper bits of the mode field (e.g. for sockets, or - * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we - * leave the examination of the mode bits even in the VREG - * case to avoid breakage for bogus servers, but we make sure - * that there are actually type bits set in the upper part of - * fa_mode (and failing that, trust the va_type field). - * - * NFSv3 cleared the issue, and requires fa_mode to not - * contain any type information (while also introduing sockets - * and FIFOs for fa_type). - */ - if (vtype == VNON || (vtype == VREG && (vmode & S_IFMT) != 0)) - vtype = IFTOVT(vmode); - rdev = fxdr_unsigned(long, fp->fa2_rdev); - /* - * Really ugly NFSv2 kludge. - */ - if (vtype == VCHR && rdev == (int)0xffffffff) - vtype = VFIFO; + nfsm_chain_add_32(error, nmc, FALSE); } - - nvap->nva_type = vtype; - nvap->nva_mode = (vmode & 07777); - nvap->nva_rdev = (dev_t)rdev; - nvap->nva_nlink = (uint64_t)fxdr_unsigned(u_long, fp->fa_nlink); - nvap->nva_uid = fxdr_unsigned(uid_t, fp->fa_uid); - nvap->nva_gid = fxdr_unsigned(gid_t, fp->fa_gid); - if (v3) { - fxdr_hyper(&fp->fa3_size, &nvap->nva_size); - nvap->nva_blocksize = 16*1024; - fxdr_hyper(&fp->fa3_used, &nvap->nva_bytes); - fxdr_hyper(&fp->fa3_fileid, &nvap->nva_fileid); - fxdr_nfsv3time(&fp->fa3_atime, &nvap->nva_atime); - fxdr_nfsv3time(&fp->fa3_mtime, &nvap->nva_mtime); - fxdr_nfsv3time(&fp->fa3_ctime, &nvap->nva_ctime); + if (VATTR_IS_ACTIVE(vap, va_gid)) { + nfsm_chain_add_32(error, nmc, TRUE); + nfsm_chain_add_32(error, nmc, vap->va_gid); + } else { + nfsm_chain_add_32(error, nmc, FALSE); + } + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + nfsm_chain_add_32(error, nmc, TRUE); + nfsm_chain_add_64(error, nmc, vap->va_data_size); + } else { + nfsm_chain_add_32(error, nmc, FALSE); + } + if (vap->va_vaflags & VA_UTIMES_NULL) { + nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_SERVER); + nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_SERVER); } else { - nvap->nva_size = fxdr_unsigned(u_long, fp->fa2_size); - nvap->nva_blocksize = fxdr_unsigned(long, fp->fa2_blocksize); - nvap->nva_bytes = fxdr_unsigned(long, fp->fa2_blocks) * NFS_FABLKSIZE; - nvap->nva_fileid = (uint64_t)fxdr_unsigned(u_long, fp->fa2_fileid); - fxdr_nfsv2time(&fp->fa2_atime, &nvap->nva_atime); - fxdr_nfsv2time(&fp->fa2_mtime, &nvap->nva_mtime); - fxdr_nfsv2time(&fp->fa2_ctime, &nvap->nva_ctime); + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_CLIENT); + nfsm_chain_add_32(error, nmc, vap->va_access_time.tv_sec); + nfsm_chain_add_32(error, nmc, vap->va_access_time.tv_nsec); + } else { + nfsm_chain_add_32(error, nmc, NFS_TIME_DONT_CHANGE); + } + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_CLIENT); + nfsm_chain_add_32(error, nmc, vap->va_modify_time.tv_sec); + nfsm_chain_add_32(error, nmc, vap->va_modify_time.tv_nsec); + } else { + nfsm_chain_add_32(error, nmc, NFS_TIME_DONT_CHANGE); + } } - return (0); + return (error); } + /* - * Load the attribute cache (that lives in the nfsnode entry) with - * the value pointed to by nvap, unless the file type in the attribute - * cache doesn't match the file type in the nvap, in which case log a - * warning and return ESTALE. + * nfsm_chain_get_fh_attr() * - * If the dontshrink flag is set, then it's not safe to call ubc_setsize() - * to shrink the size of the file. + * Get the file handle and attributes from an mbuf chain. (NFSv2/v3) */ int -nfs_loadattrcache( - struct nfsnode *np, - struct nfs_vattr *nvap, - u_int64_t *xidp, - int dontshrink) +nfsm_chain_get_fh_attr( + struct nfsm_chain *nmc, + nfsnode_t dnp, + vfs_context_t ctx, + int nfsvers, + uint64_t *xidp, + fhandle_t *fhp, + struct nfs_vattr *nvap) { - mount_t mp; - vnode_t vp; - struct timeval now; - struct nfs_vattr *npnvap; - - if (np->n_flag & NINIT) { - vp = NULL; - mp = np->n_mount; - } else { - vp = NFSTOV(np); - mp = vnode_mount(vp); - } - - FSDBG_TOP(527, vp, np, *xidp >> 32, *xidp); - - if (!VFSTONFS(mp)) { - FSDBG_BOT(527, ENXIO, 1, 0, *xidp); - return (ENXIO); - } - - if (*xidp < np->n_xid) { - /* - * We have already updated attributes with a response from - * a later request. The attributes we have here are probably - * stale so we drop them (just return). However, our - * out-of-order receipt could be correct - if the requests were - * processed out of order at the server. Given the uncertainty - * we invalidate our cached attributes. *xidp is zeroed here - * to indicate the attributes were dropped - only getattr - * cares - it needs to retry the rpc. - */ - NATTRINVALIDATE(np); - FSDBG_BOT(527, 0, np, np->n_xid, *xidp); - *xidp = 0; - return (0); - } - - if (vp && (nvap->nva_type != vnode_vtype(vp))) { - /* - * The filehandle has changed type on us. This can be - * caused by either the server not having unique filehandles - * or because another client has removed the previous - * filehandle and a new object (of a different type) - * has been created with the same filehandle. - * - * We can't simply switch the type on the vnode because - * there may be type-specific fields that need to be - * cleaned up or set up. - * - * So, what should we do with this vnode? - * - * About the best we can do is log a warning and return - * an error. ESTALE is about the closest error, but it - * is a little strange that we come up with this error - * internally instead of simply passing it through from - * the server. Hopefully, the vnode will be reclaimed - * soon so the filehandle can be reincarnated as the new - * object type. - */ - printf("nfs loadattrcache vnode changed type, was %d now %d\n", - vnode_vtype(vp), nvap->nva_type); - FSDBG_BOT(527, ESTALE, 3, 0, *xidp); - return (ESTALE); - } + int error = 0, gotfh, gotattr; - microuptime(&now); - np->n_attrstamp = now.tv_sec; - np->n_xid = *xidp; - - npnvap = &np->n_vattr; - nvap->nva_fsid = vfs_statfs(mp)->f_fsid.val[0]; - bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap)); + gotfh = gotattr = 1; - if (vp) { - if (nvap->nva_size != np->n_size) { - FSDBG(527, vp, nvap->nva_size, np->n_size, - (nvap->nva_type == VREG) | - (np->n_flag & NMODIFIED ? 6 : 4)); - if (nvap->nva_type == VREG) { - u_quad_t orig_size = np->n_size; - if (np->n_flag & NMODIFIED) { - if (nvap->nva_size < np->n_size) - nvap->nva_size = np->n_size; - else - np->n_size = nvap->nva_size; - } else - np->n_size = nvap->nva_size; - if (!UBCINFOEXISTS(vp) || - (dontshrink && np->n_size < (u_quad_t)ubc_getsize(vp))) { - nvap->nva_size = np->n_size = orig_size; - NATTRINVALIDATE(np); - } else { - ubc_setsize(vp, (off_t)np->n_size); /* XXX */ - } - } else - np->n_size = nvap->nva_size; + if (nfsvers == NFS_VER3) /* check for file handle */ + nfsm_chain_get_32(error, nmc, gotfh); + if (!error && gotfh) /* get file handle */ + nfsm_chain_get_fh(error, nmc, nfsvers, fhp); + else + fhp->fh_len = 0; + if (nfsvers == NFS_VER3) /* check for file attributes */ + nfsm_chain_get_32(error, nmc, gotattr); + nfsmout_if(error); + if (gotattr) { + if (!gotfh) /* skip attributes */ + nfsm_chain_adv(error, nmc, NFSX_V3FATTR); + else /* get attributes */ + error = nfs_parsefattr(nmc, nfsvers, nvap); + } else if (gotfh) { + /* we need valid attributes in order to call nfs_nget() */ + if (nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, 0, ctx, nvap, xidp)) { + gotattr = 0; + fhp->fh_len = 0; } - } else { - np->n_size = nvap->nva_size; - } - - if (np->n_flag & NCHG) { - if (np->n_flag & NACC) - nvap->nva_atime = np->n_atim; - if (np->n_flag & NUPD) - nvap->nva_mtime = np->n_mtim; } - - FSDBG_BOT(527, 0, np, 0, *xidp); - return (0); +nfsmout: + return (error); } /* - * Calculate the attribute timeout based on - * how recently the file has been modified. + * Get and process NFSv3 WCC data from an mbuf chain */ int -nfs_attrcachetimeout(vnode_t vp) +nfsm_chain_get_wcc_data_f( + struct nfsm_chain *nmc, + nfsnode_t np, + struct timespec *premtime, + int *newpostattr, + u_int64_t *xidp) { - struct nfsnode *np = VTONFS(vp); - struct nfsmount *nmp; - struct timeval now; - int isdir, timeo; + int error = 0; + uint32_t flag = 0; + + nfsm_chain_get_32(error, nmc, flag); + if (!error && flag) { + nfsm_chain_adv(error, nmc, 2 * NFSX_UNSIGNED); + nfsm_chain_get_32(error, nmc, premtime->tv_sec); + nfsm_chain_get_32(error, nmc, premtime->tv_nsec); + nfsm_chain_adv(error, nmc, 2 * NFSX_UNSIGNED); + } else { + premtime->tv_sec = 0; + premtime->tv_nsec = 0; + } + nfsm_chain_postop_attr_update_flag(error, nmc, np, *newpostattr, xidp); - if (!(nmp = VFSTONFS(vnode_mount(vp)))) - return (0); - - isdir = vnode_isdir(vp); - - if ((np)->n_flag & NMODIFIED) - timeo = isdir ? nmp->nm_acdirmin : nmp->nm_acregmin; - else { - /* Note that if the client and server clocks are way out of sync, */ - /* timeout will probably get clamped to a min or max value */ - microtime(&now); - timeo = (now.tv_sec - (np)->n_mtime.tv_sec) / 10; - if (isdir) { - if (timeo < nmp->nm_acdirmin) - timeo = nmp->nm_acdirmin; - else if (timeo > nmp->nm_acdirmax) - timeo = nmp->nm_acdirmax; - } else { - if (timeo < nmp->nm_acregmin) - timeo = nmp->nm_acregmin; - else if (timeo > nmp->nm_acregmax) - timeo = nmp->nm_acregmax; - } - } - - return (timeo); + return (error); } /* - * Check the time stamp - * If the cache is valid, copy contents to *nvaper and return 0 - * otherwise return an error + * Get the next RPC transaction ID (XID) */ -int -nfs_getattrcache(vp, nvaper) - vnode_t vp; - struct nfs_vattr *nvaper; +void +nfs_get_xid(uint64_t *xidp) { - struct nfsnode *np = VTONFS(vp); - struct nfs_vattr *nvap; - struct timeval nowup; - int32_t timeo; - - if (!NATTRVALID(np)) { - FSDBG(528, vp, 0, 0, 0); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); - return (ENOENT); - } - - timeo = nfs_attrcachetimeout(vp); - - microuptime(&nowup); - if ((nowup.tv_sec - np->n_attrstamp) >= timeo) { - FSDBG(528, vp, 0, 0, 1); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); - return (ENOENT); - } - FSDBG(528, vp, 0, 0, 2); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_hits); - nvap = &np->n_vattr; + struct timeval tv; - if (nvap->nva_size != np->n_size) { - FSDBG(528, vp, nvap->nva_size, np->n_size, - (nvap->nva_type == VREG) | - (np->n_flag & NMODIFIED ? 6 : 4)); - if (nvap->nva_type == VREG) { - if (np->n_flag & NMODIFIED) { - if (nvap->nva_size < np->n_size) - nvap->nva_size = np->n_size; - else - np->n_size = nvap->nva_size; - } else - np->n_size = nvap->nva_size; - ubc_setsize(vp, (off_t)np->n_size); /* XXX */ - } else - np->n_size = nvap->nva_size; + lck_mtx_lock(nfs_request_mutex); + if (!nfs_xid) { + /* + * Derive initial xid from system time. + * + * Note: it's OK if this code inits nfs_xid to 0 (for example, + * due to a broken clock) because we immediately increment it + * and we guarantee to never use xid 0. So, nfs_xid should only + * ever be 0 the first time this function is called. + */ + microtime(&tv); + nfs_xid = tv.tv_sec << 12; } - - bcopy((caddr_t)nvap, (caddr_t)nvaper, sizeof(struct nfs_vattr)); - if (np->n_flag & NCHG) { - if (np->n_flag & NACC) - nvaper->nva_atime = np->n_atim; - if (np->n_flag & NUPD) - nvaper->nva_mtime = np->n_mtim; + if (++nfs_xid == 0) { + /* Skip zero xid if it should ever happen. */ + nfs_xidwrap++; + nfs_xid++; } - return (0); + *xidp = nfs_xid + ((uint64_t)nfs_xidwrap << 32); + lck_mtx_unlock(nfs_request_mutex); } -#ifndef NFS_NOSERVER /* - * Extract a lookup path from the given mbufs and store it in - * a newly allocated buffer saved in the given nameidata structure. - * exptected string length given as *lenp and final string length - * (after any WebNFS processing) is returned in *lenp. + * Build the RPC header and fill in the authorization info. + * Returns the head of the mbuf list and the xid. */ + int -nfsm_path_mbuftond( - mbuf_t *mdp, - caddr_t *dposp, - __unused int v3, - __unused int pubflag, - int* lenp, - struct nameidata *ndp) +nfsm_rpchead( + struct nfsreq *req, + mbuf_t mrest, + u_int64_t *xidp, + mbuf_t *mreqp) { - int i, len, len2, rem, error = 0; - mbuf_t md; - char *fromcp, *tocp; - struct componentname *cnp = &ndp->ni_cnd; -/* XXX Revisit when enabling WebNFS */ -#ifdef WEBNFS_ENABLED - int webcnt = 0, digitcnt = 0; - char hexdigits[2]; -#endif + struct nfsmount *nmp = req->r_nmp; + int nfsvers = nmp->nm_vers; + int proc = ((nfsvers == NFS_VER2) ? nfsv2_procid[req->r_procnum] : (int)req->r_procnum); - len = *lenp; - if (len > (MAXPATHLEN - 1)) - return (ENAMETOOLONG); + return nfsm_rpchead2(nmp, nmp->nm_sotype, NFS_PROG, nfsvers, proc, + req->r_auth, req->r_cred, req, mrest, xidp, mreqp); +} - /* - * Get a buffer for the name to be translated, and copy the - * name into the buffer. - */ - MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); - if (!cnp->cn_pnbuf) - return (ENOMEM); - cnp->cn_pnlen = MAXPATHLEN; - cnp->cn_flags |= HASBUF; +int +nfsm_rpchead2(struct nfsmount *nmp, int sotype, int prog, int vers, int proc, int auth_type, + kauth_cred_t cred, struct nfsreq *req, mbuf_t mrest, u_int64_t *xidp, mbuf_t *mreqp) +{ + mbuf_t mreq, mb; + int error, i, grpsiz, auth_len = 0, authsiz, reqlen; + size_t headlen; + struct nfsm_chain nmreq; - /* - * Copy the name from the mbuf list to the string - * - * Along the way, take note of any WebNFS characters - * and convert any % escapes. - */ - fromcp = *dposp; - tocp = cnp->cn_pnbuf; - md = *mdp; - rem = (caddr_t)mbuf_data(md) + mbuf_len(md) - fromcp; - for (i = 1; i <= len; i++) { - while (rem == 0) { - md = mbuf_next(md); - if (md == NULL) { - error = EBADRPC; - goto out; - } - fromcp = mbuf_data(md); - rem = mbuf_len(md); - } -/* XXX Revisit when enabling WebNFS */ -#ifdef WEBNFS_ENABLED - if (pubflag) { - if ((i == 1) && ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START)) { - switch ((unsigned char)*fromcp) { - case WEBNFS_NATIVE_CHAR: - /* - * 'Native' path for us is the same - * as a path according to the NFS spec, - * just skip the escape char. - */ - webcnt++; - fromcp++; - rem--; - /* next iteration of for loop */ - continue; - /* - * More may be added in the future, range 0x80-0xff. - * Don't currently support security query lookup (0x81). - */ - default: - error = EIO; - goto out; - } - } - if (digitcnt) { - /* We're expecting hex digits */ - if (!ISHEX(*fromcp)) { - error = ENOENT; - goto out; - } - digitcnt--; - hexdigits[digitcnt ? 0 : 1] = *fromcp++; - if (!digitcnt) - *tocp++ = HEXSTRTOI(hexdigits); - rem--; - /* next iteration of for loop */ - continue; - } else if (*fromcp == WEBNFS_ESC_CHAR) { - /* - * We can't really look at the next couple - * bytes here safely/easily, so we note that - * the next two characters should be hex - * digits and later save them in hexdigits[]. - * When we've got both, we'll convert it. - */ - digitcnt = 2; - webcnt += 2; - fromcp++; - rem--; - /* next iteration of for loop */ - continue; - } - } - if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) -#else - if (*fromcp == '\0' || *fromcp == '/') -#endif - { - error = EACCES; - goto out; + /* calculate expected auth length */ + switch (auth_type) { + case RPCAUTH_NONE: + auth_len = 0; + break; + case RPCAUTH_SYS: + { + gid_t grouplist[NGROUPS]; + int groupcount = NGROUPS; + + if (!cred) + return (EINVAL); + + (void)kauth_cred_getgroups(cred, grouplist, &groupcount); + if (groupcount < 1) + return (EINVAL); + + auth_len = ((((groupcount - 1) > nmp->nm_numgrps) ? + nmp->nm_numgrps : (groupcount - 1)) << 2) + + 5 * NFSX_UNSIGNED; + break; + } + case RPCAUTH_KRB5: + case RPCAUTH_KRB5I: + case RPCAUTH_KRB5P: + if (!req || !cred) + return (EINVAL); + auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now + break; + default: + return (EINVAL); } - *tocp++ = *fromcp++; - rem--; - } - *tocp = '\0'; - *mdp = md; - *dposp = fromcp; - len2 = nfsm_rndup(len)-len; - if (len2 > 0) { - if (rem >= len2) - *dposp += len2; - else if ((error = nfs_adv(mdp, dposp, len2, rem)) != 0) - goto out; - } + authsiz = nfsm_rndup(auth_len); -/* XXX Revisit when enabling WebNFS */ -#ifdef WEBNFS_ENABLED - if (pubflag) { - if (digitcnt) { - /* The string ended in the middle of an escape! */ - error = ENOENT; - goto out; + /* allocate the packet */ + headlen = authsiz + 10 * NFSX_UNSIGNED; + if (sotype == SOCK_STREAM) /* also include room for any RPC Record Mark */ + headlen += NFSX_UNSIGNED; + if (headlen >= nfs_mbuf_minclsize) { + error = mbuf_getpacket(MBUF_WAITOK, &mreq); + } else { + error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mreq); + if (!error) { + if (headlen < nfs_mbuf_mhlen) + mbuf_align_32(mreq, headlen); + else + mbuf_align_32(mreq, 8 * NFSX_UNSIGNED); } - len -= webcnt; } -#endif - -out: if (error) { - if (cnp->cn_pnbuf) - FREE_ZONE(cnp->cn_pnbuf, MAXPATHLEN, M_NAMEI); - cnp->cn_flags &= ~HASBUF; - } else { - ndp->ni_pathlen = len; - *lenp = len; + /* unable to allocate packet */ + /* XXX should we keep statistics for these errors? */ + return (error); } - return (error); -} - -/* - * Set up nameidata for a lookup() call and do it. - * - * If pubflag is set, this call is done for a lookup operation on the - * public filehandle. In that case we allow crossing mountpoints and - * absolute pathnames. However, the caller is expected to check that - * the lookup result is within the public fs, and deny access if - * it is not. - */ -int -nfs_namei( - struct nfsrv_descript *nfsd, - struct vfs_context *ctx, - struct nameidata *ndp, - struct nfs_filehandle *nfhp, - mbuf_t nam, - int pubflag, - vnode_t *retdirp, - struct nfs_export **nxp, - struct nfs_export_options **nxop) -{ -/* XXX Revisit when enabling WebNFS */ -#ifdef WEBNFS_ENABLED - char *cp; - uio_t auio; - char uio_buf[ UIO_SIZEOF(1) ]; - int linklen, olen = ndp->ni_pathlen; -#endif - vnode_t dp; - int error; - struct componentname *cnp = &ndp->ni_cnd; - char *tmppn; - - *retdirp = NULL; /* - * Extract and set starting directory. + * If the caller gave us a non-zero XID then use it because + * it may be a higher-level resend with a GSSAPI credential. + * Otherwise, allocate a new one. */ - error = nfsrv_fhtovp(nfhp, nam, pubflag, &dp, nxp, nxop); - if (error) - goto out; - error = nfsrv_credcheck(nfsd, *nxp, *nxop); - if (error || (vnode_vtype(dp) != VDIR)) { - vnode_put(dp); - error = ENOTDIR; - goto out; - } + if (*xidp == 0) + nfs_get_xid(xidp); - ctx->vc_ucred = nfsd->nd_cr; - ndp->ni_cnd.cn_context = ctx; + /* build the header(s) */ + nfsm_chain_init(&nmreq, mreq); - if (*nxop && ((*nxop)->nxo_flags & NX_READONLY)) - cnp->cn_flags |= RDONLY; + /* First, if it's a TCP stream insert space for an RPC record mark */ + if (sotype == SOCK_STREAM) + nfsm_chain_add_32(error, &nmreq, 0); - *retdirp = dp; + /* Then the RPC header. */ + nfsm_chain_add_32(error, &nmreq, (*xidp & 0xffffffff)); + nfsm_chain_add_32(error, &nmreq, RPC_CALL); + nfsm_chain_add_32(error, &nmreq, RPC_VER2); + nfsm_chain_add_32(error, &nmreq, prog); + nfsm_chain_add_32(error, &nmreq, vers); + nfsm_chain_add_32(error, &nmreq, proc); -/* XXX Revisit when enabling WebNFS */ -#ifdef WEBNFS_ENABLED - if (pubflag) { - ndp->ni_rootdir = rootvnode; - ndp->ni_loopcnt = 0; - if (cnp->cn_pnbuf[0] == '/') { - vnode_put(dp); - dp = rootvnode; - error = vnode_get(dp); - if (error) { - *retdirp = NULL; - goto out; - } +add_cred: + switch (auth_type) { + case RPCAUTH_NONE: + nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE); /* auth */ + nfsm_chain_add_32(error, &nmreq, 0); /* length */ + nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE); /* verf */ + nfsm_chain_add_32(error, &nmreq, 0); /* length */ + nfsm_chain_build_done(error, &nmreq); + /* Append the args mbufs */ + if (!error) + error = mbuf_setnext(nmreq.nmc_mcur, mrest); + break; + case RPCAUTH_SYS: { + gid_t grouplist[NGROUPS]; + int groupcount; + + nfsm_chain_add_32(error, &nmreq, RPCAUTH_SYS); + nfsm_chain_add_32(error, &nmreq, authsiz); + nfsm_chain_add_32(error, &nmreq, 0); /* stamp */ + nfsm_chain_add_32(error, &nmreq, 0); /* zero-length hostname */ + nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(cred)); /* UID */ + nfsm_chain_add_32(error, &nmreq, kauth_cred_getgid(cred)); /* GID */ + grpsiz = (auth_len >> 2) - 5; + nfsm_chain_add_32(error, &nmreq, grpsiz);/* additional GIDs */ + memset(grouplist, 0, sizeof(grouplist)); + groupcount = grpsiz; + (void)kauth_cred_getgroups(cred, grouplist, &groupcount); + for (i = 1; i <= grpsiz; i++) + nfsm_chain_add_32(error, &nmreq, grouplist[i]); + + /* And the verifier... */ + nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE); /* flavor */ + nfsm_chain_add_32(error, &nmreq, 0); /* length */ + nfsm_chain_build_done(error, &nmreq); + + /* Append the args mbufs */ + if (!error) + error = mbuf_setnext(nmreq.nmc_mcur, mrest); + break; + } + case RPCAUTH_KRB5: + case RPCAUTH_KRB5I: + case RPCAUTH_KRB5P: + error = nfs_gss_clnt_cred_put(req, &nmreq, mrest); + if (error == ENEEDAUTH) { + gid_t grouplist[NGROUPS]; + int groupcount = NGROUPS; + /* + * Use sec=sys for this user + */ + error = 0; + req->r_auth = auth_type = RPCAUTH_SYS; + (void)kauth_cred_getgroups(cred, grouplist, &groupcount); + auth_len = ((((groupcount - 1) > nmp->nm_numgrps) ? + nmp->nm_numgrps : (groupcount - 1)) << 2) + + 5 * NFSX_UNSIGNED; + authsiz = nfsm_rndup(auth_len); + goto add_cred; } - } else { - cnp->cn_flags |= NOCROSSMOUNT; + break; + }; + + /* finish setting up the packet */ + if (!error) + error = mbuf_pkthdr_setrcvif(mreq, 0); + + if (error) { + mbuf_freem(mreq); + return (error); } -#else - cnp->cn_flags |= NOCROSSMOUNT; -#endif - ndp->ni_usedvp = dp; + /* Calculate the size of the request */ + reqlen = 0; + for (mb = nmreq.nmc_mhead; mb; mb = mbuf_next(mb)) + reqlen += mbuf_len(mb); + + mbuf_pkthdr_setlen(mreq, reqlen); - for (;;) { - cnp->cn_nameptr = cnp->cn_pnbuf; - ndp->ni_startdir = dp; - /* - * And call lookup() to do the real work - */ - error = lookup(ndp); - if (error) - break; /* - * Check for encountering a symbolic link + * If the request goes on a TCP stream, + * set its size in the RPC record mark. + * The record mark count doesn't include itself + * and the last fragment bit is set. */ - if ((cnp->cn_flags & ISSYMLINK) == 0) { - return (0); - } else { - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } -/* XXX Revisit when enabling WebNFS */ -#ifdef WEBNFS_ENABLED - if (!pubflag) { -#endif - if (cnp->cn_flags & (LOCKPARENT | WANTPARENT)) - vnode_put(ndp->ni_dvp); - if (ndp->ni_vp) { - vnode_put(ndp->ni_vp); - ndp->ni_vp = NULL; - } - error = EINVAL; - break; -/* XXX Revisit when enabling WebNFS */ -#ifdef WEBNFS_ENABLED - } + if (sotype == SOCK_STREAM) + nfsm_chain_set_recmark(error, &nmreq, + (reqlen - NFSX_UNSIGNED) | 0x80000000); - if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { - vnode_put(ndp->ni_vp); - ndp->ni_vp = NULL; - error = ELOOP; - break; - } - /* XXX assert(olen <= MAXPATHLEN - 1); */ - if (ndp->ni_pathlen > 1) { - MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - if (!cp) { - vnode_put(ndp->ni_vp); - ndp->ni_vp = NULL; - error = ENOMEM; - break; - } - } else { - cp = cnp->cn_pnbuf; - } - auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, - &uio_buf[0], sizeof(uio_buf)); - if (!auio) { - vnode_put(ndp->ni_vp); - ndp->ni_vp = NULL; - if (ndp->ni_pathlen > 1) - FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); - error = ENOMEM; - break; - } - uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); - error = VNOP_READLINK(ndp->ni_vp, auio, cnp->cn_context); - if (error) { -badlink: - vnode_put(ndp->ni_vp); - ndp->ni_vp = NULL; - if (ndp->ni_pathlen > 1) - FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); - break; - } - linklen = MAXPATHLEN - uio_resid(auio); - if (linklen == 0) { - error = ENOENT; - goto badlink; - } - if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { - error = ENAMETOOLONG; - goto badlink; - } - if (ndp->ni_pathlen > 1) { - long len = cnp->cn_pnlen; - tmppn = cnp->cn_pnbuf; - cnp->cn_pnbuf = cp; - cnp->cn_pnlen = olen + 1; - bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); - FREE_ZONE(tmppn, len, M_NAMEI); - } else - cnp->cn_pnbuf[linklen] = '\0'; - ndp->ni_pathlen += linklen; - - vnode_put(ndp->ni_vp); - dp = ndp->ni_dvp; - ndp->ni_dvp = NULL; + *mreqp = mreq; + return (0); +} +/* + * Parse an NFS file attribute structure out of an mbuf chain. + */ +int +nfs_parsefattr(struct nfsm_chain *nmc, int nfsvers, struct nfs_vattr *nvap) +{ + int error = 0; + enum vtype vtype; + u_short vmode; + uint32_t val, val2; + dev_t rdev; + + val = val2 = 0; + NVATTR_INIT(nvap); + + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TYPE); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_MODE); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SIZE); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SPACE_USED); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_RAWDEV); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FSID); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FILEID); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_ACCESS); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_MODIFY); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_METADATA); + + nfsm_chain_get_32(error, nmc, vtype); + nfsm_chain_get_32(error, nmc, vmode); + nfsmout_if(error); + + if (nfsvers == NFS_VER3) { + nvap->nva_type = nfstov_type(vtype, nfsvers); + } else { /* - * Check if root directory should replace current directory. + * The duplicate information returned in fa_type and fa_mode + * is an ambiguity in the NFS version 2 protocol. + * + * VREG should be taken literally as a regular file. If a + * server intends to return some type information differently + * in the upper bits of the mode field (e.g. for sockets, or + * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we + * leave the examination of the mode bits even in the VREG + * case to avoid breakage for bogus servers, but we make sure + * that there are actually type bits set in the upper part of + * fa_mode (and failing that, trust the va_type field). + * + * NFSv3 cleared the issue, and requires fa_mode to not + * contain any type information (while also introducing + * sockets and FIFOs for fa_type). */ - if (cnp->cn_pnbuf[0] == '/') { - vnode_put(dp); - dp = ndp->ni_rootdir; - error = vnode_get(dp); - if (error) - break; - } -#endif + vtype = nfstov_type(vtype, nfsvers); + if ((vtype == VNON) || ((vtype == VREG) && ((vmode & S_IFMT) != 0))) + vtype = IFTOVT(vmode); + nvap->nva_type = vtype; } - } -out: - tmppn = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmppn, cnp->cn_pnlen, M_NAMEI); + nvap->nva_mode = (vmode & 07777); + + nfsm_chain_get_32(error, nmc, nvap->nva_nlink); + nfsm_chain_get_32(error, nmc, nvap->nva_uid); + nfsm_chain_get_32(error, nmc, nvap->nva_gid); + + if (nfsvers == NFS_VER3) { + nfsm_chain_get_64(error, nmc, nvap->nva_size); + nfsm_chain_get_64(error, nmc, nvap->nva_bytes); + nfsm_chain_get_32(error, nmc, nvap->nva_rawdev.specdata1); + nfsm_chain_get_32(error, nmc, nvap->nva_rawdev.specdata2); + nfsmout_if(error); + nfsm_chain_get_64(error, nmc, nvap->nva_fsid.major); + nvap->nva_fsid.minor = 0; + nfsm_chain_get_64(error, nmc, nvap->nva_fileid); + } else { + nfsm_chain_get_32(error, nmc, nvap->nva_size); + nfsm_chain_adv(error, nmc, NFSX_UNSIGNED); + nfsm_chain_get_32(error, nmc, rdev); + nfsmout_if(error); + nvap->nva_rawdev.specdata1 = major(rdev); + nvap->nva_rawdev.specdata2 = minor(rdev); + nfsm_chain_get_32(error, nmc, val); /* blocks */ + nfsmout_if(error); + nvap->nva_bytes = val * NFS_FABLKSIZE; + nfsm_chain_get_32(error, nmc, val); + nfsmout_if(error); + nvap->nva_fsid.major = (uint64_t)val; + nvap->nva_fsid.minor = 0; + nfsm_chain_get_32(error, nmc, val); + nfsmout_if(error); + nvap->nva_fileid = (uint64_t)val; + /* Really ugly NFSv2 kludge. */ + if ((vtype == VCHR) && (rdev == (dev_t)0xffffffff)) + nvap->nva_type = VFIFO; + } + nfsm_chain_get_time(error, nmc, nfsvers, + nvap->nva_timesec[NFSTIME_ACCESS], + nvap->nva_timensec[NFSTIME_ACCESS]); + nfsm_chain_get_time(error, nmc, nfsvers, + nvap->nva_timesec[NFSTIME_MODIFY], + nvap->nva_timensec[NFSTIME_MODIFY]); + nfsm_chain_get_time(error, nmc, nfsvers, + nvap->nva_timesec[NFSTIME_CHANGE], + nvap->nva_timensec[NFSTIME_CHANGE]); +nfsmout: return (error); } /* - * A fiddled version of m_adj() that ensures null fill to a long - * boundary and only trims off the back end + * Load the attribute cache (that lives in the nfsnode entry) with + * the value pointed to by nvap, unless the file type in the attribute + * cache doesn't match the file type in the nvap, in which case log a + * warning and return ESTALE. + * + * If the dontshrink flag is set, then it's not safe to call ubc_setsize() + * to shrink the size of the file. */ -void -nfsm_adj(mp, len, nul) - mbuf_t mp; - int len; - int nul; -{ - mbuf_t m, mnext; - int count, i, mlen; - char *cp; +int +nfs_loadattrcache( + nfsnode_t np, + struct nfs_vattr *nvap, + u_int64_t *xidp, + int dontshrink) +{ + mount_t mp; + vnode_t vp; + struct timeval now; + struct nfs_vattr *npnvap; + int xattr = np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR; + int referral = np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL; + int aclbit, monitored, error = 0; + kauth_acl_t acl; + struct nfsmount *nmp; + uint32_t events = np->n_events; - /* - * Trim from tail. Scan the mbuf chain, - * calculating its length and finding the last mbuf. - * If the adjustment only affects this mbuf, then just - * adjust and return. Otherwise, rescan and truncate - * after the remaining size. - */ - count = 0; - m = mp; - for (;;) { - mlen = mbuf_len(m); - count += mlen; - mnext = mbuf_next(m); - if (mnext == NULL) - break; - m = mnext; + if (np->n_hflag & NHINIT) { + vp = NULL; + mp = np->n_mount; + } else { + vp = NFSTOV(np); + mp = vnode_mount(vp); } - if (mlen > len) { - mlen -= len; - mbuf_setlen(m, mlen); - if (nul > 0) { - cp = (caddr_t)mbuf_data(m) + mlen - nul; - for (i = 0; i < nul; i++) - *cp++ = '\0'; - } - return; + monitored = vp ? vnode_ismonitored(vp) : 0; + + FSDBG_TOP(527, np, vp, *xidp >> 32, *xidp); + + if (!((nmp = VFSTONFS(mp)))) { + FSDBG_BOT(527, ENXIO, 1, 0, *xidp); + return (ENXIO); } - count -= len; - if (count < 0) - count = 0; + + if (*xidp < np->n_xid) { + /* + * We have already updated attributes with a response from + * a later request. The attributes we have here are probably + * stale so we drop them (just return). However, our + * out-of-order receipt could be correct - if the requests were + * processed out of order at the server. Given the uncertainty + * we invalidate our cached attributes. *xidp is zeroed here + * to indicate the attributes were dropped - only getattr + * cares - it needs to retry the rpc. + */ + NATTRINVALIDATE(np); + FSDBG_BOT(527, 0, np, np->n_xid, *xidp); + *xidp = 0; + return (0); + } + + if (vp && (nvap->nva_type != vnode_vtype(vp))) { + /* + * The filehandle has changed type on us. This can be + * caused by either the server not having unique filehandles + * or because another client has removed the previous + * filehandle and a new object (of a different type) + * has been created with the same filehandle. + * + * We can't simply switch the type on the vnode because + * there may be type-specific fields that need to be + * cleaned up or set up. + * + * So, what should we do with this vnode? + * + * About the best we can do is log a warning and return + * an error. ESTALE is about the closest error, but it + * is a little strange that we come up with this error + * internally instead of simply passing it through from + * the server. Hopefully, the vnode will be reclaimed + * soon so the filehandle can be reincarnated as the new + * object type. + */ + printf("nfs loadattrcache vnode changed type, was %d now %d\n", + vnode_vtype(vp), nvap->nva_type); + error = ESTALE; + if (monitored) + events |= VNODE_EVENT_DELETE; + goto out; + } + + npnvap = &np->n_vattr; + /* - * Correct length for chain is "count". - * Find the mbuf with last data, adjust its length, - * and toss data from remaining mbufs on chain. + * The ACL cache needs special handling because it is not + * always updated. Save current ACL cache state so it can + * be restored after copying the new attributes into place. */ - for (m = mp; m; m = mbuf_next(m)) { - mlen = mbuf_len(m); - if (mlen >= count) { - mlen = count; - mbuf_setlen(m, count); - if (nul > 0) { - cp = (caddr_t)mbuf_data(m) + mlen - nul; - for (i = 0; i < nul; i++) - *cp++ = '\0'; + aclbit = NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL); + acl = npnvap->nva_acl; + + if (monitored) { + /* + * For monitored nodes, check for attribute changes that should generate events. + */ + if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS) && + (nvap->nva_nlink != npnvap->nva_nlink)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_LINK; + if (events & VNODE_EVENT_PERMS) + /* no need to do all the checking if it's already set */; + else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_MODE) && + (nvap->nva_mode != npnvap->nva_mode)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) && + (nvap->nva_uid != npnvap->nva_uid)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) && + (nvap->nva_gid != npnvap->nva_gid)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if (nmp->nm_vers >= NFS_VER4) { + if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) && + !kauth_guid_equal(&nvap->nva_uuuid, &npnvap->nva_uuuid)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) && + !kauth_guid_equal(&nvap->nva_guuid, &npnvap->nva_guuid)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if ((NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_ACL) && + nvap->nva_acl && npnvap->nva_acl && + ((nvap->nva_acl->acl_entrycount != npnvap->nva_acl->acl_entrycount) || + bcmp(nvap->nva_acl, npnvap->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl))))) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + } + if (((nmp->nm_vers >= NFS_VER4) && (nvap->nva_change != npnvap->nva_change)) || + (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_MODIFY) && + ((nvap->nva_timesec[NFSTIME_MODIFY] != npnvap->nva_timesec[NFSTIME_MODIFY]) || + (nvap->nva_timensec[NFSTIME_MODIFY] != npnvap->nva_timensec[NFSTIME_MODIFY])))) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_WRITE; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_RAWDEV) && + ((nvap->nva_rawdev.specdata1 != npnvap->nva_rawdev.specdata1) || + (nvap->nva_rawdev.specdata2 != npnvap->nva_rawdev.specdata2))) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_FILEID) && + (nvap->nva_fileid != npnvap->nva_fileid)) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ARCHIVE) && + ((nvap->nva_flags & NFS_FFLAG_ARCHIVED) != (npnvap->nva_flags & NFS_FFLAG_ARCHIVED))) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_HIDDEN) && + ((nvap->nva_flags & NFS_FFLAG_HIDDEN) != (npnvap->nva_flags & NFS_FFLAG_HIDDEN))) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_CREATE) && + ((nvap->nva_timesec[NFSTIME_CREATE] != npnvap->nva_timesec[NFSTIME_CREATE]) || + (nvap->nva_timensec[NFSTIME_CREATE] != npnvap->nva_timensec[NFSTIME_CREATE]))) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_BACKUP) && + ((nvap->nva_timesec[NFSTIME_BACKUP] != npnvap->nva_timesec[NFSTIME_BACKUP]) || + (nvap->nva_timensec[NFSTIME_BACKUP] != npnvap->nva_timensec[NFSTIME_BACKUP]))) + events |= VNODE_EVENT_ATTRIB; + } + + /* Copy the attributes to the attribute cache */ + bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap)); + + microuptime(&now); + np->n_attrstamp = now.tv_sec; + np->n_xid = *xidp; + /* NFS_FFLAG_IS_ATTR and NFS_FFLAG_TRIGGER_REFERRAL need to be sticky... */ + if (vp && xattr) + nvap->nva_flags |= xattr; + if (vp && referral) + nvap->nva_flags |= referral; + + if (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL)) { + /* we're updating the ACL */ + if (nvap->nva_acl) { + /* make a copy of the acl for the cache */ + npnvap->nva_acl = kauth_acl_alloc(nvap->nva_acl->acl_entrycount); + if (npnvap->nva_acl) { + bcopy(nvap->nva_acl, npnvap->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl)); + } else { + /* can't make a copy to cache, invalidate ACL cache */ + NFS_BITMAP_CLR(npnvap->nva_bitmap, NFS_FATTR_ACL); + NACLINVALIDATE(np); + aclbit = 0; } - break; } - count -= mlen; + if (acl) { + kauth_acl_free(acl); + acl = NULL; + } + } + if (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL)) { + /* update the ACL timestamp */ + np->n_aclstamp = now.tv_sec; + } else { + /* we aren't updating the ACL, so restore original values */ + if (aclbit) + NFS_BITMAP_SET(npnvap->nva_bitmap, NFS_FATTR_ACL); + npnvap->nva_acl = acl; } - for (m = mbuf_next(m); m; m = mbuf_next(m)) - mbuf_setlen(m, 0); -} -/* - * Make these functions instead of macros, so that the kernel text size - * doesn't get too big... - */ -void -nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) - struct nfsrv_descript *nfsd; - int before_ret; - struct vnode_attr *before_vap; - int after_ret; - struct vnode_attr *after_vap; - mbuf_t *mbp; - char **bposp; -{ - mbuf_t mb = *mbp, mb2; - char *bpos = *bposp; - u_long *tl; +#if CONFIG_TRIGGERS + /* + * For NFSv4, if the fsid doesn't match the fsid for the mount, then + * this node is for a different file system on the server. So we mark + * this node as a trigger node that will trigger the mirror mount. + */ + if ((nmp->nm_vers >= NFS_VER4) && (nvap->nva_type == VDIR) && + ((np->n_vattr.nva_fsid.major != nmp->nm_fsid.major) || + (np->n_vattr.nva_fsid.minor != nmp->nm_fsid.minor))) + np->n_vattr.nva_flags |= NFS_FFLAG_TRIGGER; +#endif - if (before_ret) { - nfsm_build(tl, u_long *, NFSX_UNSIGNED); - *tl = nfs_false; - } else { - nfsm_build(tl, u_long *, 7 * NFSX_UNSIGNED); - *tl++ = nfs_true; - txdr_hyper(&(before_vap->va_data_size), tl); - tl += 2; - txdr_nfsv3time(&(before_vap->va_modify_time), tl); - tl += 2; - txdr_nfsv3time(&(before_vap->va_change_time), tl); - } - *bposp = bpos; - *mbp = mb; - nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); -} + if (!vp || (nvap->nva_type != VREG)) { + np->n_size = nvap->nva_size; + } else if (nvap->nva_size != np->n_size) { + FSDBG(527, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4)); + if (!UBCINFOEXISTS(vp) || (dontshrink && (nvap->nva_size < np->n_size))) { + /* asked not to shrink, so stick with current size */ + FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); + nvap->nva_size = np->n_size; + NATTRINVALIDATE(np); + } else if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) { + /* if we've modified, stick with larger size */ + FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002); + nvap->nva_size = np->n_size; + npnvap->nva_size = np->n_size; + } else { + /* + * n_size is protected by the data lock, so we need to + * defer updating it until it's safe. We save the new size + * and set a flag and it'll get updated the next time we get/drop + * the data lock or the next time we do a getattr. + */ + np->n_newsize = nvap->nva_size; + SET(np->n_flag, NUPDATESIZE); + if (monitored) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_EXTEND; + } + } -void -nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) - struct nfsrv_descript *nfsd; - int after_ret; - struct vnode_attr *after_vap; - mbuf_t *mbp; - char **bposp; -{ - mbuf_t mb = *mbp, mb2; - char *bpos = *bposp; - u_long *tl; - struct nfs_fattr *fp; - - if (after_ret) { - nfsm_build(tl, u_long *, NFSX_UNSIGNED); - *tl = nfs_false; - } else { - nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3FATTR); - *tl++ = nfs_true; - fp = (struct nfs_fattr *)tl; - nfsm_srvfattr(nfsd, after_vap, fp); + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) { + nvap->nva_timesec[NFSTIME_ACCESS] = np->n_atim.tv_sec; + nvap->nva_timensec[NFSTIME_ACCESS] = np->n_atim.tv_nsec; + } + if (np->n_flag & NUPD) { + nvap->nva_timesec[NFSTIME_MODIFY] = np->n_mtim.tv_sec; + nvap->nva_timensec[NFSTIME_MODIFY] = np->n_mtim.tv_nsec; + } } - *mbp = mb; - *bposp = bpos; + +out: + if (monitored && events) + nfs_vnode_notify(np, events); + FSDBG_BOT(527, error, np, np->n_size, *xidp); + return (error); } -void -nfsm_srvfattr(nfsd, vap, fp) - struct nfsrv_descript *nfsd; - struct vnode_attr *vap; - struct nfs_fattr *fp; +/* + * Calculate the attribute timeout based on + * how recently the file has been modified. + */ +int +nfs_attrcachetimeout(nfsnode_t np) { + struct nfsmount *nmp; + struct timeval now; + int isdir; + uint32_t timeo; - // XXX Should we assert here that all fields are supported? + if (!(nmp = NFSTONMP(np))) + return (0); + + isdir = vnode_isdir(NFSTOV(np)); - fp->fa_nlink = txdr_unsigned(vap->va_nlink); - fp->fa_uid = txdr_unsigned(vap->va_uid); - fp->fa_gid = txdr_unsigned(vap->va_gid); - if (nfsd->nd_flag & ND_NFSV3) { - fp->fa_type = vtonfsv3_type(vap->va_type); - fp->fa_mode = vtonfsv3_mode(vap->va_mode); - txdr_hyper(&vap->va_data_size, &fp->fa3_size); - txdr_hyper(&vap->va_data_alloc, &fp->fa3_used); - fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); - fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); - fp->fa3_fsid.nfsuquad[0] = 0; - fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); - txdr_hyper(&vap->va_fileid, &fp->fa3_fileid); - txdr_nfsv3time(&vap->va_access_time, &fp->fa3_atime); - txdr_nfsv3time(&vap->va_modify_time, &fp->fa3_mtime); - txdr_nfsv3time(&vap->va_change_time, &fp->fa3_ctime); + if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) { + /* If we have a delegation, we always use the max timeout. */ + timeo = isdir ? nmp->nm_acdirmax : nmp->nm_acregmax; + } else if ((np)->n_flag & NMODIFIED) { + /* If we have modifications, we always use the min timeout. */ + timeo = isdir ? nmp->nm_acdirmin : nmp->nm_acregmin; } else { - fp->fa_type = vtonfsv2_type(vap->va_type); - fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); - fp->fa2_size = txdr_unsigned(vap->va_data_size); - fp->fa2_blocksize = txdr_unsigned(vap->va_iosize); - if (vap->va_type == VFIFO) - fp->fa2_rdev = 0xffffffff; - else - fp->fa2_rdev = txdr_unsigned(vap->va_rdev); - fp->fa2_blocks = txdr_unsigned(vap->va_data_alloc / NFS_FABLKSIZE); - fp->fa2_fsid = txdr_unsigned(vap->va_fsid); - fp->fa2_fileid = txdr_unsigned(vap->va_fileid); - txdr_nfsv2time(&vap->va_access_time, &fp->fa2_atime); - txdr_nfsv2time(&vap->va_modify_time, &fp->fa2_mtime); - txdr_nfsv2time(&vap->va_change_time, &fp->fa2_ctime); + /* Otherwise, we base the timeout on how old the file seems. */ + /* Note that if the client and server clocks are way out of sync, */ + /* timeout will probably get clamped to a min or max value */ + microtime(&now); + timeo = (now.tv_sec - (np)->n_vattr.nva_timesec[NFSTIME_MODIFY]) / 10; + if (isdir) { + if (timeo < nmp->nm_acdirmin) + timeo = nmp->nm_acdirmin; + else if (timeo > nmp->nm_acdirmax) + timeo = nmp->nm_acdirmax; + } else { + if (timeo < nmp->nm_acregmin) + timeo = nmp->nm_acregmin; + else if (timeo > nmp->nm_acregmax) + timeo = nmp->nm_acregmax; + } } + + return (timeo); } /* - * Build hash lists of net addresses and hang them off the NFS export. - * Called by nfsrv_export() to set up the lists of export addresses. + * Check the attribute cache time stamp. + * If the cache is valid, copy contents to *nvaper and return 0 + * otherwise return an error. + * Must be called with the node locked. */ -static int -nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) +int +nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int flags) { - struct nfs_export_net_args nxna; - struct nfs_netopt *no; - struct radix_node_head *rnh; - struct radix_node *rn; - struct sockaddr *saddr, *smask; - struct domain *dom; - int i, error; - unsigned int net; - user_addr_t uaddr; - kauth_cred_t cred; - struct ucred temp_cred; + struct nfs_vattr *nvap; + struct timeval nowup; + int32_t timeo; - uaddr = unxa->nxa_nets; - for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) { - error = copyin(uaddr, &nxna, sizeof(nxna)); - if (error) - return (error); + /* Check if the attributes are valid. */ + if (!NATTRVALID(np) || ((flags & NGA_ACL) && !NACLVALID(np))) { + FSDBG(528, np, 0, 0xffffff01, ENOENT); + OSAddAtomic(1, &nfsstats.attrcache_misses); + return (ENOENT); + } - if (nxna.nxna_flags & (NX_MAPROOT|NX_MAPALL)) { - bzero(&temp_cred, sizeof(temp_cred)); - temp_cred.cr_uid = nxna.nxna_cred.cr_uid; - temp_cred.cr_ngroups = nxna.nxna_cred.cr_ngroups; - for (i=0; i < nxna.nxna_cred.cr_ngroups && i < NGROUPS; i++) - temp_cred.cr_groups[i] = nxna.nxna_cred.cr_groups[i]; + /* Verify the cached attributes haven't timed out. */ + timeo = nfs_attrcachetimeout(np); + microuptime(&nowup); + if ((nowup.tv_sec - np->n_attrstamp) >= timeo) { + FSDBG(528, np, 0, 0xffffff02, ENOENT); + OSAddAtomic(1, &nfsstats.attrcache_misses); + return (ENOENT); + } + if ((flags & NGA_ACL) && ((nowup.tv_sec - np->n_aclstamp) >= timeo)) { + FSDBG(528, np, 0, 0xffffff02, ENOENT); + OSAddAtomic(1, &nfsstats.attrcache_misses); + return (ENOENT); + } - cred = kauth_cred_create(&temp_cred); - if (!cred) - return (ENOMEM); + nvap = &np->n_vattr; + FSDBG(528, np, nvap->nva_size, np->n_size, 0xcace); + OSAddAtomic(1, &nfsstats.attrcache_hits); + + if (nvap->nva_type != VREG) { + np->n_size = nvap->nva_size; + } else if (nvap->nva_size != np->n_size) { + FSDBG(528, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4)); + if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) { + /* if we've modified, stick with larger size */ + nvap->nva_size = np->n_size; } else { - cred = NOCRED; + /* + * n_size is protected by the data lock, so we need to + * defer updating it until it's safe. We save the new size + * and set a flag and it'll get updated the next time we get/drop + * the data lock or the next time we do a getattr. + */ + np->n_newsize = nvap->nva_size; + SET(np->n_flag, NUPDATESIZE); } + } - if (nxna.nxna_addr.ss_len == 0) { - /* No address means this is a default/world export */ - if (nx->nx_flags & NX_DEFAULTEXPORT) - return (EEXIST); - nx->nx_flags |= NX_DEFAULTEXPORT; - nx->nx_defopt.nxo_flags = nxna.nxna_flags; - nx->nx_defopt.nxo_cred = cred; - nx->nx_expcnt++; - continue; + bcopy((caddr_t)nvap, (caddr_t)nvaper, sizeof(struct nfs_vattr)); + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) { + nvaper->nva_timesec[NFSTIME_ACCESS] = np->n_atim.tv_sec; + nvaper->nva_timensec[NFSTIME_ACCESS] = np->n_atim.tv_nsec; } - - i = sizeof(struct nfs_netopt); - i += nxna.nxna_addr.ss_len + nxna.nxna_mask.ss_len; - MALLOC(no, struct nfs_netopt *, i, M_NETADDR, M_WAITOK); - if (!no) - return (ENOMEM); - bzero(no, sizeof(struct nfs_netopt)); - no->no_opt.nxo_flags = nxna.nxna_flags; - no->no_opt.nxo_cred = cred; - - saddr = (struct sockaddr *)(no + 1); - bcopy(&nxna.nxna_addr, saddr, nxna.nxna_addr.ss_len); - if (nxna.nxna_mask.ss_len) { - smask = (struct sockaddr *)((caddr_t)saddr + nxna.nxna_addr.ss_len); - bcopy(&nxna.nxna_mask, smask, nxna.nxna_mask.ss_len); + if (np->n_flag & NUPD) { + nvaper->nva_timesec[NFSTIME_MODIFY] = np->n_mtim.tv_sec; + nvaper->nva_timensec[NFSTIME_MODIFY] = np->n_mtim.tv_nsec; + } + } + if (nvap->nva_acl) { + if (flags & NGA_ACL) { + nvaper->nva_acl = kauth_acl_alloc(nvap->nva_acl->acl_entrycount); + if (!nvaper->nva_acl) + return (ENOMEM); + bcopy(nvap->nva_acl, nvaper->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl)); } else { - smask = NULL; + nvaper->nva_acl = NULL; } - i = saddr->sa_family; - if ((rnh = nx->nx_rtable[i]) == 0) { - /* - * Seems silly to initialize every AF when most are not - * used, do so on demand here - */ - for (dom = domains; dom; dom = dom->dom_next) - if (dom->dom_family == i && dom->dom_rtattach) { - dom->dom_rtattach((void **)&nx->nx_rtable[i], - dom->dom_rtoffset); - break; - } - if ((rnh = nx->nx_rtable[i]) == 0) { - if (IS_VALID_CRED(cred)) - kauth_cred_unref(&cred); - _FREE(no, M_NETADDR); - return (ENOBUFS); - } + } + return (0); +} + +/* + * When creating file system objects: + * Don't bother setting UID if it's the same as the credential performing the create. + * Don't bother setting GID if it's the same as the directory or credential. + */ +void +nfs_avoid_needless_id_setting_on_create(nfsnode_t dnp, struct vnode_attr *vap, vfs_context_t ctx) +{ + if (VATTR_IS_ACTIVE(vap, va_uid)) { + if (kauth_cred_getuid(vfs_context_ucred(ctx)) == vap->va_uid) { + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_uuuid); } - rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, no->no_rnodes); - if (rn == 0) { - /* - * One of the reasons that rnh_addaddr may fail is that - * the entry already exists. To check for this case, we - * look up the entry to see if it is there. If so, we - * do not need to make a new entry but do continue. - */ - int matched = 0; - rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); - if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && - (((struct nfs_netopt *)rn)->no_opt.nxo_flags == nxna.nxna_flags)) { - kauth_cred_t cred2 = ((struct nfs_netopt *)rn)->no_opt.nxo_cred; - if (cred && cred2 && (cred->cr_uid == cred2->cr_uid) && - (cred->cr_ngroups == cred2->cr_ngroups)) { - for (i=0; i < cred2->cr_ngroups && i < NGROUPS; i++) - if (cred->cr_groups[i] != cred2->cr_groups[i]) - break; - if (i >= cred2->cr_ngroups || i >= NGROUPS) - matched = 1; - } - } - if (IS_VALID_CRED(cred)) - kauth_cred_unref(&cred); - _FREE(no, M_NETADDR); - if (matched) - continue; - return (EPERM); + } + if (VATTR_IS_ACTIVE(vap, va_gid)) { + if ((vap->va_gid == dnp->n_vattr.nva_gid) || + (kauth_cred_getgid(vfs_context_ucred(ctx)) == vap->va_gid)) { + VATTR_CLEAR_ACTIVE(vap, va_gid); + VATTR_CLEAR_ACTIVE(vap, va_guuid); } - nx->nx_expcnt++; } - - return (0); } /* - * In order to properly track an export's netopt count, we need to pass - * an additional argument to nfsrv_free_netopt() so that it can decrement - * the export's netopt count. + * Convert a universal address string to a sockaddr structure. + * + * Universal addresses can be in the following formats: + * + * d = decimal (IPv4) + * x = hexadecimal (IPv6) + * p = port (decimal) + * + * d.d.d.d + * d.d.d.d.p.p + * x:x:x:x:x:x:x:x + * x:x:x:x:x:x:x:x.p.p + * x:x:x:x:x:x:d.d.d.d + * x:x:x:x:x:x:d.d.d.d.p.p + * + * IPv6 strings can also have a series of zeroes elided + * IPv6 strings can also have a %scope suffix at the end (after any port) + * + * rules & exceptions: + * - value before : is hex + * - value before . is dec + * - once . hit, all values are dec + * - hex+port case means value before first dot is actually hex + * - . is always preceded by digits except if last hex was double-colon + * + * scan, converting #s to bytes + * first time a . is encountered, scan the rest to count them. + * 2 dots = just port + * 3 dots = just IPv4 no port + * 5 dots = IPv4 and port */ -struct nfsrv_free_netopt_arg { - uint32_t *cnt; - struct radix_node_head *rnh; -}; -static int -nfsrv_free_netopt(struct radix_node *rn, void *w) +#define IS_DIGIT(C) \ + (((C) >= '0') && ((C) <= '9')) + +#define IS_XDIGIT(C) \ + (IS_DIGIT(C) || \ + (((C) >= 'A') && ((C) <= 'F')) || \ + (((C) >= 'a') && ((C) <= 'f'))) + +int +nfs_uaddr2sockaddr(const char *uaddr, struct sockaddr *addr) { - struct nfsrv_free_netopt_arg *fna = (struct nfsrv_free_netopt_arg *)w; - struct radix_node_head *rnh = fna->rnh; - uint32_t *cnt = fna->cnt; - struct nfs_netopt *nno = (struct nfs_netopt *)rn; + const char *p, *pd; /* pointers to current character in scan */ + const char *pnum; /* pointer to current number to decode */ + const char *pscope; /* pointer to IPv6 scope ID */ + uint8_t a[18]; /* octet array to store address bytes */ + int i; /* index of next octet to decode */ + int dci; /* index of octet to insert double-colon zeroes */ + int dcount, xdcount; /* count of digits in current number */ + int needmore; /* set when we know we need more input (e.g. after colon, period) */ + int dots; /* # of dots */ + int hex; /* contains hex values */ + unsigned long val; /* decoded value */ + int s; /* index used for sliding array to insert elided zeroes */ + +#define HEXVALUE 0 +#define DECIMALVALUE 1 +#define GET(TYPE) \ + do { \ + if ((dcount <= 0) || (dcount > (((TYPE) == DECIMALVALUE) ? 3 : 4))) \ + return (0); \ + if (((TYPE) == DECIMALVALUE) && xdcount) \ + return (0); \ + val = strtoul(pnum, NULL, ((TYPE) == DECIMALVALUE) ? 10 : 16); \ + if (((TYPE) == DECIMALVALUE) && (val >= 256)) \ + return (0); \ + /* check if there is room left in the array */ \ + if (i > (int)(sizeof(a) - (((TYPE) == HEXVALUE) ? 2 : 1) - ((dci != -1) ? 2 : 0))) \ + return (0); \ + if ((TYPE) == HEXVALUE) \ + a[i++] = ((val >> 8) & 0xff); \ + a[i++] = (val & 0xff); \ + } while (0) + + hex = 0; + dots = 0; + dci = -1; + i = dcount = xdcount = 0; + pnum = p = uaddr; + pscope = NULL; + needmore = 1; + if ((*p == ':') && (*++p != ':')) /* if it starts with colon, gotta be a double */ + return (0); - (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); - if (IS_VALID_CRED(nno->no_opt.nxo_cred)) - kauth_cred_unref(&nno->no_opt.nxo_cred); - _FREE((caddr_t)rn, M_NETADDR); - *cnt -= 1; - return (0); + while (*p) { + if (IS_XDIGIT(*p)) { + dcount++; + if (!IS_DIGIT(*p)) + xdcount++; + needmore = 0; + p++; + } else if (*p == '.') { + /* rest is decimal IPv4 dotted quad and/or port */ + if (!dots) { + /* this is the first, so count them */ + for (pd = p; *pd; pd++) { + if (*pd == '.') { + if (++dots > 5) + return (0); + } else if (hex && (*pd == '%')) { + break; + } else if ((*pd < '0') || (*pd > '9')) { + return (0); + } + } + if ((dots != 2) && (dots != 3) && (dots != 5)) + return (0); + if (hex && (dots == 2)) { /* hex+port */ + if (!dcount && needmore) + return (0); + if (dcount) /* last hex may be elided zero */ + GET(HEXVALUE); + } else { + GET(DECIMALVALUE); + } + } else { + GET(DECIMALVALUE); + } + dcount = xdcount = 0; + needmore = 1; + pnum = ++p; + } else if (*p == ':') { + hex = 1; + if (dots) + return (0); + if (!dcount) { /* missing number, probably double colon */ + if (dci >= 0) /* can only have one double colon */ + return (0); + dci = i; + needmore = 0; + } else { + GET(HEXVALUE); + dcount = xdcount = 0; + needmore = 1; + } + pnum = ++p; + } else if (*p == '%') { /* scope ID delimiter */ + if (!hex) + return (0); + p++; + pscope = p; + break; + } else { /* unexpected character */ + return (0); + } + } + if (needmore && !dcount) + return (0); + if (dcount) /* decode trailing number */ + GET(dots ? DECIMALVALUE : HEXVALUE); + if (dci >= 0) { /* got a double-colon at i, need to insert a range of zeroes */ + /* if we got a port, slide to end of array */ + /* otherwise, slide to end of address (non-port) values */ + int end = ((dots == 2) || (dots == 5)) ? sizeof(a) : (sizeof(a) - 2); + if (i % 2) /* length of zero range must be multiple of 2 */ + return (0); + if (i >= end) /* no room? */ + return (0); + /* slide (i-dci) numbers up from index dci */ + for (s=0; s < (i - dci); s++) + a[end-1-s] = a[i-1-s]; + /* zero (end-i) numbers at index dci */ + for (s=0; s < (end - i); s++) + a[dci+s] = 0; + i = end; + } + + /* copy out resulting socket address */ + if (hex) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6*)addr; + if ((((dots == 0) || (dots == 3)) && (i != (sizeof(a)-2)))) + return (0); + if ((((dots == 2) || (dots == 5)) && (i != sizeof(a)))) + return (0); + bzero(sin6, sizeof(struct sockaddr_in6)); + sin6->sin6_len = sizeof(struct sockaddr_in6); + sin6->sin6_family = AF_INET6; + bcopy(a, &sin6->sin6_addr.s6_addr, sizeof(struct in6_addr)); + if ((dots == 5) || (dots == 2)) + sin6->sin6_port = htons((a[16] << 8) | a[17]); + if (pscope) { + for (p=pscope; IS_DIGIT(*p); p++) + ; + if (*p && !IS_DIGIT(*p)) { /* name */ + ifnet_t interface = NULL; + if (ifnet_find_by_name(pscope, &interface) == 0) + sin6->sin6_scope_id = ifnet_index(interface); + if (interface) + ifnet_release(interface); + } else { /* decimal number */ + sin6->sin6_scope_id = strtoul(pscope, NULL, 10); + } + /* XXX should we also embed scope id for linklocal? */ + } + } else { + struct sockaddr_in *sin = (struct sockaddr_in*)addr; + if ((dots != 3) && (dots != 5)) + return (0); + if ((dots == 3) && (i != 4)) + return (0); + if ((dots == 5) && (i != 6)) + return (0); + bzero(sin, sizeof(struct sockaddr_in)); + sin->sin_len = sizeof(struct sockaddr_in); + sin->sin_family = AF_INET; + bcopy(a, &sin->sin_addr.s_addr, sizeof(struct in_addr)); + if (dots == 5) + sin->sin_port = htons((a[4] << 8) | a[5]); + } + return (1); } + +#endif /* NFSCLIENT */ + /* - * Free the net address hash lists that are hanging off the mount points. + * Schedule a callout thread to run an NFS timer function + * interval milliseconds in the future. */ -static void -nfsrv_free_addrlist(struct nfs_export *nx) +void +nfs_interval_timer_start(thread_call_t call, int interval) { - int i; - struct radix_node_head *rnh; - struct nfsrv_free_netopt_arg fna; + uint64_t deadline; - for (i = 0; i <= AF_MAX; i++) - if ( (rnh = nx->nx_rtable[i]) ) { - fna.rnh = rnh; - fna.cnt = &nx->nx_expcnt; - (*rnh->rnh_walktree)(rnh, nfsrv_free_netopt, (caddr_t)&fna); - _FREE((caddr_t)rnh, M_RTABLE); - nx->nx_rtable[i] = 0; - } + clock_interval_to_deadline(interval, 1000 * 1000, &deadline); + thread_call_enter_delayed(call, deadline); } -void enablequotas(struct mount *mp, vfs_context_t ctx); // XXX +#if NFSSERVER + +int nfsrv_cmp_secflavs(struct nfs_sec *, struct nfs_sec *); +int nfsrv_hang_addrlist(struct nfs_export *, struct user_nfs_export_args *); +int nfsrv_free_netopt(struct radix_node *, void *); +int nfsrv_free_addrlist(struct nfs_export *, struct user_nfs_export_args *); +struct nfs_export_options *nfsrv_export_lookup(struct nfs_export *, mbuf_t); +struct nfs_export *nfsrv_fhtoexport(struct nfs_filehandle *); +struct nfs_user_stat_node *nfsrv_get_user_stat_node(struct nfs_active_user_list *, struct sockaddr *, uid_t); +void nfsrv_init_user_list(struct nfs_active_user_list *); +void nfsrv_free_user_list(struct nfs_active_user_list *); + +/* + * add NFSv3 WCC data to an mbuf chain + */ int -nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) +nfsm_chain_add_wcc_data_f( + struct nfsrv_descript *nd, + struct nfsm_chain *nmc, + int preattrerr, + struct vnode_attr *prevap, + int postattrerr, + struct vnode_attr *postvap) { - int error = 0, pathlen; - struct nfs_exportfs *nxfs, *nxfs2, *nxfs3; - struct nfs_export *nx, *nx2, *nx3; - struct nfs_filehandle nfh; - struct nameidata mnd, xnd; - vnode_t mvp = NULL, xvp = NULL; - mount_t mp; - char path[MAXPATHLEN]; - int expisroot; + int error = 0; - if (unxa->nxa_flags & NXA_DELETE_ALL) { - /* delete all exports on all file systems */ - lck_rw_lock_exclusive(&nfs_export_rwlock); - while ((nxfs = LIST_FIRST(&nfs_exports))) { - mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path); - if (mp) - mp->mnt_flag &= ~MNT_EXPORTED; - /* delete all exports on this file system */ - while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) { - LIST_REMOVE(nx, nx_next); - LIST_REMOVE(nx, nx_hash); - /* delete all netopts for this export */ - nfsrv_free_addrlist(nx); - nx->nx_flags &= ~NX_DEFAULTEXPORT; - if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) { - kauth_cred_unref(&nx->nx_defopt.nxo_cred); - } - FREE(nx->nx_path, M_TEMP); - FREE(nx, M_TEMP); - } - LIST_REMOVE(nxfs, nxfs_next); - FREE(nxfs->nxfs_path, M_TEMP); - FREE(nxfs, M_TEMP); - } - lck_rw_done(&nfs_export_rwlock); - return (0); + if (preattrerr) { + nfsm_chain_add_32(error, nmc, FALSE); + } else { + nfsm_chain_add_32(error, nmc, TRUE); + nfsm_chain_add_64(error, nmc, prevap->va_data_size); + nfsm_chain_add_time(error, nmc, NFS_VER3, &prevap->va_modify_time); + nfsm_chain_add_time(error, nmc, NFS_VER3, &prevap->va_change_time); } + nfsm_chain_add_postop_attr(error, nd, nmc, postattrerr, postvap); - error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, (size_t *)&pathlen); + return (error); +} + +/* + * Extract a lookup path from the given mbufs and store it in + * a newly allocated buffer saved in the given nameidata structure. + */ +int +nfsm_chain_get_path_namei( + struct nfsm_chain *nmc, + uint32_t len, + struct nameidata *nip) +{ + struct componentname *cnp = &nip->ni_cnd; + int error = 0; + char *cp; + + if (len > (MAXPATHLEN - 1)) + return (ENAMETOOLONG); + + /* + * Get a buffer for the name to be translated, and copy the + * name into the buffer. + */ + MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (!cnp->cn_pnbuf) + return (ENOMEM); + cnp->cn_pnlen = MAXPATHLEN; + cnp->cn_flags |= HASBUF; + + /* Copy the name from the mbuf list to the string */ + cp = cnp->cn_pnbuf; + nfsm_chain_get_opaque(error, nmc, len, cp); if (error) - return (error); + goto out; + cnp->cn_pnbuf[len] = '\0'; + + /* sanity check the string */ + if ((strlen(cp) != len) || strchr(cp, '/')) + error = EACCES; +out: + if (error) { + if (cnp->cn_pnbuf) + FREE_ZONE(cnp->cn_pnbuf, MAXPATHLEN, M_NAMEI); + cnp->cn_flags &= ~HASBUF; + } else { + nip->ni_pathlen = len; + } + return (error); +} + +/* + * Set up nameidata for a lookup() call and do it. + */ +int +nfsrv_namei( + struct nfsrv_descript *nd, + vfs_context_t ctx, + struct nameidata *nip, + struct nfs_filehandle *nfhp, + vnode_t *retdirp, + struct nfs_export **nxp, + struct nfs_export_options **nxop) +{ + vnode_t dp; + int error; + struct componentname *cnp = &nip->ni_cnd; + uint32_t cnflags; + char *tmppn; - lck_rw_lock_exclusive(&nfs_export_rwlock); + *retdirp = NULL; - // first check if we've already got an exportfs with the given ID - LIST_FOREACH(nxfs, &nfs_exports, nxfs_next) { - if (nxfs->nxfs_id == unxa->nxa_fsid) - break; + /* + * Extract and set starting directory. + */ + error = nfsrv_fhtovp(nfhp, nd, &dp, nxp, nxop); + if (error) + goto out; + error = nfsrv_credcheck(nd, ctx, *nxp, *nxop); + if (error || (vnode_vtype(dp) != VDIR)) { + vnode_put(dp); + error = ENOTDIR; + goto out; } - if (nxfs) { - /* verify exported FS path matches given path */ - if (strcmp(path, nxfs->nxfs_path)) { - error = EEXIST; - goto unlock_out; - } - mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path); - /* find exported FS root vnode */ - NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, - UIO_SYSSPACE, nxfs->nxfs_path, ctx); - error = namei(&mnd); - if (error) - goto unlock_out; - mvp = mnd.ni_vp; - /* make sure it's (still) the root of a file system */ - if ((mvp->v_flag & VROOT) == 0) { - error = EINVAL; - goto out; - } - /* sanity check: this should be same mount */ - if (mp != vnode_mount(mvp)) { - error = EINVAL; - goto out; - } - } else { - /* no current exported file system with that ID */ - if (!(unxa->nxa_flags & NXA_ADD)) { - error = ENOENT; - goto unlock_out; - } + *retdirp = dp; - /* find exported FS root vnode */ - NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, - UIO_SYSSPACE, path, ctx); - error = namei(&mnd); - if (error) - goto unlock_out; - mvp = mnd.ni_vp; - /* make sure it's the root of a file system */ - if ((mvp->v_flag & VROOT) == 0) { - error = EINVAL; - goto out; - } - mp = vnode_mount(mvp); + nip->ni_cnd.cn_context = ctx; - /* make sure the file system is NFS-exportable */ - nfh.nfh_len = NFS_MAX_FID_SIZE; - error = VFS_VPTOFH(mvp, &nfh.nfh_len, &nfh.nfh_fid[0], NULL); - if (!error && (nfh.nfh_len > (int)NFS_MAX_FID_SIZE)) - error = EIO; - if (error) - goto out; + if (*nxop && ((*nxop)->nxo_flags & NX_READONLY)) + cnp->cn_flags |= RDONLY; - /* add an exportfs for it */ - MALLOC(nxfs, struct nfs_exportfs *, sizeof(struct nfs_exportfs), M_TEMP, M_WAITOK); - if (!nxfs) { - error = ENOMEM; - goto out; + cnp->cn_flags |= NOCROSSMOUNT; + cnp->cn_nameptr = cnp->cn_pnbuf; + nip->ni_usedvp = nip->ni_startdir = dp; + + /* + * And call lookup() to do the real work + */ + cnflags = nip->ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(nip)) == ERECYCLE) { + nip->ni_cnd.cn_flags = cnflags; + cnp->cn_nameptr = cnp->cn_pnbuf; + nip->ni_usedvp = nip->ni_dvp = nip->ni_startdir = dp; + } + if (error) + goto out; + + /* Check for encountering a symbolic link */ + if (cnp->cn_flags & ISSYMLINK) { +#ifndef __LP64__ + if ((cnp->cn_flags & FSNODELOCKHELD)) { + cnp->cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(nip->ni_dvp, NULL); } - bzero(nxfs, sizeof(struct nfs_exportfs)); - nxfs->nxfs_id = unxa->nxa_fsid; - MALLOC(nxfs->nxfs_path, char*, pathlen, M_TEMP, M_WAITOK); - if (!nxfs->nxfs_path) { - FREE(nxfs, M_TEMP); - error = ENOMEM; - goto out; +#endif /* __LP64__ */ + if (cnp->cn_flags & (LOCKPARENT | WANTPARENT)) + vnode_put(nip->ni_dvp); + if (nip->ni_vp) { + vnode_put(nip->ni_vp); + nip->ni_vp = NULL; } - bcopy(path, nxfs->nxfs_path, pathlen); - /* insert into list in reverse-sorted order */ + error = EINVAL; + } +out: + if (error) { + tmppn = cnp->cn_pnbuf; + cnp->cn_pnbuf = NULL; + cnp->cn_flags &= ~HASBUF; + FREE_ZONE(tmppn, cnp->cn_pnlen, M_NAMEI); + } + return (error); +} + +/* + * A fiddled version of m_adj() that ensures null fill to a 4-byte + * boundary and only trims off the back end + */ +void +nfsm_adj(mbuf_t mp, int len, int nul) +{ + mbuf_t m, mnext; + int count, i, mlen; + char *cp; + + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + count = 0; + m = mp; + for (;;) { + mlen = mbuf_len(m); + count += mlen; + mnext = mbuf_next(m); + if (mnext == NULL) + break; + m = mnext; + } + if (mlen > len) { + mlen -= len; + mbuf_setlen(m, mlen); + if (nul > 0) { + cp = (caddr_t)mbuf_data(m) + mlen - nul; + for (i = 0; i < nul; i++) + *cp++ = '\0'; + } + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + for (m = mp; m; m = mbuf_next(m)) { + mlen = mbuf_len(m); + if (mlen >= count) { + mlen = count; + mbuf_setlen(m, count); + if (nul > 0) { + cp = (caddr_t)mbuf_data(m) + mlen - nul; + for (i = 0; i < nul; i++) + *cp++ = '\0'; + } + break; + } + count -= mlen; + } + for (m = mbuf_next(m); m; m = mbuf_next(m)) + mbuf_setlen(m, 0); +} + +/* + * Trim the header out of the mbuf list and trim off any trailing + * junk so that the mbuf list has only the write data. + */ +int +nfsm_chain_trim_data(struct nfsm_chain *nmc, int len, int *mlen) +{ + int cnt = 0, dlen, adjust; + caddr_t data; + mbuf_t m; + + if (mlen) + *mlen = 0; + + /* trim header */ + for (m = nmc->nmc_mhead; m && (m != nmc->nmc_mcur); m = mbuf_next(m)) + mbuf_setlen(m, 0); + if (!m) + return (EIO); + + /* trim current mbuf */ + data = mbuf_data(m); + dlen = mbuf_len(m); + adjust = nmc->nmc_ptr - data; + dlen -= adjust; + if ((dlen > 0) && (adjust > 0)) { + if (mbuf_setdata(m, nmc->nmc_ptr, dlen)) + return(EIO); + } else + mbuf_setlen(m, dlen); + + /* skip next len bytes */ + for (; m && (cnt < len); m = mbuf_next(m)) { + dlen = mbuf_len(m); + cnt += dlen; + if (cnt > len) { + /* truncate to end of data */ + mbuf_setlen(m, dlen - (cnt - len)); + if (m == nmc->nmc_mcur) + nmc->nmc_left -= (cnt - len); + cnt = len; + } + } + if (mlen) + *mlen = cnt; + + /* trim any trailing data */ + if (m == nmc->nmc_mcur) + nmc->nmc_left = 0; + for (; m; m = mbuf_next(m)) + mbuf_setlen(m, 0); + + return (0); +} + +int +nfsm_chain_add_fattr( + struct nfsrv_descript *nd, + struct nfsm_chain *nmc, + struct vnode_attr *vap) +{ + int error = 0; + + // XXX Should we assert here that all fields are supported? + + nfsm_chain_add_32(error, nmc, vtonfs_type(vap->va_type, nd->nd_vers)); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_32(error, nmc, vap->va_mode & 07777); + } else { + nfsm_chain_add_32(error, nmc, vtonfsv2_mode(vap->va_type, vap->va_mode)); + } + nfsm_chain_add_32(error, nmc, vap->va_nlink); + nfsm_chain_add_32(error, nmc, vap->va_uid); + nfsm_chain_add_32(error, nmc, vap->va_gid); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_64(error, nmc, vap->va_data_size); + nfsm_chain_add_64(error, nmc, vap->va_data_alloc); + nfsm_chain_add_32(error, nmc, major(vap->va_rdev)); + nfsm_chain_add_32(error, nmc, minor(vap->va_rdev)); + nfsm_chain_add_64(error, nmc, vap->va_fsid); + nfsm_chain_add_64(error, nmc, vap->va_fileid); + } else { + nfsm_chain_add_32(error, nmc, vap->va_data_size); + nfsm_chain_add_32(error, nmc, NFS_FABLKSIZE); + if (vap->va_type == VFIFO) + nfsm_chain_add_32(error, nmc, 0xffffffff); + else + nfsm_chain_add_32(error, nmc, vap->va_rdev); + nfsm_chain_add_32(error, nmc, vap->va_data_alloc / NFS_FABLKSIZE); + nfsm_chain_add_32(error, nmc, vap->va_fsid); + nfsm_chain_add_32(error, nmc, vap->va_fileid); + } + nfsm_chain_add_time(error, nmc, nd->nd_vers, &vap->va_access_time); + nfsm_chain_add_time(error, nmc, nd->nd_vers, &vap->va_modify_time); + nfsm_chain_add_time(error, nmc, nd->nd_vers, &vap->va_change_time); + + return (error); +} + +int +nfsm_chain_get_sattr( + struct nfsrv_descript *nd, + struct nfsm_chain *nmc, + struct vnode_attr *vap) +{ + int error = 0; + uint32_t val = 0; + uint64_t val64; + struct timespec now; + + if (nd->nd_vers == NFS_VER2) { + /* + * There is/was a bug in the Sun client that puts 0xffff in the mode + * field of sattr when it should put in 0xffffffff. The u_short + * doesn't sign extend. So check the low order 2 bytes for 0xffff. + */ + nfsm_chain_get_32(error, nmc, val); + if ((val & 0xffff) != 0xffff) { + VATTR_SET(vap, va_mode, val & 07777); + /* save the "type" bits for NFSv2 create */ + VATTR_SET(vap, va_type, IFTOVT(val)); + VATTR_CLEAR_ACTIVE(vap, va_type); + } + nfsm_chain_get_32(error, nmc, val); + if (val != (uint32_t)-1) + VATTR_SET(vap, va_uid, val); + nfsm_chain_get_32(error, nmc, val); + if (val != (uint32_t)-1) + VATTR_SET(vap, va_gid, val); + /* save the "size" bits for NFSv2 create (even if they appear unset) */ + nfsm_chain_get_32(error, nmc, val); + VATTR_SET(vap, va_data_size, val); + if (val == (uint32_t)-1) + VATTR_CLEAR_ACTIVE(vap, va_data_size); + nfsm_chain_get_time(error, nmc, NFS_VER2, + vap->va_access_time.tv_sec, + vap->va_access_time.tv_nsec); + if (vap->va_access_time.tv_sec != -1) + VATTR_SET_ACTIVE(vap, va_access_time); + nfsm_chain_get_time(error, nmc, NFS_VER2, + vap->va_modify_time.tv_sec, + vap->va_modify_time.tv_nsec); + if (vap->va_modify_time.tv_sec != -1) + VATTR_SET_ACTIVE(vap, va_modify_time); + return (error); + } + + /* NFSv3 */ + nfsm_chain_get_32(error, nmc, val); + if (val) { + nfsm_chain_get_32(error, nmc, val); + VATTR_SET(vap, va_mode, val & 07777); + } + nfsm_chain_get_32(error, nmc, val); + if (val) { + nfsm_chain_get_32(error, nmc, val); + VATTR_SET(vap, va_uid, val); + } + nfsm_chain_get_32(error, nmc, val); + if (val) { + nfsm_chain_get_32(error, nmc, val); + VATTR_SET(vap, va_gid, val); + } + nfsm_chain_get_32(error, nmc, val); + if (val) { + nfsm_chain_get_64(error, nmc, val64); + VATTR_SET(vap, va_data_size, val64); + } + nanotime(&now); + nfsm_chain_get_32(error, nmc, val); + switch (val) { + case NFS_TIME_SET_TO_CLIENT: + nfsm_chain_get_time(error, nmc, nd->nd_vers, + vap->va_access_time.tv_sec, + vap->va_access_time.tv_nsec); + VATTR_SET_ACTIVE(vap, va_access_time); + vap->va_vaflags &= ~VA_UTIMES_NULL; + break; + case NFS_TIME_SET_TO_SERVER: + VATTR_SET(vap, va_access_time, now); + vap->va_vaflags |= VA_UTIMES_NULL; + break; + } + nfsm_chain_get_32(error, nmc, val); + switch (val) { + case NFS_TIME_SET_TO_CLIENT: + nfsm_chain_get_time(error, nmc, nd->nd_vers, + vap->va_modify_time.tv_sec, + vap->va_modify_time.tv_nsec); + VATTR_SET_ACTIVE(vap, va_modify_time); + vap->va_vaflags &= ~VA_UTIMES_NULL; + break; + case NFS_TIME_SET_TO_SERVER: + VATTR_SET(vap, va_modify_time, now); + if (!VATTR_IS_ACTIVE(vap, va_access_time)) + vap->va_vaflags |= VA_UTIMES_NULL; + break; + } + + return (error); +} + +/* + * Compare two security flavor structs + */ +int +nfsrv_cmp_secflavs(struct nfs_sec *sf1, struct nfs_sec *sf2) +{ + int i; + + if (sf1->count != sf2->count) + return 1; + for (i = 0; i < sf1->count; i++) + if (sf1->flavors[i] != sf2->flavors[i]) + return 1; + return 0; +} + +/* + * Build hash lists of net addresses and hang them off the NFS export. + * Called by nfsrv_export() to set up the lists of export addresses. + */ +int +nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) +{ + struct nfs_export_net_args nxna; + struct nfs_netopt *no, *rn_no; + struct radix_node_head *rnh; + struct radix_node *rn; + struct sockaddr *saddr, *smask; + struct domain *dom; + int i, error; + unsigned int net; + user_addr_t uaddr; + kauth_cred_t cred; + + uaddr = unxa->nxa_nets; + for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) { + error = copyin(uaddr, &nxna, sizeof(nxna)); + if (error) + return (error); + + if (nxna.nxna_flags & (NX_MAPROOT|NX_MAPALL)) { + struct posix_cred temp_pcred; + bzero(&temp_pcred, sizeof(temp_pcred)); + temp_pcred.cr_uid = nxna.nxna_cred.cr_uid; + temp_pcred.cr_ngroups = nxna.nxna_cred.cr_ngroups; + for (i=0; i < nxna.nxna_cred.cr_ngroups && i < NGROUPS; i++) + temp_pcred.cr_groups[i] = nxna.nxna_cred.cr_groups[i]; + cred = posix_cred_create(&temp_pcred); + if (!IS_VALID_CRED(cred)) + return (ENOMEM); + } else { + cred = NOCRED; + } + + if (nxna.nxna_addr.ss_len == 0) { + /* No address means this is a default/world export */ + if (nx->nx_flags & NX_DEFAULTEXPORT) { + if (IS_VALID_CRED(cred)) + kauth_cred_unref(&cred); + return (EEXIST); + } + nx->nx_flags |= NX_DEFAULTEXPORT; + nx->nx_defopt.nxo_flags = nxna.nxna_flags; + nx->nx_defopt.nxo_cred = cred; + bcopy(&nxna.nxna_sec, &nx->nx_defopt.nxo_sec, sizeof(struct nfs_sec)); + nx->nx_expcnt++; + continue; + } + + i = sizeof(struct nfs_netopt); + i += nxna.nxna_addr.ss_len + nxna.nxna_mask.ss_len; + MALLOC(no, struct nfs_netopt *, i, M_NETADDR, M_WAITOK); + if (!no) { + if (IS_VALID_CRED(cred)) + kauth_cred_unref(&cred); + return (ENOMEM); + } + bzero(no, sizeof(struct nfs_netopt)); + no->no_opt.nxo_flags = nxna.nxna_flags; + no->no_opt.nxo_cred = cred; + bcopy(&nxna.nxna_sec, &no->no_opt.nxo_sec, sizeof(struct nfs_sec)); + + saddr = (struct sockaddr *)(no + 1); + bcopy(&nxna.nxna_addr, saddr, nxna.nxna_addr.ss_len); + if (nxna.nxna_mask.ss_len) { + smask = (struct sockaddr *)((caddr_t)saddr + nxna.nxna_addr.ss_len); + bcopy(&nxna.nxna_mask, smask, nxna.nxna_mask.ss_len); + } else { + smask = NULL; + } + i = saddr->sa_family; + if ((rnh = nx->nx_rtable[i]) == 0) { + /* + * Seems silly to initialize every AF when most are not + * used, do so on demand here + */ + for (dom = domains; dom; dom = dom->dom_next) + if (dom->dom_family == i && dom->dom_rtattach) { + dom->dom_rtattach((void **)&nx->nx_rtable[i], + dom->dom_rtoffset); + break; + } + if ((rnh = nx->nx_rtable[i]) == 0) { + if (IS_VALID_CRED(cred)) + kauth_cred_unref(&cred); + _FREE(no, M_NETADDR); + return (ENOBUFS); + } + } + rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, no->no_rnodes); + if (rn == 0) { + /* + * One of the reasons that rnh_addaddr may fail is that + * the entry already exists. To check for this case, we + * look up the entry to see if it is there. If so, we + * do not need to make a new entry but do continue. + * + * XXX should this be rnh_lookup() instead? + */ + int matched = 0; + rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); + rn_no = (struct nfs_netopt *)rn; + if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && + (rn_no->no_opt.nxo_flags == nxna.nxna_flags) && + (!nfsrv_cmp_secflavs(&rn_no->no_opt.nxo_sec, &nxna.nxna_sec))) { + kauth_cred_t cred2 = rn_no->no_opt.nxo_cred; + if (cred == cred2) { + /* creds are same (or both NULL) */ + matched = 1; + } else if (cred && cred2 && (kauth_cred_getuid(cred) == kauth_cred_getuid(cred2))) { + /* + * Now compare the effective and + * supplementary groups... + * + * Note: This comparison, as written, + * does not correctly indicate that + * the groups are equivalent, since + * other than the first supplementary + * group, which is also the effective + * group, order on the remaining groups + * doesn't matter, and this is an + * ordered compare. + */ + gid_t groups[NGROUPS]; + gid_t groups2[NGROUPS]; + int groupcount = NGROUPS; + int group2count = NGROUPS; + + if (!kauth_cred_getgroups(cred, groups, &groupcount) && + !kauth_cred_getgroups(cred2, groups2, &group2count) && + groupcount == group2count) { + for (i=0; i < group2count; i++) + if (groups[i] != groups2[i]) + break; + if (i >= group2count || i >= NGROUPS) + matched = 1; + } + } + } + if (IS_VALID_CRED(cred)) + kauth_cred_unref(&cred); + _FREE(no, M_NETADDR); + if (matched) + continue; + return (EPERM); + } + nx->nx_expcnt++; + } + + return (0); +} + +/* + * In order to properly track an export's netopt count, we need to pass + * an additional argument to nfsrv_free_netopt() so that it can decrement + * the export's netopt count. + */ +struct nfsrv_free_netopt_arg { + uint32_t *cnt; + struct radix_node_head *rnh; +}; + +int +nfsrv_free_netopt(struct radix_node *rn, void *w) +{ + struct nfsrv_free_netopt_arg *fna = (struct nfsrv_free_netopt_arg *)w; + struct radix_node_head *rnh = fna->rnh; + uint32_t *cnt = fna->cnt; + struct nfs_netopt *nno = (struct nfs_netopt *)rn; + + (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); + if (IS_VALID_CRED(nno->no_opt.nxo_cred)) + kauth_cred_unref(&nno->no_opt.nxo_cred); + _FREE((caddr_t)rn, M_NETADDR); + *cnt -= 1; + return (0); +} + +/* + * Free the net address hash lists that are hanging off the mount points. + */ +int +nfsrv_free_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) +{ + struct nfs_export_net_args nxna; + struct radix_node_head *rnh; + struct radix_node *rn; + struct nfsrv_free_netopt_arg fna; + struct nfs_netopt *nno; + user_addr_t uaddr; + unsigned int net; + int i, error; + + if (!unxa || !unxa->nxa_netcount) { + /* delete everything */ + for (i = 0; i <= AF_MAX; i++) + if ( (rnh = nx->nx_rtable[i]) ) { + fna.rnh = rnh; + fna.cnt = &nx->nx_expcnt; + (*rnh->rnh_walktree)(rnh, nfsrv_free_netopt, (caddr_t)&fna); + _FREE((caddr_t)rnh, M_RTABLE); + nx->nx_rtable[i] = 0; + } + return (0); + } + + /* delete only the exports specified */ + uaddr = unxa->nxa_nets; + for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) { + error = copyin(uaddr, &nxna, sizeof(nxna)); + if (error) + return (error); + + if (nxna.nxna_addr.ss_len == 0) { + /* No address means this is a default/world export */ + if (nx->nx_flags & NX_DEFAULTEXPORT) { + nx->nx_flags &= ~NX_DEFAULTEXPORT; + if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) { + kauth_cred_unref(&nx->nx_defopt.nxo_cred); + } + nx->nx_expcnt--; + } + continue; + } + + if ((rnh = nx->nx_rtable[nxna.nxna_addr.ss_family]) == 0) { + /* AF not initialized? */ + if (!(unxa->nxa_flags & NXA_ADD)) + printf("nfsrv_free_addrlist: address not found (0)\n"); + continue; + } + + rn = (*rnh->rnh_lookup)(&nxna.nxna_addr, + nxna.nxna_mask.ss_len ? &nxna.nxna_mask : NULL, rnh); + if (!rn || (rn->rn_flags & RNF_ROOT)) { + if (!(unxa->nxa_flags & NXA_ADD)) + printf("nfsrv_free_addrlist: address not found (1)\n"); + continue; + } + + (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); + nno = (struct nfs_netopt *)rn; + if (IS_VALID_CRED(nno->no_opt.nxo_cred)) + kauth_cred_unref(&nno->no_opt.nxo_cred); + _FREE((caddr_t)rn, M_NETADDR); + + nx->nx_expcnt--; + if (nx->nx_expcnt == ((nx->nx_flags & NX_DEFAULTEXPORT) ? 1 : 0)) { + /* no more entries in rnh, so free it up */ + _FREE((caddr_t)rnh, M_RTABLE); + nx->nx_rtable[nxna.nxna_addr.ss_family] = 0; + } + } + + return (0); +} + +void enablequotas(struct mount *mp, vfs_context_t ctx); // XXX + +int +nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) +{ + int error = 0; + size_t pathlen; + struct nfs_exportfs *nxfs, *nxfs2, *nxfs3; + struct nfs_export *nx, *nx2, *nx3; + struct nfs_filehandle nfh; + struct nameidata mnd, xnd; + vnode_t mvp = NULL, xvp = NULL; + mount_t mp = NULL; + char path[MAXPATHLEN]; + int expisroot; + + if (unxa->nxa_flags == NXA_CHECK) { + /* just check if the path is an NFS-exportable file system */ + error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, &pathlen); + if (error) + return (error); + NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); + error = namei(&mnd); + if (error) + return (error); + mvp = mnd.ni_vp; + mp = vnode_mount(mvp); + /* make sure it's the root of a file system */ + if (!vnode_isvroot(mvp)) + error = EINVAL; + /* make sure the file system is NFS-exportable */ + if (!error) { + nfh.nfh_len = NFSV3_MAX_FID_SIZE; + error = VFS_VPTOFH(mvp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); + } + if (!error && (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)) + error = EIO; + if (!error && !(mp->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED)) + error = EISDIR; + vnode_put(mvp); + nameidone(&mnd); + return (error); + } + + /* all other operations: must be super user */ + if ((error = vfs_context_suser(ctx))) + return (error); + + if (unxa->nxa_flags & NXA_DELETE_ALL) { + /* delete all exports on all file systems */ + lck_rw_lock_exclusive(&nfsrv_export_rwlock); + while ((nxfs = LIST_FIRST(&nfsrv_exports))) { + mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path); + if (mp) { + vfs_clearflags(mp, MNT_EXPORTED); + mount_iterdrop(mp); + mp = NULL; + } + /* delete all exports on this file system */ + while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) { + LIST_REMOVE(nx, nx_next); + LIST_REMOVE(nx, nx_hash); + /* delete all netopts for this export */ + nfsrv_free_addrlist(nx, NULL); + nx->nx_flags &= ~NX_DEFAULTEXPORT; + if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) { + kauth_cred_unref(&nx->nx_defopt.nxo_cred); + } + /* free active user list for this export */ + nfsrv_free_user_list(&nx->nx_user_list); + FREE(nx->nx_path, M_TEMP); + FREE(nx, M_TEMP); + } + LIST_REMOVE(nxfs, nxfs_next); + FREE(nxfs->nxfs_path, M_TEMP); + FREE(nxfs, M_TEMP); + } + if (nfsrv_export_hashtbl) { + /* all exports deleted, clean up export hash table */ + FREE(nfsrv_export_hashtbl, M_TEMP); + nfsrv_export_hashtbl = NULL; + } + lck_rw_done(&nfsrv_export_rwlock); + return (0); + } + + error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, &pathlen); + if (error) + return (error); + + lck_rw_lock_exclusive(&nfsrv_export_rwlock); + + /* init export hash table if not already */ + if (!nfsrv_export_hashtbl) { + if (nfsrv_export_hash_size <= 0) + nfsrv_export_hash_size = NFSRVEXPHASHSZ; + nfsrv_export_hashtbl = hashinit(nfsrv_export_hash_size, M_TEMP, &nfsrv_export_hash); + } + + // first check if we've already got an exportfs with the given ID + LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { + if (nxfs->nxfs_id == unxa->nxa_fsid) + break; + } + if (nxfs) { + /* verify exported FS path matches given path */ + if (strncmp(path, nxfs->nxfs_path, MAXPATHLEN)) { + error = EEXIST; + goto unlock_out; + } + if ((unxa->nxa_flags & (NXA_ADD|NXA_OFFLINE)) == NXA_ADD) { + /* if adding, verify that the mount is still what we expect */ + mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path); + if (mp) { + mount_ref(mp, 0); + mount_iterdrop(mp); + } + /* find exported FS root vnode */ + NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs->nxfs_path), ctx); + error = namei(&mnd); + if (error) + goto unlock_out; + mvp = mnd.ni_vp; + /* make sure it's (still) the root of a file system */ + if (!vnode_isvroot(mvp)) { + error = EINVAL; + goto out; + } + /* sanity check: this should be same mount */ + if (mp != vnode_mount(mvp)) { + error = EINVAL; + goto out; + } + } + } else { + /* no current exported file system with that ID */ + if (!(unxa->nxa_flags & NXA_ADD)) { + error = ENOENT; + goto unlock_out; + } + + /* find exported FS root vnode */ + NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); + error = namei(&mnd); + if (error) { + if (!(unxa->nxa_flags & NXA_OFFLINE)) + goto unlock_out; + } else { + mvp = mnd.ni_vp; + /* make sure it's the root of a file system */ + if (!vnode_isvroot(mvp)) { + /* bail if not marked offline */ + if (!(unxa->nxa_flags & NXA_OFFLINE)) { + error = EINVAL; + goto out; + } + vnode_put(mvp); + nameidone(&mnd); + mvp = NULL; + } else { + mp = vnode_mount(mvp); + mount_ref(mp, 0); + + /* make sure the file system is NFS-exportable */ + nfh.nfh_len = NFSV3_MAX_FID_SIZE; + error = VFS_VPTOFH(mvp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); + if (!error && (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)) + error = EIO; + if (!error && !(mp->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED)) + error = EISDIR; + if (error) + goto out; + } + } + + /* add an exportfs for it */ + MALLOC(nxfs, struct nfs_exportfs *, sizeof(struct nfs_exportfs), M_TEMP, M_WAITOK); + if (!nxfs) { + error = ENOMEM; + goto out; + } + bzero(nxfs, sizeof(struct nfs_exportfs)); + nxfs->nxfs_id = unxa->nxa_fsid; + MALLOC(nxfs->nxfs_path, char*, pathlen, M_TEMP, M_WAITOK); + if (!nxfs->nxfs_path) { + FREE(nxfs, M_TEMP); + error = ENOMEM; + goto out; + } + bcopy(path, nxfs->nxfs_path, pathlen); + /* insert into list in reverse-sorted order */ nxfs3 = NULL; - LIST_FOREACH(nxfs2, &nfs_exports, nxfs_next) { - if (strcmp(nxfs->nxfs_path, nxfs2->nxfs_path) > 0) + LIST_FOREACH(nxfs2, &nfsrv_exports, nxfs_next) { + if (strncmp(nxfs->nxfs_path, nxfs2->nxfs_path, MAXPATHLEN) > 0) break; nxfs3 = nxfs2; } @@ -2423,14 +2871,15 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) else if (nxfs3) LIST_INSERT_AFTER(nxfs3, nxfs, nxfs_next); else - LIST_INSERT_HEAD(&nfs_exports, nxfs, nxfs_next); + LIST_INSERT_HEAD(&nfsrv_exports, nxfs, nxfs_next); /* make sure any quotas are enabled before we export the file system */ - enablequotas(mp, ctx); + if (mp) + enablequotas(mp, ctx); } if (unxa->nxa_exppath) { - error = copyinstr(unxa->nxa_exppath, path, MAXPATHLEN, (size_t *)&pathlen); + error = copyinstr(unxa->nxa_exppath, path, MAXPATHLEN, &pathlen); if (error) goto out; LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { @@ -2439,7 +2888,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) } if (nx) { /* verify exported FS path matches given path */ - if (strcmp(path, nx->nx_path)) { + if (strncmp(path, nx->nx_path, MAXPATHLEN)) { error = EEXIST; goto out; } @@ -2458,6 +2907,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) bzero(nx, sizeof(struct nfs_export)); nx->nx_id = unxa->nxa_expid; nx->nx_fs = nxfs; + microtime(&nx->nx_exptime); MALLOC(nx->nx_path, char*, pathlen, M_TEMP, M_WAITOK); if (!nx->nx_path) { error = ENOMEM; @@ -2466,10 +2916,12 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) goto out1; } bcopy(path, nx->nx_path, pathlen); + /* initialize the active user list */ + nfsrv_init_user_list(&nx->nx_user_list); /* insert into list in reverse-sorted order */ nx3 = NULL; LIST_FOREACH(nx2, &nxfs->nxfs_exports, nx_next) { - if (strcmp(nx->nx_path, nx2->nx_path) > 0) + if (strncmp(nx->nx_path, nx2->nx_path, MAXPATHLEN) > 0) break; nx3 = nx2; } @@ -2480,10 +2932,10 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) else LIST_INSERT_HEAD(&nxfs->nxfs_exports, nx, nx_next); /* insert into hash */ - LIST_INSERT_HEAD(NFSEXPHASH(nxfs->nxfs_id, nx->nx_id), nx, nx_hash); + LIST_INSERT_HEAD(NFSRVEXPHASH(nxfs->nxfs_id, nx->nx_id), nx, nx_hash); /* - * We don't allow nested exports. Check if the new entry + * We don't allow/support nested exports. Check if the new entry * nests with the entries before and after or if there's an * entry for the file system root and subdirs. */ @@ -2509,55 +2961,84 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) if (nx2) error = EINVAL; } - if (error) { - printf("nfsrv_export: attempt to register nested exports: %s/%s\n", - nxfs->nxfs_path, nx->nx_path); - goto out1; - } - - /* find export root vnode */ - if (!nx->nx_path[0] || ((nx->nx_path[0] == '.') && !nx->nx_path[1])) { - /* exporting file system's root directory */ - xvp = mvp; - vnode_get(xvp); - } else { - xnd.ni_cnd.cn_nameiop = LOOKUP; - xnd.ni_cnd.cn_flags = LOCKLEAF; - xnd.ni_pathlen = pathlen - 1; - xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf = path; - xnd.ni_startdir = mvp; - xnd.ni_usedvp = mvp; - xnd.ni_cnd.cn_context = ctx; - error = lookup(&xnd); - if (error) - goto out1; - xvp = xnd.ni_vp; - } - - if (vnode_vtype(xvp) != VDIR) { - error = EINVAL; - vnode_put(xvp); - goto out1; + if (error) { + /* + * Don't actually return an error because mountd is + * probably about to delete the conflicting export. + * This can happen when a new export momentarily conflicts + * with an old export while the transition is being made. + * Theoretically, mountd could be written to avoid this + * transient situation - but it would greatly increase the + * complexity of mountd for very little overall benefit. + */ + printf("nfsrv_export: warning: nested exports: %s/%s\n", + nxfs->nxfs_path, nx->nx_path); + error = 0; } - - /* grab file handle */ + nx->nx_fh.nfh_xh.nxh_flags = NXHF_INVALIDFH; + } + /* make sure file handle is set up */ + if ((nx->nx_fh.nfh_xh.nxh_version != htonl(NFS_FH_VERSION)) || + (nx->nx_fh.nfh_xh.nxh_flags & NXHF_INVALIDFH)) { + /* try to set up export root file handle */ nx->nx_fh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); nx->nx_fh.nfh_xh.nxh_fsid = htonl(nx->nx_fs->nxfs_id); nx->nx_fh.nfh_xh.nxh_expid = htonl(nx->nx_id); nx->nx_fh.nfh_xh.nxh_flags = 0; nx->nx_fh.nfh_xh.nxh_reserved = 0; - nx->nx_fh.nfh_len = NFS_MAX_FID_SIZE; - error = VFS_VPTOFH(xvp, &nx->nx_fh.nfh_len, &nx->nx_fh.nfh_fid[0], NULL); - if (!error && (nx->nx_fh.nfh_len > (int)NFS_MAX_FID_SIZE)) { - error = EIO; + nx->nx_fh.nfh_fhp = (u_char*)&nx->nx_fh.nfh_xh; + bzero(&nx->nx_fh.nfh_fid[0], NFSV2_MAX_FID_SIZE); + if (mvp) { + /* find export root vnode */ + if (!nx->nx_path[0] || ((nx->nx_path[0] == '.') && !nx->nx_path[1])) { + /* exporting file system's root directory */ + xvp = mvp; + vnode_get(xvp); + } else { + xnd.ni_cnd.cn_nameiop = LOOKUP; +#if CONFIG_TRIGGERS + xnd.ni_op = OP_LOOKUP; +#endif + xnd.ni_cnd.cn_flags = LOCKLEAF; + xnd.ni_pathlen = pathlen - 1; + xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf = path; + xnd.ni_startdir = mvp; + xnd.ni_usedvp = mvp; + xnd.ni_cnd.cn_context = ctx; + while ((error = lookup(&xnd)) == ERECYCLE) { + xnd.ni_cnd.cn_flags = LOCKLEAF; + xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf; + xnd.ni_usedvp = xnd.ni_dvp = xnd.ni_startdir = mvp; + } + if (error) + goto out1; + xvp = xnd.ni_vp; + } + + if (vnode_vtype(xvp) != VDIR) { + error = EINVAL; + vnode_put(xvp); + goto out1; + } + + /* grab file handle */ + nx->nx_fh.nfh_len = NFSV3_MAX_FID_SIZE; + error = VFS_VPTOFH(xvp, (int*)&nx->nx_fh.nfh_len, &nx->nx_fh.nfh_fid[0], NULL); + if (!error && (nx->nx_fh.nfh_len > (int)NFSV3_MAX_FID_SIZE)) { + error = EIO; + } else { + nx->nx_fh.nfh_xh.nxh_fidlen = nx->nx_fh.nfh_len; + nx->nx_fh.nfh_len += sizeof(nx->nx_fh.nfh_xh); + } + + vnode_put(xvp); + if (error) + goto out1; } else { - nx->nx_fh.nfh_xh.nxh_fidlen = nx->nx_fh.nfh_len; - nx->nx_fh.nfh_len += sizeof(nx->nx_fh.nfh_xh); + nx->nx_fh.nfh_xh.nxh_flags = NXHF_INVALIDFH; + nx->nx_fh.nfh_xh.nxh_fidlen = 0; + nx->nx_fh.nfh_len = sizeof(nx->nx_fh.nfh_xh); } - - vnode_put(xvp); - if (error) - goto out1; } } else { nx = NULL; @@ -2571,28 +3052,43 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) LIST_REMOVE(nx, nx_next); LIST_REMOVE(nx, nx_hash); /* delete all netopts for this export */ - nfsrv_free_addrlist(nx); + nfsrv_free_addrlist(nx, NULL); nx->nx_flags &= ~NX_DEFAULTEXPORT; if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) { kauth_cred_unref(&nx->nx_defopt.nxo_cred); } + /* delete active user list for this export */ + nfsrv_free_user_list(&nx->nx_user_list); FREE(nx->nx_path, M_TEMP); FREE(nx, M_TEMP); } goto out1; - } else { + } else if (!unxa->nxa_netcount) { /* delete all netopts for this export */ - nfsrv_free_addrlist(nx); + nfsrv_free_addrlist(nx, NULL); nx->nx_flags &= ~NX_DEFAULTEXPORT; if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) { kauth_cred_unref(&nx->nx_defopt.nxo_cred); } + } else { + /* delete only the netopts for the given addresses */ + error = nfsrv_free_addrlist(nx, unxa); + if (error) + goto out1; } } if (unxa->nxa_flags & NXA_ADD) { + /* + * If going offline set the export time so that when + * coming back on line we will present a new write verifier + * to the client. + */ + if (unxa->nxa_flags & NXA_OFFLINE) + microtime(&nx->nx_exptime); + error = nfsrv_hang_addrlist(nx, unxa); - if (!error) - mp->mnt_flag |= MNT_EXPORTED; + if (!error && mp) + vfs_setflags(mp, MNT_EXPORTED); } out1: @@ -2600,6 +3096,8 @@ out1: /* export has no export options */ LIST_REMOVE(nx, nx_next); LIST_REMOVE(nx, nx_hash); + /* delete active user list for this export */ + nfsrv_free_user_list(&nx->nx_user_list); FREE(nx->nx_path, M_TEMP); FREE(nx, M_TEMP); } @@ -2608,7 +3106,8 @@ out1: LIST_REMOVE(nxfs, nxfs_next); FREE(nxfs->nxfs_path, M_TEMP); FREE(nxfs, M_TEMP); - mp->mnt_flag &= ~MNT_EXPORTED; + if (mp) + vfs_clearflags(mp, MNT_EXPORTED); } out: @@ -2617,385 +3116,886 @@ out: nameidone(&mnd); } unlock_out: - lck_rw_done(&nfs_export_rwlock); + if (mp) + mount_drop(mp, 0); + lck_rw_done(&nfsrv_export_rwlock); return (error); } -static struct nfs_export_options * -nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam) +struct nfs_export_options * +nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam) +{ + struct nfs_export_options *nxo = NULL; + struct nfs_netopt *no = NULL; + struct radix_node_head *rnh; + struct sockaddr *saddr; + + /* Lookup in the export list first. */ + if (nam != NULL) { + saddr = mbuf_data(nam); + rnh = nx->nx_rtable[saddr->sa_family]; + if (rnh != NULL) { + no = (struct nfs_netopt *) + (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); + if (no && no->no_rnodes->rn_flags & RNF_ROOT) + no = NULL; + if (no) + nxo = &no->no_opt; + } + } + /* If no address match, use the default if it exists. */ + if ((nxo == NULL) && (nx->nx_flags & NX_DEFAULTEXPORT)) + nxo = &nx->nx_defopt; + return (nxo); +} + +/* find an export for the given handle */ +struct nfs_export * +nfsrv_fhtoexport(struct nfs_filehandle *nfhp) +{ + struct nfs_exphandle *nxh = (struct nfs_exphandle*)nfhp->nfh_fhp; + struct nfs_export *nx; + uint32_t fsid, expid; + + if (!nfsrv_export_hashtbl) + return (NULL); + fsid = ntohl(nxh->nxh_fsid); + expid = ntohl(nxh->nxh_expid); + nx = NFSRVEXPHASH(fsid, expid)->lh_first; + for (; nx; nx = LIST_NEXT(nx, nx_hash)) { + if (nx->nx_fs->nxfs_id != fsid) + continue; + if (nx->nx_id != expid) + continue; + break; + } + return (nx); +} + +/* + * nfsrv_fhtovp() - convert FH to vnode and export info + */ +int +nfsrv_fhtovp( + struct nfs_filehandle *nfhp, + struct nfsrv_descript *nd, + vnode_t *vpp, + struct nfs_export **nxp, + struct nfs_export_options **nxop) +{ + struct nfs_exphandle *nxh = (struct nfs_exphandle*)nfhp->nfh_fhp; + struct nfs_export_options *nxo; + u_char *fidp; + int error; + struct mount *mp; + mbuf_t nam = NULL; + uint32_t v; + int i, valid; + + *vpp = NULL; + *nxp = NULL; + *nxop = NULL; + + if (nd != NULL) + nam = nd->nd_nam; + + v = ntohl(nxh->nxh_version); + if (v != NFS_FH_VERSION) { + /* file handle format not supported */ + return (ESTALE); + } + if (nfhp->nfh_len > NFSV3_MAX_FH_SIZE) + return (EBADRPC); + if (nfhp->nfh_len < (int)sizeof(struct nfs_exphandle)) + return (ESTALE); + v = ntohs(nxh->nxh_flags); + if (v & NXHF_INVALIDFH) + return (ESTALE); + + *nxp = nfsrv_fhtoexport(nfhp); + if (!*nxp) + return (ESTALE); + + /* Get the export option structure for this tuple. */ + *nxop = nxo = nfsrv_export_lookup(*nxp, nam); + if (nam && (*nxop == NULL)) + return (EACCES); + + if (nd != NULL) { + /* Validate the security flavor of the request */ + for (i = 0, valid = 0; i < nxo->nxo_sec.count; i++) { + if (nd->nd_sec == nxo->nxo_sec.flavors[i]) { + valid = 1; + break; + } + } + if (!valid) { + /* + * RFC 2623 section 2.3.2 recommends no authentication + * requirement for certain NFS procedures used for mounting. + * This allows an unauthenticated superuser on the client + * to do mounts for the benefit of authenticated users. + */ + if (nd->nd_vers == NFS_VER2) + if (nd->nd_procnum == NFSV2PROC_GETATTR || + nd->nd_procnum == NFSV2PROC_STATFS) + valid = 1; + if (nd->nd_vers == NFS_VER3) + if (nd->nd_procnum == NFSPROC_FSINFO) + valid = 1; + + if (!valid) + return (NFSERR_AUTHERR | AUTH_REJECTCRED); + } + } + + if (nxo && (nxo->nxo_flags & NX_OFFLINE)) + return ((nd == NULL || nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); + + /* find mount structure */ + mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path); + if (mp) { + error = vfs_busy(mp, LK_NOWAIT); + mount_iterdrop(mp); + if (error) + mp = NULL; + } + if (!mp) { + /* + * We have an export, but no mount? + * Perhaps the export just hasn't been marked offline yet. + */ + return ((nd == NULL || nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); + } + + fidp = nfhp->nfh_fhp + sizeof(*nxh); + error = VFS_FHTOVP(mp, nxh->nxh_fidlen, fidp, vpp, NULL); + vfs_unbusy(mp); + if (error) + return (error); + /* vnode pointer should be good at this point or ... */ + if (*vpp == NULL) + return (ESTALE); + return (0); +} + +/* + * nfsrv_credcheck() - check/map credentials according + * to given export options. + */ +int +nfsrv_credcheck( + struct nfsrv_descript *nd, + vfs_context_t ctx, + __unused struct nfs_export *nx, + struct nfs_export_options *nxo) +{ + if (nxo && nxo->nxo_cred) { + if ((nxo->nxo_flags & NX_MAPALL) || + ((nxo->nxo_flags & NX_MAPROOT) && !suser(nd->nd_cr, NULL))) { + kauth_cred_ref(nxo->nxo_cred); + kauth_cred_unref(&nd->nd_cr); + nd->nd_cr = nxo->nxo_cred; + } + } + ctx->vc_ucred = nd->nd_cr; + return (0); +} + +/* + * nfsrv_vptofh() - convert vnode to file handle for given export + * + * If the caller is passing in a vnode for a ".." directory entry, + * they can pass a directory NFS file handle (dnfhp) which will be + * checked against the root export file handle. If it matches, we + * refuse to provide the file handle for the out-of-export directory. + */ +int +nfsrv_vptofh( + struct nfs_export *nx, + int nfsvers, + struct nfs_filehandle *dnfhp, + vnode_t vp, + vfs_context_t ctx, + struct nfs_filehandle *nfhp) +{ + int error; + uint32_t maxfidsize; + + nfhp->nfh_fhp = (u_char*)&nfhp->nfh_xh; + nfhp->nfh_xh.nxh_version = htonl(NFS_FH_VERSION); + nfhp->nfh_xh.nxh_fsid = htonl(nx->nx_fs->nxfs_id); + nfhp->nfh_xh.nxh_expid = htonl(nx->nx_id); + nfhp->nfh_xh.nxh_flags = 0; + nfhp->nfh_xh.nxh_reserved = 0; + + if (nfsvers == NFS_VER2) + bzero(&nfhp->nfh_fid[0], NFSV2_MAX_FID_SIZE); + + /* if directory FH matches export root, return invalid FH */ + if (dnfhp && nfsrv_fhmatch(dnfhp, &nx->nx_fh)) { + if (nfsvers == NFS_VER2) + nfhp->nfh_len = NFSX_V2FH; + else + nfhp->nfh_len = sizeof(nfhp->nfh_xh); + nfhp->nfh_xh.nxh_fidlen = 0; + nfhp->nfh_xh.nxh_flags = htons(NXHF_INVALIDFH); + return (0); + } + + if (nfsvers == NFS_VER2) + maxfidsize = NFSV2_MAX_FID_SIZE; + else + maxfidsize = NFSV3_MAX_FID_SIZE; + nfhp->nfh_len = maxfidsize; + + error = VFS_VPTOFH(vp, (int*)&nfhp->nfh_len, &nfhp->nfh_fid[0], ctx); + if (error) + return (error); + if (nfhp->nfh_len > maxfidsize) + return (EOVERFLOW); + nfhp->nfh_xh.nxh_fidlen = nfhp->nfh_len; + nfhp->nfh_len += sizeof(nfhp->nfh_xh); + if ((nfsvers == NFS_VER2) && (nfhp->nfh_len < NFSX_V2FH)) + nfhp->nfh_len = NFSX_V2FH; + + return (0); +} + +/* + * Compare two file handles to see it they're the same. + * Note that we don't use nfh_len because that may include + * padding in an NFSv2 file handle. + */ +int +nfsrv_fhmatch(struct nfs_filehandle *fh1, struct nfs_filehandle *fh2) +{ + struct nfs_exphandle *nxh1, *nxh2; + int len1, len2; + + nxh1 = (struct nfs_exphandle *)fh1->nfh_fhp; + nxh2 = (struct nfs_exphandle *)fh2->nfh_fhp; + len1 = sizeof(fh1->nfh_xh) + nxh1->nxh_fidlen; + len2 = sizeof(fh2->nfh_xh) + nxh2->nxh_fidlen; + if (len1 != len2) + return (0); + if (bcmp(nxh1, nxh2, len1)) + return (0); + return (1); +} + +/* + * Functions for dealing with active user lists + */ + +/* + * Search the hash table for a user node with a matching IP address and uid field. + * If found, the node's tm_last timestamp is updated and the node is returned. + * + * If not found, a new node is allocated (or reclaimed via LRU), initialized, and returned. + * Returns NULL if a new node could not be allcoated. + * + * The list's user_mutex lock MUST be held. + */ +struct nfs_user_stat_node * +nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr *saddr, uid_t uid) +{ + struct nfs_user_stat_node *unode; + struct timeval now; + struct nfs_user_stat_hashtbl_head *head; + + /* seach the hash table */ + head = NFS_USER_STAT_HASH(list->user_hashtbl, uid); + LIST_FOREACH(unode, head, hash_link) { + if ((uid == unode->uid) && (nfs_sockaddr_cmp(saddr, (struct sockaddr*)&unode->sock) == 0)) { + /* found matching node */ + break; + } + } + + if (unode) { + /* found node in the hash table, now update lru position */ + TAILQ_REMOVE(&list->user_lru, unode, lru_link); + TAILQ_INSERT_TAIL(&list->user_lru, unode, lru_link); + + /* update time stamp */ + microtime(&now); + unode->tm_last = (uint32_t)now.tv_sec; + return unode; + } + + if (list->node_count < nfsrv_user_stat_max_nodes) { + /* Allocate a new node */ + MALLOC(unode, struct nfs_user_stat_node *, sizeof(struct nfs_user_stat_node), + M_TEMP, M_WAITOK | M_ZERO); + + if (!unode) + return NULL; + + /* increment node count */ + OSAddAtomic(1, &nfsrv_user_stat_node_count); + list->node_count++; + } else { + /* reuse the oldest node in the lru list */ + unode = TAILQ_FIRST(&list->user_lru); + + if (!unode) + return NULL; + + /* Remove the node */ + TAILQ_REMOVE(&list->user_lru, unode, lru_link); + LIST_REMOVE(unode, hash_link); + } + + /* Initialize the node */ + unode->uid = uid; + bcopy(saddr, &unode->sock, saddr->sa_len); + microtime(&now); + unode->ops = 0; + unode->bytes_read = 0; + unode->bytes_written = 0; + unode->tm_start = (uint32_t)now.tv_sec; + unode->tm_last = (uint32_t)now.tv_sec; + + /* insert the node */ + TAILQ_INSERT_TAIL(&list->user_lru, unode, lru_link); + LIST_INSERT_HEAD(head, unode, hash_link); + + return unode; +} + +void +nfsrv_update_user_stat(struct nfs_export *nx, struct nfsrv_descript *nd, uid_t uid, u_int ops, u_int rd_bytes, u_int wr_bytes) +{ + struct nfs_user_stat_node *unode; + struct nfs_active_user_list *ulist; + struct sockaddr *saddr; + + if ((!nfsrv_user_stat_enabled) || (!nx) || (!nd) || (!nd->nd_nam)) + return; + + saddr = (struct sockaddr *)mbuf_data(nd->nd_nam); + + /* check address family before going any further */ + if ((saddr->sa_family != AF_INET) && (saddr->sa_family != AF_INET6)) + return; + + ulist = &nx->nx_user_list; + + /* lock the active user list */ + lck_mtx_lock(&ulist->user_mutex); + + /* get the user node */ + unode = nfsrv_get_user_stat_node(ulist, saddr, uid); + + if (!unode) { + lck_mtx_unlock(&ulist->user_mutex); + return; + } + + /* update counters */ + unode->ops += ops; + unode->bytes_read += rd_bytes; + unode->bytes_written += wr_bytes; + + /* done */ + lck_mtx_unlock(&ulist->user_mutex); +} + +/* initialize an active user list */ +void +nfsrv_init_user_list(struct nfs_active_user_list *ulist) +{ + uint i; + + /* initialize the lru */ + TAILQ_INIT(&ulist->user_lru); + + /* initialize the hash table */ + for(i = 0; i < NFS_USER_STAT_HASH_SIZE; i++) + LIST_INIT(&ulist->user_hashtbl[i]); + ulist->node_count = 0; + + lck_mtx_init(&ulist->user_mutex, nfsrv_active_user_mutex_group, LCK_ATTR_NULL); +} + +/* Free all nodes in an active user list */ +void +nfsrv_free_user_list(struct nfs_active_user_list *ulist) { - struct nfs_export_options *nxo = NULL; - struct nfs_netopt *no = NULL; - struct radix_node_head *rnh; - struct sockaddr *saddr; + struct nfs_user_stat_node *unode; - /* Lookup in the export list first. */ - if (nam != NULL) { - saddr = mbuf_data(nam); - rnh = nx->nx_rtable[saddr->sa_family]; - if (rnh != NULL) { - no = (struct nfs_netopt *) - (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); - if (no && no->no_rnodes->rn_flags & RNF_ROOT) - no = NULL; - if (no) - nxo = &no->no_opt; - } + if (!ulist) + return; + + while ((unode = TAILQ_FIRST(&ulist->user_lru))) { + /* Remove node and free */ + TAILQ_REMOVE(&ulist->user_lru, unode, lru_link); + LIST_REMOVE(unode, hash_link); + FREE(unode, M_TEMP); + + /* decrement node count */ + OSAddAtomic(-1, &nfsrv_user_stat_node_count); } - /* If no address match, use the default if it exists. */ - if ((nxo == NULL) && (nx->nx_flags & NX_DEFAULTEXPORT)) - nxo = &nx->nx_defopt; - return (nxo); + ulist->node_count = 0; + + lck_mtx_destroy(&ulist->user_mutex, nfsrv_active_user_mutex_group); } -/* find an export for the given handle */ -static struct nfs_export * -nfsrv_fhtoexport(struct nfs_filehandle *nfhp) +/* Reclaim old expired user nodes from active user lists. */ +void +nfsrv_active_user_list_reclaim(void) { - struct nfs_export *nx; - uint32_t fsid, expid; + struct nfs_exportfs *nxfs; + struct nfs_export *nx; + struct nfs_active_user_list *ulist; + struct nfs_user_stat_hashtbl_head oldlist; + struct nfs_user_stat_node *unode, *unode_next; + struct timeval now; + uint32_t tstale; + + LIST_INIT(&oldlist); + + lck_rw_lock_shared(&nfsrv_export_rwlock); + microtime(&now); + tstale = now.tv_sec - nfsrv_user_stat_max_idle_sec; + LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { + LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { + /* Scan through all user nodes of this export */ + ulist = &nx->nx_user_list; + lck_mtx_lock(&ulist->user_mutex); + for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) { + unode_next = TAILQ_NEXT(unode, lru_link); + + /* check if this node has expired */ + if (unode->tm_last >= tstale) + break; - fsid = ntohl(nfhp->nfh_xh.nxh_fsid); - expid = ntohl(nfhp->nfh_xh.nxh_expid); - nx = NFSEXPHASH(fsid, expid)->lh_first; - for (; nx; nx = LIST_NEXT(nx, nx_hash)) { - if (nx->nx_fs->nxfs_id != fsid) - continue; - if (nx->nx_id != expid) - continue; - break; + /* Remove node from the active user list */ + TAILQ_REMOVE(&ulist->user_lru, unode, lru_link); + LIST_REMOVE(unode, hash_link); + + /* Add node to temp list */ + LIST_INSERT_HEAD(&oldlist, unode, hash_link); + + /* decrement node count */ + OSAddAtomic(-1, &nfsrv_user_stat_node_count); + ulist->node_count--; + } + /* can unlock this export's list now */ + lck_mtx_unlock(&ulist->user_mutex); + } + } + lck_rw_done(&nfsrv_export_rwlock); + + /* Free expired nodes */ + while ((unode = LIST_FIRST(&oldlist))) { + LIST_REMOVE(unode, hash_link); + FREE(unode, M_TEMP); } - return nx; } /* - * nfsrv_fhtovp() - convert FH to vnode and export info + * Maps errno values to nfs error numbers. + * Use NFSERR_IO as the catch all for ones not specifically defined in + * RFC 1094. */ -int -nfsrv_fhtovp( - struct nfs_filehandle *nfhp, - mbuf_t nam, - __unused int pubflag, - vnode_t *vpp, - struct nfs_export **nxp, - struct nfs_export_options **nxop) -{ - int error; - struct mount *mp; - uint32_t v; +static u_char nfsrv_v2errmap[] = { + NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, + NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, + NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, +}; - *vpp = NULL; - *nxp = NULL; - *nxop = NULL; +/* + * Maps errno values to nfs error numbers. + * Although it is not obvious whether or not NFS clients really care if + * a returned error value is in the specified list for the procedure, the + * safest thing to do is filter them appropriately. For Version 2, the + * X/Open XNFS document is the only specification that defines error values + * for each RPC (The RFC simply lists all possible error values for all RPCs), + * so I have decided to not do this for Version 2. + * The first entry is the default error return and the rest are the valid + * errors for that RPC in increasing numeric order. + */ +static short nfsv3err_null[] = { + 0, + 0, +}; - v = ntohl(nfhp->nfh_xh.nxh_version); - if (v != NFS_FH_VERSION) { - /* file handle format not supported */ - return (ESTALE); - } - if (nfhp->nfh_len > NFS_MAX_FH_SIZE) - return (EBADRPC); - if (nfhp->nfh_len < (int)sizeof(nfhp->nfh_xh)) - return (ESTALE); - v = ntohs(nfhp->nfh_xh.nxh_flags); - if (v & NXHF_INVALIDFH) - return (ESTALE); +static short nfsv3err_getattr[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; -/* XXX Revisit when enabling WebNFS */ -#ifdef WEBNFS_ENABLED - if (nfs_ispublicfh(nfhp)) { - if (!pubflag || !nfs_pub.np_valid) - return (ESTALE); - nfhp = &nfs_pub.np_handle; - } -#endif +static short nfsv3err_setattr[] = { + NFSERR_IO, + NFSERR_PERM, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOT_SYNC, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - *nxp = nfsrv_fhtoexport(nfhp); - if (!*nxp) - return (ESTALE); +static short nfsv3err_lookup[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_NAMETOL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - /* Get the export option structure for this tuple. */ - *nxop = nfsrv_export_lookup(*nxp, nam); - if (nam && (*nxop == NULL)) - return (EACCES); +static short nfsv3err_access[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - /* find mount structure */ - mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path); - if (!mp) - return (ESTALE); +static short nfsv3err_readlink[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - error = VFS_FHTOVP(mp, nfhp->nfh_xh.nxh_fidlen, &nfhp->nfh_fid[0], vpp, NULL); - if (error) - return (error); - /* vnode pointer should be good at this point or ... */ - if (*vpp == NULL) - return (ESTALE); - return (0); -} +static short nfsv3err_read[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_NXIO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; -/* - * nfsrv_credcheck() - check/map credentials according to given export options - */ -int -nfsrv_credcheck( - struct nfsrv_descript *nfsd, - __unused struct nfs_export *nx, - struct nfs_export_options *nxo) -{ - if (nxo && nxo->nxo_cred) { - if ((nxo->nxo_flags & NX_MAPALL) || - ((nxo->nxo_flags & NX_MAPROOT) && !suser(nfsd->nd_cr, NULL))) { - kauth_cred_ref(nxo->nxo_cred); - kauth_cred_unref(&nfsd->nd_cr); - nfsd->nd_cr = nxo->nxo_cred; - } - } - return (0); -} +static short nfsv3err_write[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_INVAL, + NFSERR_FBIG, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; + +static short nfsv3err_create[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; + +static short nfsv3err_mkdir[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; +static short nfsv3err_symlink[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; -/* - * WebNFS: check if a filehandle is a public filehandle. For v3, this - * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has - * transformed this to all zeroes in both cases, so check for it. - */ -int -nfs_ispublicfh(struct nfs_filehandle *nfhp) -{ - char *cp = (char *)nfhp; - unsigned int i; - - if (nfhp->nfh_len == 0) - return (TRUE); - if (nfhp->nfh_len != NFSX_V2FH) - return (FALSE); - for (i = 0; i < NFSX_V2FH; i++) - if (*cp++ != 0) - return (FALSE); - return (TRUE); -} +static short nfsv3err_mknod[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_BADTYPE, + NFSERR_TRYLATER, + 0, +}; -/* - * nfsrv_vptofh() - convert vnode to file handle for given export - * - * If the caller is passing in a vnode for a ".." directory entry, - * they can pass a directory NFS file handle (dnfhp) which will be - * checked against the root export file handle. If it matches, we - * refuse to provide the file handle for the out-of-export directory. - */ -int -nfsrv_vptofh( - struct nfs_export *nx, - int v2, - struct nfs_filehandle *dnfhp, - vnode_t vp, - struct vfs_context *ctx, - struct nfs_filehandle *nfhp) -{ - int error; +static short nfsv3err_remove[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - nfhp->nfh_xh.nxh_version = htonl(NFS_FH_VERSION); - nfhp->nfh_xh.nxh_fsid = htonl(nx->nx_fs->nxfs_id); - nfhp->nfh_xh.nxh_expid = htonl(nx->nx_id); - nfhp->nfh_xh.nxh_flags = 0; - nfhp->nfh_xh.nxh_reserved = 0; +static short nfsv3err_rmdir[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_NOTDIR, + NFSERR_INVAL, + NFSERR_ROFS, + NFSERR_NAMETOL, + NFSERR_NOTEMPTY, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - if (v2) - bzero(&nfhp->nfh_fid[0], NFSV2_MAX_FID_SIZE); +static short nfsv3err_rename[] = { + NFSERR_IO, + NFSERR_NOENT, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_XDEV, + NFSERR_NOTDIR, + NFSERR_ISDIR, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_MLINK, + NFSERR_NAMETOL, + NFSERR_NOTEMPTY, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - /* if directory FH matches export root, return invalid FH */ - if (dnfhp && nfsrv_fhmatch(dnfhp, &nx->nx_fh)) { - nfhp->nfh_len = v2 ? NFSX_V2FH : sizeof(nfhp->nfh_xh); - nfhp->nfh_xh.nxh_fidlen = 0; - nfhp->nfh_xh.nxh_flags = htons(NXHF_INVALIDFH); - return (0); - } +static short nfsv3err_link[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_EXIST, + NFSERR_XDEV, + NFSERR_NOTDIR, + NFSERR_INVAL, + NFSERR_NOSPC, + NFSERR_ROFS, + NFSERR_MLINK, + NFSERR_NAMETOL, + NFSERR_DQUOT, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_NOTSUPP, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - nfhp->nfh_len = v2 ? NFSV2_MAX_FID_SIZE : NFS_MAX_FID_SIZE; - error = VFS_VPTOFH(vp, &nfhp->nfh_len, &nfhp->nfh_fid[0], ctx); - if (error) - return (error); - if (nfhp->nfh_len > (int)(v2 ? NFSV2_MAX_FID_SIZE : NFS_MAX_FID_SIZE)) - return (EOVERFLOW); - nfhp->nfh_xh.nxh_fidlen = nfhp->nfh_len; - nfhp->nfh_len += sizeof(nfhp->nfh_xh); - if (v2 && (nfhp->nfh_len < NFSX_V2FH)) - nfhp->nfh_len = NFSX_V2FH; +static short nfsv3err_readdir[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_BAD_COOKIE, + NFSERR_TOOSMALL, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - return (0); -} +static short nfsv3err_readdirplus[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_ACCES, + NFSERR_NOTDIR, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_BAD_COOKIE, + NFSERR_NOTSUPP, + NFSERR_TOOSMALL, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; -int -nfsrv_fhmatch(struct nfs_filehandle *fh1, struct nfs_filehandle *fh2) -{ - int len1, len2; +static short nfsv3err_fsstat[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; - len1 = sizeof(fh1->nfh_xh) + fh1->nfh_xh.nxh_fidlen; - len2 = sizeof(fh2->nfh_xh) + fh2->nfh_xh.nxh_fidlen; - if (len1 != len2) - return (0); - if (bcmp(&fh1->nfh_xh, &fh2->nfh_xh, len1)) - return (0); - return (1); -} - -#endif /* NFS_NOSERVER */ -/* - * This function compares two net addresses by family and returns TRUE - * if they are the same host. - * If there is any doubt, return FALSE. - * The AF_INET family is handled as a special case so that address mbufs - * don't need to be saved to store "struct in_addr", which is only 4 bytes. - */ -int -netaddr_match(family, haddr, nam) - int family; - union nethostaddr *haddr; - mbuf_t nam; -{ - struct sockaddr_in *inetaddr; - - switch (family) { - case AF_INET: - inetaddr = mbuf_data(nam); - if (inetaddr->sin_family == AF_INET && - inetaddr->sin_addr.s_addr == haddr->had_inetaddr) - return (1); - break; -#if ISO - case AF_ISO: - { - struct sockaddr_iso *isoaddr1, *isoaddr2; - - isoaddr1 = mbuf_data(nam); - isoaddr2 = mbuf_data(haddr->had_nam); - if (isoaddr1->siso_family == AF_ISO && - isoaddr1->siso_nlen > 0 && - isoaddr1->siso_nlen == isoaddr2->siso_nlen && - SAME_ISOADDR(isoaddr1, isoaddr2)) - return (1); - break; - } -#endif /* ISO */ - default: - break; - }; - return (0); -} +static short nfsv3err_fsinfo[] = { + NFSERR_STALE, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; -static nfsuint64 nfs_nullcookie = { { 0, 0 } }; -/* - * This function finds the directory cookie that corresponds to the - * logical byte offset given. - */ -nfsuint64 * -nfs_getcookie(np, off, add) - struct nfsnode *np; - off_t off; - int add; -{ - struct nfsdmap *dp, *dp2; - int pos; - - pos = off / NFS_DIRBLKSIZ; - if (pos == 0) { -#if DIAGNOSTIC - if (add) - panic("nfs getcookie add at 0"); -#endif - return (&nfs_nullcookie); - } - pos--; - dp = np->n_cookies.lh_first; - if (!dp) { - if (add) { - MALLOC_ZONE(dp, struct nfsdmap *, sizeof(struct nfsdmap), - M_NFSDIROFF, M_WAITOK); - if (!dp) - return ((nfsuint64 *)0); - dp->ndm_eocookie = 0; - LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); - } else - return ((nfsuint64 *)0); - } - while (pos >= NFSNUMCOOKIES) { - pos -= NFSNUMCOOKIES; - if (dp->ndm_list.le_next) { - if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && - pos >= dp->ndm_eocookie) - return ((nfsuint64 *)0); - dp = dp->ndm_list.le_next; - } else if (add) { - MALLOC_ZONE(dp2, struct nfsdmap *, sizeof(struct nfsdmap), - M_NFSDIROFF, M_WAITOK); - if (!dp2) - return ((nfsuint64 *)0); - dp2->ndm_eocookie = 0; - LIST_INSERT_AFTER(dp, dp2, ndm_list); - dp = dp2; - } else - return ((nfsuint64 *)0); - } - if (pos >= dp->ndm_eocookie) { - if (add) - dp->ndm_eocookie = pos + 1; - else - return ((nfsuint64 *)0); - } - return (&dp->ndm_cookies[pos]); -} +static short nfsv3err_pathconf[] = { + NFSERR_STALE, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; -/* - * Invalidate cached directory information, except for the actual directory - * blocks (which are invalidated separately). - * Done mainly to avoid the use of stale offset cookies. - */ -void -nfs_invaldir(vp) - vnode_t vp; -{ - struct nfsnode *np = VTONFS(vp); +static short nfsv3err_commit[] = { + NFSERR_IO, + NFSERR_IO, + NFSERR_STALE, + NFSERR_BADHANDLE, + NFSERR_SERVERFAULT, + NFSERR_TRYLATER, + 0, +}; -#if DIAGNOSTIC - if (vnode_vtype(vp) != VDIR) - panic("nfs: invaldir not dir"); -#endif - np->n_direofoffset = 0; - np->n_cookieverf.nfsuquad[0] = 0; - np->n_cookieverf.nfsuquad[1] = 0; - if (np->n_cookies.lh_first) - np->n_cookies.lh_first->ndm_eocookie = 0; -} +static short *nfsrv_v3errmap[] = { + nfsv3err_null, + nfsv3err_getattr, + nfsv3err_setattr, + nfsv3err_lookup, + nfsv3err_access, + nfsv3err_readlink, + nfsv3err_read, + nfsv3err_write, + nfsv3err_create, + nfsv3err_mkdir, + nfsv3err_symlink, + nfsv3err_mknod, + nfsv3err_remove, + nfsv3err_rmdir, + nfsv3err_rename, + nfsv3err_link, + nfsv3err_readdir, + nfsv3err_readdirplus, + nfsv3err_fsstat, + nfsv3err_fsinfo, + nfsv3err_pathconf, + nfsv3err_commit, +}; -#ifndef NFS_NOSERVER /* * Map errnos to NFS error numbers. For Version 3 also filter out error * numbers not specified for the associated procedure. */ int -nfsrv_errmap(nd, err) - struct nfsrv_descript *nd; - int err; +nfsrv_errmap(struct nfsrv_descript *nd, int err) { short *defaulterrp, *errp; - if (nd->nd_flag & ND_NFSV3) { - if (nd->nd_procnum <= NFSPROC_COMMIT) { - errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; - while (*++errp) { - if (*errp == err) - return (err); - else if (*errp > err) - break; - } - return ((int)*defaulterrp); - } else + if (nd->nd_vers == NFS_VER2) { + if (err <= (int)sizeof(nfsrv_v2errmap)) + return ((int)nfsrv_v2errmap[err - 1]); + return (NFSERR_IO); + } + /* NFSv3 */ + if (nd->nd_procnum > NFSPROC_COMMIT) return (err & 0xffff); + errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; + while (*++errp) { + if (*errp == err) + return (err); + else if (*errp > err) + break; } - if (err <= ELAST) - return ((int)nfsrv_v2errmap[err - 1]); - return (NFSERR_IO); + return ((int)*defaulterrp); } -#endif /* NFS_NOSERVER */ +#endif /* NFSSERVER */