X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..7ee9d059c4eecf68ae4f8b0fb99ae2471eda79af:/bsd/nfs/nfs_subs.c diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index f0fd596a9..dccead918 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,7 +84,7 @@ #include #include #include -#include +#include #include #include #include @@ -99,6 +99,9 @@ #include #include #include +#if NFSCLIENT +#define _NFS_XDR_SUBS_FUNCS_ /* define this to get xdrbuf function definitions */ +#endif #include #include #include @@ -110,6 +113,8 @@ #include #include +#include + /* * NFS globals */ @@ -185,9 +190,23 @@ nfstov_type(nfstype nvtype, int nfsvers) int vtonfsv2_mode(enum vtype vtype, mode_t m) { - if (vtype == VFIFO) + switch (vtype) { + case VNON: + case VREG: + case VDIR: + case VBLK: + case VCHR: + case VLNK: + case VSOCK: + return vnode_makeimode(vtype, m); + case VFIFO: return vnode_makeimode(VCHR, m); - return vnode_makeimode(vtype, m); + case VBAD: + case VSTR: + case VCPLX: + default: + return vnode_makeimode(VNON, m); + } } #if NFSSERVER @@ -425,7 +444,7 @@ nfsm_chain_add_opaque_nopad_f(struct nfsm_chain *nmc, const u_char *buf, uint32_ * Add "len" bytes of data from "uio" to the given chain. */ int -nfsm_chain_add_uio(struct nfsm_chain *nmc, struct uio *uiop, uint32_t len) +nfsm_chain_add_uio(struct nfsm_chain *nmc, uio_t uio, uint32_t len) { uint32_t paddedlen, tlen; int error; @@ -443,7 +462,7 @@ nfsm_chain_add_uio(struct nfsm_chain *nmc, struct uio *uiop, uint32_t len) if (len) { if (tlen > len) tlen = len; - uiomove(nmc->nmc_ptr, tlen, uiop); + uiomove(nmc->nmc_ptr, tlen, uio); } else { bzero(nmc->nmc_ptr, tlen); } @@ -739,7 +758,7 @@ nfsm_chain_get_opaque_f(struct nfsm_chain *nmc, uint32_t len, u_char *buf) * The nfsm_chain is advanced by nfsm_rndup("len") bytes. */ int -nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, struct uio *uiop) +nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, uio_t uio) { uint32_t cplen, padlen; int error = 0; @@ -751,7 +770,7 @@ nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, struct uio *uiop) /* copy as much as we need/can */ cplen = MIN(nmc->nmc_left, len); if (cplen) { - error = uiomove(nmc->nmc_ptr, cplen, uiop); + error = uiomove(nmc->nmc_ptr, cplen, uio); if (error) return (error); nmc->nmc_ptr += cplen; @@ -779,6 +798,33 @@ nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, struct uio *uiop) #if NFSCLIENT +int +nfsm_chain_add_string_nfc(struct nfsm_chain *nmc, const uint8_t *s, uint32_t slen) +{ + uint8_t smallbuf[64]; + uint8_t *nfcname = smallbuf; + size_t buflen = sizeof(smallbuf), nfclen; + int error; + + error = utf8_normalizestr(s, slen, nfcname, &nfclen, buflen, UTF_PRECOMPOSED|UTF_NO_NULL_TERM); + if (error == ENAMETOOLONG) { + buflen = MAXPATHLEN; + MALLOC_ZONE(nfcname, uint8_t *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (nfcname) + error = utf8_normalizestr(s, slen, nfcname, &nfclen, buflen, UTF_PRECOMPOSED|UTF_NO_NULL_TERM); + } + + /* if we got an error, just use the original string */ + if (error) + nfsm_chain_add_string(error, nmc, s, slen); + else + nfsm_chain_add_string(error, nmc, nfcname, nfclen); + + if (nfcname && (nfcname != smallbuf)) + FREE_ZONE(nfcname, MAXPATHLEN, M_NAMEI); + return (error); +} + /* * Add an NFSv2 "sattr" structure to an mbuf chain */ @@ -895,7 +941,7 @@ nfsm_chain_get_fh_attr( error = nfs_parsefattr(nmc, nfsvers, nvap); } else if (gotfh) { /* we need valid attributes in order to call nfs_nget() */ - if (nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, ctx, nvap, xidp)) { + if (nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, 0, ctx, nvap, xidp)) { gotattr = 0; fhp->fh_len = 0; } @@ -933,6 +979,36 @@ nfsm_chain_get_wcc_data_f( return (error); } +/* + * Get the next RPC transaction ID (XID) + */ +void +nfs_get_xid(uint64_t *xidp) +{ + struct timeval tv; + + lck_mtx_lock(nfs_request_mutex); + if (!nfs_xid) { + /* + * Derive initial xid from system time. + * + * Note: it's OK if this code inits nfs_xid to 0 (for example, + * due to a broken clock) because we immediately increment it + * and we guarantee to never use xid 0. So, nfs_xid should only + * ever be 0 the first time this function is called. + */ + microtime(&tv); + nfs_xid = tv.tv_sec << 12; + } + if (++nfs_xid == 0) { + /* Skip zero xid if it should ever happen. */ + nfs_xidwrap++; + nfs_xid++; + } + *xidp = nfs_xid + ((uint64_t)nfs_xidwrap << 32); + lck_mtx_unlock(nfs_request_mutex); +} + /* * Build the RPC header and fill in the authorization info. * Returns the head of the mbuf list and the xid. @@ -941,7 +1017,6 @@ nfsm_chain_get_wcc_data_f( int nfsm_rpchead( struct nfsreq *req, - int auth_len, mbuf_t mrest, u_int64_t *xidp, mbuf_t *mreqp) @@ -949,24 +1024,55 @@ nfsm_rpchead( struct nfsmount *nmp = req->r_nmp; int nfsvers = nmp->nm_vers; int proc = ((nfsvers == NFS_VER2) ? nfsv2_procid[req->r_procnum] : (int)req->r_procnum); - int auth_type = (!auth_len && !req->r_cred) ? RPCAUTH_NULL : nmp->nm_auth; - return nfsm_rpchead2(nmp->nm_sotype, NFS_PROG, nfsvers, proc, - auth_type, auth_len, req->r_cred, req, mrest, xidp, mreqp); + return nfsm_rpchead2(nmp, nmp->nm_sotype, NFS_PROG, nfsvers, proc, + req->r_auth, req->r_cred, req, mrest, xidp, mreqp); } int -nfsm_rpchead2(int sotype, int prog, int vers, int proc, int auth_type, int auth_len, +nfsm_rpchead2(struct nfsmount *nmp, int sotype, int prog, int vers, int proc, int auth_type, kauth_cred_t cred, struct nfsreq *req, mbuf_t mrest, u_int64_t *xidp, mbuf_t *mreqp) { mbuf_t mreq, mb; - int error, i, grpsiz, authsiz, reqlen; + int error, i, grpsiz, auth_len = 0, authsiz, reqlen; size_t headlen; - struct timeval tv; struct nfsm_chain nmreq; - /* allocate the packet */ + /* calculate expected auth length */ + switch (auth_type) { + case RPCAUTH_NONE: + auth_len = 0; + break; + case RPCAUTH_SYS: + { + gid_t grouplist[NGROUPS]; + int groupcount = NGROUPS; + + if (!cred) + return (EINVAL); + + (void)kauth_cred_getgroups(cred, grouplist, &groupcount); + if (groupcount < 1) + return (EINVAL); + + auth_len = ((((groupcount - 1) > nmp->nm_numgrps) ? + nmp->nm_numgrps : (groupcount - 1)) << 2) + + 5 * NFSX_UNSIGNED; + break; + } + case RPCAUTH_KRB5: + case RPCAUTH_KRB5I: + case RPCAUTH_KRB5P: + if (!req || !cred) + return (EINVAL); + auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now + break; + default: + return (EINVAL); + } authsiz = nfsm_rndup(auth_len); + + /* allocate the packet */ headlen = authsiz + 10 * NFSX_UNSIGNED; if (sotype == SOCK_STREAM) /* also include room for any RPC Record Mark */ headlen += NFSX_UNSIGNED; @@ -992,33 +1098,11 @@ nfsm_rpchead2(int sotype, int prog, int vers, int proc, int auth_type, int auth_ * it may be a higher-level resend with a GSSAPI credential. * Otherwise, allocate a new one. */ - if (*xidp == 0) { - lck_mtx_lock(nfs_request_mutex); - if (!nfs_xid) { - /* - * Derive initial xid from system time. - * - * Note: it's OK if this code inits nfs_xid to 0 (for example, - * due to a broken clock) because we immediately increment it - * and we guarantee to never use xid 0. So, nfs_xid should only - * ever be 0 the first time this function is called. - */ - microtime(&tv); - nfs_xid = tv.tv_sec << 12; - } - if (++nfs_xid == 0) { - /* Skip zero xid if it should ever happen. */ - nfs_xidwrap++; - nfs_xid++; - } - *xidp = nfs_xid + ((u_int64_t)nfs_xidwrap << 32); - lck_mtx_unlock(nfs_request_mutex); - } + if (*xidp == 0) + nfs_get_xid(xidp); /* build the header(s) */ - nmreq.nmc_mcur = nmreq.nmc_mhead = mreq; - nmreq.nmc_ptr = mbuf_data(nmreq.nmc_mcur); - nmreq.nmc_left = mbuf_trailingspace(nmreq.nmc_mcur); + nfsm_chain_init(&nmreq, mreq); /* First, if it's a TCP stream insert space for an RPC record mark */ if (sotype == SOCK_STREAM) @@ -1034,27 +1118,36 @@ nfsm_rpchead2(int sotype, int prog, int vers, int proc, int auth_type, int auth_ add_cred: switch (auth_type) { - case RPCAUTH_NULL: - nfsm_chain_add_32(error, &nmreq, RPCAUTH_NULL); /* auth */ + case RPCAUTH_NONE: + nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE); /* auth */ nfsm_chain_add_32(error, &nmreq, 0); /* length */ - nfsm_chain_add_32(error, &nmreq, RPCAUTH_NULL); /* verf */ + nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE); /* verf */ nfsm_chain_add_32(error, &nmreq, 0); /* length */ nfsm_chain_build_done(error, &nmreq); + /* Append the args mbufs */ + if (!error) + error = mbuf_setnext(nmreq.nmc_mcur, mrest); break; - case RPCAUTH_UNIX: - nfsm_chain_add_32(error, &nmreq, RPCAUTH_UNIX); + case RPCAUTH_SYS: { + gid_t grouplist[NGROUPS]; + int groupcount; + + nfsm_chain_add_32(error, &nmreq, RPCAUTH_SYS); nfsm_chain_add_32(error, &nmreq, authsiz); nfsm_chain_add_32(error, &nmreq, 0); /* stamp */ nfsm_chain_add_32(error, &nmreq, 0); /* zero-length hostname */ nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(cred)); /* UID */ - nfsm_chain_add_32(error, &nmreq, cred->cr_groups[0]); /* GID */ + nfsm_chain_add_32(error, &nmreq, kauth_cred_getgid(cred)); /* GID */ grpsiz = (auth_len >> 2) - 5; nfsm_chain_add_32(error, &nmreq, grpsiz);/* additional GIDs */ + memset(grouplist, 0, sizeof(grouplist)); + groupcount = grpsiz; + (void)kauth_cred_getgroups(cred, grouplist, &groupcount); for (i = 1; i <= grpsiz; i++) - nfsm_chain_add_32(error, &nmreq, cred->cr_groups[i]); + nfsm_chain_add_32(error, &nmreq, grouplist[i]); /* And the verifier... */ - nfsm_chain_add_32(error, &nmreq, RPCAUTH_NULL); /* flavor */ + nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE); /* flavor */ nfsm_chain_add_32(error, &nmreq, 0); /* length */ nfsm_chain_build_done(error, &nmreq); @@ -1062,16 +1155,24 @@ add_cred: if (!error) error = mbuf_setnext(nmreq.nmc_mcur, mrest); break; + } case RPCAUTH_KRB5: case RPCAUTH_KRB5I: case RPCAUTH_KRB5P: error = nfs_gss_clnt_cred_put(req, &nmreq, mrest); if (error == ENEEDAUTH) { + gid_t grouplist[NGROUPS]; + int groupcount = NGROUPS; /* * Use sec=sys for this user */ error = 0; - auth_type = RPCAUTH_UNIX; + req->r_auth = auth_type = RPCAUTH_SYS; + (void)kauth_cred_getgroups(cred, grouplist, &groupcount); + auth_len = ((((groupcount - 1) > nmp->nm_numgrps) ? + nmp->nm_numgrps : (groupcount - 1)) << 2) + + 5 * NFSX_UNSIGNED; + authsiz = nfsm_rndup(auth_len); goto add_cred; } break; @@ -1120,6 +1221,21 @@ nfs_parsefattr(struct nfsm_chain *nmc, int nfsvers, struct nfs_vattr *nvap) dev_t rdev; val = val2 = 0; + NVATTR_INIT(nvap); + + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TYPE); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_MODE); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SIZE); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SPACE_USED); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_RAWDEV); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FSID); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FILEID); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_ACCESS); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_MODIFY); + NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_METADATA); nfsm_chain_get_32(error, nmc, vtype); nfsm_chain_get_32(error, nmc, vmode); @@ -1220,6 +1336,12 @@ nfs_loadattrcache( vnode_t vp; struct timeval now; struct nfs_vattr *npnvap; + int xattr = np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR; + int referral = np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL; + int aclbit, monitored, error = 0; + kauth_acl_t acl; + struct nfsmount *nmp; + uint32_t events = np->n_events; if (np->n_hflag & NHINIT) { vp = NULL; @@ -1228,10 +1350,11 @@ nfs_loadattrcache( vp = NFSTOV(np); mp = vnode_mount(vp); } + monitored = vp ? vnode_ismonitored(vp) : 0; FSDBG_TOP(527, np, vp, *xidp >> 32, *xidp); - if (!VFSTONFS(mp)) { + if (!((nmp = VFSTONFS(mp)))) { FSDBG_BOT(527, ENXIO, 1, 0, *xidp); return (ENXIO); } @@ -1277,41 +1400,160 @@ nfs_loadattrcache( */ printf("nfs loadattrcache vnode changed type, was %d now %d\n", vnode_vtype(vp), nvap->nva_type); - FSDBG_BOT(527, ESTALE, 3, 0, *xidp); - return (ESTALE); + error = ESTALE; + if (monitored) + events |= VNODE_EVENT_DELETE; + goto out; } - microuptime(&now); - np->n_attrstamp = now.tv_sec; - np->n_xid = *xidp; - npnvap = &np->n_vattr; - bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap)); - if (nvap->nva_size != np->n_size) { + /* + * The ACL cache needs special handling because it is not + * always updated. Save current ACL cache state so it can + * be restored after copying the new attributes into place. + */ + aclbit = NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL); + acl = npnvap->nva_acl; + + if (monitored) { /* - * n_size is protected by the data lock, so we need to - * defer updating it until it's safe. We save the new size - * and set a flag and it'll get updated the next time we get/drop - * the data lock or the next time we do a getattr. + * For monitored nodes, check for attribute changes that should generate events. */ - np->n_newsize = nvap->nva_size; - FSDBG(527, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4)); - SET(np->n_flag, NUPDATESIZE); - if (vp && (nvap->nva_type == VREG)) { - if (!UBCINFOEXISTS(vp) || (dontshrink && (np->n_newsize < np->n_size))) { - /* asked not to shrink, so stick with current size */ - FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); - nvap->nva_size = np->n_size; - CLR(np->n_flag, NUPDATESIZE); - NATTRINVALIDATE(np); - } else if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) { - /* if we've modified, use larger size */ - FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002); - nvap->nva_size = np->n_size; - CLR(np->n_flag, NUPDATESIZE); + if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS) && + (nvap->nva_nlink != npnvap->nva_nlink)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_LINK; + if (events & VNODE_EVENT_PERMS) + /* no need to do all the checking if it's already set */; + else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_MODE) && + (nvap->nva_mode != npnvap->nva_mode)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) && + (nvap->nva_uid != npnvap->nva_uid)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) && + (nvap->nva_gid != npnvap->nva_gid)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if (nmp->nm_vers >= NFS_VER4) { + if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) && + !kauth_guid_equal(&nvap->nva_uuuid, &npnvap->nva_uuuid)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) && + !kauth_guid_equal(&nvap->nva_guuid, &npnvap->nva_guuid)) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + else if ((NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_ACL) && + nvap->nva_acl && npnvap->nva_acl && + ((nvap->nva_acl->acl_entrycount != npnvap->nva_acl->acl_entrycount) || + bcmp(nvap->nva_acl, npnvap->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl))))) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS; + } + if (((nmp->nm_vers >= NFS_VER4) && (nvap->nva_change != npnvap->nva_change)) || + (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_MODIFY) && + ((nvap->nva_timesec[NFSTIME_MODIFY] != npnvap->nva_timesec[NFSTIME_MODIFY]) || + (nvap->nva_timensec[NFSTIME_MODIFY] != npnvap->nva_timensec[NFSTIME_MODIFY])))) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_WRITE; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_RAWDEV) && + ((nvap->nva_rawdev.specdata1 != npnvap->nva_rawdev.specdata1) || + (nvap->nva_rawdev.specdata2 != npnvap->nva_rawdev.specdata2))) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_FILEID) && + (nvap->nva_fileid != npnvap->nva_fileid)) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ARCHIVE) && + ((nvap->nva_flags & NFS_FFLAG_ARCHIVED) != (npnvap->nva_flags & NFS_FFLAG_ARCHIVED))) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_HIDDEN) && + ((nvap->nva_flags & NFS_FFLAG_HIDDEN) != (npnvap->nva_flags & NFS_FFLAG_HIDDEN))) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_CREATE) && + ((nvap->nva_timesec[NFSTIME_CREATE] != npnvap->nva_timesec[NFSTIME_CREATE]) || + (nvap->nva_timensec[NFSTIME_CREATE] != npnvap->nva_timensec[NFSTIME_CREATE]))) + events |= VNODE_EVENT_ATTRIB; + if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_BACKUP) && + ((nvap->nva_timesec[NFSTIME_BACKUP] != npnvap->nva_timesec[NFSTIME_BACKUP]) || + (nvap->nva_timensec[NFSTIME_BACKUP] != npnvap->nva_timensec[NFSTIME_BACKUP]))) + events |= VNODE_EVENT_ATTRIB; + } + + /* Copy the attributes to the attribute cache */ + bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap)); + + microuptime(&now); + np->n_attrstamp = now.tv_sec; + np->n_xid = *xidp; + /* NFS_FFLAG_IS_ATTR and NFS_FFLAG_TRIGGER_REFERRAL need to be sticky... */ + if (vp && xattr) + nvap->nva_flags |= xattr; + if (vp && referral) + nvap->nva_flags |= referral; + + if (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL)) { + /* we're updating the ACL */ + if (nvap->nva_acl) { + /* make a copy of the acl for the cache */ + npnvap->nva_acl = kauth_acl_alloc(nvap->nva_acl->acl_entrycount); + if (npnvap->nva_acl) { + bcopy(nvap->nva_acl, npnvap->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl)); + } else { + /* can't make a copy to cache, invalidate ACL cache */ + NFS_BITMAP_CLR(npnvap->nva_bitmap, NFS_FATTR_ACL); + NACLINVALIDATE(np); + aclbit = 0; } } + if (acl) { + kauth_acl_free(acl); + acl = NULL; + } + } + if (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL)) { + /* update the ACL timestamp */ + np->n_aclstamp = now.tv_sec; + } else { + /* we aren't updating the ACL, so restore original values */ + if (aclbit) + NFS_BITMAP_SET(npnvap->nva_bitmap, NFS_FATTR_ACL); + npnvap->nva_acl = acl; + } + +#if CONFIG_TRIGGERS + /* + * For NFSv4, if the fsid doesn't match the fsid for the mount, then + * this node is for a different file system on the server. So we mark + * this node as a trigger node that will trigger the mirror mount. + */ + if ((nmp->nm_vers >= NFS_VER4) && (nvap->nva_type == VDIR) && + ((np->n_vattr.nva_fsid.major != nmp->nm_fsid.major) || + (np->n_vattr.nva_fsid.minor != nmp->nm_fsid.minor))) + np->n_vattr.nva_flags |= NFS_FFLAG_TRIGGER; +#endif + + if (!vp || (nvap->nva_type != VREG)) { + np->n_size = nvap->nva_size; + } else if (nvap->nva_size != np->n_size) { + FSDBG(527, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4)); + if (!UBCINFOEXISTS(vp) || (dontshrink && (nvap->nva_size < np->n_size))) { + /* asked not to shrink, so stick with current size */ + FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); + nvap->nva_size = np->n_size; + NATTRINVALIDATE(np); + } else if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) { + /* if we've modified, stick with larger size */ + FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002); + nvap->nva_size = np->n_size; + npnvap->nva_size = np->n_size; + } else { + /* + * n_size is protected by the data lock, so we need to + * defer updating it until it's safe. We save the new size + * and set a flag and it'll get updated the next time we get/drop + * the data lock or the next time we do a getattr. + */ + np->n_newsize = nvap->nva_size; + SET(np->n_flag, NUPDATESIZE); + if (monitored) + events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_EXTEND; + } } if (np->n_flag & NCHG) { @@ -1325,8 +1567,11 @@ nfs_loadattrcache( } } - FSDBG_BOT(527, 0, np, np->n_size, *xidp); - return (0); +out: + if (monitored && events) + nfs_vnode_notify(np, events); + FSDBG_BOT(527, error, np, np->n_size, *xidp); + return (error); } /* @@ -1338,20 +1583,26 @@ nfs_attrcachetimeout(nfsnode_t np) { struct nfsmount *nmp; struct timeval now; - int isdir, timeo; + int isdir; + uint32_t timeo; if (!(nmp = NFSTONMP(np))) return (0); isdir = vnode_isdir(NFSTOV(np)); - if ((np)->n_flag & NMODIFIED) + if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) { + /* If we have a delegation, we always use the max timeout. */ + timeo = isdir ? nmp->nm_acdirmax : nmp->nm_acregmax; + } else if ((np)->n_flag & NMODIFIED) { + /* If we have modifications, we always use the min timeout. */ timeo = isdir ? nmp->nm_acdirmin : nmp->nm_acregmin; - else { + } else { + /* Otherwise, we base the timeout on how old the file seems. */ /* Note that if the client and server clocks are way out of sync, */ /* timeout will probably get clamped to a min or max value */ microtime(&now); - timeo = (now.tv_sec - (np)->n_mtime.tv_sec) / 10; + timeo = (now.tv_sec - (np)->n_vattr.nva_timesec[NFSTIME_MODIFY]) / 10; if (isdir) { if (timeo < nmp->nm_acdirmin) timeo = nmp->nm_acdirmin; @@ -1369,66 +1620,59 @@ nfs_attrcachetimeout(nfsnode_t np) } /* - * Check the time stamp + * Check the attribute cache time stamp. * If the cache is valid, copy contents to *nvaper and return 0 - * otherwise return an error + * otherwise return an error. + * Must be called with the node locked. */ int -nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int alreadylocked) +nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int flags) { struct nfs_vattr *nvap; struct timeval nowup; int32_t timeo; - if (!alreadylocked && nfs_lock(np, NFS_NODE_LOCK_SHARED)) { - FSDBG(528, np, 0, 0xffffff00, ENOENT); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); - return (ENOENT); - } - - if (!NATTRVALID(np)) { - if (!alreadylocked) - nfs_unlock(np); + /* Check if the attributes are valid. */ + if (!NATTRVALID(np) || ((flags & NGA_ACL) && !NACLVALID(np))) { FSDBG(528, np, 0, 0xffffff01, ENOENT); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); + OSAddAtomic(1, &nfsstats.attrcache_misses); return (ENOENT); } + /* Verify the cached attributes haven't timed out. */ timeo = nfs_attrcachetimeout(np); - microuptime(&nowup); if ((nowup.tv_sec - np->n_attrstamp) >= timeo) { - if (!alreadylocked) - nfs_unlock(np); FSDBG(528, np, 0, 0xffffff02, ENOENT); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); + OSAddAtomic(1, &nfsstats.attrcache_misses); + return (ENOENT); + } + if ((flags & NGA_ACL) && ((nowup.tv_sec - np->n_aclstamp) >= timeo)) { + FSDBG(528, np, 0, 0xffffff02, ENOENT); + OSAddAtomic(1, &nfsstats.attrcache_misses); return (ENOENT); } nvap = &np->n_vattr; FSDBG(528, np, nvap->nva_size, np->n_size, 0xcace); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_hits); + OSAddAtomic(1, &nfsstats.attrcache_hits); - if (nvap->nva_size != np->n_size) { - /* - * n_size is protected by the data lock, so we need to - * defer updating it until it's safe. We save the new size - * and set a flag and it'll get updated the next time we get/drop - * the data lock or the next time we do a getattr. - */ - if (!alreadylocked) { - /* need to upgrade shared lock to exclusive */ - if (lck_rw_lock_shared_to_exclusive(&np->n_lock) == FALSE) - lck_rw_lock_exclusive(&np->n_lock); - } - np->n_newsize = nvap->nva_size; + if (nvap->nva_type != VREG) { + np->n_size = nvap->nva_size; + } else if (nvap->nva_size != np->n_size) { FSDBG(528, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4)); - SET(np->n_flag, NUPDATESIZE); - if ((nvap->nva_type == VREG) && (np->n_flag & NMODIFIED) && - (nvap->nva_size < np->n_size)) { - /* if we've modified, use larger size */ + if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) { + /* if we've modified, stick with larger size */ nvap->nva_size = np->n_size; - CLR(np->n_flag, NUPDATESIZE); + } else { + /* + * n_size is protected by the data lock, so we need to + * defer updating it until it's safe. We save the new size + * and set a flag and it'll get updated the next time we get/drop + * the data lock or the next time we do a getattr. + */ + np->n_newsize = nvap->nva_size; + SET(np->n_flag, NUPDATESIZE); } } @@ -1443,85 +1687,257 @@ nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int alreadylocked) nvaper->nva_timensec[NFSTIME_MODIFY] = np->n_mtim.tv_nsec; } } - if (!alreadylocked) - nfs_unlock(np); + if (nvap->nva_acl) { + if (flags & NGA_ACL) { + nvaper->nva_acl = kauth_acl_alloc(nvap->nva_acl->acl_entrycount); + if (!nvaper->nva_acl) + return (ENOMEM); + bcopy(nvap->nva_acl, nvaper->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl)); + } else { + nvaper->nva_acl = NULL; + } + } return (0); } - -static nfsuint64 nfs_nullcookie = { { 0, 0 } }; /* - * This function finds the directory cookie that corresponds to the - * logical byte offset given. + * When creating file system objects: + * Don't bother setting UID if it's the same as the credential performing the create. + * Don't bother setting GID if it's the same as the directory or credential. */ -nfsuint64 * -nfs_getcookie(nfsnode_t dnp, off_t off, int add) +void +nfs_avoid_needless_id_setting_on_create(nfsnode_t dnp, struct vnode_attr *vap, vfs_context_t ctx) { - struct nfsdmap *dp, *dp2; - int pos; - - pos = off / NFS_DIRBLKSIZ; - if (pos == 0) - return (&nfs_nullcookie); - pos--; - dp = dnp->n_cookies.lh_first; - if (!dp) { - if (add) { - MALLOC_ZONE(dp, struct nfsdmap *, sizeof(struct nfsdmap), - M_NFSDIROFF, M_WAITOK); - if (!dp) - return ((nfsuint64 *)0); - dp->ndm_eocookie = 0; - LIST_INSERT_HEAD(&dnp->n_cookies, dp, ndm_list); - } else - return ((nfsuint64 *)0); - } - while (pos >= NFSNUMCOOKIES) { - pos -= NFSNUMCOOKIES; - if (dp->ndm_list.le_next) { - if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && - pos >= dp->ndm_eocookie) - return ((nfsuint64 *)0); - dp = dp->ndm_list.le_next; - } else if (add) { - MALLOC_ZONE(dp2, struct nfsdmap *, sizeof(struct nfsdmap), - M_NFSDIROFF, M_WAITOK); - if (!dp2) - return ((nfsuint64 *)0); - dp2->ndm_eocookie = 0; - LIST_INSERT_AFTER(dp, dp2, ndm_list); - dp = dp2; - } else - return ((nfsuint64 *)0); - } - if (pos >= dp->ndm_eocookie) { - if (add) - dp->ndm_eocookie = pos + 1; - else - return ((nfsuint64 *)0); + if (VATTR_IS_ACTIVE(vap, va_uid)) { + if (kauth_cred_getuid(vfs_context_ucred(ctx)) == vap->va_uid) { + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_uuuid); + } + } + if (VATTR_IS_ACTIVE(vap, va_gid)) { + if ((vap->va_gid == dnp->n_vattr.nva_gid) || + (kauth_cred_getgid(vfs_context_ucred(ctx)) == vap->va_gid)) { + VATTR_CLEAR_ACTIVE(vap, va_gid); + VATTR_CLEAR_ACTIVE(vap, va_guuid); + } } - return (&dp->ndm_cookies[pos]); } /* - * Invalidate cached directory information, except for the actual directory - * blocks (which are invalidated separately). - * Done mainly to avoid the use of stale offset cookies. + * Convert a universal address string to a sockaddr structure. + * + * Universal addresses can be in the following formats: + * + * d = decimal (IPv4) + * x = hexadecimal (IPv6) + * p = port (decimal) + * + * d.d.d.d + * d.d.d.d.p.p + * x:x:x:x:x:x:x:x + * x:x:x:x:x:x:x:x.p.p + * x:x:x:x:x:x:d.d.d.d + * x:x:x:x:x:x:d.d.d.d.p.p + * + * IPv6 strings can also have a series of zeroes elided + * IPv6 strings can also have a %scope suffix at the end (after any port) + * + * rules & exceptions: + * - value before : is hex + * - value before . is dec + * - once . hit, all values are dec + * - hex+port case means value before first dot is actually hex + * - . is always preceded by digits except if last hex was double-colon + * + * scan, converting #s to bytes + * first time a . is encountered, scan the rest to count them. + * 2 dots = just port + * 3 dots = just IPv4 no port + * 5 dots = IPv4 and port */ -void -nfs_invaldir(nfsnode_t dnp) + +#define IS_DIGIT(C) \ + (((C) >= '0') && ((C) <= '9')) + +#define IS_XDIGIT(C) \ + (IS_DIGIT(C) || \ + (((C) >= 'A') && ((C) <= 'F')) || \ + (((C) >= 'a') && ((C) <= 'f'))) + +int +nfs_uaddr2sockaddr(const char *uaddr, struct sockaddr *addr) { - if (vnode_vtype(NFSTOV(dnp)) != VDIR) { - printf("nfs: invaldir not dir\n"); - return; + const char *p, *pd; /* pointers to current character in scan */ + const char *pnum; /* pointer to current number to decode */ + const char *pscope; /* pointer to IPv6 scope ID */ + uint8_t a[18]; /* octet array to store address bytes */ + int i; /* index of next octet to decode */ + int dci; /* index of octet to insert double-colon zeroes */ + int dcount, xdcount; /* count of digits in current number */ + int needmore; /* set when we know we need more input (e.g. after colon, period) */ + int dots; /* # of dots */ + int hex; /* contains hex values */ + unsigned long val; /* decoded value */ + int s; /* index used for sliding array to insert elided zeroes */ + +#define HEXVALUE 0 +#define DECIMALVALUE 1 +#define GET(TYPE) \ + do { \ + if ((dcount <= 0) || (dcount > (((TYPE) == DECIMALVALUE) ? 3 : 4))) \ + return (0); \ + if (((TYPE) == DECIMALVALUE) && xdcount) \ + return (0); \ + val = strtoul(pnum, NULL, ((TYPE) == DECIMALVALUE) ? 10 : 16); \ + if (((TYPE) == DECIMALVALUE) && (val >= 256)) \ + return (0); \ + /* check if there is room left in the array */ \ + if (i > (int)(sizeof(a) - (((TYPE) == HEXVALUE) ? 2 : 1) - ((dci != -1) ? 2 : 0))) \ + return (0); \ + if ((TYPE) == HEXVALUE) \ + a[i++] = ((val >> 8) & 0xff); \ + a[i++] = (val & 0xff); \ + } while (0) + + hex = 0; + dots = 0; + dci = -1; + i = dcount = xdcount = 0; + pnum = p = uaddr; + pscope = NULL; + needmore = 1; + if ((*p == ':') && (*++p != ':')) /* if it starts with colon, gotta be a double */ + return (0); + + while (*p) { + if (IS_XDIGIT(*p)) { + dcount++; + if (!IS_DIGIT(*p)) + xdcount++; + needmore = 0; + p++; + } else if (*p == '.') { + /* rest is decimal IPv4 dotted quad and/or port */ + if (!dots) { + /* this is the first, so count them */ + for (pd = p; *pd; pd++) { + if (*pd == '.') { + if (++dots > 5) + return (0); + } else if (hex && (*pd == '%')) { + break; + } else if ((*pd < '0') || (*pd > '9')) { + return (0); + } + } + if ((dots != 2) && (dots != 3) && (dots != 5)) + return (0); + if (hex && (dots == 2)) { /* hex+port */ + if (!dcount && needmore) + return (0); + if (dcount) /* last hex may be elided zero */ + GET(HEXVALUE); + } else { + GET(DECIMALVALUE); + } + } else { + GET(DECIMALVALUE); + } + dcount = xdcount = 0; + needmore = 1; + pnum = ++p; + } else if (*p == ':') { + hex = 1; + if (dots) + return (0); + if (!dcount) { /* missing number, probably double colon */ + if (dci >= 0) /* can only have one double colon */ + return (0); + dci = i; + needmore = 0; + } else { + GET(HEXVALUE); + dcount = xdcount = 0; + needmore = 1; + } + pnum = ++p; + } else if (*p == '%') { /* scope ID delimiter */ + if (!hex) + return (0); + p++; + pscope = p; + break; + } else { /* unexpected character */ + return (0); + } + } + if (needmore && !dcount) + return (0); + if (dcount) /* decode trailing number */ + GET(dots ? DECIMALVALUE : HEXVALUE); + if (dci >= 0) { /* got a double-colon at i, need to insert a range of zeroes */ + /* if we got a port, slide to end of array */ + /* otherwise, slide to end of address (non-port) values */ + int end = ((dots == 2) || (dots == 5)) ? sizeof(a) : (sizeof(a) - 2); + if (i % 2) /* length of zero range must be multiple of 2 */ + return (0); + if (i >= end) /* no room? */ + return (0); + /* slide (i-dci) numbers up from index dci */ + for (s=0; s < (i - dci); s++) + a[end-1-s] = a[i-1-s]; + /* zero (end-i) numbers at index dci */ + for (s=0; s < (end - i); s++) + a[dci+s] = 0; + i = end; + } + + /* copy out resulting socket address */ + if (hex) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6*)addr; + if ((((dots == 0) || (dots == 3)) && (i != (sizeof(a)-2)))) + return (0); + if ((((dots == 2) || (dots == 5)) && (i != sizeof(a)))) + return (0); + bzero(sin6, sizeof(struct sockaddr_in6)); + sin6->sin6_len = sizeof(struct sockaddr_in6); + sin6->sin6_family = AF_INET6; + bcopy(a, &sin6->sin6_addr.s6_addr, sizeof(struct in6_addr)); + if ((dots == 5) || (dots == 2)) + sin6->sin6_port = htons((a[16] << 8) | a[17]); + if (pscope) { + for (p=pscope; IS_DIGIT(*p); p++) + ; + if (*p && !IS_DIGIT(*p)) { /* name */ + ifnet_t interface = NULL; + if (ifnet_find_by_name(pscope, &interface) == 0) + sin6->sin6_scope_id = ifnet_index(interface); + if (interface) + ifnet_release(interface); + } else { /* decimal number */ + sin6->sin6_scope_id = strtoul(pscope, NULL, 10); + } + /* XXX should we also embed scope id for linklocal? */ + } + } else { + struct sockaddr_in *sin = (struct sockaddr_in*)addr; + if ((dots != 3) && (dots != 5)) + return (0); + if ((dots == 3) && (i != 4)) + return (0); + if ((dots == 5) && (i != 6)) + return (0); + bzero(sin, sizeof(struct sockaddr_in)); + sin->sin_len = sizeof(struct sockaddr_in); + sin->sin_family = AF_INET; + bcopy(a, &sin->sin_addr.s_addr, sizeof(struct in_addr)); + if (dots == 5) + sin->sin_port = htons((a[4] << 8) | a[5]); } - dnp->n_direofoffset = 0; - dnp->n_cookieverf.nfsuquad[0] = 0; - dnp->n_cookieverf.nfsuquad[1] = 0; - if (dnp->n_cookies.lh_first) - dnp->n_cookies.lh_first->ndm_eocookie = 0; + return (1); } + #endif /* NFSCLIENT */ /* @@ -1540,8 +1956,15 @@ nfs_interval_timer_start(thread_call_t call, int interval) #if NFSSERVER -static void nfsrv_init_user_list(struct nfs_active_user_list *); -static void nfsrv_free_user_list(struct nfs_active_user_list *); +int nfsrv_cmp_secflavs(struct nfs_sec *, struct nfs_sec *); +int nfsrv_hang_addrlist(struct nfs_export *, struct user_nfs_export_args *); +int nfsrv_free_netopt(struct radix_node *, void *); +int nfsrv_free_addrlist(struct nfs_export *, struct user_nfs_export_args *); +struct nfs_export_options *nfsrv_export_lookup(struct nfs_export *, mbuf_t); +struct nfs_export *nfsrv_fhtoexport(struct nfs_filehandle *); +struct nfs_user_stat_node *nfsrv_get_user_stat_node(struct nfs_active_user_list *, struct sockaddr *, uid_t); +void nfsrv_init_user_list(struct nfs_active_user_list *); +void nfsrv_free_user_list(struct nfs_active_user_list *); /* * add NFSv3 WCC data to an mbuf chain @@ -1634,6 +2057,7 @@ nfsrv_namei( vnode_t dp; int error; struct componentname *cnp = &nip->ni_cnd; + uint32_t cnflags; char *tmppn; *retdirp = NULL; @@ -1664,16 +2088,23 @@ nfsrv_namei( /* * And call lookup() to do the real work */ - error = lookup(nip); + cnflags = nip->ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(nip)) == ERECYCLE) { + nip->ni_cnd.cn_flags = cnflags; + cnp->cn_nameptr = cnp->cn_pnbuf; + nip->ni_usedvp = nip->ni_dvp = nip->ni_startdir = dp; + } if (error) goto out; /* Check for encountering a symbolic link */ if (cnp->cn_flags & ISSYMLINK) { +#ifndef __LP64__ if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(nip->ni_dvp, NULL); } +#endif /* __LP64__ */ if (cnp->cn_flags & (LOCKPARENT | WANTPARENT)) vnode_put(nip->ni_dvp); if (nip->ni_vp) { @@ -1693,7 +2124,7 @@ out: } /* - * A fiddled version of m_adj() that ensures null fill to a long + * A fiddled version of m_adj() that ensures null fill to a 4-byte * boundary and only trims off the back end */ void @@ -1861,7 +2292,7 @@ nfsm_chain_get_sattr( struct nfsm_chain *nmc, struct vnode_attr *vap) { - int error = 0, nullflag = 0; + int error = 0; uint32_t val = 0; uint64_t val64; struct timespec now; @@ -1932,10 +2363,11 @@ nfsm_chain_get_sattr( vap->va_access_time.tv_sec, vap->va_access_time.tv_nsec); VATTR_SET_ACTIVE(vap, va_access_time); + vap->va_vaflags &= ~VA_UTIMES_NULL; break; case NFS_TIME_SET_TO_SERVER: VATTR_SET(vap, va_access_time, now); - nullflag = VA_UTIMES_NULL; + vap->va_vaflags |= VA_UTIMES_NULL; break; } nfsm_chain_get_32(error, nmc, val); @@ -1945,10 +2377,12 @@ nfsm_chain_get_sattr( vap->va_modify_time.tv_sec, vap->va_modify_time.tv_nsec); VATTR_SET_ACTIVE(vap, va_modify_time); + vap->va_vaflags &= ~VA_UTIMES_NULL; break; case NFS_TIME_SET_TO_SERVER: VATTR_SET(vap, va_modify_time, now); - vap->va_vaflags |= nullflag; + if (!VATTR_IS_ACTIVE(vap, va_access_time)) + vap->va_vaflags |= VA_UTIMES_NULL; break; } @@ -1958,7 +2392,7 @@ nfsm_chain_get_sattr( /* * Compare two security flavor structs */ -static int +int nfsrv_cmp_secflavs(struct nfs_sec *sf1, struct nfs_sec *sf2) { int i; @@ -1975,7 +2409,7 @@ nfsrv_cmp_secflavs(struct nfs_sec *sf1, struct nfs_sec *sf2) * Build hash lists of net addresses and hang them off the NFS export. * Called by nfsrv_export() to set up the lists of export addresses. */ -static int +int nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) { struct nfs_export_net_args nxna; @@ -1988,7 +2422,6 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) unsigned int net; user_addr_t uaddr; kauth_cred_t cred; - struct ucred temp_cred; uaddr = unxa->nxa_nets; for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) { @@ -1997,12 +2430,13 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) return (error); if (nxna.nxna_flags & (NX_MAPROOT|NX_MAPALL)) { - bzero(&temp_cred, sizeof(temp_cred)); - temp_cred.cr_uid = nxna.nxna_cred.cr_uid; - temp_cred.cr_ngroups = nxna.nxna_cred.cr_ngroups; + struct posix_cred temp_pcred; + bzero(&temp_pcred, sizeof(temp_pcred)); + temp_pcred.cr_uid = nxna.nxna_cred.cr_uid; + temp_pcred.cr_ngroups = nxna.nxna_cred.cr_ngroups; for (i=0; i < nxna.nxna_cred.cr_ngroups && i < NGROUPS; i++) - temp_cred.cr_groups[i] = nxna.nxna_cred.cr_groups[i]; - cred = kauth_cred_create(&temp_cred); + temp_pcred.cr_groups[i] = nxna.nxna_cred.cr_groups[i]; + cred = posix_cred_create(&temp_pcred); if (!IS_VALID_CRED(cred)) return (ENOMEM); } else { @@ -2084,13 +2518,34 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) if (cred == cred2) { /* creds are same (or both NULL) */ matched = 1; - } else if (cred && cred2 && (cred->cr_uid == cred2->cr_uid) && - (cred->cr_ngroups == cred2->cr_ngroups)) { - for (i=0; i < cred2->cr_ngroups && i < NGROUPS; i++) - if (cred->cr_groups[i] != cred2->cr_groups[i]) - break; - if (i >= cred2->cr_ngroups || i >= NGROUPS) - matched = 1; + } else if (cred && cred2 && (kauth_cred_getuid(cred) == kauth_cred_getuid(cred2))) { + /* + * Now compare the effective and + * supplementary groups... + * + * Note: This comparison, as written, + * does not correctly indicate that + * the groups are equivalent, since + * other than the first supplementary + * group, which is also the effective + * group, order on the remaining groups + * doesn't matter, and this is an + * ordered compare. + */ + gid_t groups[NGROUPS]; + gid_t groups2[NGROUPS]; + int groupcount = NGROUPS; + int group2count = NGROUPS; + + if (!kauth_cred_getgroups(cred, groups, &groupcount) && + !kauth_cred_getgroups(cred2, groups2, &group2count) && + groupcount == group2count) { + for (i=0; i < group2count; i++) + if (groups[i] != groups2[i]) + break; + if (i >= group2count || i >= NGROUPS) + matched = 1; + } } } if (IS_VALID_CRED(cred)) @@ -2116,7 +2571,7 @@ struct nfsrv_free_netopt_arg { struct radix_node_head *rnh; }; -static int +int nfsrv_free_netopt(struct radix_node *rn, void *w) { struct nfsrv_free_netopt_arg *fna = (struct nfsrv_free_netopt_arg *)w; @@ -2135,7 +2590,7 @@ nfsrv_free_netopt(struct radix_node *rn, void *w) /* * Free the net address hash lists that are hanging off the mount points. */ -static int +int nfsrv_free_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) { struct nfs_export_net_args nxna; @@ -2216,7 +2671,8 @@ void enablequotas(struct mount *mp, vfs_context_t ctx); // XXX int nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) { - int error = 0, pathlen; + int error = 0; + size_t pathlen; struct nfs_exportfs *nxfs, *nxfs2, *nxfs3; struct nfs_export *nx, *nx2, *nx3; struct nfs_filehandle nfh; @@ -2226,13 +2682,49 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) char path[MAXPATHLEN]; int expisroot; + if (unxa->nxa_flags == NXA_CHECK) { + /* just check if the path is an NFS-exportable file system */ + error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, &pathlen); + if (error) + return (error); + NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); + error = namei(&mnd); + if (error) + return (error); + mvp = mnd.ni_vp; + mp = vnode_mount(mvp); + /* make sure it's the root of a file system */ + if (!vnode_isvroot(mvp)) + error = EINVAL; + /* make sure the file system is NFS-exportable */ + if (!error) { + nfh.nfh_len = NFSV3_MAX_FID_SIZE; + error = VFS_VPTOFH(mvp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); + } + if (!error && (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)) + error = EIO; + if (!error && !(mp->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED)) + error = EISDIR; + vnode_put(mvp); + nameidone(&mnd); + return (error); + } + + /* all other operations: must be super user */ + if ((error = vfs_context_suser(ctx))) + return (error); + if (unxa->nxa_flags & NXA_DELETE_ALL) { /* delete all exports on all file systems */ lck_rw_lock_exclusive(&nfsrv_export_rwlock); while ((nxfs = LIST_FIRST(&nfsrv_exports))) { mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path); - if (mp) + if (mp) { vfs_clearflags(mp, MNT_EXPORTED); + mount_iterdrop(mp); + mp = NULL; + } /* delete all exports on this file system */ while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) { LIST_REMOVE(nx, nx_next); @@ -2252,16 +2744,28 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) FREE(nxfs->nxfs_path, M_TEMP); FREE(nxfs, M_TEMP); } + if (nfsrv_export_hashtbl) { + /* all exports deleted, clean up export hash table */ + FREE(nfsrv_export_hashtbl, M_TEMP); + nfsrv_export_hashtbl = NULL; + } lck_rw_done(&nfsrv_export_rwlock); return (0); } - error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, (size_t *)&pathlen); + error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, &pathlen); if (error) return (error); lck_rw_lock_exclusive(&nfsrv_export_rwlock); + /* init export hash table if not already */ + if (!nfsrv_export_hashtbl) { + if (nfsrv_export_hash_size <= 0) + nfsrv_export_hash_size = NFSRVEXPHASHSZ; + nfsrv_export_hashtbl = hashinit(nfsrv_export_hash_size, M_TEMP, &nfsrv_export_hash); + } + // first check if we've already got an exportfs with the given ID LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { if (nxfs->nxfs_id == unxa->nxa_fsid) @@ -2276,8 +2780,12 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) if ((unxa->nxa_flags & (NXA_ADD|NXA_OFFLINE)) == NXA_ADD) { /* if adding, verify that the mount is still what we expect */ mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path); + if (mp) { + mount_ref(mp, 0); + mount_iterdrop(mp); + } /* find exported FS root vnode */ - NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs->nxfs_path), ctx); error = namei(&mnd); if (error) @@ -2302,7 +2810,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) } /* find exported FS root vnode */ - NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); error = namei(&mnd); if (error) { @@ -2322,12 +2830,15 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) mvp = NULL; } else { mp = vnode_mount(mvp); + mount_ref(mp, 0); /* make sure the file system is NFS-exportable */ nfh.nfh_len = NFSV3_MAX_FID_SIZE; error = VFS_VPTOFH(mvp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); if (!error && (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)) error = EIO; + if (!error && !(mp->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED)) + error = EISDIR; if (error) goto out; } @@ -2368,7 +2879,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) } if (unxa->nxa_exppath) { - error = copyinstr(unxa->nxa_exppath, path, MAXPATHLEN, (size_t *)&pathlen); + error = copyinstr(unxa->nxa_exppath, path, MAXPATHLEN, &pathlen); if (error) goto out; LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { @@ -2485,13 +2996,20 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) vnode_get(xvp); } else { xnd.ni_cnd.cn_nameiop = LOOKUP; +#if CONFIG_TRIGGERS + xnd.ni_op = OP_LOOKUP; +#endif xnd.ni_cnd.cn_flags = LOCKLEAF; xnd.ni_pathlen = pathlen - 1; xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf = path; xnd.ni_startdir = mvp; xnd.ni_usedvp = mvp; xnd.ni_cnd.cn_context = ctx; - error = lookup(&xnd); + while ((error = lookup(&xnd)) == ERECYCLE) { + xnd.ni_cnd.cn_flags = LOCKLEAF; + xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf; + xnd.ni_usedvp = xnd.ni_dvp = xnd.ni_startdir = mvp; + } if (error) goto out1; xvp = xnd.ni_vp; @@ -2598,11 +3116,13 @@ out: nameidone(&mnd); } unlock_out: + if (mp) + mount_drop(mp, 0); lck_rw_done(&nfsrv_export_rwlock); return (error); } -static struct nfs_export_options * +struct nfs_export_options * nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam) { struct nfs_export_options *nxo = NULL; @@ -2630,13 +3150,15 @@ nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam) } /* find an export for the given handle */ -static struct nfs_export * +struct nfs_export * nfsrv_fhtoexport(struct nfs_filehandle *nfhp) { struct nfs_exphandle *nxh = (struct nfs_exphandle*)nfhp->nfh_fhp; struct nfs_export *nx; uint32_t fsid, expid; + if (!nfsrv_export_hashtbl) + return (NULL); fsid = ntohl(nxh->nxh_fsid); expid = ntohl(nxh->nxh_expid); nx = NFSRVEXPHASH(fsid, expid)->lh_first; @@ -2647,7 +3169,7 @@ nfsrv_fhtoexport(struct nfs_filehandle *nfhp) continue; break; } - return nx; + return (nx); } /* @@ -2728,20 +3250,27 @@ nfsrv_fhtovp( } if (nxo && (nxo->nxo_flags & NX_OFFLINE)) - return ((nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); + return ((nd == NULL || nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); /* find mount structure */ mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path); + if (mp) { + error = vfs_busy(mp, LK_NOWAIT); + mount_iterdrop(mp); + if (error) + mp = NULL; + } if (!mp) { /* * We have an export, but no mount? * Perhaps the export just hasn't been marked offline yet. */ - return ((nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); + return ((nd == NULL || nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); } fidp = nfhp->nfh_fhp + sizeof(*nxh); error = VFS_FHTOVP(mp, nxh->nxh_fidlen, fidp, vpp, NULL); + vfs_unbusy(mp); if (error) return (error); /* vnode pointer should be good at this point or ... */ @@ -2859,46 +3388,6 @@ nfsrv_fhmatch(struct nfs_filehandle *fh1, struct nfs_filehandle *fh2) * Functions for dealing with active user lists */ -/* - * Compare address fields of two sockaddr_storage structures. - * Returns zero if they match. - */ -static int -nfsrv_cmp_sockaddr(struct sockaddr_storage *sock1, struct sockaddr_storage *sock2) -{ - struct sockaddr_in *ipv4_sock1, *ipv4_sock2; - struct sockaddr_in6 *ipv6_sock1, *ipv6_sock2; - - /* check for valid parameters */ - if (sock1 == NULL || sock2 == NULL) - return 1; - - /* check address length */ - if (sock1->ss_len != sock2->ss_len) - return 1; - - /* Check address family */ - if (sock1->ss_family != sock2->ss_family) - return 1; - - if (sock1->ss_family == AF_INET) { - /* IPv4 */ - ipv4_sock1 = (struct sockaddr_in *)sock1; - ipv4_sock2 = (struct sockaddr_in *)sock2; - - if (!bcmp(&ipv4_sock1->sin_addr, &ipv4_sock2->sin_addr, sizeof(struct in_addr))) - return 0; - } else { - /* IPv6 */ - ipv6_sock1 = (struct sockaddr_in6 *)sock1; - ipv6_sock2 = (struct sockaddr_in6 *)sock2; - - if (!bcmp(&ipv6_sock1->sin6_addr, &ipv6_sock2->sin6_addr, sizeof(struct in6_addr))) - return 0; - } - return 1; -} - /* * Search the hash table for a user node with a matching IP address and uid field. * If found, the node's tm_last timestamp is updated and the node is returned. @@ -2908,8 +3397,8 @@ nfsrv_cmp_sockaddr(struct sockaddr_storage *sock1, struct sockaddr_storage *sock * * The list's user_mutex lock MUST be held. */ -static struct nfs_user_stat_node * -nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_storage *sock, uid_t uid) +struct nfs_user_stat_node * +nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr *saddr, uid_t uid) { struct nfs_user_stat_node *unode; struct timeval now; @@ -2918,7 +3407,7 @@ nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_stor /* seach the hash table */ head = NFS_USER_STAT_HASH(list->user_hashtbl, uid); LIST_FOREACH(unode, head, hash_link) { - if (uid == unode->uid && nfsrv_cmp_sockaddr(sock, &unode->sock) == 0) { + if ((uid == unode->uid) && (nfs_sockaddr_cmp(saddr, (struct sockaddr*)&unode->sock) == 0)) { /* found matching node */ break; } @@ -2944,7 +3433,7 @@ nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_stor return NULL; /* increment node count */ - OSAddAtomic(1, (SInt32*)&nfsrv_user_stat_node_count); + OSAddAtomic(1, &nfsrv_user_stat_node_count); list->node_count++; } else { /* reuse the oldest node in the lru list */ @@ -2960,7 +3449,7 @@ nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_stor /* Initialize the node */ unode->uid = uid; - bcopy(sock, &unode->sock, sock->ss_len); + bcopy(saddr, &unode->sock, saddr->sa_len); microtime(&now); unode->ops = 0; unode->bytes_read = 0; @@ -2980,15 +3469,15 @@ nfsrv_update_user_stat(struct nfs_export *nx, struct nfsrv_descript *nd, uid_t u { struct nfs_user_stat_node *unode; struct nfs_active_user_list *ulist; - struct sockaddr_storage *sock_stor; + struct sockaddr *saddr; if ((!nfsrv_user_stat_enabled) || (!nx) || (!nd) || (!nd->nd_nam)) return; - sock_stor = (struct sockaddr_storage *)mbuf_data(nd->nd_nam); + saddr = (struct sockaddr *)mbuf_data(nd->nd_nam); /* check address family before going any further */ - if ((sock_stor->ss_family != AF_INET) && (sock_stor->ss_family != AF_INET6)) + if ((saddr->sa_family != AF_INET) && (saddr->sa_family != AF_INET6)) return; ulist = &nx->nx_user_list; @@ -2997,7 +3486,7 @@ nfsrv_update_user_stat(struct nfs_export *nx, struct nfsrv_descript *nd, uid_t u lck_mtx_lock(&ulist->user_mutex); /* get the user node */ - unode = nfsrv_get_user_stat_node(ulist, sock_stor, uid); + unode = nfsrv_get_user_stat_node(ulist, saddr, uid); if (!unode) { lck_mtx_unlock(&ulist->user_mutex); @@ -3014,7 +3503,7 @@ nfsrv_update_user_stat(struct nfs_export *nx, struct nfsrv_descript *nd, uid_t u } /* initialize an active user list */ -static void +void nfsrv_init_user_list(struct nfs_active_user_list *ulist) { uint i; @@ -3031,7 +3520,7 @@ nfsrv_init_user_list(struct nfs_active_user_list *ulist) } /* Free all nodes in an active user list */ -static void +void nfsrv_free_user_list(struct nfs_active_user_list *ulist) { struct nfs_user_stat_node *unode; @@ -3046,7 +3535,7 @@ nfsrv_free_user_list(struct nfs_active_user_list *ulist) FREE(unode, M_TEMP); /* decrement node count */ - OSAddAtomic(-1, (SInt32*)&nfsrv_user_stat_node_count); + OSAddAtomic(-1, &nfsrv_user_stat_node_count); } ulist->node_count = 0; @@ -3090,7 +3579,7 @@ nfsrv_active_user_list_reclaim(void) LIST_INSERT_HEAD(&oldlist, unode, hash_link); /* decrement node count */ - OSAddAtomic(-1, (SInt32*)&nfsrv_user_stat_node_count); + OSAddAtomic(-1, &nfsrv_user_stat_node_count); ulist->node_count--; } /* can unlock this export's list now */