]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/nfs/nfs_srvcache.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_srvcache.c
index ea1fee65192bbae12cf01b962a0a6d9c4582c0e0..b0eb21d7347c56ba600c315f6cd06cb4eedf22d1 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -65,7 +65,7 @@
  * FreeBSD-Id: nfs_srvcache.c,v 1.15 1997/10/12 20:25:46 phk Exp $
  */
 
  * FreeBSD-Id: nfs_srvcache.c,v 1.15 1997/10/12 20:25:46 phk Exp $
  */
 
-#ifndef NFS_NOSERVER 
+#if NFSSERVER
 /*
  * Reference: Chet Juszczak, "Improving the Performance and Correctness
  *             of an NFS Server", in Proc. Winter 1989 USENIX Conference,
 /*
  * Reference: Chet Juszczak, "Improving the Performance and Correctness
  *             of an NFS Server", in Proc. Winter 1989 USENIX Conference,
 #include <sys/kpi_mbuf.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/kpi_mbuf.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
-#include <sys/socketvar.h>     /* for dup_sockaddr */
 #include <libkern/OSAtomic.h>
 
 #include <netinet/in.h>
 #include <libkern/OSAtomic.h>
 
 #include <netinet/in.h>
-#if ISO
-#include <netiso/iso.h>
-#endif
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsrvcache.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsrvcache.h>
 
-extern struct nfsstats nfsstats;
 extern int nfsv2_procid[NFS_NPROCS];
 extern int nfsv2_procid[NFS_NPROCS];
-long numnfsrvcache;
-static long desirednfsrvcache = NFSRVCACHESIZ;
+static int nfsrv_reqcache_count;
+int nfsrv_reqcache_size = NFSRVCACHESIZ;
 
 #define        NFSRCHASH(xid) \
 
 #define        NFSRCHASH(xid) \
-       (&nfsrvhashtbl[((xid) + ((xid) >> 24)) & nfsrvhash])
-LIST_HEAD(nfsrvhash, nfsrvcache) *nfsrvhashtbl;
-TAILQ_HEAD(nfsrvlru, nfsrvcache) nfsrvlruhead;
-u_long nfsrvhash;
+       (&nfsrv_reqcache_hashtbl[((xid) + ((xid) >> 24)) & nfsrv_reqcache_hash])
+LIST_HEAD(nfsrv_reqcache_hash, nfsrvcache) *nfsrv_reqcache_hashtbl;
+TAILQ_HEAD(nfsrv_reqcache_lru, nfsrvcache) nfsrv_reqcache_lruhead;
+u_long nfsrv_reqcache_hash;
 
 lck_grp_t *nfsrv_reqcache_lck_grp;
 
 lck_grp_t *nfsrv_reqcache_lck_grp;
-lck_grp_attr_t *nfsrv_reqcache_lck_grp_attr;
-lck_attr_t *nfsrv_reqcache_lck_attr;
 lck_mtx_t *nfsrv_reqcache_mutex;
 
 lck_mtx_t *nfsrv_reqcache_mutex;
 
-#define        NETFAMILY(rp) \
-               (((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO)
-
 /*
  * Static array that defines which nfs rpc's are nonidempotent
  */
 /*
  * Static array that defines which nfs rpc's are nonidempotent
  */
@@ -166,17 +156,50 @@ static int nfsv2_repstat[NFS_NPROCS] = {
  * Initialize the server request cache list
  */
 void
  * Initialize the server request cache list
  */
 void
-nfsrv_initcache()
+nfsrv_initcache(void)
 {
 {
-       /* init nfs server request cache mutex */
-       nfsrv_reqcache_lck_grp_attr = lck_grp_attr_alloc_init();
-       lck_grp_attr_setstat(nfsrv_reqcache_lck_grp_attr);
-       nfsrv_reqcache_lck_grp = lck_grp_alloc_init("nfsrv_reqcache", nfsrv_reqcache_lck_grp_attr);
-       nfsrv_reqcache_lck_attr = lck_attr_alloc_init();
-       nfsrv_reqcache_mutex = lck_mtx_alloc_init(nfsrv_reqcache_lck_grp, nfsrv_reqcache_lck_attr);
+       if (nfsrv_reqcache_size <= 0)
+               return;
+
+       lck_mtx_lock(nfsrv_reqcache_mutex);
+       /* init nfs server request cache hash table */
+       nfsrv_reqcache_hashtbl = hashinit(nfsrv_reqcache_size, M_NFSD, &nfsrv_reqcache_hash);
+       TAILQ_INIT(&nfsrv_reqcache_lruhead);
+       lck_mtx_unlock(nfsrv_reqcache_mutex);
+}
 
 
-       nfsrvhashtbl = hashinit(desirednfsrvcache, M_NFSD, &nfsrvhash);
-       TAILQ_INIT(&nfsrvlruhead);
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ * Ditto for AF_INET6 which is only 16 bytes.
+ */
+static int
+netaddr_match(
+       int family,
+       union nethostaddr *haddr,
+       mbuf_t nam)
+{
+       struct sockaddr_in *inetaddr;
+       struct sockaddr_in6 *inet6addr;
+
+       switch (family) {
+       case AF_INET:
+               inetaddr = mbuf_data(nam);
+               if ((inetaddr->sin_family == AF_INET) &&
+                   (inetaddr->sin_addr.s_addr == haddr->had_inetaddr))
+                       return (1);
+               break;
+       case AF_INET6:
+               inet6addr = mbuf_data(nam);
+               if ((inet6addr->sin6_family == AF_INET6) &&
+                   !bcmp(&inet6addr->sin6_addr, &haddr->had_inet6addr, sizeof(inet6addr->sin6_addr)))
+                       return (1);
+               break;
+       }
+       return (0);
 }
 
 /*
 }
 
 /*
@@ -194,15 +217,14 @@ nfsrv_initcache()
  * Update/add new request at end of lru list
  */
 int
  * Update/add new request at end of lru list
  */
 int
-nfsrv_getcache(nd, slp, repp)
-       struct nfsrv_descript *nd;
-       struct nfssvc_sock *slp;
-       mbuf_t *repp;
+nfsrv_getcache(
+       struct nfsrv_descript *nd,
+       struct nfsrv_sock *slp,
+       mbuf_t *mrepp)
 {
        struct nfsrvcache *rp;
 {
        struct nfsrvcache *rp;
-       mbuf_t mb;
-       struct sockaddr_in *saddr;
-       caddr_t bpos;
+       struct nfsm_chain nmrep;
+       struct sockaddr *saddr;
        int ret, error;
 
        /*
        int ret, error;
 
        /*
@@ -216,30 +238,38 @@ loop:
        for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
            rp = rp->rc_hash.le_next) {
            if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
        for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
            rp = rp->rc_hash.le_next) {
            if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
-               netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
+               netaddr_match(rp->rc_family, &rp->rc_haddr, nd->nd_nam)) {
                        if ((rp->rc_flag & RC_LOCKED) != 0) {
                                rp->rc_flag |= RC_WANTED;
                        if ((rp->rc_flag & RC_LOCKED) != 0) {
                                rp->rc_flag |= RC_WANTED;
-                               (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+                               msleep(rp, nfsrv_reqcache_mutex, PZERO-1, "nfsrc", NULL);
                                goto loop;
                        }
                        rp->rc_flag |= RC_LOCKED;
                        /* If not at end of LRU chain, move it there */
                        if (rp->rc_lru.tqe_next) {
                                goto loop;
                        }
                        rp->rc_flag |= RC_LOCKED;
                        /* If not at end of LRU chain, move it there */
                        if (rp->rc_lru.tqe_next) {
-                               TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
-                               TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru);
+                               TAILQ_REMOVE(&nfsrv_reqcache_lruhead, rp, rc_lru);
+                               TAILQ_INSERT_TAIL(&nfsrv_reqcache_lruhead, rp, rc_lru);
                        }
                        if (rp->rc_state == RC_UNUSED)
                                panic("nfsrv cache");
                        if (rp->rc_state == RC_INPROG) {
                        }
                        if (rp->rc_state == RC_UNUSED)
                                panic("nfsrv cache");
                        if (rp->rc_state == RC_INPROG) {
-                               OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_inproghits);
+                               OSAddAtomic64(1, &nfsstats.srvcache_inproghits);
                                ret = RC_DROPIT;
                        } else if (rp->rc_flag & RC_REPSTATUS) {
                                ret = RC_DROPIT;
                        } else if (rp->rc_flag & RC_REPSTATUS) {
-                               OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_nonidemdonehits);
-                               nfs_rephead(0, nd, slp, rp->rc_status, repp, &mb, &bpos);
-                               ret = RC_REPLY;
+                               OSAddAtomic64(1, &nfsstats.srvcache_nonidemdonehits);
+                               nd->nd_repstat = rp->rc_status;
+                               error = nfsrv_rephead(nd, slp, &nmrep, 0);
+                               if (error) {
+                                       printf("nfsrv cache: reply alloc failed for nonidem request hit\n");
+                                       ret = RC_DROPIT;
+                                       *mrepp = NULL;
+                               } else {
+                                       ret = RC_REPLY;
+                                       *mrepp = nmrep.nmc_mhead;
+                               }
                        } else if (rp->rc_flag & RC_REPMBUF) {
                        } else if (rp->rc_flag & RC_REPMBUF) {
-                               OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_nonidemdonehits);
-                               error = mbuf_copym(rp->rc_reply, 0, MBUF_COPYALL, MBUF_WAITOK, repp);
+                               OSAddAtomic64(1, &nfsstats.srvcache_nonidemdonehits);
+                               error = mbuf_copym(rp->rc_reply, 0, MBUF_COPYALL, MBUF_WAITOK, mrepp);
                                if (error) {
                                        printf("nfsrv cache: reply copym failed for nonidem request hit\n");
                                        ret = RC_DROPIT;
                                if (error) {
                                        printf("nfsrv cache: reply copym failed for nonidem request hit\n");
                                        ret = RC_DROPIT;
@@ -247,26 +277,26 @@ loop:
                                        ret = RC_REPLY;
                                }
                        } else {
                                        ret = RC_REPLY;
                                }
                        } else {
-                               OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_idemdonehits);
+                               OSAddAtomic64(1, &nfsstats.srvcache_idemdonehits);
                                rp->rc_state = RC_INPROG;
                                ret = RC_DOIT;
                        }
                        rp->rc_flag &= ~RC_LOCKED;
                        if (rp->rc_flag & RC_WANTED) {
                                rp->rc_flag &= ~RC_WANTED;
                                rp->rc_state = RC_INPROG;
                                ret = RC_DOIT;
                        }
                        rp->rc_flag &= ~RC_LOCKED;
                        if (rp->rc_flag & RC_WANTED) {
                                rp->rc_flag &= ~RC_WANTED;
-                               wakeup((caddr_t)rp);
+                               wakeup(rp);
                        }
                        lck_mtx_unlock(nfsrv_reqcache_mutex);
                        return (ret);
                }
        }
                        }
                        lck_mtx_unlock(nfsrv_reqcache_mutex);
                        return (ret);
                }
        }
-       OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_misses);
-       if (numnfsrvcache < desirednfsrvcache) {
+       OSAddAtomic64(1, &nfsstats.srvcache_misses);
+       if (nfsrv_reqcache_count < nfsrv_reqcache_size) {
                /* try to allocate a new entry */
                MALLOC(rp, struct nfsrvcache *, sizeof *rp, M_NFSD, M_WAITOK);
                if (rp) {
                        bzero((char *)rp, sizeof *rp);
                /* try to allocate a new entry */
                MALLOC(rp, struct nfsrvcache *, sizeof *rp, M_NFSD, M_WAITOK);
                if (rp) {
                        bzero((char *)rp, sizeof *rp);
-                       numnfsrvcache++;
+                       nfsrv_reqcache_count++;
                        rp->rc_flag = RC_LOCKED;
                }
        } else {
                        rp->rc_flag = RC_LOCKED;
                }
        } else {
@@ -274,7 +304,7 @@ loop:
        }
        if (!rp) {
                /* try to reuse the least recently used entry */
        }
        if (!rp) {
                /* try to reuse the least recently used entry */
-               rp = nfsrvlruhead.tqh_first;
+               rp = nfsrv_reqcache_lruhead.tqh_first;
                if (!rp) {
                        /* no entry to reuse? */
                        /* OK, we just won't be able to cache this request */
                if (!rp) {
                        /* no entry to reuse? */
                        /* OK, we just won't be able to cache this request */
@@ -283,28 +313,32 @@ loop:
                }
                while ((rp->rc_flag & RC_LOCKED) != 0) {
                        rp->rc_flag |= RC_WANTED;
                }
                while ((rp->rc_flag & RC_LOCKED) != 0) {
                        rp->rc_flag |= RC_WANTED;
-                       (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
-                       rp = nfsrvlruhead.tqh_first;
+                       msleep(rp, nfsrv_reqcache_mutex, PZERO-1, "nfsrc", NULL);
+                       rp = nfsrv_reqcache_lruhead.tqh_first;
                }
                rp->rc_flag |= RC_LOCKED;
                LIST_REMOVE(rp, rc_hash);
                }
                rp->rc_flag |= RC_LOCKED;
                LIST_REMOVE(rp, rc_hash);
-               TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
+               TAILQ_REMOVE(&nfsrv_reqcache_lruhead, rp, rc_lru);
                if (rp->rc_flag & RC_REPMBUF)
                        mbuf_freem(rp->rc_reply);
                if (rp->rc_flag & RC_NAM)
                        mbuf_freem(rp->rc_nam);
                rp->rc_flag &= (RC_LOCKED | RC_WANTED);
        }
                if (rp->rc_flag & RC_REPMBUF)
                        mbuf_freem(rp->rc_reply);
                if (rp->rc_flag & RC_NAM)
                        mbuf_freem(rp->rc_nam);
                rp->rc_flag &= (RC_LOCKED | RC_WANTED);
        }
-       TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru);
+       TAILQ_INSERT_TAIL(&nfsrv_reqcache_lruhead, rp, rc_lru);
        rp->rc_state = RC_INPROG;
        rp->rc_xid = nd->nd_retxid;
        saddr = mbuf_data(nd->nd_nam);
        rp->rc_state = RC_INPROG;
        rp->rc_xid = nd->nd_retxid;
        saddr = mbuf_data(nd->nd_nam);
-       switch (saddr->sin_family) {
+       rp->rc_family = saddr->sa_family;
+       switch (saddr->sa_family) {
        case AF_INET:
                rp->rc_flag |= RC_INETADDR;
        case AF_INET:
                rp->rc_flag |= RC_INETADDR;
-               rp->rc_inetaddr = saddr->sin_addr.s_addr;
+               rp->rc_inetaddr = ((struct sockaddr_in*)saddr)->sin_addr.s_addr;
+               break;
+       case AF_INET6:
+               rp->rc_flag |= RC_INETADDR;
+               rp->rc_inet6addr = ((struct sockaddr_in6*)saddr)->sin6_addr;
                break;
                break;
-       case AF_ISO:
        default:
                error = mbuf_copym(nd->nd_nam, 0, MBUF_COPYALL, MBUF_WAITOK, &rp->rc_nam);
                if (error)
        default:
                error = mbuf_copym(nd->nd_nam, 0, MBUF_COPYALL, MBUF_WAITOK, &rp->rc_nam);
                if (error)
@@ -318,7 +352,7 @@ loop:
        rp->rc_flag &= ~RC_LOCKED;
        if (rp->rc_flag & RC_WANTED) {
                rp->rc_flag &= ~RC_WANTED;
        rp->rc_flag &= ~RC_LOCKED;
        if (rp->rc_flag & RC_WANTED) {
                rp->rc_flag &= ~RC_WANTED;
-               wakeup((caddr_t)rp);
+               wakeup(rp);
        }
        lck_mtx_unlock(nfsrv_reqcache_mutex);
        return (RC_DOIT);
        }
        lck_mtx_unlock(nfsrv_reqcache_mutex);
        return (RC_DOIT);
@@ -328,10 +362,10 @@ loop:
  * Update a request cache entry after the rpc has been done
  */
 void
  * Update a request cache entry after the rpc has been done
  */
 void
-nfsrv_updatecache(nd, repvalid, repmbuf)
-       struct nfsrv_descript *nd;
-       int repvalid;
-       mbuf_t repmbuf;
+nfsrv_updatecache(
+       struct nfsrv_descript *nd,
+       int repvalid,
+       mbuf_t repmbuf)
 {
        struct nfsrvcache *rp;
        int error;
 {
        struct nfsrvcache *rp;
        int error;
@@ -343,20 +377,32 @@ loop:
        for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
            rp = rp->rc_hash.le_next) {
            if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
        for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
            rp = rp->rc_hash.le_next) {
            if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
-               netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
+               netaddr_match(rp->rc_family, &rp->rc_haddr, nd->nd_nam)) {
                        if ((rp->rc_flag & RC_LOCKED) != 0) {
                                rp->rc_flag |= RC_WANTED;
                        if ((rp->rc_flag & RC_LOCKED) != 0) {
                                rp->rc_flag |= RC_WANTED;
-                               (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+                               msleep(rp, nfsrv_reqcache_mutex, PZERO-1, "nfsrc", NULL);
                                goto loop;
                        }
                        rp->rc_flag |= RC_LOCKED;
                                goto loop;
                        }
                        rp->rc_flag |= RC_LOCKED;
+                        if (rp->rc_state == RC_DONE) {
+                                /*
+                                 * This can occur if the cache is too small.
+                                 * Retransmits of the same request aren't
+                                 * dropped so we may see the operation
+                                 * complete more then once.
+                                 */
+                                if (rp->rc_flag & RC_REPMBUF) {
+                                        mbuf_freem(rp->rc_reply);
+                                        rp->rc_flag &= ~RC_REPMBUF;
+                                }
+                       }
                        rp->rc_state = RC_DONE;
                        /*
                         * If we have a valid reply update status and save
                         * the reply for non-idempotent rpc's.
                         */
                        if (repvalid && nonidempotent[nd->nd_procnum]) {
                        rp->rc_state = RC_DONE;
                        /*
                         * If we have a valid reply update status and save
                         * the reply for non-idempotent rpc's.
                         */
                        if (repvalid && nonidempotent[nd->nd_procnum]) {
-                               if ((nd->nd_flag & ND_NFSV3) == 0 &&
+                               if ((nd->nd_vers == NFS_VER2) &&
                                  nfsv2_repstat[nfsv2_procid[nd->nd_procnum]]) {
                                        rp->rc_status = nd->nd_repstat;
                                        rp->rc_flag |= RC_REPSTATUS;
                                  nfsv2_repstat[nfsv2_procid[nd->nd_procnum]]) {
                                        rp->rc_status = nd->nd_repstat;
                                        rp->rc_flag |= RC_REPSTATUS;
@@ -369,7 +415,7 @@ loop:
                        rp->rc_flag &= ~RC_LOCKED;
                        if (rp->rc_flag & RC_WANTED) {
                                rp->rc_flag &= ~RC_WANTED;
                        rp->rc_flag &= ~RC_LOCKED;
                        if (rp->rc_flag & RC_WANTED) {
                                rp->rc_flag &= ~RC_WANTED;
-                               wakeup((caddr_t)rp);
+                               wakeup(rp);
                        }
                        lck_mtx_unlock(nfsrv_reqcache_mutex);
                        return;
                        }
                        lck_mtx_unlock(nfsrv_reqcache_mutex);
                        return;
@@ -382,19 +428,20 @@ loop:
  * Clean out the cache. Called when the last nfsd terminates.
  */
 void
  * Clean out the cache. Called when the last nfsd terminates.
  */
 void
-nfsrv_cleancache()
+nfsrv_cleancache(void)
 {
        struct nfsrvcache *rp, *nextrp;
 
        lck_mtx_lock(nfsrv_reqcache_mutex);
 {
        struct nfsrvcache *rp, *nextrp;
 
        lck_mtx_lock(nfsrv_reqcache_mutex);
-       for (rp = nfsrvlruhead.tqh_first; rp != 0; rp = nextrp) {
+       for (rp = nfsrv_reqcache_lruhead.tqh_first; rp != 0; rp = nextrp) {
                nextrp = rp->rc_lru.tqe_next;
                LIST_REMOVE(rp, rc_hash);
                nextrp = rp->rc_lru.tqe_next;
                LIST_REMOVE(rp, rc_hash);
-               TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru);
+               TAILQ_REMOVE(&nfsrv_reqcache_lruhead, rp, rc_lru);
                _FREE(rp, M_NFSD);
        }
                _FREE(rp, M_NFSD);
        }
-       numnfsrvcache = 0;
+       nfsrv_reqcache_count = 0;
+       FREE(nfsrv_reqcache_hashtbl, M_TEMP);
        lck_mtx_unlock(nfsrv_reqcache_mutex);
 }
 
        lck_mtx_unlock(nfsrv_reqcache_mutex);
 }
 
-#endif /* NFS_NOSERVER */
+#endif /* NFSSERVER */