[apple/xnu.git] / bsd / vfs / vfs_cache.c

/*
 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * The contents of this file constitute Original Code as defined in and
 * are subject to the Apple Public Source License Version 1.1 (the
 * "License").  You may not use this file except in compliance with the
 * License.  Please obtain a copy of the License at
 * http://www.apple.com/publicsource and read it before using this file.
 * 
 * This Original Code and all software distributed under the License are
 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 * License for the specific language governing rights and limitations
 * under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */
/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
/*
 * Copyright (c) 1989, 1993, 1995
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Poul-Henning Kamp of the FreeBSD Project.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *
 *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
 */
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/errno.h>
#include <sys/malloc.h>

/*
 * Name caching works as follows:
 *
 * Names found by directory scans are retained in a cache
 * for future reference.  It is managed LRU, so frequently
 * used names will hang around.  Cache is indexed by hash value
 * obtained from (vp, name) where vp refers to the directory
 * containing name.
 *
 * If it is a "negative" entry, (i.e. for a name that is known NOT to
 * exist) the vnode pointer will be NULL.
 *
 * For simplicity (and economy of storage), names longer than
 * a maximum length of NCHNAMLEN are not cached; they occur
 * infrequently in any case, and are almost never of interest.
 *
 * Upon reaching the last segment of a path, if the reference
 * is for DELETE, or NOCACHE is set (rewrite), and the
 * name is located in the cache, it will be dropped.
 */

/*
 * Structures associated with name cacheing.
 */
#define NCHHASH(dvp, hash_val) \
	(&nchashtbl[((u_long)(dvp) ^ ((dvp)->v_id ^ (hash_val))) & nchash])
LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
u_long	nchash;				/* size of hash table - 1 */
long	numcache;			/* number of cache entries allocated */
TAILQ_HEAD(, namecache) nclruhead;	/* LRU chain */
struct	nchstats nchstats;		/* cache effectiveness statistics */
u_long nextvnodeid = 0;
int doingcache = 1;			/* 1 => enable the cache */

/*
 * Delete an entry from its hash list and move it to the front
 * of the LRU list for immediate reuse.
 *
 * NOTE: THESE MACROS CAN BLOCK (in the call to remove_name())
 *       SO BE CAREFUL IF YOU HOLD POINTERS TO nclruhead OR
 *       nchashtbl.
 */
#if DIAGNOSTIC
#define PURGE(ncp)  {						\
	if (ncp->nc_hash.le_prev == 0)				\
		panic("namecache purge le_prev");		\
	if (ncp->nc_hash.le_next == ncp)			\
		panic("namecache purge le_next");		\
	LIST_REMOVE(ncp, nc_hash);				\
	ncp->nc_hash.le_prev = 0;				\
	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);			\
	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);		\
	/* this has to come last because it could block */      \
	remove_name(ncp->nc_name);                              \
	ncp->nc_name = NULL;                                    \
}
#else
#define PURGE(ncp)  {						\
	LIST_REMOVE(ncp, nc_hash);				\
	ncp->nc_hash.le_prev = 0;				\
	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);			\
	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);		\
 	/* this has to come last because it could block */      \
	remove_name(ncp->nc_name);                              \
	ncp->nc_name = NULL;                                    \
}
#endif /* DIAGNOSTIC */

/*
 * Move an entry that has been used to the tail of the LRU list
 * so that it will be preserved for future use.
 */
#define TOUCH(ncp)  {						\
	if (ncp->nc_lru.tqe_next != 0) {			\
		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);		\
		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);	\
	}							\
}


//
// Have to take a len argument because we may only need to
// hash part of a componentname.
//
static unsigned int
hash_string(const char *str, int len)
{
    unsigned int i, hashval = 0;

    if (len == 0) {
	for(i=1; *str != 0; i++, str++) {
	    hashval += (unsigned char)*str * i;
	}
    } else {
	for(i=len; i > 0; i--, str++) {
	    hashval += (unsigned char)*str * (len - i + 1);
	}
    }

    return hashval;
}


/*
 * Lookup an entry in the cache 
 *
 * We don't do this if the segment name is long, simply so the cache 
 * can avoid holding long names (which would either waste space, or
 * add greatly to the complexity).
 *
 * Lookup is called with dvp pointing to the directory to search,
 * cnp pointing to the name of the entry being sought. If the lookup
 * succeeds, the vnode is returned in *vpp, and a status of -1 is
 * returned. If the lookup determines that the name does not exist
 * (negative cacheing), a status of ENOENT is returned. If the lookup
 * fails, a status of zero is returned.
 */

int
cache_lookup(dvp, vpp, cnp)
	struct vnode *dvp;
	struct vnode **vpp;
	struct componentname *cnp;
{
	register struct namecache *ncp, *nnp;
	register struct nchashhead *ncpp;
	register long namelen = cnp->cn_namelen;
	char *nameptr = cnp->cn_nameptr;

	if (!doingcache) {
		cnp->cn_flags &= ~MAKEENTRY;
		return (0);
	}

	ncpp = NCHHASH(dvp, cnp->cn_hash);
	for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
		nnp = ncp->nc_hash.le_next;

		if (ncp->nc_dvp == dvp &&
		    strncmp(ncp->nc_name, nameptr, namelen) == 0 &&
		    ncp->nc_name[namelen] == 0) {
			/* Make sure the vp isn't stale. */
			if ((ncp->nc_dvpid != dvp->v_id) ||
			    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
				nchstats.ncs_falsehits++;
				PURGE(ncp);
				continue;
			}
			break;
		}
	}

	/* We failed to find an entry */
	if (ncp == 0) {
		nchstats.ncs_miss++;
		return (0);
	}

	/* We don't want to have an entry, so dump it */
	if ((cnp->cn_flags & MAKEENTRY) == 0) {
		nchstats.ncs_badhits++;
		PURGE(ncp);
		return (0);
	} 

	/* We found a "positive" match, return the vnode */
        if (ncp->nc_vp) {
		if (ncp->nc_vp->v_flag & (VUINIT|VXLOCK|VTERMINATE|VORECLAIM)) {
		    PURGE(ncp);
		    return (0);
		}

		nchstats.ncs_goodhits++;
		TOUCH(ncp);
		*vpp = ncp->nc_vp;
		return (-1);
	}

	/* We found a negative match, and want to create it, so purge */
	if (cnp->cn_nameiop == CREATE) {
		nchstats.ncs_badhits++;
		PURGE(ncp);
		return (0);
	}

	/*
	 * We found a "negative" match, ENOENT notifies client of this match.
	 * The nc_vpid field records whether this is a whiteout.
	 */
	nchstats.ncs_neghits++;
	TOUCH(ncp);
	cnp->cn_flags |= ncp->nc_vpid;
	return (ENOENT);
}

/*
 * Add an entry to the cache.
 */
void
cache_enter(dvp, vp, cnp)
	struct vnode *dvp;
	struct vnode *vp;
	struct componentname *cnp;
{
	register struct namecache *ncp;
	register struct nchashhead *ncpp;

	if (!doingcache)
		return;

	/*
	 * We allocate a new entry if we are less than the maximum
	 * allowed and the one at the front of the LRU list is in use.
	 * Otherwise we use the one at the front of the LRU list.
	 */
	if (numcache < desiredvnodes &&
	    ((ncp = nclruhead.tqh_first) == NULL ||
	    ncp->nc_hash.le_prev != 0)) {
		/* Add one more entry */
		ncp = (struct namecache *)
			_MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
		numcache++;
	} else if (ncp = nclruhead.tqh_first) {
		/* reuse an old entry */
		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
		if (ncp->nc_hash.le_prev != 0) {
#if DIAGNOSTIC
			if (ncp->nc_hash.le_next == ncp)
				panic("cache_enter: le_next");
#endif
			LIST_REMOVE(ncp, nc_hash);
			remove_name(ncp->nc_name);
			ncp->nc_name = NULL;
			ncp->nc_hash.le_prev = 0;
		}
	} else {
		/* give up */
		return;
	}

	/*
	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
	 * For negative entries, we have to record whether it is a whiteout.
	 * the whiteout flag is stored in the nc_vpid field which is
	 * otherwise unused.
	 */
	ncp->nc_vp = vp;
	if (vp)
		ncp->nc_vpid = vp->v_id;
	else
		ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
	ncp->nc_dvp = dvp;
	ncp->nc_dvpid = dvp->v_id;
	ncp->nc_name = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
	ncpp = NCHHASH(dvp, cnp->cn_hash);
#if DIAGNOSTIC
	{
		register struct namecache *p;

		for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
			if (p == ncp)
				panic("cache_enter: duplicate");
	}
#endif
	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
}

/*
 * Name cache initialization, from vfs_init() when we are booting
 */
void
nchinit()
{
    static void init_string_table(void);

    TAILQ_INIT(&nclruhead);
    nchashtbl = hashinit(MAX(4096, desiredvnodes), M_CACHE, &nchash);

    init_string_table();
}


int
resize_namecache(u_int newsize)
{
    struct nchashhead *new_table;
    struct nchashhead *old_table;
    struct nchashhead *old_head, *head;
    struct namecache  *entry, *next;
    uint32_t           i;
    u_long             new_mask, old_mask;

    // we don't support shrinking yet
    if (newsize < nchash) {
	return 0;
    }

    new_table = hashinit(newsize, M_CACHE, &new_mask);
    if (new_table == NULL) {
	return ENOMEM;
    }

    // do the switch!
    old_table = nchashtbl;
    nchashtbl = new_table;
    old_mask  = nchash;
    nchash    = new_mask;

    // walk the old table and insert all the entries into
    // the new table
    //
    for(i=0; i <= old_mask; i++) {
	old_head = &old_table[i];
	for (entry=old_head->lh_first; entry != NULL; entry=next) {
	    //
	    // XXXdbg - Beware: this assumes that hash_string() does
	    //                  the same thing as what happens in
	    //                  lookup() over in vfs_lookup.c
	    head = NCHHASH(entry->nc_dvp, hash_string(entry->nc_name, 0));

	    next = entry->nc_hash.le_next;
	    LIST_INSERT_HEAD(head, entry, nc_hash);
	}
    }
    
    FREE(old_table, M_CACHE);

    return 0;
}


/*
 * Invalidate a all entries to particular vnode.
 * 
 * We actually just increment the v_id, that will do it. The entries will
 * be purged by lookup as they get found. If the v_id wraps around, we
 * need to ditch the entire cache, to avoid confusion. No valid vnode will
 * ever have (v_id == 0).
 */
void
cache_purge(vp)
	struct vnode *vp;
{
	struct namecache *ncp;
	struct nchashhead *ncpp;

	vp->v_id = ++nextvnodeid;
	if (nextvnodeid != 0)
		return;
	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
		while (ncp = ncpp->lh_first)
			PURGE(ncp);
	}
	vp->v_id = ++nextvnodeid;
}

/*
 * Flush all entries referencing a particular filesystem.
 *
 * Since we need to check it anyway, we will flush all the invalid
 * entriess at the same time.
 */
void
cache_purgevfs(mp)
	struct mount *mp;
{
	struct nchashhead *ncpp;
	struct namecache *ncp, *nnp;

	/* Scan hash tables for applicable entries */
	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
		for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
			nnp = ncp->nc_hash.le_next;
			if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
			    (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
			    ncp->nc_dvp->v_mount == mp) {
				PURGE(ncp);
			}
		}
	}
}


//
// String ref routines
//
static LIST_HEAD(stringhead, string_t) *string_ref_table;
static u_long   string_table_mask;
static uint32_t max_chain_len=0;
static struct stringhead *long_chain_head=NULL;
static uint32_t filled_buckets=0;
static uint32_t num_dups=0;
static uint32_t nstrings=0;

typedef struct string_t {
    LIST_ENTRY(string_t)  hash_chain;
    unsigned char        *str;
    uint32_t              refcount;
} string_t;


static int
resize_string_ref_table()
{
    struct stringhead *new_table;
    struct stringhead *old_table;
    struct stringhead *old_head, *head;
    string_t          *entry, *next;
    uint32_t           i, hashval;
    u_long             new_mask, old_mask;

    new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
    if (new_table == NULL) {
	return ENOMEM;
    }

    // do the switch!
    old_table         = string_ref_table;
    string_ref_table  = new_table;
    old_mask          = string_table_mask;
    string_table_mask = new_mask;

    printf("resize: max chain len %d, new table size %d\n",
	   max_chain_len, new_mask + 1);
    max_chain_len   = 0;
    long_chain_head = NULL;
    filled_buckets  = 0;

    // walk the old table and insert all the entries into
    // the new table
    //
    for(i=0; i <= old_mask; i++) {
	old_head = &old_table[i];
	for (entry=old_head->lh_first; entry != NULL; entry=next) {
	    hashval = hash_string(entry->str, 0);
	    head = &string_ref_table[hashval & string_table_mask];
	    if (head->lh_first == NULL) {
		filled_buckets++;
	    }

	    next = entry->hash_chain.le_next;
	    LIST_INSERT_HEAD(head, entry, hash_chain);
	}
    }
    
    FREE(old_table, M_CACHE);

    return 0;
}


static void
init_string_table(void)
{
    string_ref_table = hashinit(4096, M_CACHE, &string_table_mask);
}


char *
add_name(const char *name, size_t len, u_int hashval, u_int flags)
{
    struct stringhead *head;
    string_t          *entry;
    int                chain_len = 0;
    
    //
    // If the table gets more than 3/4 full, resize it
    //
    if (4*filled_buckets >= ((string_table_mask + 1) * 3)) {
		if (resize_string_ref_table() != 0) {
			printf("failed to resize the hash table.\n");
		}
    }

    if (hashval == 0) {
	hashval = hash_string(name, len);
    }

    head = &string_ref_table[hashval & string_table_mask];
    for (entry=head->lh_first; entry != NULL; chain_len++, entry=entry->hash_chain.le_next) {
	if (strncmp(entry->str, name, len) == 0 && entry->str[len] == '\0') {
	    entry->refcount++;
	    num_dups++;
	    break;
	}
    }

    if (entry == NULL) {
	// it wasn't already there so add it.
	MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);

	// have to get "head" again because we could have blocked
	// in malloc and thus head could have changed.
	//
	head = &string_ref_table[hashval & string_table_mask];
	if (head->lh_first == NULL) {
	    filled_buckets++;
	}

	LIST_INSERT_HEAD(head, entry, hash_chain);
	entry->str = (char *)((char *)entry + sizeof(string_t));
	strncpy(entry->str, name, len);
	entry->str[len] = '\0';
	entry->refcount = 1;

	if (chain_len > max_chain_len) {
	    max_chain_len   = chain_len;
	    long_chain_head = head;
	}

	nstrings++;
    }
    
    return entry->str;
}

int
remove_name(const char *nameref)
{
    struct stringhead *head;
    string_t          *entry;
    uint32_t           hashval;

    hashval = hash_string(nameref, 0);
    head = &string_ref_table[hashval & string_table_mask];
    for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
	if (entry->str == (unsigned char *)nameref) {
	    entry->refcount--;
	    if (entry->refcount == 0) {
		LIST_REMOVE(entry, hash_chain);
		if (head->lh_first == NULL) {
		    filled_buckets--;
		}
		entry->str = NULL;
		nstrings--;

		FREE(entry, M_TEMP);
	    } else {
		num_dups--;
	    }

	    return 0;
	}
    }

    return ENOENT;
}


void
dump_string_table(void)
{
    struct stringhead *head;
    string_t          *entry;
    int                i;
    
    for(i=0; i <= string_table_mask; i++) {
	head = &string_ref_table[i];
	for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
	    printf("%6d - %s\n", entry->refcount, entry->str);
	}
    }
}
Commit	Line	Data
1c79356b	1	/*
55e303ae	2	* Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
1c79356b A	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
e5568f75 A	6	* The contents of this file constitute Original Code as defined in and
	7	* are subject to the Apple Public Source License Version 1.1 (the
	8	* "License"). You may not use this file except in compliance with the
	9	* License. Please obtain a copy of the License at
	10	* http://www.apple.com/publicsource and read it before using this file.
1c79356b	11	*
e5568f75 A	12	* This Original Code and all software distributed under the License are
e5568f75 A	13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b A	14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
1c79356b A	15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75 A	16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
	17	* License for the specific language governing rights and limitations
	18	* under the License.
1c79356b A	19	*
	20	* @APPLE_LICENSE_HEADER_END@
	21	*/
	22	/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
	23	/*
	24	* Copyright (c) 1989, 1993, 1995
	25	* The Regents of the University of California. All rights reserved.
	26	*
	27	* This code is derived from software contributed to Berkeley by
	28	* Poul-Henning Kamp of the FreeBSD Project.
	29	*
	30	* Redistribution and use in source and binary forms, with or without
	31	* modification, are permitted provided that the following conditions
	32	* are met:
	33	* 1. Redistributions of source code must retain the above copyright
	34	* notice, this list of conditions and the following disclaimer.
	35	* 2. Redistributions in binary form must reproduce the above copyright
	36	* notice, this list of conditions and the following disclaimer in the
	37	* documentation and/or other materials provided with the distribution.
	38	* 3. All advertising materials mentioning features or use of this software
	39	* must display the following acknowledgement:
	40	* This product includes software developed by the University of
	41	* California, Berkeley and its contributors.
	42	* 4. Neither the name of the University nor the names of its contributors
	43	* may be used to endorse or promote products derived from this software
	44	* without specific prior written permission.
	45	*
	46	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	47	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	48	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	49	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	50	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	51	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	52	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	53	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	54	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	55	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	56	* SUCH DAMAGE.
	57	*
	58	*
	59	* @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
	60	*/
	61	#include <sys/param.h>
	62	#include <sys/systm.h>
	63	#include <sys/time.h>
	64	#include <sys/mount.h>
	65	#include <sys/vnode.h>
	66	#include <sys/namei.h>
	67	#include <sys/errno.h>
	68	#include <sys/malloc.h>
	69
	70	/*
	71	* Name caching works as follows:
	72	*
	73	* Names found by directory scans are retained in a cache
	74	* for future reference. It is managed LRU, so frequently
	75	* used names will hang around. Cache is indexed by hash value
	76	* obtained from (vp, name) where vp refers to the directory
	77	* containing name.
	78	*
	79	* If it is a "negative" entry, (i.e. for a name that is known NOT to
	80	* exist) the vnode pointer will be NULL.
	81	*
	82	* For simplicity (and economy of storage), names longer than
83	* a maximum length of NCHNAMLEN are not cached; they occur
84	* infrequently in any case, and are almost never of interest.
85	*
86	* Upon reaching the last segment of a path, if the reference
87	* is for DELETE, or NOCACHE is set (rewrite), and the
88	* name is located in the cache, it will be dropped.
89	*/
90
91	/*
92	* Structures associated with name cacheing.
93	*/
55e303ae A	94	#define NCHHASH(dvp, hash_val) \
55e303ae A	95	(&nchashtbl[((u_long)(dvp) ^ ((dvp)->v_id ^ (hash_val))) & nchash])
1c79356b A	96	LIST_HEAD(nchashhead, namecache) nchashtbl; / Hash Table */
	97	u_long nchash; /* size of hash table - 1 */
	98	long numcache; /* number of cache entries allocated */
	99	TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */
	100	struct nchstats nchstats; /* cache effectiveness statistics */
	101	u_long nextvnodeid = 0;
	102	int doingcache = 1; /* 1 => enable the cache */
	103
	104	/*
	105	* Delete an entry from its hash list and move it to the front
	106	* of the LRU list for immediate reuse.
55e303ae A	107	*
	108	* NOTE: THESE MACROS CAN BLOCK (in the call to remove_name())
	109	* SO BE CAREFUL IF YOU HOLD POINTERS TO nclruhead OR
	110	* nchashtbl.
1c79356b A	111	*/
	112	#if DIAGNOSTIC
	113	#define PURGE(ncp) { \
	114	if (ncp->nc_hash.le_prev == 0) \
	115	panic("namecache purge le_prev"); \
	116	if (ncp->nc_hash.le_next == ncp) \
	117	panic("namecache purge le_next"); \
	118	LIST_REMOVE(ncp, nc_hash); \
	119	ncp->nc_hash.le_prev = 0; \
	120	TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
	121	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \
55e303ae A	122	/* this has to come last because it could block */ \
	123	remove_name(ncp->nc_name); \
	124	ncp->nc_name = NULL; \
1c79356b A	125	}
	126	#else
	127	#define PURGE(ncp) { \
	128	LIST_REMOVE(ncp, nc_hash); \
	129	ncp->nc_hash.le_prev = 0; \
	130	TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
	131	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \
55e303ae A	132	/* this has to come last because it could block */ \
	133	remove_name(ncp->nc_name); \
	134	ncp->nc_name = NULL; \
1c79356b A	135	}
	136	#endif /* DIAGNOSTIC */
	137
	138	/*
	139	* Move an entry that has been used to the tail of the LRU list
	140	* so that it will be preserved for future use.
	141	*/
	142	#define TOUCH(ncp) { \
	143	if (ncp->nc_lru.tqe_next != 0) { \
	144	TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
	145	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); \
	146	} \
	147	}
	148
55e303ae A	149
	150	//
	151	// Have to take a len argument because we may only need to
	152	// hash part of a componentname.
	153	//
	154	static unsigned int
	155	hash_string(const char *str, int len)
	156	{
	157	unsigned int i, hashval = 0;
	158
	159	if (len == 0) {
	160	for(i=1; *str != 0; i++, str++) {
	161	hashval += (unsigned char)str i;
	162	}
	163	} else {
	164	for(i=len; i > 0; i--, str++) {
	165	hashval += (unsigned char)str (len - i + 1);
	166	}
	167	}
	168
	169	return hashval;
	170	}
	171
	172
	173
	174
1c79356b A	175	/*
	176	* Lookup an entry in the cache
	177	*
	178	* We don't do this if the segment name is long, simply so the cache
	179	* can avoid holding long names (which would either waste space, or
	180	* add greatly to the complexity).
	181	*
	182	* Lookup is called with dvp pointing to the directory to search,
	183	* cnp pointing to the name of the entry being sought. If the lookup
	184	* succeeds, the vnode is returned in *vpp, and a status of -1 is
	185	* returned. If the lookup determines that the name does not exist
	186	* (negative cacheing), a status of ENOENT is returned. If the lookup
	187	* fails, a status of zero is returned.
	188	*/
	189
	190	int
	191	cache_lookup(dvp, vpp, cnp)
	192	struct vnode *dvp;
	193	struct vnode **vpp;
	194	struct componentname *cnp;
	195	{
	196	register struct namecache ncp, nnp;
	197	register struct nchashhead *ncpp;
55e303ae A	198	register long namelen = cnp->cn_namelen;
55e303ae A	199	char *nameptr = cnp->cn_nameptr;
1c79356b A	200
	201	if (!doingcache) {
	202	cnp->cn_flags &= ~MAKEENTRY;
	203	return (0);
	204	}
1c79356b	205
55e303ae	206	ncpp = NCHHASH(dvp, cnp->cn_hash);
1c79356b A	207	for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
1c79356b A	208	nnp = ncp->nc_hash.le_next;
55e303ae	209
1c79356b	210	if (ncp->nc_dvp == dvp &&
55e303ae A	211	strncmp(ncp->nc_name, nameptr, namelen) == 0 &&
	212	ncp->nc_name[namelen] == 0) {
	213	/* Make sure the vp isn't stale. */
	214	if ((ncp->nc_dvpid != dvp->v_id) \|\|
	215	(ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
	216	nchstats.ncs_falsehits++;
	217	PURGE(ncp);
	218	continue;
	219	}
1c79356b	220	break;
55e303ae	221	}
1c79356b A	222	}
	223
	224	/* We failed to find an entry */
	225	if (ncp == 0) {
	226	nchstats.ncs_miss++;
	227	return (0);
	228	}
	229
	230	/* We don't want to have an entry, so dump it */
	231	if ((cnp->cn_flags & MAKEENTRY) == 0) {
	232	nchstats.ncs_badhits++;
	233	PURGE(ncp);
	234	return (0);
	235	}
	236
	237	/* We found a "positive" match, return the vnode */
	238	if (ncp->nc_vp) {
55e303ae A	239	if (ncp->nc_vp->v_flag & (VUINIT\|VXLOCK\|VTERMINATE\|VORECLAIM)) {
	240	PURGE(ncp);
	241	return (0);
	242	}
	243
1c79356b A	244	nchstats.ncs_goodhits++;
	245	TOUCH(ncp);
	246	*vpp = ncp->nc_vp;
	247	return (-1);
	248	}
	249
	250	/* We found a negative match, and want to create it, so purge */
	251	if (cnp->cn_nameiop == CREATE) {
	252	nchstats.ncs_badhits++;
	253	PURGE(ncp);
	254	return (0);
	255	}
	256
	257	/*
	258	* We found a "negative" match, ENOENT notifies client of this match.
	259	* The nc_vpid field records whether this is a whiteout.
	260	*/
	261	nchstats.ncs_neghits++;
	262	TOUCH(ncp);
	263	cnp->cn_flags \|= ncp->nc_vpid;
	264	return (ENOENT);
	265	}
	266
	267	/*
	268	* Add an entry to the cache.
	269	*/
	270	void
	271	cache_enter(dvp, vp, cnp)
	272	struct vnode *dvp;
	273	struct vnode *vp;
	274	struct componentname *cnp;
	275	{
	276	register struct namecache *ncp;
	277	register struct nchashhead *ncpp;
	278
	279	if (!doingcache)
	280	return;
	281
1c79356b A	282	/*
	283	* We allocate a new entry if we are less than the maximum
	284	* allowed and the one at the front of the LRU list is in use.
	285	* Otherwise we use the one at the front of the LRU list.
	286	*/
	287	if (numcache < desiredvnodes &&
	288	((ncp = nclruhead.tqh_first) == NULL \|\|
	289	ncp->nc_hash.le_prev != 0)) {
	290	/* Add one more entry */
	291	ncp = (struct namecache *)
	292	_MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
	293	numcache++;
	294	} else if (ncp = nclruhead.tqh_first) {
	295	/* reuse an old entry */
	296	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
	297	if (ncp->nc_hash.le_prev != 0) {
	298	#if DIAGNOSTIC
	299	if (ncp->nc_hash.le_next == ncp)
	300	panic("cache_enter: le_next");
	301	#endif
	302	LIST_REMOVE(ncp, nc_hash);
55e303ae A	303	remove_name(ncp->nc_name);
55e303ae A	304	ncp->nc_name = NULL;
1c79356b A	305	ncp->nc_hash.le_prev = 0;
	306	}
	307	} else {
	308	/* give up */
	309	return;
	310	}
	311
	312	/*
	313	* Fill in cache info, if vp is NULL this is a "negative" cache entry.
	314	* For negative entries, we have to record whether it is a whiteout.
	315	* the whiteout flag is stored in the nc_vpid field which is
	316	* otherwise unused.
	317	*/
	318	ncp->nc_vp = vp;
	319	if (vp)
	320	ncp->nc_vpid = vp->v_id;
	321	else
	322	ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
	323	ncp->nc_dvp = dvp;
	324	ncp->nc_dvpid = dvp->v_id;
55e303ae	325	ncp->nc_name = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
1c79356b	326	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
55e303ae	327	ncpp = NCHHASH(dvp, cnp->cn_hash);
1c79356b A	328	#if DIAGNOSTIC
	329	{
	330	register struct namecache *p;
	331
	332	for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
	333	if (p == ncp)
	334	panic("cache_enter: duplicate");
	335	}
	336	#endif
	337	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
	338	}
	339
	340	/*
	341	* Name cache initialization, from vfs_init() when we are booting
	342	*/
	343	void
	344	nchinit()
	345	{
55e303ae A	346	static void init_string_table(void);
	347
	348	TAILQ_INIT(&nclruhead);
	349	nchashtbl = hashinit(MAX(4096, desiredvnodes), M_CACHE, &nchash);
1c79356b	350
55e303ae	351	init_string_table();
1c79356b A	352	}
1c79356b A	353
55e303ae A	354
	355	int
	356	resize_namecache(u_int newsize)
	357	{
	358	struct nchashhead *new_table;
	359	struct nchashhead *old_table;
	360	struct nchashhead old_head, head;
	361	struct namecache entry, next;
	362	uint32_t i;
	363	u_long new_mask, old_mask;
	364
	365	// we don't support shrinking yet
	366	if (newsize < nchash) {
	367	return 0;
	368	}
	369
	370	new_table = hashinit(newsize, M_CACHE, &new_mask);
	371	if (new_table == NULL) {
	372	return ENOMEM;
	373	}
	374
	375	// do the switch!
	376	old_table = nchashtbl;
	377	nchashtbl = new_table;
	378	old_mask = nchash;
	379	nchash = new_mask;
	380
	381	// walk the old table and insert all the entries into
	382	// the new table
	383	//
	384	for(i=0; i <= old_mask; i++) {
	385	old_head = &old_table[i];
	386	for (entry=old_head->lh_first; entry != NULL; entry=next) {
	387	//
	388	// XXXdbg - Beware: this assumes that hash_string() does
	389	// the same thing as what happens in
	390	// lookup() over in vfs_lookup.c
	391	head = NCHHASH(entry->nc_dvp, hash_string(entry->nc_name, 0));
	392
	393	next = entry->nc_hash.le_next;
	394	LIST_INSERT_HEAD(head, entry, nc_hash);
	395	}
	396	}
	397
	398	FREE(old_table, M_CACHE);
	399
	400	return 0;
	401	}
	402
	403
	404
	405
1c79356b A	406	/*
	407	* Invalidate a all entries to particular vnode.
	408	*
	409	* We actually just increment the v_id, that will do it. The entries will
	410	* be purged by lookup as they get found. If the v_id wraps around, we
	411	* need to ditch the entire cache, to avoid confusion. No valid vnode will
	412	* ever have (v_id == 0).
	413	*/
	414	void
	415	cache_purge(vp)
	416	struct vnode *vp;
	417	{
	418	struct namecache *ncp;
	419	struct nchashhead *ncpp;
	420
	421	vp->v_id = ++nextvnodeid;
	422	if (nextvnodeid != 0)
	423	return;
	424	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
	425	while (ncp = ncpp->lh_first)
	426	PURGE(ncp);
	427	}
	428	vp->v_id = ++nextvnodeid;
	429	}
	430
	431	/*
	432	* Flush all entries referencing a particular filesystem.
	433	*
	434	* Since we need to check it anyway, we will flush all the invalid
	435	* entriess at the same time.
	436	*/
	437	void
	438	cache_purgevfs(mp)
	439	struct mount *mp;
	440	{
	441	struct nchashhead *ncpp;
	442	struct namecache ncp, nnp;
	443
	444	/* Scan hash tables for applicable entries */
	445	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
	446	for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
	447	nnp = ncp->nc_hash.le_next;
	448	if (ncp->nc_dvpid != ncp->nc_dvp->v_id \|\|
	449	(ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) \|\|
	450	ncp->nc_dvp->v_mount == mp) {
	451	PURGE(ncp);
	452	}
	453	}
	454	}
	455	}
55e303ae A	456
	457
	458
	459	//
	460	// String ref routines
	461	//
	462	static LIST_HEAD(stringhead, string_t) *string_ref_table;
	463	static u_long string_table_mask;
	464	static uint32_t max_chain_len=0;
	465	static struct stringhead *long_chain_head=NULL;
	466	static uint32_t filled_buckets=0;
	467	static uint32_t num_dups=0;
	468	static uint32_t nstrings=0;
	469
	470	typedef struct string_t {
	471	LIST_ENTRY(string_t) hash_chain;
	472	unsigned char *str;
	473	uint32_t refcount;
	474	} string_t;
	475
	476
	477
	478	static int
	479	resize_string_ref_table()
	480	{
	481	struct stringhead *new_table;
	482	struct stringhead *old_table;
	483	struct stringhead old_head, head;
	484	string_t entry, next;
	485	uint32_t i, hashval;
	486	u_long new_mask, old_mask;
	487
	488	new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
	489	if (new_table == NULL) {
	490	return ENOMEM;
	491	}
	492
	493	// do the switch!
	494	old_table = string_ref_table;
	495	string_ref_table = new_table;
	496	old_mask = string_table_mask;
	497	string_table_mask = new_mask;
	498
	499	printf("resize: max chain len %d, new table size %d\n",
	500	max_chain_len, new_mask + 1);
	501	max_chain_len = 0;
	502	long_chain_head = NULL;
	503	filled_buckets = 0;
	504
	505	// walk the old table and insert all the entries into
	506	// the new table
	507	//
	508	for(i=0; i <= old_mask; i++) {
	509	old_head = &old_table[i];
	510	for (entry=old_head->lh_first; entry != NULL; entry=next) {
	511	hashval = hash_string(entry->str, 0);
	512	head = &string_ref_table[hashval & string_table_mask];
	513	if (head->lh_first == NULL) {
	514	filled_buckets++;
	515	}
	516
	517	next = entry->hash_chain.le_next;
	518	LIST_INSERT_HEAD(head, entry, hash_chain);
	519	}
520	}
521
522	FREE(old_table, M_CACHE);
523
524	return 0;
525	}
526
527
528	static void
529	init_string_table(void)
530	{
531	string_ref_table = hashinit(4096, M_CACHE, &string_table_mask);
532	}
533
534
535	char *
536	add_name(const char *name, size_t len, u_int hashval, u_int flags)
537	{
538	struct stringhead *head;
539	string_t *entry;
540	int chain_len = 0;
541
542	//
543	// If the table gets more than 3/4 full, resize it
544	//
545	if (4filled_buckets >= ((string_table_mask + 1) 3)) {
546	if (resize_string_ref_table() != 0) {
547	printf("failed to resize the hash table.\n");
548	}
549	}
550
551	if (hashval == 0) {
552	hashval = hash_string(name, len);
553	}
554
555	head = &string_ref_table[hashval & string_table_mask];
556	for (entry=head->lh_first; entry != NULL; chain_len++, entry=entry->hash_chain.le_next) {
557	if (strncmp(entry->str, name, len) == 0 && entry->str[len] == '\0') {
558	entry->refcount++;
559	num_dups++;
560	break;
561	}
562	}
563
564	if (entry == NULL) {
565	// it wasn't already there so add it.
566	MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
567
568	// have to get "head" again because we could have blocked
569	// in malloc and thus head could have changed.
570	//
571	head = &string_ref_table[hashval & string_table_mask];
572	if (head->lh_first == NULL) {
573	filled_buckets++;
574	}
575
576	LIST_INSERT_HEAD(head, entry, hash_chain);
577	entry->str = (char )((char )entry + sizeof(string_t));
578	strncpy(entry->str, name, len);
579	entry->str[len] = '\0';
580	entry->refcount = 1;
581
582	if (chain_len > max_chain_len) {
583	max_chain_len = chain_len;
584	long_chain_head = head;
585	}
586
587	nstrings++;
588	}
589
590	return entry->str;
591	}
592
593	int
594	remove_name(const char *nameref)
595	{
596	struct stringhead *head;
597	string_t *entry;
598	uint32_t hashval;
599
600	hashval = hash_string(nameref, 0);
601	head = &string_ref_table[hashval & string_table_mask];
602	for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
603	if (entry->str == (unsigned char *)nameref) {
604	entry->refcount--;
605	if (entry->refcount == 0) {
606	LIST_REMOVE(entry, hash_chain);
607	if (head->lh_first == NULL) {
608	filled_buckets--;
609	}
610	entry->str = NULL;
611	nstrings--;
612
613	FREE(entry, M_TEMP);
614	} else {
615	num_dups--;
616	}
617
618	return 0;
619	}
620	}
621
622	return ENOENT;
623	}
624
625
626	void
627	dump_string_table(void)
628	{
629	struct stringhead *head;
630	string_t *entry;
631	int i;
632
633	for(i=0; i <= string_table_mask; i++) {
634	head = &string_ref_table[i];
635	for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
636	printf("%6d - %s\n", entry->refcount, entry->str);
637	}
638	}
639	}