bsd/vfs/vfs_cache.c

   1 /*
   2  * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  26 /*
  27  * Copyright (c) 1989, 1993, 1995
  28  *      The Regents of the University of California.  All rights reserved.
  29  *
  30  * This code is derived from software contributed to Berkeley by
  31  * Poul-Henning Kamp of the FreeBSD Project.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  * 3. All advertising materials mentioning features or use of this software
  42  *    must display the following acknowledgement:
  43  *      This product includes software developed by the University of
  44  *      California, Berkeley and its contributors.
  45  * 4. Neither the name of the University nor the names of its contributors
  46  *    may be used to endorse or promote products derived from this software
  47  *    without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  59  * SUCH DAMAGE.
  60  *
  61  *
  62  *      @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
  63  */
  64 #include <sys/param.h>
  65 #include <sys/systm.h>
  66 #include <sys/time.h>
  67 #include <sys/mount.h>
  68 #include <sys/vnode.h>
  69 #include <sys/namei.h>
  70 #include <sys/errno.h>
  71 #include <sys/malloc.h>
  72
  73 /*
  74  * Name caching works as follows:
  75  *
  76  * Names found by directory scans are retained in a cache
  77  * for future reference.  It is managed LRU, so frequently
  78  * used names will hang around.  Cache is indexed by hash value
  79  * obtained from (vp, name) where vp refers to the directory
  80  * containing name.
  81  *
  82  * If it is a "negative" entry, (i.e. for a name that is known NOT to
  83  * exist) the vnode pointer will be NULL.
  84  *
  85  * For simplicity (and economy of storage), names longer than
  86  * a maximum length of NCHNAMLEN are not cached; they occur
  87  * infrequently in any case, and are almost never of interest.
  88  *
  89  * Upon reaching the last segment of a path, if the reference
  90  * is for DELETE, or NOCACHE is set (rewrite), and the
  91  * name is located in the cache, it will be dropped.
  92  */
  93
  94 /*
  95  * Structures associated with name cacheing.
  96  */
  97 #define NCHHASH(dvp, hash_val) \
  98         (&nchashtbl[((u_long)(dvp) ^ ((dvp)->v_id ^ (hash_val))) & nchash])
  99 LIST_HEAD(nchashhead, namecache) *nchashtbl;    /* Hash Table */
 100 u_long  nchash;                         /* size of hash table - 1 */
 101 long    numcache;                       /* number of cache entries allocated */
 102 TAILQ_HEAD(, namecache) nclruhead;      /* LRU chain */
 103 struct  nchstats nchstats;              /* cache effectiveness statistics */
 104 u_long nextvnodeid = 0;
 105 int doingcache = 1;                     /* 1 => enable the cache */
 106
 107 /*
 108  * Delete an entry from its hash list and move it to the front
 109  * of the LRU list for immediate reuse.
 110  *
 111  * NOTE: THESE MACROS CAN BLOCK (in the call to remove_name())
 112  *       SO BE CAREFUL IF YOU HOLD POINTERS TO nclruhead OR
 113  *       nchashtbl.
 114  */
 115 #if DIAGNOSTIC
 116 #define PURGE(ncp)  {                                           \
 117         if (ncp->nc_hash.le_prev == 0)                          \
 118                 panic("namecache purge le_prev");               \
 119         if (ncp->nc_hash.le_next == ncp)                        \
 120                 panic("namecache purge le_next");               \
 121         LIST_REMOVE(ncp, nc_hash);                              \
 122         ncp->nc_hash.le_prev = 0;                               \
 123         TAILQ_REMOVE(&nclruhead, ncp, nc_lru);                  \
 124         TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);             \
 125         /* this has to come last because it could block */      \
 126         remove_name(ncp->nc_name);                              \
 127         ncp->nc_name = NULL;                                    \
 128 }
 129 #else
 130 #define PURGE(ncp)  {                                           \
 131         LIST_REMOVE(ncp, nc_hash);                              \
 132         ncp->nc_hash.le_prev = 0;                               \
 133         TAILQ_REMOVE(&nclruhead, ncp, nc_lru);                  \
 134         TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);             \
 135         /* this has to come last because it could block */      \
 136         remove_name(ncp->nc_name);                              \
 137         ncp->nc_name = NULL;                                    \
 138 }
 139 #endif /* DIAGNOSTIC */
 140
 141 /*
 142  * Move an entry that has been used to the tail of the LRU list
 143  * so that it will be preserved for future use.
 144  */
 145 #define TOUCH(ncp)  {                                           \
 146         if (ncp->nc_lru.tqe_next != 0) {                        \
 147                 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);          \
 148                 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);     \
 149         }                                                       \
 150 }
 151
 152
 153 //
 154 // Have to take a len argument because we may only need to
 155 // hash part of a componentname.
 156 //
 157 static unsigned int
 158 hash_string(const char *str, int len)
 159 {
 160     unsigned int i, hashval = 0;
 161
 162     if (len == 0) {
 163         for(i=1; *str != 0; i++, str++) {
 164             hashval += (unsigned char)*str * i;
 165         }
 166     } else {
 167         for(i=len; i > 0; i--, str++) {
 168             hashval += (unsigned char)*str * (len - i + 1);
 169         }
 170     }
 171
 172     return hashval;
 173 }
 174
 175
 176
 177
 178 /*
 179  * Lookup an entry in the cache
 180  *
 181  * We don't do this if the segment name is long, simply so the cache
 182  * can avoid holding long names (which would either waste space, or
 183  * add greatly to the complexity).
 184  *
 185  * Lookup is called with dvp pointing to the directory to search,
 186  * cnp pointing to the name of the entry being sought. If the lookup
 187  * succeeds, the vnode is returned in *vpp, and a status of -1 is
 188  * returned. If the lookup determines that the name does not exist
 189  * (negative cacheing), a status of ENOENT is returned. If the lookup
 190  * fails, a status of zero is returned.
 191  */
 192
 193 int
 194 cache_lookup(dvp, vpp, cnp)
 195         struct vnode *dvp;
 196         struct vnode **vpp;
 197         struct componentname *cnp;
 198 {
 199         register struct namecache *ncp, *nnp;
 200         register struct nchashhead *ncpp;
 201         register long namelen = cnp->cn_namelen;
 202         char *nameptr = cnp->cn_nameptr;
 203
 204         if (!doingcache) {
 205                 cnp->cn_flags &= ~MAKEENTRY;
 206                 return (0);
 207         }
 208
 209         ncpp = NCHHASH(dvp, cnp->cn_hash);
 210         for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
 211                 nnp = ncp->nc_hash.le_next;
 212
 213                 if (ncp->nc_dvp == dvp &&
 214                     strncmp(ncp->nc_name, nameptr, namelen) == 0 &&
 215                     ncp->nc_name[namelen] == 0) {
 216                         /* Make sure the vp isn't stale. */
 217                         if ((ncp->nc_dvpid != dvp->v_id) ||
 218                             (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
 219                                 nchstats.ncs_falsehits++;
 220                                 PURGE(ncp);
 221                                 continue;
 222                         }
 223                         break;
 224                 }
 225         }
 226
 227         /* We failed to find an entry */
 228         if (ncp == 0) {
 229                 nchstats.ncs_miss++;
 230                 return (0);
 231         }
 232
 233         /* We don't want to have an entry, so dump it */
 234         if ((cnp->cn_flags & MAKEENTRY) == 0) {
 235                 nchstats.ncs_badhits++;
 236                 PURGE(ncp);
 237                 return (0);
 238         }
 239
 240         /* We found a "positive" match, return the vnode */
 241         if (ncp->nc_vp) {
 242                 if (ncp->nc_vp->v_flag & (VUINIT|VXLOCK|VTERMINATE|VORECLAIM)) {
 243                     PURGE(ncp);
 244                     return (0);
 245                 }
 246
 247                 nchstats.ncs_goodhits++;
 248                 TOUCH(ncp);
 249                 *vpp = ncp->nc_vp;
 250                 return (-1);
 251         }
 252
 253         /* We found a negative match, and want to create it, so purge */
 254         if (cnp->cn_nameiop == CREATE) {
 255                 nchstats.ncs_badhits++;
 256                 PURGE(ncp);
 257                 return (0);
 258         }
 259
 260         /*
 261          * We found a "negative" match, ENOENT notifies client of this match.
 262          * The nc_vpid field records whether this is a whiteout.
 263          */
 264         nchstats.ncs_neghits++;
 265         TOUCH(ncp);
 266         cnp->cn_flags |= ncp->nc_vpid;
 267         return (ENOENT);
 268 }
 269
 270 /*
 271  * Add an entry to the cache.
 272  */
 273 void
 274 cache_enter(dvp, vp, cnp)
 275         struct vnode *dvp;
 276         struct vnode *vp;
 277         struct componentname *cnp;
 278 {
 279         register struct namecache *ncp;
 280         register struct nchashhead *ncpp;
 281
 282         if (!doingcache)
 283                 return;
 284
 285         /*
 286          * We allocate a new entry if we are less than the maximum
 287          * allowed and the one at the front of the LRU list is in use.
 288          * Otherwise we use the one at the front of the LRU list.
 289          */
 290         if (numcache < desiredvnodes &&
 291             ((ncp = nclruhead.tqh_first) == NULL ||
 292             ncp->nc_hash.le_prev != 0)) {
 293                 /* Add one more entry */
 294                 ncp = (struct namecache *)
 295                         _MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
 296                 numcache++;
 297         } else if (ncp = nclruhead.tqh_first) {
 298                 /* reuse an old entry */
 299                 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
 300                 if (ncp->nc_hash.le_prev != 0) {
 301 #if DIAGNOSTIC
 302                         if (ncp->nc_hash.le_next == ncp)
 303                                 panic("cache_enter: le_next");
 304 #endif
 305                         LIST_REMOVE(ncp, nc_hash);
 306                         remove_name(ncp->nc_name);
 307                         ncp->nc_name = NULL;
 308                         ncp->nc_hash.le_prev = 0;
 309                 }
 310         } else {
 311                 /* give up */
 312                 return;
 313         }
 314
 315         /*
 316          * Fill in cache info, if vp is NULL this is a "negative" cache entry.
 317          * For negative entries, we have to record whether it is a whiteout.
 318          * the whiteout flag is stored in the nc_vpid field which is
 319          * otherwise unused.
 320          */
 321         ncp->nc_vp = vp;
 322         if (vp)
 323                 ncp->nc_vpid = vp->v_id;
 324         else
 325                 ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
 326         ncp->nc_dvp = dvp;
 327         ncp->nc_dvpid = dvp->v_id;
 328         ncp->nc_name = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
 329         TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
 330         ncpp = NCHHASH(dvp, cnp->cn_hash);
 331 #if DIAGNOSTIC
 332         {
 333                 register struct namecache *p;
 334
 335                 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
 336                         if (p == ncp)
 337                                 panic("cache_enter: duplicate");
 338         }
 339 #endif
 340         LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
 341 }
 342
 343 /*
 344  * Name cache initialization, from vfs_init() when we are booting
 345  */
 346 void
 347 nchinit()
 348 {
 349     static void init_string_table(void);
 350
 351     TAILQ_INIT(&nclruhead);
 352     nchashtbl = hashinit(MAX(4096, desiredvnodes), M_CACHE, &nchash);
 353
 354     init_string_table();
 355 }
 356
 357
 358 int
 359 resize_namecache(u_int newsize)
 360 {
 361     struct nchashhead *new_table;
 362     struct nchashhead *old_table;
 363     struct nchashhead *old_head, *head;
 364     struct namecache  *entry, *next;
 365     uint32_t           i;
 366     u_long             new_mask, old_mask;
 367
 368     // we don't support shrinking yet
 369     if (newsize < nchash) {
 370         return 0;
 371     }
 372
 373     new_table = hashinit(newsize, M_CACHE, &new_mask);
 374     if (new_table == NULL) {
 375         return ENOMEM;
 376     }
 377
 378     // do the switch!
 379     old_table = nchashtbl;
 380     nchashtbl = new_table;
 381     old_mask  = nchash;
 382     nchash    = new_mask;
 383
 384     // walk the old table and insert all the entries into
 385     // the new table
 386     //
 387     for(i=0; i <= old_mask; i++) {
 388         old_head = &old_table[i];
 389         for (entry=old_head->lh_first; entry != NULL; entry=next) {
 390             //
 391             // XXXdbg - Beware: this assumes that hash_string() does
 392             //                  the same thing as what happens in
 393             //                  lookup() over in vfs_lookup.c
 394             head = NCHHASH(entry->nc_dvp, hash_string(entry->nc_name, 0));
 395
 396             next = entry->nc_hash.le_next;
 397             LIST_INSERT_HEAD(head, entry, nc_hash);
 398         }
 399     }
 400
 401     FREE(old_table, M_CACHE);
 402
 403     return 0;
 404 }
 405
 406
 407
 408
 409 /*
 410  * Invalidate a all entries to particular vnode.
 411  *
 412  * We actually just increment the v_id, that will do it. The entries will
 413  * be purged by lookup as they get found. If the v_id wraps around, we
 414  * need to ditch the entire cache, to avoid confusion. No valid vnode will
 415  * ever have (v_id == 0).
 416  */
 417 void
 418 cache_purge(vp)
 419         struct vnode *vp;
 420 {
 421         struct namecache *ncp;
 422         struct nchashhead *ncpp;
 423
 424         vp->v_id = ++nextvnodeid;
 425         if (nextvnodeid != 0)
 426                 return;
 427         for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
 428                 while (ncp = ncpp->lh_first)
 429                         PURGE(ncp);
 430         }
 431         vp->v_id = ++nextvnodeid;
 432 }
 433
 434 /*
 435  * Flush all entries referencing a particular filesystem.
 436  *
 437  * Since we need to check it anyway, we will flush all the invalid
 438  * entriess at the same time.
 439  */
 440 void
 441 cache_purgevfs(mp)
 442         struct mount *mp;
 443 {
 444         struct nchashhead *ncpp;
 445         struct namecache *ncp, *nnp;
 446
 447         /* Scan hash tables for applicable entries */
 448         for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
 449                 for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
 450                         nnp = ncp->nc_hash.le_next;
 451                         if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
 452                             (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
 453                             ncp->nc_dvp->v_mount == mp) {
 454                                 PURGE(ncp);
 455                         }
 456                 }
 457         }
 458 }
 459
 460
 461
 462 //
 463 // String ref routines
 464 //
 465 static LIST_HEAD(stringhead, string_t) *string_ref_table;
 466 static u_long   string_table_mask;
 467 static uint32_t max_chain_len=0;
 468 static struct stringhead *long_chain_head=NULL;
 469 static uint32_t filled_buckets=0;
 470 static uint32_t num_dups=0;
 471 static uint32_t nstrings=0;
 472
 473 typedef struct string_t {
 474     LIST_ENTRY(string_t)  hash_chain;
 475     unsigned char        *str;
 476     uint32_t              refcount;
 477 } string_t;
 478
 479
 480
 481 static int
 482 resize_string_ref_table()
 483 {
 484     struct stringhead *new_table;
 485     struct stringhead *old_table;
 486     struct stringhead *old_head, *head;
 487     string_t          *entry, *next;
 488     uint32_t           i, hashval;
 489     u_long             new_mask, old_mask;
 490
 491     new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
 492     if (new_table == NULL) {
 493         return ENOMEM;
 494     }
 495
 496     // do the switch!
 497     old_table         = string_ref_table;
 498     string_ref_table  = new_table;
 499     old_mask          = string_table_mask;
 500     string_table_mask = new_mask;
 501
 502     printf("resize: max chain len %d, new table size %d\n",
 503            max_chain_len, new_mask + 1);
 504     max_chain_len   = 0;
 505     long_chain_head = NULL;
 506     filled_buckets  = 0;
 507
 508     // walk the old table and insert all the entries into
 509     // the new table
 510     //
 511     for(i=0; i <= old_mask; i++) {
 512         old_head = &old_table[i];
 513         for (entry=old_head->lh_first; entry != NULL; entry=next) {
 514             hashval = hash_string(entry->str, 0);
 515             head = &string_ref_table[hashval & string_table_mask];
 516             if (head->lh_first == NULL) {
 517                 filled_buckets++;
 518             }
 519
 520             next = entry->hash_chain.le_next;
 521             LIST_INSERT_HEAD(head, entry, hash_chain);
 522         }
 523     }
 524
 525     FREE(old_table, M_CACHE);
 526
 527     return 0;
 528 }
 529
 530
 531 static void
 532 init_string_table(void)
 533 {
 534     string_ref_table = hashinit(4096, M_CACHE, &string_table_mask);
 535 }
 536
 537
 538 char *
 539 add_name(const char *name, size_t len, u_int hashval, u_int flags)
 540 {
 541     struct stringhead *head;
 542     string_t          *entry;
 543     int                chain_len = 0;
 544
 545     //
 546     // If the table gets more than 3/4 full, resize it
 547     //
 548     if (4*filled_buckets >= ((string_table_mask + 1) * 3)) {
 549                 if (resize_string_ref_table() != 0) {
 550                         printf("failed to resize the hash table.\n");
 551                 }
 552     }
 553
 554     if (hashval == 0) {
 555         hashval = hash_string(name, len);
 556     }
 557
 558     head = &string_ref_table[hashval & string_table_mask];
 559     for (entry=head->lh_first; entry != NULL; chain_len++, entry=entry->hash_chain.le_next) {
 560         if (strncmp(entry->str, name, len) == 0 && entry->str[len] == '\0') {
 561             entry->refcount++;
 562             num_dups++;
 563             break;
 564         }
 565     }
 566
 567     if (entry == NULL) {
 568         // it wasn't already there so add it.
 569         MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
 570
 571         // have to get "head" again because we could have blocked
 572         // in malloc and thus head could have changed.
 573         //
 574         head = &string_ref_table[hashval & string_table_mask];
 575         if (head->lh_first == NULL) {
 576             filled_buckets++;
 577         }
 578
 579         LIST_INSERT_HEAD(head, entry, hash_chain);
 580         entry->str = (char *)((char *)entry + sizeof(string_t));
 581         strncpy(entry->str, name, len);
 582         entry->str[len] = '\0';
 583         entry->refcount = 1;
 584
 585         if (chain_len > max_chain_len) {
 586             max_chain_len   = chain_len;
 587             long_chain_head = head;
 588         }
 589
 590         nstrings++;
 591     }
 592
 593     return entry->str;
 594 }
 595
 596 int
 597 remove_name(const char *nameref)
 598 {
 599     struct stringhead *head;
 600     string_t          *entry;
 601     uint32_t           hashval;
 602
 603     hashval = hash_string(nameref, 0);
 604     head = &string_ref_table[hashval & string_table_mask];
 605     for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
 606         if (entry->str == (unsigned char *)nameref) {
 607             entry->refcount--;
 608             if (entry->refcount == 0) {
 609                 LIST_REMOVE(entry, hash_chain);
 610                 if (head->lh_first == NULL) {
 611                     filled_buckets--;
 612                 }
 613                 entry->str = NULL;
 614                 nstrings--;
 615
 616                 FREE(entry, M_TEMP);
 617             } else {
 618                 num_dups--;
 619             }
 620
 621             return 0;
 622         }
 623     }
 624
 625     return ENOENT;
 626 }
 627
 628
 629 void
 630 dump_string_table(void)
 631 {
 632     struct stringhead *head;
 633     string_t          *entry;
 634     int                i;
 635
 636     for(i=0; i <= string_table_mask; i++) {
 637         head = &string_ref_table[i];
 638         for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
 639             printf("%6d - %s\n", entry->refcount, entry->str);
 640         }
 641     }
 642 }