bsd/vfs/vfs_cache.c

   1 /*
   2  * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1989, 1993, 1995
  25  *      The Regents of the University of California.  All rights reserved.
  26  *
  27  * This code is derived from software contributed to Berkeley by
  28  * Poul-Henning Kamp of the FreeBSD Project.
  29  *
  30  * Redistribution and use in source and binary forms, with or without
  31  * modification, are permitted provided that the following conditions
  32  * are met:
  33  * 1. Redistributions of source code must retain the above copyright
  34  *    notice, this list of conditions and the following disclaimer.
  35  * 2. Redistributions in binary form must reproduce the above copyright
  36  *    notice, this list of conditions and the following disclaimer in the
  37  *    documentation and/or other materials provided with the distribution.
  38  * 3. All advertising materials mentioning features or use of this software
  39  *    must display the following acknowledgement:
  40  *      This product includes software developed by the University of
  41  *      California, Berkeley and its contributors.
  42  * 4. Neither the name of the University nor the names of its contributors
  43  *    may be used to endorse or promote products derived from this software
  44  *    without specific prior written permission.
  45  *
  46  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  56  * SUCH DAMAGE.
  57  *
  58  *
  59  *      @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
  60  */
  61 #include <sys/param.h>
  62 #include <sys/systm.h>
  63 #include <sys/time.h>
  64 #include <sys/mount.h>
  65 #include <sys/vnode.h>
  66 #include <sys/namei.h>
  67 #include <sys/errno.h>
  68 #include <sys/malloc.h>
  69
  70 /*
  71  * Name caching works as follows:
  72  *
  73  * Names found by directory scans are retained in a cache
  74  * for future reference.  It is managed LRU, so frequently
  75  * used names will hang around.  Cache is indexed by hash value
  76  * obtained from (vp, name) where vp refers to the directory
  77  * containing name.
  78  *
  79  * If it is a "negative" entry, (i.e. for a name that is known NOT to
  80  * exist) the vnode pointer will be NULL.
  81  *
  82  * For simplicity (and economy of storage), names longer than
  83  * a maximum length of NCHNAMLEN are not cached; they occur
  84  * infrequently in any case, and are almost never of interest.
  85  *
  86  * Upon reaching the last segment of a path, if the reference
  87  * is for DELETE, or NOCACHE is set (rewrite), and the
  88  * name is located in the cache, it will be dropped.
  89  */
  90
  91 /*
  92  * Structures associated with name cacheing.
  93  */
  94 #define NCHHASH(dvp, hash_val) \
  95         (&nchashtbl[((u_long)(dvp) ^ ((dvp)->v_id ^ (hash_val))) & nchash])
  96 LIST_HEAD(nchashhead, namecache) *nchashtbl;    /* Hash Table */
  97 u_long  nchash;                         /* size of hash table - 1 */
  98 long    numcache;                       /* number of cache entries allocated */
  99 TAILQ_HEAD(, namecache) nclruhead;      /* LRU chain */
 100 struct  nchstats nchstats;              /* cache effectiveness statistics */
 101 u_long nextvnodeid = 0;
 102 int doingcache = 1;                     /* 1 => enable the cache */
 103
 104 /*
 105  * Delete an entry from its hash list and move it to the front
 106  * of the LRU list for immediate reuse.
 107  *
 108  * NOTE: THESE MACROS CAN BLOCK (in the call to remove_name())
 109  *       SO BE CAREFUL IF YOU HOLD POINTERS TO nclruhead OR
 110  *       nchashtbl.
 111  */
 112 #if DIAGNOSTIC
 113 #define PURGE(ncp)  {                                           \
 114         if (ncp->nc_hash.le_prev == 0)                          \
 115                 panic("namecache purge le_prev");               \
 116         if (ncp->nc_hash.le_next == ncp)                        \
 117                 panic("namecache purge le_next");               \
 118         LIST_REMOVE(ncp, nc_hash);                              \
 119         ncp->nc_hash.le_prev = 0;                               \
 120         TAILQ_REMOVE(&nclruhead, ncp, nc_lru);                  \
 121         TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);             \
 122         /* this has to come last because it could block */      \
 123         remove_name(ncp->nc_name);                              \
 124         ncp->nc_name = NULL;                                    \
 125 }
 126 #else
 127 #define PURGE(ncp)  {                                           \
 128         LIST_REMOVE(ncp, nc_hash);                              \
 129         ncp->nc_hash.le_prev = 0;                               \
 130         TAILQ_REMOVE(&nclruhead, ncp, nc_lru);                  \
 131         TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);             \
 132         /* this has to come last because it could block */      \
 133         remove_name(ncp->nc_name);                              \
 134         ncp->nc_name = NULL;                                    \
 135 }
 136 #endif /* DIAGNOSTIC */
 137
 138 /*
 139  * Move an entry that has been used to the tail of the LRU list
 140  * so that it will be preserved for future use.
 141  */
 142 #define TOUCH(ncp)  {                                           \
 143         if (ncp->nc_lru.tqe_next != 0) {                        \
 144                 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);          \
 145                 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);     \
 146         }                                                       \
 147 }
 148
 149
 150 //
 151 // Have to take a len argument because we may only need to
 152 // hash part of a componentname.
 153 //
 154 static unsigned int
 155 hash_string(const char *str, int len)
 156 {
 157     unsigned int i, hashval = 0;
 158
 159     if (len == 0) {
 160         for(i=1; *str != 0; i++, str++) {
 161             hashval += (unsigned char)*str * i;
 162         }
 163     } else {
 164         for(i=len; i > 0; i--, str++) {
 165             hashval += (unsigned char)*str * (len - i + 1);
 166         }
 167     }
 168
 169     return hashval;
 170 }
 171
 172
 173
 174
 175 /*
 176  * Lookup an entry in the cache
 177  *
 178  * We don't do this if the segment name is long, simply so the cache
 179  * can avoid holding long names (which would either waste space, or
 180  * add greatly to the complexity).
 181  *
 182  * Lookup is called with dvp pointing to the directory to search,
 183  * cnp pointing to the name of the entry being sought. If the lookup
 184  * succeeds, the vnode is returned in *vpp, and a status of -1 is
 185  * returned. If the lookup determines that the name does not exist
 186  * (negative cacheing), a status of ENOENT is returned. If the lookup
 187  * fails, a status of zero is returned.
 188  */
 189
 190 int
 191 cache_lookup(dvp, vpp, cnp)
 192         struct vnode *dvp;
 193         struct vnode **vpp;
 194         struct componentname *cnp;
 195 {
 196         register struct namecache *ncp, *nnp;
 197         register struct nchashhead *ncpp;
 198         register long namelen = cnp->cn_namelen;
 199         char *nameptr = cnp->cn_nameptr;
 200
 201         if (!doingcache) {
 202                 cnp->cn_flags &= ~MAKEENTRY;
 203                 return (0);
 204         }
 205
 206         ncpp = NCHHASH(dvp, cnp->cn_hash);
 207         for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
 208                 nnp = ncp->nc_hash.le_next;
 209
 210                 if (ncp->nc_dvp == dvp &&
 211                     strncmp(ncp->nc_name, nameptr, namelen) == 0 &&
 212                     ncp->nc_name[namelen] == 0) {
 213                         /* Make sure the vp isn't stale. */
 214                         if ((ncp->nc_dvpid != dvp->v_id) ||
 215                             (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
 216                                 nchstats.ncs_falsehits++;
 217                                 PURGE(ncp);
 218                                 continue;
 219                         }
 220                         break;
 221                 }
 222         }
 223
 224         /* We failed to find an entry */
 225         if (ncp == 0) {
 226                 nchstats.ncs_miss++;
 227                 return (0);
 228         }
 229
 230         /* We don't want to have an entry, so dump it */
 231         if ((cnp->cn_flags & MAKEENTRY) == 0) {
 232                 nchstats.ncs_badhits++;
 233                 PURGE(ncp);
 234                 return (0);
 235         }
 236
 237         /* We found a "positive" match, return the vnode */
 238         if (ncp->nc_vp) {
 239                 if (ncp->nc_vp->v_flag & (VUINIT|VXLOCK|VTERMINATE|VORECLAIM)) {
 240                     PURGE(ncp);
 241                     return (0);
 242                 }
 243
 244                 nchstats.ncs_goodhits++;
 245                 TOUCH(ncp);
 246                 *vpp = ncp->nc_vp;
 247                 return (-1);
 248         }
 249
 250         /* We found a negative match, and want to create it, so purge */
 251         if (cnp->cn_nameiop == CREATE) {
 252                 nchstats.ncs_badhits++;
 253                 PURGE(ncp);
 254                 return (0);
 255         }
 256
 257         /*
 258          * We found a "negative" match, ENOENT notifies client of this match.
 259          * The nc_vpid field records whether this is a whiteout.
 260          */
 261         nchstats.ncs_neghits++;
 262         TOUCH(ncp);
 263         cnp->cn_flags |= ncp->nc_vpid;
 264         return (ENOENT);
 265 }
 266
 267 /*
 268  * Add an entry to the cache.
 269  */
 270 void
 271 cache_enter(dvp, vp, cnp)
 272         struct vnode *dvp;
 273         struct vnode *vp;
 274         struct componentname *cnp;
 275 {
 276         register struct namecache *ncp;
 277         register struct nchashhead *ncpp;
 278
 279         if (!doingcache)
 280                 return;
 281
 282         /*
 283          * We allocate a new entry if we are less than the maximum
 284          * allowed and the one at the front of the LRU list is in use.
 285          * Otherwise we use the one at the front of the LRU list.
 286          */
 287         if (numcache < desiredvnodes &&
 288             ((ncp = nclruhead.tqh_first) == NULL ||
 289             ncp->nc_hash.le_prev != 0)) {
 290                 /* Add one more entry */
 291                 ncp = (struct namecache *)
 292                         _MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
 293                 numcache++;
 294         } else if (ncp = nclruhead.tqh_first) {
 295                 /* reuse an old entry */
 296                 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
 297                 if (ncp->nc_hash.le_prev != 0) {
 298 #if DIAGNOSTIC
 299                         if (ncp->nc_hash.le_next == ncp)
 300                                 panic("cache_enter: le_next");
 301 #endif
 302                         LIST_REMOVE(ncp, nc_hash);
 303                         remove_name(ncp->nc_name);
 304                         ncp->nc_name = NULL;
 305                         ncp->nc_hash.le_prev = 0;
 306                 }
 307         } else {
 308                 /* give up */
 309                 return;
 310         }
 311
 312         /*
 313          * Fill in cache info, if vp is NULL this is a "negative" cache entry.
 314          * For negative entries, we have to record whether it is a whiteout.
 315          * the whiteout flag is stored in the nc_vpid field which is
 316          * otherwise unused.
 317          */
 318         ncp->nc_vp = vp;
 319         if (vp)
 320                 ncp->nc_vpid = vp->v_id;
 321         else
 322                 ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
 323         ncp->nc_dvp = dvp;
 324         ncp->nc_dvpid = dvp->v_id;
 325         ncp->nc_name = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
 326         TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
 327         ncpp = NCHHASH(dvp, cnp->cn_hash);
 328 #if DIAGNOSTIC
 329         {
 330                 register struct namecache *p;
 331
 332                 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
 333                         if (p == ncp)
 334                                 panic("cache_enter: duplicate");
 335         }
 336 #endif
 337         LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
 338 }
 339
 340 /*
 341  * Name cache initialization, from vfs_init() when we are booting
 342  */
 343 void
 344 nchinit()
 345 {
 346     static void init_string_table(void);
 347
 348     TAILQ_INIT(&nclruhead);
 349     nchashtbl = hashinit(MAX(4096, desiredvnodes), M_CACHE, &nchash);
 350
 351     init_string_table();
 352 }
 353
 354
 355 int
 356 resize_namecache(u_int newsize)
 357 {
 358     struct nchashhead *new_table;
 359     struct nchashhead *old_table;
 360     struct nchashhead *old_head, *head;
 361     struct namecache  *entry, *next;
 362     uint32_t           i;
 363     u_long             new_mask, old_mask;
 364
 365     // we don't support shrinking yet
 366     if (newsize < nchash) {
 367         return 0;
 368     }
 369
 370     new_table = hashinit(newsize, M_CACHE, &new_mask);
 371     if (new_table == NULL) {
 372         return ENOMEM;
 373     }
 374
 375     // do the switch!
 376     old_table = nchashtbl;
 377     nchashtbl = new_table;
 378     old_mask  = nchash;
 379     nchash    = new_mask;
 380
 381     // walk the old table and insert all the entries into
 382     // the new table
 383     //
 384     for(i=0; i <= old_mask; i++) {
 385         old_head = &old_table[i];
 386         for (entry=old_head->lh_first; entry != NULL; entry=next) {
 387             //
 388             // XXXdbg - Beware: this assumes that hash_string() does
 389             //                  the same thing as what happens in
 390             //                  lookup() over in vfs_lookup.c
 391             head = NCHHASH(entry->nc_dvp, hash_string(entry->nc_name, 0));
 392
 393             next = entry->nc_hash.le_next;
 394             LIST_INSERT_HEAD(head, entry, nc_hash);
 395         }
 396     }
 397
 398     FREE(old_table, M_CACHE);
 399
 400     return 0;
 401 }
 402
 403
 404
 405
 406 /*
 407  * Invalidate a all entries to particular vnode.
 408  *
 409  * We actually just increment the v_id, that will do it. The entries will
 410  * be purged by lookup as they get found. If the v_id wraps around, we
 411  * need to ditch the entire cache, to avoid confusion. No valid vnode will
 412  * ever have (v_id == 0).
 413  */
 414 void
 415 cache_purge(vp)
 416         struct vnode *vp;
 417 {
 418         struct namecache *ncp;
 419         struct nchashhead *ncpp;
 420
 421         vp->v_id = ++nextvnodeid;
 422         if (nextvnodeid != 0)
 423                 return;
 424         for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
 425                 while (ncp = ncpp->lh_first)
 426                         PURGE(ncp);
 427         }
 428         vp->v_id = ++nextvnodeid;
 429 }
 430
 431 /*
 432  * Flush all entries referencing a particular filesystem.
 433  *
 434  * Since we need to check it anyway, we will flush all the invalid
 435  * entriess at the same time.
 436  */
 437 void
 438 cache_purgevfs(mp)
 439         struct mount *mp;
 440 {
 441         struct nchashhead *ncpp;
 442         struct namecache *ncp, *nnp;
 443
 444         /* Scan hash tables for applicable entries */
 445         for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
 446                 for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
 447                         nnp = ncp->nc_hash.le_next;
 448                         if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
 449                             (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
 450                             ncp->nc_dvp->v_mount == mp) {
 451                                 PURGE(ncp);
 452                         }
 453                 }
 454         }
 455 }
 456
 457
 458
 459 //
 460 // String ref routines
 461 //
 462 static LIST_HEAD(stringhead, string_t) *string_ref_table;
 463 static u_long   string_table_mask;
 464 static uint32_t max_chain_len=0;
 465 static struct stringhead *long_chain_head=NULL;
 466 static uint32_t filled_buckets=0;
 467 static uint32_t num_dups=0;
 468 static uint32_t nstrings=0;
 469
 470 typedef struct string_t {
 471     LIST_ENTRY(string_t)  hash_chain;
 472     unsigned char        *str;
 473     uint32_t              refcount;
 474 } string_t;
 475
 476
 477
 478 static int
 479 resize_string_ref_table()
 480 {
 481     struct stringhead *new_table;
 482     struct stringhead *old_table;
 483     struct stringhead *old_head, *head;
 484     string_t          *entry, *next;
 485     uint32_t           i, hashval;
 486     u_long             new_mask, old_mask;
 487
 488     new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
 489     if (new_table == NULL) {
 490         return ENOMEM;
 491     }
 492
 493     // do the switch!
 494     old_table         = string_ref_table;
 495     string_ref_table  = new_table;
 496     old_mask          = string_table_mask;
 497     string_table_mask = new_mask;
 498
 499     printf("resize: max chain len %d, new table size %d\n",
 500            max_chain_len, new_mask + 1);
 501     max_chain_len   = 0;
 502     long_chain_head = NULL;
 503     filled_buckets  = 0;
 504
 505     // walk the old table and insert all the entries into
 506     // the new table
 507     //
 508     for(i=0; i <= old_mask; i++) {
 509         old_head = &old_table[i];
 510         for (entry=old_head->lh_first; entry != NULL; entry=next) {
 511             hashval = hash_string(entry->str, 0);
 512             head = &string_ref_table[hashval & string_table_mask];
 513             if (head->lh_first == NULL) {
 514                 filled_buckets++;
 515             }
 516
 517             next = entry->hash_chain.le_next;
 518             LIST_INSERT_HEAD(head, entry, hash_chain);
 519         }
 520     }
 521
 522     FREE(old_table, M_CACHE);
 523
 524     return 0;
 525 }
 526
 527
 528 static void
 529 init_string_table(void)
 530 {
 531     string_ref_table = hashinit(4096, M_CACHE, &string_table_mask);
 532 }
 533
 534
 535 char *
 536 add_name(const char *name, size_t len, u_int hashval, u_int flags)
 537 {
 538     struct stringhead *head;
 539     string_t          *entry;
 540     int                chain_len = 0;
 541
 542     //
 543     // If the table gets more than 3/4 full, resize it
 544     //
 545     if (4*filled_buckets >= ((string_table_mask + 1) * 3)) {
 546                 if (resize_string_ref_table() != 0) {
 547                         printf("failed to resize the hash table.\n");
 548                 }
 549     }
 550
 551     if (hashval == 0) {
 552         hashval = hash_string(name, len);
 553     }
 554
 555     head = &string_ref_table[hashval & string_table_mask];
 556     for (entry=head->lh_first; entry != NULL; chain_len++, entry=entry->hash_chain.le_next) {
 557         if (strncmp(entry->str, name, len) == 0 && entry->str[len] == '\0') {
 558             entry->refcount++;
 559             num_dups++;
 560             break;
 561         }
 562     }
 563
 564     if (entry == NULL) {
 565         // it wasn't already there so add it.
 566         MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
 567
 568         // have to get "head" again because we could have blocked
 569         // in malloc and thus head could have changed.
 570         //
 571         head = &string_ref_table[hashval & string_table_mask];
 572         if (head->lh_first == NULL) {
 573             filled_buckets++;
 574         }
 575
 576         LIST_INSERT_HEAD(head, entry, hash_chain);
 577         entry->str = (char *)((char *)entry + sizeof(string_t));
 578         strncpy(entry->str, name, len);
 579         entry->str[len] = '\0';
 580         entry->refcount = 1;
 581
 582         if (chain_len > max_chain_len) {
 583             max_chain_len   = chain_len;
 584             long_chain_head = head;
 585         }
 586
 587         nstrings++;
 588     }
 589
 590     return entry->str;
 591 }
 592
 593 int
 594 remove_name(const char *nameref)
 595 {
 596     struct stringhead *head;
 597     string_t          *entry;
 598     uint32_t           hashval;
 599
 600     hashval = hash_string(nameref, 0);
 601     head = &string_ref_table[hashval & string_table_mask];
 602     for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
 603         if (entry->str == (unsigned char *)nameref) {
 604             entry->refcount--;
 605             if (entry->refcount == 0) {
 606                 LIST_REMOVE(entry, hash_chain);
 607                 if (head->lh_first == NULL) {
 608                     filled_buckets--;
 609                 }
 610                 entry->str = NULL;
 611                 nstrings--;
 612
 613                 FREE(entry, M_TEMP);
 614             } else {
 615                 num_dups--;
 616             }
 617
 618             return 0;
 619         }
 620     }
 621
 622     return ENOENT;
 623 }
 624
 625
 626 void
 627 dump_string_table(void)
 628 {
 629     struct stringhead *head;
 630     string_t          *entry;
 631     int                i;
 632
 633     for(i=0; i <= string_table_mask; i++) {
 634         head = &string_ref_table[i];
 635         for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
 636             printf("%6d - %s\n", entry->refcount, entry->str);
 637         }
 638     }
 639 }