bsd/vfs/vfs_cache.c

   1 /*
   2  * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1989, 1993, 1995
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * This code is derived from software contributed to Berkeley by
  34  * Poul-Henning Kamp of the FreeBSD Project.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. All advertising materials mentioning features or use of this software
  45  *    must display the following acknowledgement:
  46  *      This product includes software developed by the University of
  47  *      California, Berkeley and its contributors.
  48  * 4. Neither the name of the University nor the names of its contributors
  49  *    may be used to endorse or promote products derived from this software
  50  *    without specific prior written permission.
  51  *
  52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  62  * SUCH DAMAGE.
  63  *
  64  *
  65  *      @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/time.h>
  76 #include <sys/mount_internal.h>
  77 #include <sys/vnode_internal.h>
  78 #include <sys/namei.h>
  79 #include <sys/errno.h>
  80 #include <sys/malloc.h>
  81 #include <sys/kauth.h>
  82 #include <sys/user.h>
  83 #include <sys/paths.h>
  84
  85 #if CONFIG_MACF
  86 #include <security/mac_framework.h>
  87 #endif
  88
  89 /*
  90  * Name caching works as follows:
  91  *
  92  * Names found by directory scans are retained in a cache
  93  * for future reference.  It is managed LRU, so frequently
  94  * used names will hang around.  Cache is indexed by hash value
  95  * obtained from (vp, name) where vp refers to the directory
  96  * containing name.
  97  *
  98  * If it is a "negative" entry, (i.e. for a name that is known NOT to
  99  * exist) the vnode pointer will be NULL.
 100  *
 101  * Upon reaching the last segment of a path, if the reference
 102  * is for DELETE, or NOCACHE is set (rewrite), and the
 103  * name is located in the cache, it will be dropped.
 104  */
 105
 106 /*
 107  * Structures associated with name cacheing.
 108  */
 109
 110 LIST_HEAD(nchashhead, namecache) *nchashtbl;    /* Hash Table */
 111 u_long  nchashmask;
 112 u_long  nchash;                         /* size of hash table - 1 */
 113 long    numcache;                       /* number of cache entries allocated */
 114 int     desiredNodes;
 115 int     desiredNegNodes;
 116 int     ncs_negtotal;
 117 int     nc_disabled = 0;
 118 TAILQ_HEAD(, namecache) nchead;         /* chain of all name cache entries */
 119 TAILQ_HEAD(, namecache) neghead;        /* chain of only negative cache entries */
 120
 121
 122 #if COLLECT_STATS
 123
 124 struct  nchstats nchstats;              /* cache effectiveness statistics */
 125
 126 #define NCHSTAT(v) {            \
 127         nchstats.v++;           \
 128 }
 129 #define NAME_CACHE_LOCK()               name_cache_lock()
 130 #define NAME_CACHE_UNLOCK()             name_cache_unlock()
 131 #define NAME_CACHE_LOCK_SHARED()        name_cache_lock()
 132
 133 #else
 134
 135 #define NCHSTAT(v)
 136 #define NAME_CACHE_LOCK()               name_cache_lock()
 137 #define NAME_CACHE_UNLOCK()             name_cache_unlock()
 138 #define NAME_CACHE_LOCK_SHARED()        name_cache_lock_shared()
 139
 140 #endif
 141
 142
 143 /* vars for name cache list lock */
 144 lck_grp_t * namecache_lck_grp;
 145 lck_grp_attr_t * namecache_lck_grp_attr;
 146 lck_attr_t * namecache_lck_attr;
 147
 148 lck_grp_t * strcache_lck_grp;
 149 lck_grp_attr_t * strcache_lck_grp_attr;
 150 lck_attr_t * strcache_lck_attr;
 151
 152 lck_rw_t  * namecache_rw_lock;
 153 lck_rw_t  * strtable_rw_lock;
 154
 155 #define NUM_STRCACHE_LOCKS 1024
 156
 157 lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
 158
 159
 160 static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
 161 static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
 162 static void init_string_table(void) __attribute__((section("__TEXT, initcode")));
 163 static void cache_delete(struct namecache *, int);
 164 static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
 165
 166 #ifdef DUMP_STRING_TABLE
 167 /*
 168  * Internal dump function used for debugging
 169  */
 170 void dump_string_table(void);
 171 #endif  /* DUMP_STRING_TABLE */
 172
 173 static void init_crc32(void) __attribute__((section("__TEXT, initcode")));
 174 static unsigned int crc32tab[256];
 175
 176
 177 #define NCHHASH(dvp, hash_val) \
 178         (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
 179
 180
 181
 182 /*
 183  * This function builds the path to a filename in "buff".  The
 184  * length of the buffer *INCLUDING* the trailing zero byte is
 185  * returned in outlen.  NOTE: the length includes the trailing
 186  * zero byte and thus the length is one greater than what strlen
 187  * would return.  This is important and lots of code elsewhere
 188  * in the kernel assumes this behavior.
 189  *
 190  * This function can call vnop in file system if the parent vnode
 191  * does not exist or when called for hardlinks via volfs path.
 192  * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
 193  * in the name cache and does not enter the file system.
 194  *
 195  * passed in vp must have a valid io_count reference
 196  */
 197 int
 198 build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
 199 {
 200         vnode_t vp, tvp;
 201         vnode_t vp_with_iocount;
 202         vnode_t proc_root_dir_vp;
 203         char *end;
 204         const char *str;
 205         int  len;
 206         int  ret = 0;
 207         int  fixhardlink;
 208
 209         if (first_vp == NULLVP)
 210                 return (EINVAL);
 211
 212         /*
 213          * Grab the process fd so we can evaluate fd_rdir.
 214          */
 215         if (vfs_context_proc(ctx)->p_fd)
 216                 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
 217         else
 218                 proc_root_dir_vp = NULL;
 219
 220         vp_with_iocount = NULLVP;
 221 again:
 222         vp = first_vp;
 223
 224         end = &buff[buflen-1];
 225         *end = '\0';
 226
 227         /*
 228          * holding the NAME_CACHE_LOCK in shared mode is
 229          * sufficient to stabilize both the vp->v_parent chain
 230          * and the 'vp->v_mount->mnt_vnodecovered' chain
 231          *
 232          * if we need to drop this lock, we must first grab the v_id
 233          * from the vnode we're currently working with... if that
 234          * vnode doesn't already have an io_count reference (the vp
 235          * passed in comes with one), we must grab a reference
 236          * after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
 237          * deadlocks may result if you call vnode_get while holding
 238          * the NAME_CACHE_LOCK... we lazily release the reference
 239          * we pick up the next time we encounter a need to drop
 240          * the NAME_CACHE_LOCK or before we return from this routine
 241          */
 242         NAME_CACHE_LOCK_SHARED();
 243
 244         /*
 245          * Check if this is the root of a file system.
 246          */
 247         while (vp && vp->v_flag & VROOT) {
 248                 if (vp->v_mount == NULL) {
 249                         ret = EINVAL;
 250                         goto out_unlock;
 251                 }
 252                 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) {
 253                         /*
 254                          * It's the root of the root file system, so it's
 255                          * just "/".
 256                          */
 257                         *--end = '/';
 258
 259                         goto out_unlock;
 260                 } else {
 261                         vp = vp->v_mount->mnt_vnodecovered;
 262                 }
 263         }
 264
 265         while ((vp != NULLVP) && (vp->v_parent != vp)) {
 266                 int  vid;
 267
 268                 /*
 269                  * For hardlinks the v_name may be stale, so if its OK
 270                  * to enter a file system, ask the file system for the
 271                  * name and parent (below).
 272                  */
 273                 fixhardlink = (vp->v_flag & VISHARDLINK) &&
 274                               (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
 275                               !(flags & BUILDPATH_NO_FS_ENTER);
 276
 277                 if (!fixhardlink) {
 278                         str = vp->v_name;
 279
 280                         if (str == NULL || *str == '\0') {
 281                                 if (vp->v_parent != NULL)
 282                                         ret = EINVAL;
 283                                 else
 284                                         ret = ENOENT;
 285                                 goto out_unlock;
 286                         }
 287                         len = strlen(str);
 288                         /*
 289                          * Check that there's enough space (including space for the '/')
 290                          */
 291                         if ((end - buff) < (len + 1)) {
 292                                 ret = ENOSPC;
 293                                 goto out_unlock;
 294                         }
 295                         /*
 296                          * Copy the name backwards.
 297                          */
 298                         str += len;
 299
 300                         for (; len > 0; len--)
 301                                *--end = *--str;
 302                         /*
 303                          * Add a path separator.
 304                          */
 305                         *--end = '/';
 306                 }
 307
 308                 /*
 309                  * Walk up the parent chain.
 310                  */
 311                 if (((vp->v_parent != NULLVP) && !fixhardlink) ||
 312                     (flags & BUILDPATH_NO_FS_ENTER)) {
 313                         /*
 314                          * In this if () block we are not allowed to enter the filesystem
 315                          * to conclusively get the most accurate parent identifier.
 316                          * As a result, if 'vp' does not identify '/' and it
 317                          * does not have a valid v_parent, then error out
 318                          * and disallow further path construction
 319                          */
 320                         if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
 321                                 /* Only '/' is allowed to have a NULL parent pointer */
 322                                 ret = EINVAL;
 323
 324                                 /* The code below will exit early if 'tvp = vp' == NULL */
 325                         }
 326
 327                         vp = vp->v_parent;
 328
 329                         /*
 330                          * if the vnode we have in hand isn't a directory and it
 331                          * has a v_parent, then we started with the resource fork
 332                          * so skip up to avoid getting a duplicate copy of the
 333                          * file name in the path.
 334                          */
 335                         if (vp && !vnode_isdir(vp) && vp->v_parent)
 336                                 vp = vp->v_parent;
 337                 } else {
 338                         /*
 339                          * No parent, go get it if supported.
 340                          */
 341                         struct vnode_attr  va;
 342                         vnode_t  dvp;
 343
 344                         /*
 345                          * Make sure file system supports obtaining a path from id.
 346                          */
 347                         if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
 348                                 ret = ENOENT;
 349                                 goto out_unlock;
 350                         }
 351                         vid = vp->v_id;
 352
 353                         NAME_CACHE_UNLOCK();
 354
 355                         if (vp != first_vp && vp != vp_with_iocount) {
 356                                 if (vp_with_iocount) {
 357                                         vnode_put(vp_with_iocount);
 358                                         vp_with_iocount = NULLVP;
 359                                 }
 360                                 if (vnode_getwithvid(vp, vid))
 361                                         goto again;
 362                                 vp_with_iocount = vp;
 363                         }
 364                         VATTR_INIT(&va);
 365                         VATTR_WANTED(&va, va_parentid);
 366
 367                         if (fixhardlink) {
 368                                 VATTR_WANTED(&va, va_name);
 369                                 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
 370                         } else {
 371                                 va.va_name = NULL;
 372                         }
 373                         /*
 374                          * Ask the file system for its parent id and for its name (optional).
 375                          */
 376                         ret = vnode_getattr(vp, &va, ctx);
 377
 378                         if (fixhardlink) {
 379                                 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
 380                                         str = va.va_name;
 381                                         vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME);
 382                                 } else if (vp->v_name) {
 383                                         str = vp->v_name;
 384                                         ret = 0;
 385                                 } else {
 386                                         ret = ENOENT;
 387                                         goto bad_news;
 388                                 }
 389                                 len = strlen(str);
 390
 391                                 /*
 392                                  * Check that there's enough space.
 393                                  */
 394                                 if ((end - buff) < (len + 1)) {
 395                                         ret = ENOSPC;
 396                                 } else {
 397                                         /* Copy the name backwards. */
 398                                         str += len;
 399
 400                                         for (; len > 0; len--) {
 401                                                 *--end = *--str;
 402                                         }
 403                                         /*
 404                                          * Add a path separator.
 405                                          */
 406                                         *--end = '/';
 407                                 }
 408 bad_news:
 409                                 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI);
 410                         }
 411                         if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
 412                                 ret = ENOENT;
 413                                 goto out;
 414                         }
 415                         /*
 416                          * Ask the file system for the parent vnode.
 417                          */
 418                         if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx)))
 419                                 goto out;
 420
 421                         if (!fixhardlink && (vp->v_parent != dvp))
 422                                 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT);
 423
 424                         if (vp_with_iocount)
 425                                 vnode_put(vp_with_iocount);
 426                         vp = dvp;
 427                         vp_with_iocount = vp;
 428
 429                         NAME_CACHE_LOCK_SHARED();
 430
 431                         /*
 432                          * if the vnode we have in hand isn't a directory and it
 433                          * has a v_parent, then we started with the resource fork
 434                          * so skip up to avoid getting a duplicate copy of the
 435                          * file name in the path.
 436                          */
 437                         if (vp && !vnode_isdir(vp) && vp->v_parent)
 438                                 vp = vp->v_parent;
 439                 }
 440                 /*
 441                  * When a mount point is crossed switch the vp.
 442                  * Continue until we find the root or we find
 443                  * a vnode that's not the root of a mounted
 444                  * file system.
 445                  */
 446                 tvp = vp;
 447
 448                 while (tvp) {
 449                         if (tvp == proc_root_dir_vp)
 450                                 goto out_unlock;        /* encountered the root */
 451
 452                         if (!(tvp->v_flag & VROOT) || !tvp->v_mount)
 453                                 break;                  /* not the root of a mounted FS */
 454                         tvp = tvp->v_mount->mnt_vnodecovered;
 455                 }
 456                 if (tvp == NULLVP)
 457                         goto out_unlock;
 458                 vp = tvp;
 459
 460                 if (vp && (flags & BUILDPATH_CHECKACCESS)) {
 461                         vid = vp->v_id;
 462
 463                         NAME_CACHE_UNLOCK();
 464
 465                         if (vp != first_vp && vp != vp_with_iocount) {
 466                                 if (vp_with_iocount) {
 467                                         vnode_put(vp_with_iocount);
 468                                         vp_with_iocount = NULLVP;
 469                                 }
 470                                 if (vnode_getwithvid(vp, vid))
 471                                         goto again;
 472                                 vp_with_iocount = vp;
 473                         }
 474                         if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx)))
 475                                 goto out;       /* no peeking */
 476
 477                         NAME_CACHE_LOCK_SHARED();
 478                 }
 479         }
 480 out_unlock:
 481         NAME_CACHE_UNLOCK();
 482 out:
 483         if (vp_with_iocount)
 484                 vnode_put(vp_with_iocount);
 485         /*
 486          * Slide the name down to the beginning of the buffer.
 487          */
 488         memmove(buff, end, &buff[buflen] - end);
 489
 490         /*
 491          * length includes the trailing zero byte
 492          */
 493         *outlen = &buff[buflen] - end;
 494
 495         return (ret);
 496 }
 497
 498
 499 /*
 500  * return NULLVP if vp's parent doesn't
 501  * exist, or we can't get a valid iocount
 502  * else return the parent of vp
 503  */
 504 vnode_t
 505 vnode_getparent(vnode_t vp)
 506 {
 507         vnode_t pvp = NULLVP;
 508         int     pvid;
 509
 510         NAME_CACHE_LOCK_SHARED();
 511         /*
 512          * v_parent is stable behind the name_cache lock
 513          * however, the only thing we can really guarantee
 514          * is that we've grabbed a valid iocount on the
 515          * parent of 'vp' at the time we took the name_cache lock...
 516          * once we drop the lock, vp could get re-parented
 517          */
 518         if ( (pvp = vp->v_parent) != NULLVP ) {
 519                 pvid = pvp->v_id;
 520
 521                 NAME_CACHE_UNLOCK();
 522
 523                 if (vnode_getwithvid(pvp, pvid) != 0)
 524                         pvp = NULL;
 525         } else
 526                 NAME_CACHE_UNLOCK();
 527         return (pvp);
 528 }
 529
 530 const char *
 531 vnode_getname(vnode_t vp)
 532 {
 533         const char *name = NULL;
 534
 535         NAME_CACHE_LOCK_SHARED();
 536
 537         if (vp->v_name)
 538                 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0);
 539         NAME_CACHE_UNLOCK();
 540
 541         return (name);
 542 }
 543
 544 void
 545 vnode_putname(const char *name)
 546 {
 547         vfs_removename(name);
 548 }
 549
 550
 551 /*
 552  * if VNODE_UPDATE_PARENT, and we can take
 553  * a reference on dvp, then update vp with
 554  * it's new parent... if vp already has a parent,
 555  * then drop the reference vp held on it
 556  *
 557  * if VNODE_UPDATE_NAME,
 558  * then drop string ref on v_name if it exists, and if name is non-NULL
 559  * then pick up a string reference on name and record it in v_name...
 560  * optionally pass in the length and hashval of name if known
 561  *
 562  * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
 563  */
 564 void
 565 vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags)
 566 {
 567         struct  namecache *ncp;
 568         vnode_t old_parentvp = NULLVP;
 569 #if NAMEDSTREAMS
 570         int isstream = (vp->v_flag & VISNAMEDSTREAM);
 571         int kusecountbumped = 0;
 572 #endif
 573         kauth_cred_t tcred = NULL;
 574         const char *vname = NULL;
 575         const char *tname = NULL;
 576
 577         if (flags & VNODE_UPDATE_PARENT) {
 578                 if (dvp && vnode_ref(dvp) != 0) {
 579                         dvp = NULLVP;
 580                 }
 581 #if NAMEDSTREAMS
 582                 /* Don't count a stream's parent ref during unmounts */
 583                 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
 584                         vnode_lock_spin(dvp);
 585                         ++dvp->v_kusecount;
 586                         kusecountbumped = 1;
 587                         vnode_unlock(dvp);
 588                 }
 589 #endif
 590         } else {
 591                 dvp = NULLVP;
 592         }
 593         if ( (flags & VNODE_UPDATE_NAME) ) {
 594                 if (name != vp->v_name) {
 595                         if (name && *name) {
 596                                 if (name_len == 0)
 597                                         name_len = strlen(name);
 598                                 tname = vfs_addname(name, name_len, name_hashval, 0);
 599                         }
 600                 } else
 601                         flags &= ~VNODE_UPDATE_NAME;
 602         }
 603         if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) {
 604
 605                 NAME_CACHE_LOCK();
 606
 607                 if ( (flags & VNODE_UPDATE_PURGE) ) {
 608
 609                         if (vp->v_parent)
 610                                 vp->v_parent->v_nc_generation++;
 611
 612                         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
 613                                 cache_delete(ncp, 1);
 614
 615                         while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) )
 616                                 cache_delete(ncp, 1);
 617
 618                         /*
 619                          * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
 620                          */
 621                         tcred = vp->v_cred;
 622                         vp->v_cred = NOCRED;
 623                         vp->v_authorized_actions = 0;
 624                 }
 625                 if ( (flags & VNODE_UPDATE_NAME) ) {
 626                         vname = vp->v_name;
 627                         vp->v_name = tname;
 628                 }
 629                 if (flags & VNODE_UPDATE_PARENT) {
 630                         if (dvp != vp && dvp != vp->v_parent) {
 631                                 old_parentvp = vp->v_parent;
 632                                 vp->v_parent = dvp;
 633                                 dvp = NULLVP;
 634
 635                                 if (old_parentvp)
 636                                         flags |= VNODE_UPDATE_CACHE;
 637                         }
 638                 }
 639                 if (flags & VNODE_UPDATE_CACHE) {
 640                         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
 641                                 cache_delete(ncp, 1);
 642                 }
 643                 NAME_CACHE_UNLOCK();
 644
 645                 if (vname != NULL)
 646                         vfs_removename(vname);
 647
 648                 if (IS_VALID_CRED(tcred))
 649                         kauth_cred_unref(&tcred);
 650         }
 651         if (dvp != NULLVP) {
 652 #if NAMEDSTREAMS
 653                 /* Back-out the ref we took if we lost a race for vp->v_parent. */
 654                 if (kusecountbumped) {
 655                         vnode_lock_spin(dvp);
 656                         if (dvp->v_kusecount > 0)
 657                                 --dvp->v_kusecount;
 658                         vnode_unlock(dvp);
 659                 }
 660 #endif
 661                 vnode_rele(dvp);
 662         }
 663         if (old_parentvp) {
 664                 struct  uthread *ut;
 665
 666 #if NAMEDSTREAMS
 667                 if (isstream) {
 668                         vnode_lock_spin(old_parentvp);
 669                         if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0))
 670                                 --old_parentvp->v_kusecount;
 671                         vnode_unlock(old_parentvp);
 672                 }
 673 #endif
 674                 ut = get_bsdthread_info(current_thread());
 675
 676                 /*
 677                  * indicated to vnode_rele that it shouldn't do a
 678                  * vnode_reclaim at this time... instead it will
 679                  * chain the vnode to the uu_vreclaims list...
 680                  * we'll be responsible for calling vnode_reclaim
 681                  * on each of the vnodes in this list...
 682                  */
 683                 ut->uu_defer_reclaims = 1;
 684                 ut->uu_vreclaims = NULLVP;
 685
 686                 while ( (vp = old_parentvp) != NULLVP ) {
 687
 688                         vnode_lock_spin(vp);
 689                         vnode_rele_internal(vp, 0, 0, 1);
 690
 691                         /*
 692                          * check to see if the vnode is now in the state
 693                          * that would have triggered a vnode_reclaim in vnode_rele
 694                          * if it is, we save it's parent pointer and then NULL
 695                          * out the v_parent field... we'll drop the reference
 696                          * that was held on the next iteration of this loop...
 697                          * this short circuits a potential deep recursion if we
 698                          * have a long chain of parents in this state...
 699                          * we'll sit in this loop until we run into
 700                          * a parent in this chain that is not in this state
 701                          *
 702                          * make our check and the vnode_rele atomic
 703                          * with respect to the current vnode we're working on
 704                          * by holding the vnode lock
 705                          * if vnode_rele deferred the vnode_reclaim and has put
 706                          * this vnode on the list to be reaped by us, than
 707                          * it has left this vnode with an iocount == 1
 708                          */
 709                         if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) &&
 710                              ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
 711                                 /*
 712                                  * vnode_rele wanted to do a vnode_reclaim on this vnode
 713                                  * it should be sitting on the head of the uu_vreclaims chain
 714                                  * pull the parent pointer now so that when we do the
 715                                  * vnode_reclaim for each of the vnodes in the uu_vreclaims
 716                                  * list, we won't recurse back through here
 717                                  *
 718                                  * need to do a convert here in case vnode_rele_internal
 719                                  * returns with the lock held in the spin mode... it
 720                                  * can drop and retake the lock under certain circumstances
 721                                  */
 722                                 vnode_lock_convert(vp);
 723
 724                                 NAME_CACHE_LOCK();
 725                                 old_parentvp = vp->v_parent;
 726                                 vp->v_parent = NULLVP;
 727                                 NAME_CACHE_UNLOCK();
 728                         } else {
 729                                 /*
 730                                  * we're done... we ran into a vnode that isn't
 731                                  * being terminated
 732                                  */
 733                                 old_parentvp = NULLVP;
 734                         }
 735                         vnode_unlock(vp);
 736                 }
 737                 ut->uu_defer_reclaims = 0;
 738
 739                 while ( (vp = ut->uu_vreclaims) != NULLVP) {
 740                         ut->uu_vreclaims = vp->v_defer_reclaimlist;
 741
 742                         /*
 743                          * vnode_put will drive the vnode_reclaim if
 744                          * we are still the only reference on this vnode
 745                          */
 746                         vnode_put(vp);
 747                 }
 748         }
 749 }
 750
 751
 752 /*
 753  * Mark a vnode as having multiple hard links.  HFS makes use of this
 754  * because it keeps track of each link separately, and wants to know
 755  * which link was actually used.
 756  *
 757  * This will cause the name cache to force a VNOP_LOOKUP on the vnode
 758  * so that HFS can post-process the lookup.  Also, volfs will call
 759  * VNOP_GETATTR2 to determine the parent, instead of using v_parent.
 760  */
 761 void vnode_setmultipath(vnode_t vp)
 762 {
 763         vnode_lock_spin(vp);
 764
 765         /*
 766          * In theory, we're changing the vnode's identity as far as the
 767          * name cache is concerned, so we ought to grab the name cache lock
 768          * here.  However, there is already a race, and grabbing the name
 769          * cache lock only makes the race window slightly smaller.
 770          *
 771          * The race happens because the vnode already exists in the name
 772          * cache, and could be found by one thread before another thread
 773          * can set the hard link flag.
 774          */
 775
 776         vp->v_flag |= VISHARDLINK;
 777
 778         vnode_unlock(vp);
 779 }
 780
 781
 782
 783 /*
 784  * backwards compatibility
 785  */
 786 void vnode_uncache_credentials(vnode_t vp)
 787 {
 788         vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
 789 }
 790
 791
 792 /*
 793  * use the exclusive form of NAME_CACHE_LOCK to protect the update of the
 794  * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
 795  * we use this lock so that we can look at the v_cred and v_authorized_actions
 796  * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
 797  * which is the super-hot path... if we are updating the authorized actions for this
 798  * vnode, we are already in the super-slow and far less frequented path so its not
 799  * that bad that we take the lock exclusive for this case... of course we strive
 800  * to hold it for the minimum amount of time possible
 801  */
 802
 803 void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
 804 {
 805         kauth_cred_t tcred = NOCRED;
 806
 807         NAME_CACHE_LOCK();
 808
 809         vp->v_authorized_actions &= ~action;
 810
 811         if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
 812             IS_VALID_CRED(vp->v_cred)) {
 813                 /*
 814                  * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
 815                  */
 816                 tcred = vp->v_cred;
 817                 vp->v_cred = NOCRED;
 818         }
 819         NAME_CACHE_UNLOCK();
 820
 821         if (tcred != NOCRED)
 822                 kauth_cred_unref(&tcred);
 823 }
 824
 825
 826 extern int bootarg_vnode_cache_defeat;  /* default = 0, from bsd_init.c */
 827
 828 boolean_t
 829 vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
 830 {
 831         kauth_cred_t    ucred;
 832         boolean_t       retval = FALSE;
 833
 834         /* Boot argument to defeat rights caching */
 835         if (bootarg_vnode_cache_defeat)
 836                 return FALSE;
 837
 838         if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
 839                 /*
 840                  * a TTL is enabled on the rights cache... handle it here
 841                  * a TTL of 0 indicates that no rights should be cached
 842                  */
 843                 if (vp->v_mount->mnt_authcache_ttl) {
 844                         if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
 845                                 /*
 846                                  * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
 847                                  * we will only allow a SEARCH right on a directory to be cached...
 848                                  * that cached right always has a default TTL associated with it
 849                                  */
 850                                 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR)
 851                                         vp = NULLVP;
 852                         }
 853                         if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
 854                                 vnode_uncache_authorized_action(vp, vp->v_authorized_actions);
 855                                 vp = NULLVP;
 856                         }
 857                 } else
 858                         vp = NULLVP;
 859         }
 860         if (vp != NULLVP) {
 861                 ucred = vfs_context_ucred(ctx);
 862
 863                 NAME_CACHE_LOCK_SHARED();
 864
 865                 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action)
 866                         retval = TRUE;
 867
 868                 NAME_CACHE_UNLOCK();
 869         }
 870         return retval;
 871 }
 872
 873
 874 void vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
 875 {
 876         kauth_cred_t tcred = NOCRED;
 877         kauth_cred_t ucred;
 878         struct timeval tv;
 879         boolean_t ttl_active = FALSE;
 880
 881         ucred = vfs_context_ucred(ctx);
 882
 883         if (!IS_VALID_CRED(ucred) || action == 0)
 884                 return;
 885
 886         if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
 887                 /*
 888                  * a TTL is enabled on the rights cache... handle it here
 889                  * a TTL of 0 indicates that no rights should be cached
 890                  */
 891                 if (vp->v_mount->mnt_authcache_ttl == 0)
 892                         return;
 893
 894                 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
 895                         /*
 896                          * only cache SEARCH action for filesystems marked
 897                          * MNTK_AUTH_OPAQUE on VDIRs...
 898                          * the lookup_path code will time these out
 899                          */
 900                         if ( (action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR )
 901                                 return;
 902                 }
 903                 ttl_active = TRUE;
 904
 905                 microuptime(&tv);
 906         }
 907         NAME_CACHE_LOCK();
 908
 909         if (vp->v_cred != ucred) {
 910                 kauth_cred_ref(ucred);
 911                 /*
 912                  * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
 913                  */
 914                 tcred = vp->v_cred;
 915                 vp->v_cred = ucred;
 916                 vp->v_authorized_actions = 0;
 917         }
 918         if (ttl_active == TRUE && vp->v_authorized_actions == 0) {
 919                 /*
 920                  * only reset the timestamnp on the
 921                  * first authorization cached after the previous
 922                  * timer has expired or we're switching creds...
 923                  * 'vnode_cache_is_authorized' will clear the
 924                  * authorized actions if the TTL is active and
 925                  * it has expired
 926                  */
 927                 vp->v_cred_timestamp = tv.tv_sec;
 928         }
 929         vp->v_authorized_actions |= action;
 930
 931         NAME_CACHE_UNLOCK();
 932
 933         if (IS_VALID_CRED(tcred))
 934                 kauth_cred_unref(&tcred);
 935 }
 936
 937
 938 boolean_t vnode_cache_is_stale(vnode_t vp)
 939 {
 940         struct timeval  tv;
 941         boolean_t       retval;
 942
 943         microuptime(&tv);
 944
 945         if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl)
 946                 retval = TRUE;
 947         else
 948                 retval = FALSE;
 949
 950         return retval;
 951 }
 952
 953
 954
 955 /*
 956  * Returns:     0                       Success
 957  *              ERECYCLE                vnode was recycled from underneath us.  Force lookup to be re-driven from namei.
 958  *                                              This errno value should not be seen by anyone outside of the kernel.
 959  */
 960 int
 961 cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
 962                 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
 963 {
 964         char            *cp;            /* pointer into pathname argument */
 965         int             vid;
 966         int             vvid = 0;       /* protected by vp != NULLVP */
 967         vnode_t         vp = NULLVP;
 968         vnode_t         tdp = NULLVP;
 969         kauth_cred_t    ucred;
 970         boolean_t       ttl_enabled = FALSE;
 971         struct timeval  tv;
 972         mount_t         mp;
 973         unsigned int    hash;
 974         int             error = 0;
 975
 976 #if CONFIG_TRIGGERS
 977         vnode_t         trigger_vp;
 978 #endif /* CONFIG_TRIGGERS */
 979
 980         ucred = vfs_context_ucred(ctx);
 981         ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
 982
 983         NAME_CACHE_LOCK_SHARED();
 984
 985         if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
 986                 ttl_enabled = TRUE;
 987                 microuptime(&tv);
 988         }
 989         for (;;) {
 990                 /*
 991                  * Search a directory.
 992                  *
 993                  * The cn_hash value is for use by cache_lookup
 994                  * The last component of the filename is left accessible via
 995                  * cnp->cn_nameptr for callers that need the name.
 996                  */
 997                 hash = 0;
 998                 cp = cnp->cn_nameptr;
 999
1000                 while (*cp && (*cp != '/')) {
1001                         hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1002                 }
1003                 /*
1004                  * the crc generator can legitimately generate
1005                  * a 0... however, 0 for us means that we
1006                  * haven't computed a hash, so use 1 instead
1007                  */
1008                 if (hash == 0)
1009                         hash = 1;
1010                 cnp->cn_hash = hash;
1011                 cnp->cn_namelen = cp - cnp->cn_nameptr;
1012
1013                 ndp->ni_pathlen -= cnp->cn_namelen;
1014                 ndp->ni_next = cp;
1015
1016                 /*
1017                  * Replace multiple slashes by a single slash and trailing slashes
1018                  * by a null.  This must be done before VNOP_LOOKUP() because some
1019                  * fs's don't know about trailing slashes.  Remember if there were
1020                  * trailing slashes to handle symlinks, existing non-directories
1021                  * and non-existing files that won't be directories specially later.
1022                  */
1023                 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
1024                         cp++;
1025                         ndp->ni_pathlen--;
1026
1027                         if (*cp == '\0') {
1028                                 ndp->ni_flag |= NAMEI_TRAILINGSLASH;
1029                                 *ndp->ni_next = '\0';
1030                         }
1031                 }
1032                 ndp->ni_next = cp;
1033
1034                 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT);
1035
1036                 if (*cp == '\0')
1037                         cnp->cn_flags |= ISLASTCN;
1038
1039                 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
1040                         cnp->cn_flags |= ISDOTDOT;
1041
1042                 *dp_authorized = 0;
1043 #if NAMEDRSRCFORK
1044                 /*
1045                  * Process a request for a file's resource fork.
1046                  *
1047                  * Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1048                  */
1049                 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1050                     (cp[1] == '.' && cp[2] == '.') &&
1051                     bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1052                         /* Skip volfs file systems that don't support native streams. */
1053                         if ((dp->v_mount != NULL) &&
1054                             (dp->v_mount->mnt_flag & MNT_DOVOLFS) &&
1055                             (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
1056                                 goto skiprsrcfork;
1057                         }
1058                         cnp->cn_flags |= CN_WANTSRSRCFORK;
1059                         cnp->cn_flags |= ISLASTCN;
1060                         ndp->ni_next[0] = '\0';
1061                         ndp->ni_pathlen = 1;
1062                 }
1063 skiprsrcfork:
1064 #endif
1065
1066 #if CONFIG_MACF
1067
1068                 /*
1069                  * Name cache provides authorization caching (see below)
1070                  * that will short circuit MAC checks in lookup().
1071                  * We must perform MAC check here.  On denial
1072                  * dp_authorized will remain 0 and second check will
1073                  * be perfomed in lookup().
1074                  */
1075                 if (!(cnp->cn_flags & DONOTAUTH)) {
1076                         error = mac_vnode_check_lookup(ctx, dp, cnp);
1077                         if (error) {
1078                                 NAME_CACHE_UNLOCK();
1079                                 goto errorout;
1080                         }
1081                 }
1082 #endif /* MAC */
1083                 if (ttl_enabled && ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl))
1084                         break;
1085
1086                 /*
1087                  * NAME_CACHE_LOCK holds these fields stable
1088                  */
1089                 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
1090                     !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE))
1091                         break;
1092
1093                 /*
1094                  * indicate that we're allowed to traverse this directory...
1095                  * even if we fail the cache lookup or decide to bail for
1096                  * some other reason, this information is valid and is used
1097                  * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1098                  */
1099                 *dp_authorized = 1;
1100
1101                 if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) {
1102                         if (cnp->cn_nameiop != LOOKUP)
1103                                 break;
1104                         if (cnp->cn_flags & LOCKPARENT)
1105                                 break;
1106                         if (cnp->cn_flags & NOCACHE)
1107                                 break;
1108                         if (cnp->cn_flags & ISDOTDOT) {
1109                                 /*
1110                                  * Force directory hardlinks to go to
1111                                  * file system for ".." requests.
1112                                  */
1113                                 if (dp && (dp->v_flag & VISHARDLINK)) {
1114                                         break;
1115                                 }
1116                                 /*
1117                                  * Quit here only if we can't use
1118                                  * the parent directory pointer or
1119                                  * don't have one.  Otherwise, we'll
1120                                  * use it below.
1121                                  */
1122                                 if ((dp->v_flag & VROOT)  ||
1123                                     dp == ndp->ni_rootdir ||
1124                                     dp->v_parent == NULLVP)
1125                                         break;
1126                         }
1127                 }
1128
1129                 /*
1130                  * "." and ".." aren't supposed to be cached, so check
1131                  * for them before checking the cache.
1132                  */
1133                 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')
1134                         vp = dp;
1135                 else if ( (cnp->cn_flags & ISDOTDOT) )
1136                         vp = dp->v_parent;
1137                 else {
1138                         if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP)
1139                                 break;
1140
1141                         if ( (vp->v_flag & VISHARDLINK) ) {
1142                                 /*
1143                                  * The file system wants a VNOP_LOOKUP on this vnode
1144                                  */
1145                                 vp = NULL;
1146                                 break;
1147                         }
1148                 }
1149                 if ( (cnp->cn_flags & ISLASTCN) )
1150                         break;
1151
1152                 if (vp->v_type != VDIR) {
1153                         if (vp->v_type != VLNK)
1154                                 vp = NULL;
1155                         break;
1156                 }
1157
1158                 if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
1159
1160                         if (mp->mnt_realrootvp == NULLVP || mp->mnt_generation != mount_generation ||
1161                                 mp->mnt_realrootvp_vid != mp->mnt_realrootvp->v_id)
1162                                 break;
1163                         vp = mp->mnt_realrootvp;
1164                 }
1165
1166 #if CONFIG_TRIGGERS
1167                 /*
1168                  * After traversing all mountpoints stacked here, if we have a
1169                  * trigger in hand, resolve it.  Note that we don't need to
1170                  * leave the fast path if the mount has already happened.
1171                  */
1172                 if ((vp->v_resolve != NULL) &&
1173                                 (vp->v_resolve->vr_resolve_func != NULL)) {
1174                         break;
1175                 }
1176 #endif /* CONFIG_TRIGGERS */
1177
1178
1179                 dp = vp;
1180                 vp = NULLVP;
1181
1182                 cnp->cn_nameptr = ndp->ni_next + 1;
1183                 ndp->ni_pathlen--;
1184                 while (*cnp->cn_nameptr == '/') {
1185                         cnp->cn_nameptr++;
1186                         ndp->ni_pathlen--;
1187                 }
1188         }
1189         if (vp != NULLVP)
1190                 vvid = vp->v_id;
1191         vid = dp->v_id;
1192
1193         NAME_CACHE_UNLOCK();
1194
1195         if ((vp != NULLVP) && (vp->v_type != VLNK) &&
1196             ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) {
1197                 /*
1198                  * if we've got a child and it's the last component, and
1199                  * the lookup doesn't need to return the parent then we
1200                  * can skip grabbing an iocount on the parent, since all
1201                  * we're going to do with it is a vnode_put just before
1202                  * we return from 'lookup'.  If it's a symbolic link,
1203                  * we need the parent in case the link happens to be
1204                  * a relative pathname.
1205                  */
1206                 tdp = dp;
1207                 dp = NULLVP;
1208         } else {
1209 need_dp:
1210                 /*
1211                  * return the last directory we looked at
1212                  * with an io reference held. If it was the one passed
1213                  * in as a result of the last iteration of VNOP_LOOKUP,
1214                  * it should already hold an io ref. No need to increase ref.
1215                  */
1216                 if (last_dp != dp){
1217
1218                         if (dp == ndp->ni_usedvp) {
1219                                 /*
1220                                  * if this vnode matches the one passed in via USEDVP
1221                                  * than this context already holds an io_count... just
1222                                  * use vnode_get to get an extra ref for lookup to play
1223                                  * with... can't use the getwithvid variant here because
1224                                  * it will block behind a vnode_drain which would result
1225                                  * in a deadlock (since we already own an io_count that the
1226                                  * vnode_drain is waiting on)... vnode_get grabs the io_count
1227                                  * immediately w/o waiting... it always succeeds
1228                                  */
1229                                 vnode_get(dp);
1230                         } else if ( (vnode_getwithvid_drainok(dp, vid)) ) {
1231                                 /*
1232                                  * failure indicates the vnode
1233                                  * changed identity or is being
1234                                  * TERMINATED... in either case
1235                                  * punt this lookup.
1236                                  *
1237                                  * don't necessarily return ENOENT, though, because
1238                                  * we really want to go back to disk and make sure it's
1239                                  * there or not if someone else is changing this
1240                                  * vnode.
1241                                  */
1242                                 error = ERECYCLE;
1243                                 goto errorout;
1244                         }
1245                 }
1246         }
1247         if (vp != NULLVP) {
1248                 if ( (vnode_getwithvid_drainok(vp, vvid)) ) {
1249                         vp = NULLVP;
1250
1251                         /*
1252                          * can't get reference on the vp we'd like
1253                          * to return... if we didn't grab a reference
1254                          * on the directory (due to fast path bypass),
1255                          * then we need to do it now... we can't return
1256                          * with both ni_dvp and ni_vp NULL, and no
1257                          * error condition
1258                          */
1259                         if (dp == NULLVP) {
1260                                 dp = tdp;
1261                                 goto need_dp;
1262                         }
1263                 }
1264         }
1265
1266         ndp->ni_dvp = dp;
1267         ndp->ni_vp  = vp;
1268
1269 #if CONFIG_TRIGGERS
1270         trigger_vp = vp ? vp : dp;
1271         if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
1272                 error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
1273                 if (error) {
1274                         if (vp)
1275                                 vnode_put(vp);
1276                         if (dp)
1277                                 vnode_put(dp);
1278                         goto errorout;
1279                 }
1280         }
1281 #endif /* CONFIG_TRIGGERS */
1282
1283 errorout:
1284         /*
1285          * If we came into cache_lookup_path after an iteration of the lookup loop that
1286          * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
1287          * on it.  It is now the job of cache_lookup_path to drop the ref on this vnode
1288          * when it is no longer needed.  If we get to this point, and last_dp is not NULL
1289          * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
1290          * the case that we got to a subsequent path component and this previous vnode is
1291          * no longer needed.  We can then drop the io ref on it.
1292          */
1293         if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
1294                 vnode_put(last_dp);
1295         }
1296
1297         //initialized to 0, should be the same if no error cases occurred.
1298         return error;
1299 }
1300
1301
1302 static vnode_t
1303 cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
1304 {
1305         struct namecache *ncp;
1306         struct nchashhead *ncpp;
1307         long namelen = cnp->cn_namelen;
1308         unsigned int hashval = (cnp->cn_hash & NCHASHMASK);
1309
1310         if (nc_disabled) {
1311                 return NULL;
1312         }
1313
1314         ncpp = NCHHASH(dvp, cnp->cn_hash);
1315         LIST_FOREACH(ncp, ncpp, nc_hash) {
1316                 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1317                         if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1318                                 break;
1319                 }
1320         }
1321         if (ncp == 0) {
1322                 /*
1323                  * We failed to find an entry
1324                  */
1325                 NCHSTAT(ncs_miss);
1326                 return (NULL);
1327         }
1328         NCHSTAT(ncs_goodhits);
1329
1330         return (ncp->nc_vp);
1331 }
1332
1333
1334 //
1335 // Have to take a len argument because we may only need to
1336 // hash part of a componentname.
1337 //
1338 static unsigned int
1339 hash_string(const char *cp, int len)
1340 {
1341     unsigned hash = 0;
1342
1343     if (len) {
1344             while (len--) {
1345                     hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1346             }
1347     } else {
1348             while (*cp != '\0') {
1349                     hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1350             }
1351     }
1352     /*
1353      * the crc generator can legitimately generate
1354      * a 0... however, 0 for us means that we
1355      * haven't computed a hash, so use 1 instead
1356      */
1357     if (hash == 0)
1358             hash = 1;
1359     return hash;
1360 }
1361
1362
1363 /*
1364  * Lookup an entry in the cache
1365  *
1366  * We don't do this if the segment name is long, simply so the cache
1367  * can avoid holding long names (which would either waste space, or
1368  * add greatly to the complexity).
1369  *
1370  * Lookup is called with dvp pointing to the directory to search,
1371  * cnp pointing to the name of the entry being sought. If the lookup
1372  * succeeds, the vnode is returned in *vpp, and a status of -1 is
1373  * returned. If the lookup determines that the name does not exist
1374  * (negative cacheing), a status of ENOENT is returned. If the lookup
1375  * fails, a status of zero is returned.
1376  */
1377
1378 int
1379 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1380 {
1381         struct namecache *ncp;
1382         struct nchashhead *ncpp;
1383         long namelen = cnp->cn_namelen;
1384         unsigned int hashval;
1385         boolean_t       have_exclusive = FALSE;
1386         uint32_t vid;
1387         vnode_t  vp;
1388
1389         if (cnp->cn_hash == 0)
1390                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1391         hashval = (cnp->cn_hash & NCHASHMASK);
1392
1393         if (nc_disabled) {
1394                 return 0;
1395         }
1396
1397         NAME_CACHE_LOCK_SHARED();
1398
1399 relook:
1400         ncpp = NCHHASH(dvp, cnp->cn_hash);
1401         LIST_FOREACH(ncp, ncpp, nc_hash) {
1402                 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1403                         if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1404                                 break;
1405                 }
1406         }
1407         /* We failed to find an entry */
1408         if (ncp == 0) {
1409                 NCHSTAT(ncs_miss);
1410                 NAME_CACHE_UNLOCK();
1411                 return (0);
1412         }
1413
1414         /* We don't want to have an entry, so dump it */
1415         if ((cnp->cn_flags & MAKEENTRY) == 0) {
1416                 if (have_exclusive == TRUE) {
1417                         NCHSTAT(ncs_badhits);
1418                         cache_delete(ncp, 1);
1419                         NAME_CACHE_UNLOCK();
1420                         return (0);
1421                 }
1422                 NAME_CACHE_UNLOCK();
1423                 NAME_CACHE_LOCK();
1424                 have_exclusive = TRUE;
1425                 goto relook;
1426         }
1427         vp = ncp->nc_vp;
1428
1429         /* We found a "positive" match, return the vnode */
1430         if (vp) {
1431                 NCHSTAT(ncs_goodhits);
1432
1433                 vid = vp->v_id;
1434                 NAME_CACHE_UNLOCK();
1435
1436                 if (vnode_getwithvid(vp, vid)) {
1437 #if COLLECT_STATS
1438                         NAME_CACHE_LOCK();
1439                         NCHSTAT(ncs_badvid);
1440                         NAME_CACHE_UNLOCK();
1441 #endif
1442                         return (0);
1443                 }
1444                 *vpp = vp;
1445                 return (-1);
1446         }
1447
1448         /* We found a negative match, and want to create it, so purge */
1449         if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) {
1450                 if (have_exclusive == TRUE) {
1451                         NCHSTAT(ncs_badhits);
1452                         cache_delete(ncp, 1);
1453                         NAME_CACHE_UNLOCK();
1454                         return (0);
1455                 }
1456                 NAME_CACHE_UNLOCK();
1457                 NAME_CACHE_LOCK();
1458                 have_exclusive = TRUE;
1459                 goto relook;
1460         }
1461
1462         /*
1463          * We found a "negative" match, ENOENT notifies client of this match.
1464          * The nc_whiteout field records whether this is a whiteout.
1465          */
1466         NCHSTAT(ncs_neghits);
1467
1468         if (ncp->nc_whiteout)
1469                 cnp->cn_flags |= ISWHITEOUT;
1470         NAME_CACHE_UNLOCK();
1471         return (ENOENT);
1472 }
1473
1474 const char *
1475 cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
1476 {
1477         const char *strname;
1478
1479         if (cnp->cn_hash == 0)
1480                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1481
1482         /*
1483          * grab 2 references on the string entered
1484          * one for the cache_enter_locked to consume
1485          * and the second to be consumed by v_name (vnode_create call point)
1486          */
1487         strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0);
1488
1489         NAME_CACHE_LOCK();
1490
1491         cache_enter_locked(dvp, vp, cnp, strname);
1492
1493         NAME_CACHE_UNLOCK();
1494
1495         return (strname);
1496 }
1497
1498
1499 /*
1500  * Add an entry to the cache...
1501  * but first check to see if the directory
1502  * that this entry is to be associated with has
1503  * had any cache_purges applied since we took
1504  * our identity snapshot... this check needs to
1505  * be done behind the name cache lock
1506  */
1507 void
1508 cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen)
1509 {
1510
1511         if (cnp->cn_hash == 0)
1512                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1513
1514         NAME_CACHE_LOCK();
1515
1516         if (dvp->v_nc_generation == gen)
1517                 (void)cache_enter_locked(dvp, vp, cnp, NULL);
1518
1519         NAME_CACHE_UNLOCK();
1520 }
1521
1522
1523 /*
1524  * Add an entry to the cache.
1525  */
1526 void
1527 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1528 {
1529         const char *strname;
1530
1531         if (cnp->cn_hash == 0)
1532                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1533
1534         /*
1535          * grab 1 reference on the string entered
1536          * for the cache_enter_locked to consume
1537          */
1538         strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1539
1540         NAME_CACHE_LOCK();
1541
1542         cache_enter_locked(dvp, vp, cnp, strname);
1543
1544         NAME_CACHE_UNLOCK();
1545 }
1546
1547
1548 static void
1549 cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname)
1550 {
1551         struct namecache *ncp, *negp;
1552         struct nchashhead *ncpp;
1553
1554         if (nc_disabled)
1555                 return;
1556
1557         /*
1558          * if the entry is for -ve caching vp is null
1559          */
1560         if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
1561                 /*
1562                  * someone beat us to the punch..
1563                  * this vnode is already in the cache
1564                  */
1565                 if (strname != NULL)
1566                         vfs_removename(strname);
1567                 return;
1568         }
1569         /*
1570          * We allocate a new entry if we are less than the maximum
1571          * allowed and the one at the front of the list is in use.
1572          * Otherwise we use the one at the front of the list.
1573          */
1574         if (numcache < desiredNodes &&
1575             ((ncp = nchead.tqh_first) == NULL ||
1576               ncp->nc_hash.le_prev != 0)) {
1577                 /*
1578                  * Allocate one more entry
1579                  */
1580                 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK);
1581                 numcache++;
1582         } else {
1583                 /*
1584                  * reuse an old entry
1585                  */
1586                 ncp = TAILQ_FIRST(&nchead);
1587                 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1588
1589                 if (ncp->nc_hash.le_prev != 0) {
1590                        /*
1591                         * still in use... we need to
1592                         * delete it before re-using it
1593                         */
1594                         NCHSTAT(ncs_stolen);
1595                         cache_delete(ncp, 0);
1596                 }
1597         }
1598         NCHSTAT(ncs_enters);
1599
1600         /*
1601          * Fill in cache info, if vp is NULL this is a "negative" cache entry.
1602          */
1603         ncp->nc_vp = vp;
1604         ncp->nc_dvp = dvp;
1605         ncp->nc_hashval = cnp->cn_hash;
1606         ncp->nc_whiteout = FALSE;
1607
1608         if (strname == NULL)
1609                 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1610         else
1611                 ncp->nc_name = strname;
1612         /*
1613          * make us the newest entry in the cache
1614          * i.e. we'll be the last to be stolen
1615          */
1616         TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
1617
1618         ncpp = NCHHASH(dvp, cnp->cn_hash);
1619 #if DIAGNOSTIC
1620         {
1621                 struct namecache *p;
1622
1623                 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
1624                         if (p == ncp)
1625                                 panic("cache_enter: duplicate");
1626         }
1627 #endif
1628         /*
1629          * make us available to be found via lookup
1630          */
1631         LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
1632
1633         if (vp) {
1634                /*
1635                 * add to the list of name cache entries
1636                 * that point at vp
1637                 */
1638                 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
1639         } else {
1640                 /*
1641                  * this is a negative cache entry (vp == NULL)
1642                  * stick it on the negative cache list
1643                  * and record the whiteout state
1644                  */
1645                 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
1646
1647                 if (cnp->cn_flags & ISWHITEOUT)
1648                         ncp->nc_whiteout = TRUE;
1649                 ncs_negtotal++;
1650
1651                 if (ncs_negtotal > desiredNegNodes) {
1652                        /*
1653                         * if we've reached our desired limit
1654                         * of negative cache entries, delete
1655                         * the oldest
1656                         */
1657                         negp = TAILQ_FIRST(&neghead);
1658                         cache_delete(negp, 1);
1659                 }
1660         }
1661         /*
1662          * add us to the list of name cache entries that
1663          * are children of dvp
1664          */
1665         LIST_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
1666 }
1667
1668
1669 /*
1670  * Initialize CRC-32 remainder table.
1671  */
1672 static void init_crc32(void)
1673 {
1674         /*
1675          * the CRC-32 generator polynomial is:
1676          *   x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
1677          *        + x^8  + x^7  + x^5  + x^4  + x^2  + x + 1
1678          */
1679         unsigned int crc32_polynomial = 0x04c11db7;
1680         unsigned int i,j;
1681
1682         /*
1683          * pre-calculate the CRC-32 remainder for each possible octet encoding
1684          */
1685         for (i = 0;  i < 256;  i++) {
1686                 unsigned int crc_rem = i << 24;
1687
1688                 for (j = 0;  j < 8;  j++) {
1689                         if (crc_rem & 0x80000000)
1690                                 crc_rem = (crc_rem << 1) ^ crc32_polynomial;
1691                         else
1692                                 crc_rem = (crc_rem << 1);
1693                 }
1694                 crc32tab[i] = crc_rem;
1695         }
1696 }
1697
1698
1699 /*
1700  * Name cache initialization, from vfs_init() when we are booting
1701  */
1702 void
1703 nchinit(void)
1704 {
1705         int     i;
1706
1707         desiredNegNodes = (desiredvnodes / 10);
1708         desiredNodes = desiredvnodes + desiredNegNodes;
1709
1710         TAILQ_INIT(&nchead);
1711         TAILQ_INIT(&neghead);
1712
1713         init_crc32();
1714
1715         nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 *desiredNodes)), M_CACHE, &nchash);
1716         nchashmask = nchash;
1717         nchash++;
1718
1719         init_string_table();
1720
1721         /* Allocate name cache lock group attribute and group */
1722         namecache_lck_grp_attr= lck_grp_attr_alloc_init();
1723
1724         namecache_lck_grp = lck_grp_alloc_init("Name Cache",  namecache_lck_grp_attr);
1725
1726         /* Allocate name cache lock attribute */
1727         namecache_lck_attr = lck_attr_alloc_init();
1728
1729         /* Allocate name cache lock */
1730         namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
1731
1732
1733         /* Allocate string cache lock group attribute and group */
1734         strcache_lck_grp_attr= lck_grp_attr_alloc_init();
1735
1736         strcache_lck_grp = lck_grp_alloc_init("String Cache",  strcache_lck_grp_attr);
1737
1738         /* Allocate string cache lock attribute */
1739         strcache_lck_attr = lck_attr_alloc_init();
1740
1741         /* Allocate string cache lock */
1742         strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
1743
1744         for (i = 0; i < NUM_STRCACHE_LOCKS; i++)
1745                 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
1746 }
1747
1748 void
1749 name_cache_lock_shared(void)
1750 {
1751         lck_rw_lock_shared(namecache_rw_lock);
1752 }
1753
1754 void
1755 name_cache_lock(void)
1756 {
1757         lck_rw_lock_exclusive(namecache_rw_lock);
1758 }
1759
1760 void
1761 name_cache_unlock(void)
1762 {
1763         lck_rw_done(namecache_rw_lock);
1764 }
1765
1766
1767 int
1768 resize_namecache(u_int newsize)
1769 {
1770     struct nchashhead   *new_table;
1771     struct nchashhead   *old_table;
1772     struct nchashhead   *old_head, *head;
1773     struct namecache    *entry, *next;
1774     uint32_t            i, hashval;
1775     int                 dNodes, dNegNodes;
1776     u_long              new_size, old_size;
1777
1778     dNegNodes = (newsize / 10);
1779     dNodes = newsize + dNegNodes;
1780
1781     // we don't support shrinking yet
1782     if (dNodes <= desiredNodes) {
1783         return 0;
1784     }
1785     new_table = hashinit(2 * dNodes, M_CACHE, &nchashmask);
1786     new_size  = nchashmask + 1;
1787
1788     if (new_table == NULL) {
1789         return ENOMEM;
1790     }
1791
1792     NAME_CACHE_LOCK();
1793     // do the switch!
1794     old_table = nchashtbl;
1795     nchashtbl = new_table;
1796     old_size  = nchash;
1797     nchash    = new_size;
1798
1799     // walk the old table and insert all the entries into
1800     // the new table
1801     //
1802     for(i=0; i < old_size; i++) {
1803         old_head = &old_table[i];
1804         for (entry=old_head->lh_first; entry != NULL; entry=next) {
1805             //
1806             // XXXdbg - Beware: this assumes that hash_string() does
1807             //                  the same thing as what happens in
1808             //                  lookup() over in vfs_lookup.c
1809             hashval = hash_string(entry->nc_name, 0);
1810             entry->nc_hashval = hashval;
1811             head = NCHHASH(entry->nc_dvp, hashval);
1812
1813             next = entry->nc_hash.le_next;
1814             LIST_INSERT_HEAD(head, entry, nc_hash);
1815         }
1816     }
1817     desiredNodes = dNodes;
1818     desiredNegNodes = dNegNodes;
1819
1820     NAME_CACHE_UNLOCK();
1821     FREE(old_table, M_CACHE);
1822
1823     return 0;
1824 }
1825
1826 static void
1827 cache_delete(struct namecache *ncp, int age_entry)
1828 {
1829         NCHSTAT(ncs_deletes);
1830
1831         if (ncp->nc_vp) {
1832                 LIST_REMOVE(ncp, nc_un.nc_link);
1833         } else {
1834                 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
1835                 ncs_negtotal--;
1836         }
1837         LIST_REMOVE(ncp, nc_child);
1838
1839         LIST_REMOVE(ncp, nc_hash);
1840         /*
1841          * this field is used to indicate
1842          * that the entry is in use and
1843          * must be deleted before it can
1844          * be reused...
1845          */
1846         ncp->nc_hash.le_prev = NULL;
1847
1848         if (age_entry) {
1849                 /*
1850                  * make it the next one available
1851                  * for cache_enter's use
1852                  */
1853                 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1854                 TAILQ_INSERT_HEAD(&nchead, ncp, nc_entry);
1855         }
1856         vfs_removename(ncp->nc_name);
1857         ncp->nc_name = NULL;
1858 }
1859
1860
1861 /*
1862  * purge the entry associated with the
1863  * specified vnode from the name cache
1864  */
1865 void
1866 cache_purge(vnode_t vp)
1867 {
1868         struct namecache *ncp;
1869         kauth_cred_t tcred = NULL;
1870
1871         if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
1872                         (LIST_FIRST(&vp->v_ncchildren) == NULL) &&
1873                         (vp->v_cred == NOCRED) &&
1874                         (vp->v_parent == NULLVP))
1875                 return;
1876
1877         NAME_CACHE_LOCK();
1878
1879         if (vp->v_parent)
1880                 vp->v_parent->v_nc_generation++;
1881
1882         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
1883                 cache_delete(ncp, 1);
1884
1885         while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) )
1886                 cache_delete(ncp, 1);
1887
1888         /*
1889          * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1890          */
1891         tcred = vp->v_cred;
1892         vp->v_cred = NOCRED;
1893         vp->v_authorized_actions = 0;
1894
1895         NAME_CACHE_UNLOCK();
1896
1897         if (IS_VALID_CRED(tcred))
1898                 kauth_cred_unref(&tcred);
1899 }
1900
1901 /*
1902  * Purge all negative cache entries that are children of the
1903  * given vnode.  A case-insensitive file system (or any file
1904  * system that has multiple equivalent names for the same
1905  * directory entry) can use this when creating or renaming
1906  * to remove negative entries that may no longer apply.
1907  */
1908 void
1909 cache_purge_negatives(vnode_t vp)
1910 {
1911         struct namecache *ncp, *next_ncp;
1912
1913         NAME_CACHE_LOCK();
1914
1915         LIST_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp)
1916                 if (ncp->nc_vp == NULL)
1917                         cache_delete(ncp , 1);
1918
1919         NAME_CACHE_UNLOCK();
1920 }
1921
1922 /*
1923  * Flush all entries referencing a particular filesystem.
1924  *
1925  * Since we need to check it anyway, we will flush all the invalid
1926  * entries at the same time.
1927  */
1928 void
1929 cache_purgevfs(struct mount *mp)
1930 {
1931         struct nchashhead *ncpp;
1932         struct namecache *ncp;
1933
1934         NAME_CACHE_LOCK();
1935         /* Scan hash tables for applicable entries */
1936         for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
1937 restart:
1938                 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) {
1939                         if (ncp->nc_dvp->v_mount == mp) {
1940                                 cache_delete(ncp, 0);
1941                                 goto restart;
1942                         }
1943                 }
1944         }
1945         NAME_CACHE_UNLOCK();
1946 }
1947
1948
1949
1950 //
1951 // String ref routines
1952 //
1953 static LIST_HEAD(stringhead, string_t) *string_ref_table;
1954 static u_long   string_table_mask;
1955 static uint32_t filled_buckets=0;
1956
1957
1958 typedef struct string_t {
1959     LIST_ENTRY(string_t)  hash_chain;
1960     const char *str;
1961     uint32_t              refcount;
1962 } string_t;
1963
1964
1965 static void
1966 resize_string_ref_table(void)
1967 {
1968         struct stringhead *new_table;
1969         struct stringhead *old_table;
1970         struct stringhead *old_head, *head;
1971         string_t          *entry, *next;
1972         uint32_t           i, hashval;
1973         u_long             new_mask, old_mask;
1974
1975         /*
1976          * need to hold the table lock exclusively
1977          * in order to grow the table... need to recheck
1978          * the need to resize again after we've taken
1979          * the lock exclusively in case some other thread
1980          * beat us to the punch
1981          */
1982         lck_rw_lock_exclusive(strtable_rw_lock);
1983
1984         if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
1985                 lck_rw_done(strtable_rw_lock);
1986                 return;
1987         }
1988         new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
1989
1990         if (new_table == NULL) {
1991                 printf("failed to resize the hash table.\n");
1992                 lck_rw_done(strtable_rw_lock);
1993                 return;
1994         }
1995
1996         // do the switch!
1997         old_table         = string_ref_table;
1998         string_ref_table  = new_table;
1999         old_mask          = string_table_mask;
2000         string_table_mask = new_mask;
2001         filled_buckets    = 0;
2002
2003         // walk the old table and insert all the entries into
2004         // the new table
2005         //
2006         for (i = 0; i <= old_mask; i++) {
2007                 old_head = &old_table[i];
2008                 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2009                         hashval = hash_string((const char *)entry->str, 0);
2010                         head = &string_ref_table[hashval & string_table_mask];
2011                         if (head->lh_first == NULL) {
2012                                 filled_buckets++;
2013                         }
2014                         next = entry->hash_chain.le_next;
2015                         LIST_INSERT_HEAD(head, entry, hash_chain);
2016                 }
2017         }
2018         lck_rw_done(strtable_rw_lock);
2019
2020         FREE(old_table, M_CACHE);
2021 }
2022
2023
2024 static void
2025 init_string_table(void)
2026 {
2027         string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask);
2028 }
2029
2030
2031 const char *
2032 vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
2033 {
2034         return (add_name_internal(name, len, hashval, FALSE, flags));
2035 }
2036
2037
2038 static const char *
2039 add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
2040 {
2041         struct stringhead *head;
2042         string_t          *entry;
2043         uint32_t          chain_len = 0;
2044         uint32_t          hash_index;
2045         uint32_t          lock_index;
2046         char              *ptr;
2047
2048         /*
2049          * if the length already accounts for the null-byte, then
2050          * subtract one so later on we don't index past the end
2051          * of the string.
2052          */
2053         if (len > 0 && name[len-1] == '\0') {
2054                 len--;
2055         }
2056         if (hashval == 0) {
2057                 hashval = hash_string(name, len);
2058         }
2059
2060         /*
2061          * take this lock 'shared' to keep the hash stable
2062          * if someone else decides to grow the pool they
2063          * will take this lock exclusively
2064          */
2065         lck_rw_lock_shared(strtable_rw_lock);
2066
2067         /*
2068          * If the table gets more than 3/4 full, resize it
2069          */
2070         if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
2071                 lck_rw_done(strtable_rw_lock);
2072
2073                 resize_string_ref_table();
2074
2075                 lck_rw_lock_shared(strtable_rw_lock);
2076         }
2077         hash_index = hashval & string_table_mask;
2078         lock_index = hash_index % NUM_STRCACHE_LOCKS;
2079
2080         head = &string_ref_table[hash_index];
2081
2082         lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2083
2084         for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
2085                 if (memcmp(entry->str, name, len) == 0 && entry->str[len] == 0) {
2086                         entry->refcount++;
2087                         break;
2088                 }
2089         }
2090         if (entry == NULL) {
2091                 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]);
2092                 /*
2093                  * it wasn't already there so add it.
2094                  */
2095                 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
2096
2097                 if (head->lh_first == NULL) {
2098                         OSAddAtomic(1, &filled_buckets);
2099                 }
2100                 ptr = (char *)((char *)entry + sizeof(string_t));
2101                 strncpy(ptr, name, len);
2102                 ptr[len] = '\0';
2103                 entry->str = ptr;
2104                 entry->refcount = 1;
2105                 LIST_INSERT_HEAD(head, entry, hash_chain);
2106         }
2107         if (need_extra_ref == TRUE)
2108                 entry->refcount++;
2109
2110         lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2111         lck_rw_done(strtable_rw_lock);
2112
2113         return (const char *)entry->str;
2114 }
2115
2116
2117 int
2118 vfs_removename(const char *nameref)
2119 {
2120         struct stringhead *head;
2121         string_t          *entry;
2122         uint32_t           hashval;
2123         uint32_t           hash_index;
2124         uint32_t           lock_index;
2125         int                retval = ENOENT;
2126
2127         hashval = hash_string(nameref, 0);
2128
2129         /*
2130          * take this lock 'shared' to keep the hash stable
2131          * if someone else decides to grow the pool they
2132          * will take this lock exclusively
2133          */
2134         lck_rw_lock_shared(strtable_rw_lock);
2135         /*
2136          * must compute the head behind the table lock
2137          * since the size and location of the table
2138          * can change on the fly
2139          */
2140         hash_index = hashval & string_table_mask;
2141         lock_index = hash_index % NUM_STRCACHE_LOCKS;
2142
2143         head = &string_ref_table[hash_index];
2144
2145         lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2146
2147         for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2148                 if (entry->str == nameref) {
2149                         entry->refcount--;
2150
2151                         if (entry->refcount == 0) {
2152                                 LIST_REMOVE(entry, hash_chain);
2153
2154                                 if (head->lh_first == NULL) {
2155                                         OSAddAtomic(-1, &filled_buckets);
2156                                 }
2157                         } else {
2158                                 entry = NULL;
2159                         }
2160                         retval = 0;
2161                         break;
2162                 }
2163         }
2164         lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2165         lck_rw_done(strtable_rw_lock);
2166
2167         if (entry != NULL)
2168                 FREE(entry, M_TEMP);
2169
2170         return retval;
2171 }
2172
2173
2174 #ifdef DUMP_STRING_TABLE
2175 void
2176 dump_string_table(void)
2177 {
2178     struct stringhead *head;
2179     string_t          *entry;
2180     u_long            i;
2181
2182     lck_rw_lock_shared(strtable_rw_lock);
2183
2184     for (i = 0; i <= string_table_mask; i++) {
2185         head = &string_ref_table[i];
2186         for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
2187             printf("%6d - %s\n", entry->refcount, entry->str);
2188         }
2189     }
2190     lck_rw_done(strtable_rw_lock);
2191 }
2192 #endif  /* DUMP_STRING_TABLE */