bsd/vfs/vfs_cache.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1989, 1993, 1995
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * This code is derived from software contributed to Berkeley by
  34  * Poul-Henning Kamp of the FreeBSD Project.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. All advertising materials mentioning features or use of this software
  45  *    must display the following acknowledgement:
  46  *      This product includes software developed by the University of
  47  *      California, Berkeley and its contributors.
  48  * 4. Neither the name of the University nor the names of its contributors
  49  *    may be used to endorse or promote products derived from this software
  50  *    without specific prior written permission.
  51  *
  52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  62  * SUCH DAMAGE.
  63  *
  64  *
  65  *      @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/time.h>
  76 #include <sys/mount_internal.h>
  77 #include <sys/vnode_internal.h>
  78 #include <sys/namei.h>
  79 #include <sys/errno.h>
  80 #include <sys/malloc.h>
  81 #include <sys/kauth.h>
  82 #include <sys/user.h>
  83 #include <sys/paths.h>
  84
  85 #if CONFIG_MACF
  86 #include <security/mac_framework.h>
  87 #endif
  88
  89 /*
  90  * Name caching works as follows:
  91  *
  92  * Names found by directory scans are retained in a cache
  93  * for future reference.  It is managed LRU, so frequently
  94  * used names will hang around.  Cache is indexed by hash value
  95  * obtained from (vp, name) where vp refers to the directory
  96  * containing name.
  97  *
  98  * If it is a "negative" entry, (i.e. for a name that is known NOT to
  99  * exist) the vnode pointer will be NULL.
 100  *
 101  * Upon reaching the last segment of a path, if the reference
 102  * is for DELETE, or NOCACHE is set (rewrite), and the
 103  * name is located in the cache, it will be dropped.
 104  */
 105
 106 /*
 107  * Structures associated with name cacheing.
 108  */
 109
 110 LIST_HEAD(nchashhead, namecache) *nchashtbl;    /* Hash Table */
 111 u_long  nchashmask;
 112 u_long  nchash;                         /* size of hash table - 1 */
 113 long    numcache;                       /* number of cache entries allocated */
 114 int     desiredNodes;
 115 int     desiredNegNodes;
 116 int     ncs_negtotal;
 117 int     nc_disabled = 0;
 118 TAILQ_HEAD(, namecache) nchead;         /* chain of all name cache entries */
 119 TAILQ_HEAD(, namecache) neghead;        /* chain of only negative cache entries */
 120
 121
 122 #if COLLECT_STATS
 123
 124 struct  nchstats nchstats;              /* cache effectiveness statistics */
 125
 126 #define NCHSTAT(v) {            \
 127         nchstats.v++;           \
 128 }
 129 #define NAME_CACHE_LOCK()               name_cache_lock()
 130 #define NAME_CACHE_UNLOCK()             name_cache_unlock()
 131 #define NAME_CACHE_LOCK_SHARED()        name_cache_lock()
 132
 133 #else
 134
 135 #define NCHSTAT(v)
 136 #define NAME_CACHE_LOCK()               name_cache_lock()
 137 #define NAME_CACHE_UNLOCK()             name_cache_unlock()
 138 #define NAME_CACHE_LOCK_SHARED()        name_cache_lock_shared()
 139
 140 #endif
 141
 142
 143 /* vars for name cache list lock */
 144 lck_grp_t * namecache_lck_grp;
 145 lck_grp_attr_t * namecache_lck_grp_attr;
 146 lck_attr_t * namecache_lck_attr;
 147
 148 lck_grp_t * strcache_lck_grp;
 149 lck_grp_attr_t * strcache_lck_grp_attr;
 150 lck_attr_t * strcache_lck_attr;
 151
 152 lck_rw_t  * namecache_rw_lock;
 153 lck_rw_t  * strtable_rw_lock;
 154
 155 #define NUM_STRCACHE_LOCKS 1024
 156
 157 lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
 158
 159
 160 static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
 161 static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
 162 static void init_string_table(void) __attribute__((section("__TEXT, initcode")));
 163 static void cache_delete(struct namecache *, int);
 164 static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
 165
 166 #ifdef DUMP_STRING_TABLE
 167 /*
 168  * Internal dump function used for debugging
 169  */
 170 void dump_string_table(void);
 171 #endif  /* DUMP_STRING_TABLE */
 172
 173 static void init_crc32(void) __attribute__((section("__TEXT, initcode")));
 174 static unsigned int crc32tab[256];
 175
 176
 177 #define NCHHASH(dvp, hash_val) \
 178         (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
 179
 180
 181
 182 /*
 183  * This function builds the path to a filename in "buff".  The
 184  * length of the buffer *INCLUDING* the trailing zero byte is
 185  * returned in outlen.  NOTE: the length includes the trailing
 186  * zero byte and thus the length is one greater than what strlen
 187  * would return.  This is important and lots of code elsewhere
 188  * in the kernel assumes this behavior.
 189  *
 190  * This function can call vnop in file system if the parent vnode
 191  * does not exist or when called for hardlinks via volfs path.
 192  * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
 193  * in the name cache and does not enter the file system.
 194  *
 195  * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when
 196  * we encounter ENOENT during path reconstruction.  ENOENT means that
 197  * one of the parents moved while we were building the path.  The
 198  * caller can special handle this case by calling build_path again.
 199  *
 200  * passed in vp must have a valid io_count reference
 201  */
 202 int
 203 build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
 204 {
 205         vnode_t vp, tvp;
 206         vnode_t vp_with_iocount;
 207         vnode_t proc_root_dir_vp;
 208         char *end;
 209         const char *str;
 210         int  len;
 211         int  ret = 0;
 212         int  fixhardlink;
 213
 214         if (first_vp == NULLVP)
 215                 return (EINVAL);
 216
 217         /*
 218          * Grab the process fd so we can evaluate fd_rdir.
 219          */
 220         if (vfs_context_proc(ctx)->p_fd)
 221                 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
 222         else
 223                 proc_root_dir_vp = NULL;
 224
 225         vp_with_iocount = NULLVP;
 226 again:
 227         vp = first_vp;
 228
 229         end = &buff[buflen-1];
 230         *end = '\0';
 231
 232         /*
 233          * holding the NAME_CACHE_LOCK in shared mode is
 234          * sufficient to stabilize both the vp->v_parent chain
 235          * and the 'vp->v_mount->mnt_vnodecovered' chain
 236          *
 237          * if we need to drop this lock, we must first grab the v_id
 238          * from the vnode we're currently working with... if that
 239          * vnode doesn't already have an io_count reference (the vp
 240          * passed in comes with one), we must grab a reference
 241          * after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
 242          * deadlocks may result if you call vnode_get while holding
 243          * the NAME_CACHE_LOCK... we lazily release the reference
 244          * we pick up the next time we encounter a need to drop
 245          * the NAME_CACHE_LOCK or before we return from this routine
 246          */
 247         NAME_CACHE_LOCK_SHARED();
 248
 249         /*
 250          * Check if this is the root of a file system.
 251          */
 252         while (vp && vp->v_flag & VROOT) {
 253                 if (vp->v_mount == NULL) {
 254                         ret = EINVAL;
 255                         goto out_unlock;
 256                 }
 257                 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) {
 258                         /*
 259                          * It's the root of the root file system, so it's
 260                          * just "/".
 261                          */
 262                         *--end = '/';
 263
 264                         goto out_unlock;
 265                 } else {
 266                         vp = vp->v_mount->mnt_vnodecovered;
 267                 }
 268         }
 269
 270         while ((vp != NULLVP) && (vp->v_parent != vp)) {
 271                 int  vid;
 272
 273                 /*
 274                  * For hardlinks the v_name may be stale, so if its OK
 275                  * to enter a file system, ask the file system for the
 276                  * name and parent (below).
 277                  */
 278                 fixhardlink = (vp->v_flag & VISHARDLINK) &&
 279                               (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
 280                               !(flags & BUILDPATH_NO_FS_ENTER);
 281
 282                 if (!fixhardlink) {
 283                         str = vp->v_name;
 284
 285                         if (str == NULL || *str == '\0') {
 286                                 if (vp->v_parent != NULL)
 287                                         ret = EINVAL;
 288                                 else
 289                                         ret = ENOENT;
 290                                 goto out_unlock;
 291                         }
 292                         len = strlen(str);
 293                         /*
 294                          * Check that there's enough space (including space for the '/')
 295                          */
 296                         if ((end - buff) < (len + 1)) {
 297                                 ret = ENOSPC;
 298                                 goto out_unlock;
 299                         }
 300                         /*
 301                          * Copy the name backwards.
 302                          */
 303                         str += len;
 304
 305                         for (; len > 0; len--)
 306                                *--end = *--str;
 307                         /*
 308                          * Add a path separator.
 309                          */
 310                         *--end = '/';
 311                 }
 312
 313                 /*
 314                  * Walk up the parent chain.
 315                  */
 316                 if (((vp->v_parent != NULLVP) && !fixhardlink) ||
 317                                 (flags & BUILDPATH_NO_FS_ENTER)) {
 318
 319                         /*
 320                          * In this if () block we are not allowed to enter the filesystem
 321                          * to conclusively get the most accurate parent identifier.
 322                          * As a result, if 'vp' does not identify '/' and it
 323                          * does not have a valid v_parent, then error out
 324                          * and disallow further path construction
 325                          */
 326                         if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
 327                                 /* Only '/' is allowed to have a NULL parent pointer */
 328                                 ret = EINVAL;
 329
 330                                 /* The code below will exit early if 'tvp = vp' == NULL */
 331                         }
 332                         vp = vp->v_parent;
 333
 334                         /*
 335                          * if the vnode we have in hand isn't a directory and it
 336                          * has a v_parent, then we started with the resource fork
 337                          * so skip up to avoid getting a duplicate copy of the
 338                          * file name in the path.
 339                          */
 340                         if (vp && !vnode_isdir(vp) && vp->v_parent) {
 341                                 vp = vp->v_parent;
 342                         }
 343                 } else {
 344                         /*
 345                          * No parent, go get it if supported.
 346                          */
 347                         struct vnode_attr  va;
 348                         vnode_t  dvp;
 349
 350                         /*
 351                          * Make sure file system supports obtaining a path from id.
 352                          */
 353                         if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
 354                                 ret = ENOENT;
 355                                 goto out_unlock;
 356                         }
 357                         vid = vp->v_id;
 358
 359                         NAME_CACHE_UNLOCK();
 360
 361                         if (vp != first_vp && vp != vp_with_iocount) {
 362                                 if (vp_with_iocount) {
 363                                         vnode_put(vp_with_iocount);
 364                                         vp_with_iocount = NULLVP;
 365                                 }
 366                                 if (vnode_getwithvid(vp, vid))
 367                                         goto again;
 368                                 vp_with_iocount = vp;
 369                         }
 370                         VATTR_INIT(&va);
 371                         VATTR_WANTED(&va, va_parentid);
 372
 373                         if (fixhardlink) {
 374                                 VATTR_WANTED(&va, va_name);
 375                                 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
 376                         } else {
 377                                 va.va_name = NULL;
 378                         }
 379                         /*
 380                          * Ask the file system for its parent id and for its name (optional).
 381                          */
 382                         ret = vnode_getattr(vp, &va, ctx);
 383
 384                         if (fixhardlink) {
 385                                 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
 386                                         str = va.va_name;
 387                                         vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME);
 388                                 } else if (vp->v_name) {
 389                                         str = vp->v_name;
 390                                         ret = 0;
 391                                 } else {
 392                                         ret = ENOENT;
 393                                         goto bad_news;
 394                                 }
 395                                 len = strlen(str);
 396
 397                                 /*
 398                                  * Check that there's enough space.
 399                                  */
 400                                 if ((end - buff) < (len + 1)) {
 401                                         ret = ENOSPC;
 402                                 } else {
 403                                         /* Copy the name backwards. */
 404                                         str += len;
 405
 406                                         for (; len > 0; len--) {
 407                                                 *--end = *--str;
 408                                         }
 409                                         /*
 410                                          * Add a path separator.
 411                                          */
 412                                         *--end = '/';
 413                                 }
 414 bad_news:
 415                                 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI);
 416                         }
 417                         if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
 418                                 ret = ENOENT;
 419                                 goto out;
 420                         }
 421                         /*
 422                          * Ask the file system for the parent vnode.
 423                          */
 424                         if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx)))
 425                                 goto out;
 426
 427                         if (!fixhardlink && (vp->v_parent != dvp))
 428                                 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT);
 429
 430                         if (vp_with_iocount)
 431                                 vnode_put(vp_with_iocount);
 432                         vp = dvp;
 433                         vp_with_iocount = vp;
 434
 435                         NAME_CACHE_LOCK_SHARED();
 436
 437                         /*
 438                          * if the vnode we have in hand isn't a directory and it
 439                          * has a v_parent, then we started with the resource fork
 440                          * so skip up to avoid getting a duplicate copy of the
 441                          * file name in the path.
 442                          */
 443                         if (vp && !vnode_isdir(vp) && vp->v_parent)
 444                                 vp = vp->v_parent;
 445                 }
 446                 /*
 447                  * When a mount point is crossed switch the vp.
 448                  * Continue until we find the root or we find
 449                  * a vnode that's not the root of a mounted
 450                  * file system.
 451                  */
 452                 tvp = vp;
 453
 454                 while (tvp) {
 455                         if (tvp == proc_root_dir_vp)
 456                                 goto out_unlock;        /* encountered the root */
 457
 458                         if (!(tvp->v_flag & VROOT) || !tvp->v_mount)
 459                                 break;                  /* not the root of a mounted FS */
 460                         tvp = tvp->v_mount->mnt_vnodecovered;
 461                 }
 462                 if (tvp == NULLVP)
 463                         goto out_unlock;
 464                 vp = tvp;
 465
 466                 if (vp && (flags & BUILDPATH_CHECKACCESS)) {
 467                         vid = vp->v_id;
 468
 469                         NAME_CACHE_UNLOCK();
 470
 471                         if (vp != first_vp && vp != vp_with_iocount) {
 472                                 if (vp_with_iocount) {
 473                                         vnode_put(vp_with_iocount);
 474                                         vp_with_iocount = NULLVP;
 475                                 }
 476                                 if (vnode_getwithvid(vp, vid))
 477                                         goto again;
 478                                 vp_with_iocount = vp;
 479                         }
 480                         if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx)))
 481                                 goto out;       /* no peeking */
 482
 483                         NAME_CACHE_LOCK_SHARED();
 484                 }
 485         }
 486 out_unlock:
 487         NAME_CACHE_UNLOCK();
 488 out:
 489         if (vp_with_iocount)
 490                 vnode_put(vp_with_iocount);
 491         /*
 492          * Slide the name down to the beginning of the buffer.
 493          */
 494         memmove(buff, end, &buff[buflen] - end);
 495
 496         /*
 497          * length includes the trailing zero byte
 498          */
 499         *outlen = &buff[buflen] - end;
 500
 501         /* One of the parents was moved during path reconstruction.
 502          * The caller is interested in knowing whether any of the
 503          * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
 504          */
 505         if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
 506                 ret = EAGAIN;
 507         }
 508
 509         return (ret);
 510 }
 511
 512
 513 /*
 514  * return NULLVP if vp's parent doesn't
 515  * exist, or we can't get a valid iocount
 516  * else return the parent of vp
 517  */
 518 vnode_t
 519 vnode_getparent(vnode_t vp)
 520 {
 521         vnode_t pvp = NULLVP;
 522         int     pvid;
 523
 524         NAME_CACHE_LOCK_SHARED();
 525         /*
 526          * v_parent is stable behind the name_cache lock
 527          * however, the only thing we can really guarantee
 528          * is that we've grabbed a valid iocount on the
 529          * parent of 'vp' at the time we took the name_cache lock...
 530          * once we drop the lock, vp could get re-parented
 531          */
 532         if ( (pvp = vp->v_parent) != NULLVP ) {
 533                 pvid = pvp->v_id;
 534
 535                 NAME_CACHE_UNLOCK();
 536
 537                 if (vnode_getwithvid(pvp, pvid) != 0)
 538                         pvp = NULL;
 539         } else
 540                 NAME_CACHE_UNLOCK();
 541         return (pvp);
 542 }
 543
 544 const char *
 545 vnode_getname(vnode_t vp)
 546 {
 547         const char *name = NULL;
 548
 549         NAME_CACHE_LOCK_SHARED();
 550
 551         if (vp->v_name)
 552                 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0);
 553         NAME_CACHE_UNLOCK();
 554
 555         return (name);
 556 }
 557
 558 void
 559 vnode_putname(const char *name)
 560 {
 561         vfs_removename(name);
 562 }
 563
 564
 565 /*
 566  * if VNODE_UPDATE_PARENT, and we can take
 567  * a reference on dvp, then update vp with
 568  * it's new parent... if vp already has a parent,
 569  * then drop the reference vp held on it
 570  *
 571  * if VNODE_UPDATE_NAME,
 572  * then drop string ref on v_name if it exists, and if name is non-NULL
 573  * then pick up a string reference on name and record it in v_name...
 574  * optionally pass in the length and hashval of name if known
 575  *
 576  * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
 577  */
 578 void
 579 vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags)
 580 {
 581         struct  namecache *ncp;
 582         vnode_t old_parentvp = NULLVP;
 583 #if NAMEDSTREAMS
 584         int isstream = (vp->v_flag & VISNAMEDSTREAM);
 585         int kusecountbumped = 0;
 586 #endif
 587         kauth_cred_t tcred = NULL;
 588         const char *vname = NULL;
 589         const char *tname = NULL;
 590
 591         if (flags & VNODE_UPDATE_PARENT) {
 592                 if (dvp && vnode_ref(dvp) != 0) {
 593                         dvp = NULLVP;
 594                 }
 595 #if NAMEDSTREAMS
 596                 /* Don't count a stream's parent ref during unmounts */
 597                 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
 598                         vnode_lock_spin(dvp);
 599                         ++dvp->v_kusecount;
 600                         kusecountbumped = 1;
 601                         vnode_unlock(dvp);
 602                 }
 603 #endif
 604         } else {
 605                 dvp = NULLVP;
 606         }
 607         if ( (flags & VNODE_UPDATE_NAME) ) {
 608                 if (name != vp->v_name) {
 609                         if (name && *name) {
 610                                 if (name_len == 0)
 611                                         name_len = strlen(name);
 612                                 tname = vfs_addname(name, name_len, name_hashval, 0);
 613                         }
 614                 } else
 615                         flags &= ~VNODE_UPDATE_NAME;
 616         }
 617         if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) {
 618
 619                 NAME_CACHE_LOCK();
 620
 621                 if ( (flags & VNODE_UPDATE_PURGE) ) {
 622
 623                         if (vp->v_parent)
 624                                 vp->v_parent->v_nc_generation++;
 625
 626                         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
 627                                 cache_delete(ncp, 1);
 628
 629                         while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) )
 630                                 cache_delete(ncp, 1);
 631
 632                         /*
 633                          * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
 634                          */
 635                         tcred = vp->v_cred;
 636                         vp->v_cred = NOCRED;
 637                         vp->v_authorized_actions = 0;
 638                 }
 639                 if ( (flags & VNODE_UPDATE_NAME) ) {
 640                         vname = vp->v_name;
 641                         vp->v_name = tname;
 642                 }
 643                 if (flags & VNODE_UPDATE_PARENT) {
 644                         if (dvp != vp && dvp != vp->v_parent) {
 645                                 old_parentvp = vp->v_parent;
 646                                 vp->v_parent = dvp;
 647                                 dvp = NULLVP;
 648
 649                                 if (old_parentvp)
 650                                         flags |= VNODE_UPDATE_CACHE;
 651                         }
 652                 }
 653                 if (flags & VNODE_UPDATE_CACHE) {
 654                         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
 655                                 cache_delete(ncp, 1);
 656                 }
 657                 NAME_CACHE_UNLOCK();
 658
 659                 if (vname != NULL)
 660                         vfs_removename(vname);
 661
 662                 if (IS_VALID_CRED(tcred))
 663                         kauth_cred_unref(&tcred);
 664         }
 665         if (dvp != NULLVP) {
 666 #if NAMEDSTREAMS
 667                 /* Back-out the ref we took if we lost a race for vp->v_parent. */
 668                 if (kusecountbumped) {
 669                         vnode_lock_spin(dvp);
 670                         if (dvp->v_kusecount > 0)
 671                                 --dvp->v_kusecount;
 672                         vnode_unlock(dvp);
 673                 }
 674 #endif
 675                 vnode_rele(dvp);
 676         }
 677         if (old_parentvp) {
 678                 struct  uthread *ut;
 679
 680 #if NAMEDSTREAMS
 681                 if (isstream) {
 682                         vnode_lock_spin(old_parentvp);
 683                         if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0))
 684                                 --old_parentvp->v_kusecount;
 685                         vnode_unlock(old_parentvp);
 686                 }
 687 #endif
 688                 ut = get_bsdthread_info(current_thread());
 689
 690                 /*
 691                  * indicated to vnode_rele that it shouldn't do a
 692                  * vnode_reclaim at this time... instead it will
 693                  * chain the vnode to the uu_vreclaims list...
 694                  * we'll be responsible for calling vnode_reclaim
 695                  * on each of the vnodes in this list...
 696                  */
 697                 ut->uu_defer_reclaims = 1;
 698                 ut->uu_vreclaims = NULLVP;
 699
 700                 while ( (vp = old_parentvp) != NULLVP ) {
 701
 702                         vnode_lock_spin(vp);
 703                         vnode_rele_internal(vp, 0, 0, 1);
 704
 705                         /*
 706                          * check to see if the vnode is now in the state
 707                          * that would have triggered a vnode_reclaim in vnode_rele
 708                          * if it is, we save it's parent pointer and then NULL
 709                          * out the v_parent field... we'll drop the reference
 710                          * that was held on the next iteration of this loop...
 711                          * this short circuits a potential deep recursion if we
 712                          * have a long chain of parents in this state...
 713                          * we'll sit in this loop until we run into
 714                          * a parent in this chain that is not in this state
 715                          *
 716                          * make our check and the vnode_rele atomic
 717                          * with respect to the current vnode we're working on
 718                          * by holding the vnode lock
 719                          * if vnode_rele deferred the vnode_reclaim and has put
 720                          * this vnode on the list to be reaped by us, than
 721                          * it has left this vnode with an iocount == 1
 722                          */
 723                         if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) &&
 724                              ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
 725                                 /*
 726                                  * vnode_rele wanted to do a vnode_reclaim on this vnode
 727                                  * it should be sitting on the head of the uu_vreclaims chain
 728                                  * pull the parent pointer now so that when we do the
 729                                  * vnode_reclaim for each of the vnodes in the uu_vreclaims
 730                                  * list, we won't recurse back through here
 731                                  *
 732                                  * need to do a convert here in case vnode_rele_internal
 733                                  * returns with the lock held in the spin mode... it
 734                                  * can drop and retake the lock under certain circumstances
 735                                  */
 736                                 vnode_lock_convert(vp);
 737
 738                                 NAME_CACHE_LOCK();
 739                                 old_parentvp = vp->v_parent;
 740                                 vp->v_parent = NULLVP;
 741                                 NAME_CACHE_UNLOCK();
 742                         } else {
 743                                 /*
 744                                  * we're done... we ran into a vnode that isn't
 745                                  * being terminated
 746                                  */
 747                                 old_parentvp = NULLVP;
 748                         }
 749                         vnode_unlock(vp);
 750                 }
 751                 ut->uu_defer_reclaims = 0;
 752
 753                 while ( (vp = ut->uu_vreclaims) != NULLVP) {
 754                         ut->uu_vreclaims = vp->v_defer_reclaimlist;
 755
 756                         /*
 757                          * vnode_put will drive the vnode_reclaim if
 758                          * we are still the only reference on this vnode
 759                          */
 760                         vnode_put(vp);
 761                 }
 762         }
 763 }
 764
 765
 766 /*
 767  * Mark a vnode as having multiple hard links.  HFS makes use of this
 768  * because it keeps track of each link separately, and wants to know
 769  * which link was actually used.
 770  *
 771  * This will cause the name cache to force a VNOP_LOOKUP on the vnode
 772  * so that HFS can post-process the lookup.  Also, volfs will call
 773  * VNOP_GETATTR2 to determine the parent, instead of using v_parent.
 774  */
 775 void vnode_setmultipath(vnode_t vp)
 776 {
 777         vnode_lock_spin(vp);
 778
 779         /*
 780          * In theory, we're changing the vnode's identity as far as the
 781          * name cache is concerned, so we ought to grab the name cache lock
 782          * here.  However, there is already a race, and grabbing the name
 783          * cache lock only makes the race window slightly smaller.
 784          *
 785          * The race happens because the vnode already exists in the name
 786          * cache, and could be found by one thread before another thread
 787          * can set the hard link flag.
 788          */
 789
 790         vp->v_flag |= VISHARDLINK;
 791
 792         vnode_unlock(vp);
 793 }
 794
 795
 796
 797 /*
 798  * backwards compatibility
 799  */
 800 void vnode_uncache_credentials(vnode_t vp)
 801 {
 802         vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
 803 }
 804
 805
 806 /*
 807  * use the exclusive form of NAME_CACHE_LOCK to protect the update of the
 808  * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
 809  * we use this lock so that we can look at the v_cred and v_authorized_actions
 810  * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
 811  * which is the super-hot path... if we are updating the authorized actions for this
 812  * vnode, we are already in the super-slow and far less frequented path so its not
 813  * that bad that we take the lock exclusive for this case... of course we strive
 814  * to hold it for the minimum amount of time possible
 815  */
 816
 817 void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
 818 {
 819         kauth_cred_t tcred = NOCRED;
 820
 821         NAME_CACHE_LOCK();
 822
 823         vp->v_authorized_actions &= ~action;
 824
 825         if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
 826             IS_VALID_CRED(vp->v_cred)) {
 827                 /*
 828                  * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
 829                  */
 830                 tcred = vp->v_cred;
 831                 vp->v_cred = NOCRED;
 832         }
 833         NAME_CACHE_UNLOCK();
 834
 835         if (tcred != NOCRED)
 836                 kauth_cred_unref(&tcred);
 837 }
 838
 839
 840 extern int bootarg_vnode_cache_defeat;  /* default = 0, from bsd_init.c */
 841
 842 boolean_t
 843 vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
 844 {
 845         kauth_cred_t    ucred;
 846         boolean_t       retval = FALSE;
 847
 848         /* Boot argument to defeat rights caching */
 849         if (bootarg_vnode_cache_defeat)
 850                 return FALSE;
 851
 852         if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
 853                 /*
 854                  * a TTL is enabled on the rights cache... handle it here
 855                  * a TTL of 0 indicates that no rights should be cached
 856                  */
 857                 if (vp->v_mount->mnt_authcache_ttl) {
 858                         if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
 859                                 /*
 860                                  * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
 861                                  * we will only allow a SEARCH right on a directory to be cached...
 862                                  * that cached right always has a default TTL associated with it
 863                                  */
 864                                 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR)
 865                                         vp = NULLVP;
 866                         }
 867                         if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
 868                                 vnode_uncache_authorized_action(vp, vp->v_authorized_actions);
 869                                 vp = NULLVP;
 870                         }
 871                 } else
 872                         vp = NULLVP;
 873         }
 874         if (vp != NULLVP) {
 875                 ucred = vfs_context_ucred(ctx);
 876
 877                 NAME_CACHE_LOCK_SHARED();
 878
 879                 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action)
 880                         retval = TRUE;
 881
 882                 NAME_CACHE_UNLOCK();
 883         }
 884         return retval;
 885 }
 886
 887
 888 void vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
 889 {
 890         kauth_cred_t tcred = NOCRED;
 891         kauth_cred_t ucred;
 892         struct timeval tv;
 893         boolean_t ttl_active = FALSE;
 894
 895         ucred = vfs_context_ucred(ctx);
 896
 897         if (!IS_VALID_CRED(ucred) || action == 0)
 898                 return;
 899
 900         if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
 901                 /*
 902                  * a TTL is enabled on the rights cache... handle it here
 903                  * a TTL of 0 indicates that no rights should be cached
 904                  */
 905                 if (vp->v_mount->mnt_authcache_ttl == 0)
 906                         return;
 907
 908                 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
 909                         /*
 910                          * only cache SEARCH action for filesystems marked
 911                          * MNTK_AUTH_OPAQUE on VDIRs...
 912                          * the lookup_path code will time these out
 913                          */
 914                         if ( (action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR )
 915                                 return;
 916                 }
 917                 ttl_active = TRUE;
 918
 919                 microuptime(&tv);
 920         }
 921         NAME_CACHE_LOCK();
 922
 923         if (vp->v_cred != ucred) {
 924                 kauth_cred_ref(ucred);
 925                 /*
 926                  * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
 927                  */
 928                 tcred = vp->v_cred;
 929                 vp->v_cred = ucred;
 930                 vp->v_authorized_actions = 0;
 931         }
 932         if (ttl_active == TRUE && vp->v_authorized_actions == 0) {
 933                 /*
 934                  * only reset the timestamnp on the
 935                  * first authorization cached after the previous
 936                  * timer has expired or we're switching creds...
 937                  * 'vnode_cache_is_authorized' will clear the
 938                  * authorized actions if the TTL is active and
 939                  * it has expired
 940                  */
 941                 vp->v_cred_timestamp = tv.tv_sec;
 942         }
 943         vp->v_authorized_actions |= action;
 944
 945         NAME_CACHE_UNLOCK();
 946
 947         if (IS_VALID_CRED(tcred))
 948                 kauth_cred_unref(&tcred);
 949 }
 950
 951
 952 boolean_t vnode_cache_is_stale(vnode_t vp)
 953 {
 954         struct timeval  tv;
 955         boolean_t       retval;
 956
 957         microuptime(&tv);
 958
 959         if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl)
 960                 retval = TRUE;
 961         else
 962                 retval = FALSE;
 963
 964         return retval;
 965 }
 966
 967
 968
 969 /*
 970  * Returns:     0                       Success
 971  *              ERECYCLE                vnode was recycled from underneath us.  Force lookup to be re-driven from namei.
 972  *                                              This errno value should not be seen by anyone outside of the kernel.
 973  */
 974 int
 975 cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
 976                 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
 977 {
 978         char            *cp;            /* pointer into pathname argument */
 979         int             vid;
 980         int             vvid = 0;       /* protected by vp != NULLVP */
 981         vnode_t         vp = NULLVP;
 982         vnode_t         tdp = NULLVP;
 983         kauth_cred_t    ucred;
 984         boolean_t       ttl_enabled = FALSE;
 985         struct timeval  tv;
 986         mount_t         mp;
 987         unsigned int    hash;
 988         int             error = 0;
 989
 990 #if CONFIG_TRIGGERS
 991         vnode_t         trigger_vp;
 992 #endif /* CONFIG_TRIGGERS */
 993
 994         ucred = vfs_context_ucred(ctx);
 995         ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
 996
 997         NAME_CACHE_LOCK_SHARED();
 998
 999         if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1000                 ttl_enabled = TRUE;
1001                 microuptime(&tv);
1002         }
1003         for (;;) {
1004                 /*
1005                  * Search a directory.
1006                  *
1007                  * The cn_hash value is for use by cache_lookup
1008                  * The last component of the filename is left accessible via
1009                  * cnp->cn_nameptr for callers that need the name.
1010                  */
1011                 hash = 0;
1012                 cp = cnp->cn_nameptr;
1013
1014                 while (*cp && (*cp != '/')) {
1015                         hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1016                 }
1017                 /*
1018                  * the crc generator can legitimately generate
1019                  * a 0... however, 0 for us means that we
1020                  * haven't computed a hash, so use 1 instead
1021                  */
1022                 if (hash == 0)
1023                         hash = 1;
1024                 cnp->cn_hash = hash;
1025                 cnp->cn_namelen = cp - cnp->cn_nameptr;
1026
1027                 ndp->ni_pathlen -= cnp->cn_namelen;
1028                 ndp->ni_next = cp;
1029
1030                 /*
1031                  * Replace multiple slashes by a single slash and trailing slashes
1032                  * by a null.  This must be done before VNOP_LOOKUP() because some
1033                  * fs's don't know about trailing slashes.  Remember if there were
1034                  * trailing slashes to handle symlinks, existing non-directories
1035                  * and non-existing files that won't be directories specially later.
1036                  */
1037                 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
1038                         cp++;
1039                         ndp->ni_pathlen--;
1040
1041                         if (*cp == '\0') {
1042                                 ndp->ni_flag |= NAMEI_TRAILINGSLASH;
1043                                 *ndp->ni_next = '\0';
1044                         }
1045                 }
1046                 ndp->ni_next = cp;
1047
1048                 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT);
1049
1050                 if (*cp == '\0')
1051                         cnp->cn_flags |= ISLASTCN;
1052
1053                 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
1054                         cnp->cn_flags |= ISDOTDOT;
1055
1056                 *dp_authorized = 0;
1057 #if NAMEDRSRCFORK
1058                 /*
1059                  * Process a request for a file's resource fork.
1060                  *
1061                  * Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1062                  */
1063                 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1064                     (cp[1] == '.' && cp[2] == '.') &&
1065                     bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1066                         /* Skip volfs file systems that don't support native streams. */
1067                         if ((dp->v_mount != NULL) &&
1068                             (dp->v_mount->mnt_flag & MNT_DOVOLFS) &&
1069                             (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
1070                                 goto skiprsrcfork;
1071                         }
1072                         cnp->cn_flags |= CN_WANTSRSRCFORK;
1073                         cnp->cn_flags |= ISLASTCN;
1074                         ndp->ni_next[0] = '\0';
1075                         ndp->ni_pathlen = 1;
1076                 }
1077 skiprsrcfork:
1078 #endif
1079
1080 #if CONFIG_MACF
1081
1082                 /*
1083                  * Name cache provides authorization caching (see below)
1084                  * that will short circuit MAC checks in lookup().
1085                  * We must perform MAC check here.  On denial
1086                  * dp_authorized will remain 0 and second check will
1087                  * be perfomed in lookup().
1088                  */
1089                 if (!(cnp->cn_flags & DONOTAUTH)) {
1090                         error = mac_vnode_check_lookup(ctx, dp, cnp);
1091                         if (error) {
1092                                 NAME_CACHE_UNLOCK();
1093                                 goto errorout;
1094                         }
1095                 }
1096 #endif /* MAC */
1097                 if (ttl_enabled && ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl))
1098                         break;
1099
1100                 /*
1101                  * NAME_CACHE_LOCK holds these fields stable
1102                  */
1103                 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
1104                     !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE))
1105                         break;
1106
1107                 /*
1108                  * indicate that we're allowed to traverse this directory...
1109                  * even if we fail the cache lookup or decide to bail for
1110                  * some other reason, this information is valid and is used
1111                  * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1112                  */
1113                 *dp_authorized = 1;
1114
1115                 if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) {
1116                         if (cnp->cn_nameiop != LOOKUP)
1117                                 break;
1118                         if (cnp->cn_flags & LOCKPARENT)
1119                                 break;
1120                         if (cnp->cn_flags & NOCACHE)
1121                                 break;
1122                         if (cnp->cn_flags & ISDOTDOT) {
1123                                 /*
1124                                  * Force directory hardlinks to go to
1125                                  * file system for ".." requests.
1126                                  */
1127                                 if (dp && (dp->v_flag & VISHARDLINK)) {
1128                                         break;
1129                                 }
1130                                 /*
1131                                  * Quit here only if we can't use
1132                                  * the parent directory pointer or
1133                                  * don't have one.  Otherwise, we'll
1134                                  * use it below.
1135                                  */
1136                                 if ((dp->v_flag & VROOT)  ||
1137                                     dp == ndp->ni_rootdir ||
1138                                     dp->v_parent == NULLVP)
1139                                         break;
1140                         }
1141                 }
1142
1143                 /*
1144                  * "." and ".." aren't supposed to be cached, so check
1145                  * for them before checking the cache.
1146                  */
1147                 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')
1148                         vp = dp;
1149                 else if ( (cnp->cn_flags & ISDOTDOT) )
1150                         vp = dp->v_parent;
1151                 else {
1152                         if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP)
1153                                 break;
1154
1155                         if ( (vp->v_flag & VISHARDLINK) ) {
1156                                 /*
1157                                  * The file system wants a VNOP_LOOKUP on this vnode
1158                                  */
1159                                 vp = NULL;
1160                                 break;
1161                         }
1162                 }
1163                 if ( (cnp->cn_flags & ISLASTCN) )
1164                         break;
1165
1166                 if (vp->v_type != VDIR) {
1167                         if (vp->v_type != VLNK)
1168                                 vp = NULL;
1169                         break;
1170                 }
1171
1172                 if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
1173
1174                         if (mp->mnt_realrootvp == NULLVP || mp->mnt_generation != mount_generation ||
1175                                 mp->mnt_realrootvp_vid != mp->mnt_realrootvp->v_id)
1176                                 break;
1177                         vp = mp->mnt_realrootvp;
1178                 }
1179
1180 #if CONFIG_TRIGGERS
1181                 /*
1182                  * After traversing all mountpoints stacked here, if we have a
1183                  * trigger in hand, resolve it.  Note that we don't need to
1184                  * leave the fast path if the mount has already happened.
1185                  */
1186                 if ((vp->v_resolve != NULL) &&
1187                                 (vp->v_resolve->vr_resolve_func != NULL)) {
1188                         break;
1189                 }
1190 #endif /* CONFIG_TRIGGERS */
1191
1192
1193                 dp = vp;
1194                 vp = NULLVP;
1195
1196                 cnp->cn_nameptr = ndp->ni_next + 1;
1197                 ndp->ni_pathlen--;
1198                 while (*cnp->cn_nameptr == '/') {
1199                         cnp->cn_nameptr++;
1200                         ndp->ni_pathlen--;
1201                 }
1202         }
1203         if (vp != NULLVP)
1204                 vvid = vp->v_id;
1205         vid = dp->v_id;
1206
1207         NAME_CACHE_UNLOCK();
1208
1209         if ((vp != NULLVP) && (vp->v_type != VLNK) &&
1210             ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) {
1211                 /*
1212                  * if we've got a child and it's the last component, and
1213                  * the lookup doesn't need to return the parent then we
1214                  * can skip grabbing an iocount on the parent, since all
1215                  * we're going to do with it is a vnode_put just before
1216                  * we return from 'lookup'.  If it's a symbolic link,
1217                  * we need the parent in case the link happens to be
1218                  * a relative pathname.
1219                  */
1220                 tdp = dp;
1221                 dp = NULLVP;
1222         } else {
1223 need_dp:
1224                 /*
1225                  * return the last directory we looked at
1226                  * with an io reference held. If it was the one passed
1227                  * in as a result of the last iteration of VNOP_LOOKUP,
1228                  * it should already hold an io ref. No need to increase ref.
1229                  */
1230                 if (last_dp != dp){
1231
1232                         if (dp == ndp->ni_usedvp) {
1233                                 /*
1234                                  * if this vnode matches the one passed in via USEDVP
1235                                  * than this context already holds an io_count... just
1236                                  * use vnode_get to get an extra ref for lookup to play
1237                                  * with... can't use the getwithvid variant here because
1238                                  * it will block behind a vnode_drain which would result
1239                                  * in a deadlock (since we already own an io_count that the
1240                                  * vnode_drain is waiting on)... vnode_get grabs the io_count
1241                                  * immediately w/o waiting... it always succeeds
1242                                  */
1243                                 vnode_get(dp);
1244                         } else if ( (vnode_getwithvid_drainok(dp, vid)) ) {
1245                                 /*
1246                                  * failure indicates the vnode
1247                                  * changed identity or is being
1248                                  * TERMINATED... in either case
1249                                  * punt this lookup.
1250                                  *
1251                                  * don't necessarily return ENOENT, though, because
1252                                  * we really want to go back to disk and make sure it's
1253                                  * there or not if someone else is changing this
1254                                  * vnode.
1255                                  */
1256                                 error = ERECYCLE;
1257                                 goto errorout;
1258                         }
1259                 }
1260         }
1261         if (vp != NULLVP) {
1262                 if ( (vnode_getwithvid_drainok(vp, vvid)) ) {
1263                         vp = NULLVP;
1264
1265                         /*
1266                          * can't get reference on the vp we'd like
1267                          * to return... if we didn't grab a reference
1268                          * on the directory (due to fast path bypass),
1269                          * then we need to do it now... we can't return
1270                          * with both ni_dvp and ni_vp NULL, and no
1271                          * error condition
1272                          */
1273                         if (dp == NULLVP) {
1274                                 dp = tdp;
1275                                 goto need_dp;
1276                         }
1277                 }
1278         }
1279
1280         ndp->ni_dvp = dp;
1281         ndp->ni_vp  = vp;
1282
1283 #if CONFIG_TRIGGERS
1284         trigger_vp = vp ? vp : dp;
1285         if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
1286                 error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
1287                 if (error) {
1288                         if (vp)
1289                                 vnode_put(vp);
1290                         if (dp)
1291                                 vnode_put(dp);
1292                         goto errorout;
1293                 }
1294         }
1295 #endif /* CONFIG_TRIGGERS */
1296
1297 errorout:
1298         /*
1299          * If we came into cache_lookup_path after an iteration of the lookup loop that
1300          * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
1301          * on it.  It is now the job of cache_lookup_path to drop the ref on this vnode
1302          * when it is no longer needed.  If we get to this point, and last_dp is not NULL
1303          * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
1304          * the case that we got to a subsequent path component and this previous vnode is
1305          * no longer needed.  We can then drop the io ref on it.
1306          */
1307         if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
1308                 vnode_put(last_dp);
1309         }
1310
1311         //initialized to 0, should be the same if no error cases occurred.
1312         return error;
1313 }
1314
1315
1316 static vnode_t
1317 cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
1318 {
1319         struct namecache *ncp;
1320         struct nchashhead *ncpp;
1321         long namelen = cnp->cn_namelen;
1322         unsigned int hashval = (cnp->cn_hash & NCHASHMASK);
1323
1324         if (nc_disabled) {
1325                 return NULL;
1326         }
1327
1328         ncpp = NCHHASH(dvp, cnp->cn_hash);
1329         LIST_FOREACH(ncp, ncpp, nc_hash) {
1330                 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1331                         if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1332                                 break;
1333                 }
1334         }
1335         if (ncp == 0) {
1336                 /*
1337                  * We failed to find an entry
1338                  */
1339                 NCHSTAT(ncs_miss);
1340                 return (NULL);
1341         }
1342         NCHSTAT(ncs_goodhits);
1343
1344         return (ncp->nc_vp);
1345 }
1346
1347
1348 //
1349 // Have to take a len argument because we may only need to
1350 // hash part of a componentname.
1351 //
1352 static unsigned int
1353 hash_string(const char *cp, int len)
1354 {
1355     unsigned hash = 0;
1356
1357     if (len) {
1358             while (len--) {
1359                     hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1360             }
1361     } else {
1362             while (*cp != '\0') {
1363                     hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1364             }
1365     }
1366     /*
1367      * the crc generator can legitimately generate
1368      * a 0... however, 0 for us means that we
1369      * haven't computed a hash, so use 1 instead
1370      */
1371     if (hash == 0)
1372             hash = 1;
1373     return hash;
1374 }
1375
1376
1377 /*
1378  * Lookup an entry in the cache
1379  *
1380  * We don't do this if the segment name is long, simply so the cache
1381  * can avoid holding long names (which would either waste space, or
1382  * add greatly to the complexity).
1383  *
1384  * Lookup is called with dvp pointing to the directory to search,
1385  * cnp pointing to the name of the entry being sought. If the lookup
1386  * succeeds, the vnode is returned in *vpp, and a status of -1 is
1387  * returned. If the lookup determines that the name does not exist
1388  * (negative cacheing), a status of ENOENT is returned. If the lookup
1389  * fails, a status of zero is returned.
1390  */
1391
1392 int
1393 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1394 {
1395         struct namecache *ncp;
1396         struct nchashhead *ncpp;
1397         long namelen = cnp->cn_namelen;
1398         unsigned int hashval;
1399         boolean_t       have_exclusive = FALSE;
1400         uint32_t vid;
1401         vnode_t  vp;
1402
1403         if (cnp->cn_hash == 0)
1404                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1405         hashval = (cnp->cn_hash & NCHASHMASK);
1406
1407         if (nc_disabled) {
1408                 return 0;
1409         }
1410
1411         NAME_CACHE_LOCK_SHARED();
1412
1413 relook:
1414         ncpp = NCHHASH(dvp, cnp->cn_hash);
1415         LIST_FOREACH(ncp, ncpp, nc_hash) {
1416                 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1417                         if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1418                                 break;
1419                 }
1420         }
1421         /* We failed to find an entry */
1422         if (ncp == 0) {
1423                 NCHSTAT(ncs_miss);
1424                 NAME_CACHE_UNLOCK();
1425                 return (0);
1426         }
1427
1428         /* We don't want to have an entry, so dump it */
1429         if ((cnp->cn_flags & MAKEENTRY) == 0) {
1430                 if (have_exclusive == TRUE) {
1431                         NCHSTAT(ncs_badhits);
1432                         cache_delete(ncp, 1);
1433                         NAME_CACHE_UNLOCK();
1434                         return (0);
1435                 }
1436                 NAME_CACHE_UNLOCK();
1437                 NAME_CACHE_LOCK();
1438                 have_exclusive = TRUE;
1439                 goto relook;
1440         }
1441         vp = ncp->nc_vp;
1442
1443         /* We found a "positive" match, return the vnode */
1444         if (vp) {
1445                 NCHSTAT(ncs_goodhits);
1446
1447                 vid = vp->v_id;
1448                 NAME_CACHE_UNLOCK();
1449
1450                 if (vnode_getwithvid(vp, vid)) {
1451 #if COLLECT_STATS
1452                         NAME_CACHE_LOCK();
1453                         NCHSTAT(ncs_badvid);
1454                         NAME_CACHE_UNLOCK();
1455 #endif
1456                         return (0);
1457                 }
1458                 *vpp = vp;
1459                 return (-1);
1460         }
1461
1462         /* We found a negative match, and want to create it, so purge */
1463         if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) {
1464                 if (have_exclusive == TRUE) {
1465                         NCHSTAT(ncs_badhits);
1466                         cache_delete(ncp, 1);
1467                         NAME_CACHE_UNLOCK();
1468                         return (0);
1469                 }
1470                 NAME_CACHE_UNLOCK();
1471                 NAME_CACHE_LOCK();
1472                 have_exclusive = TRUE;
1473                 goto relook;
1474         }
1475
1476         /*
1477          * We found a "negative" match, ENOENT notifies client of this match.
1478          * The nc_whiteout field records whether this is a whiteout.
1479          */
1480         NCHSTAT(ncs_neghits);
1481
1482         if (ncp->nc_whiteout)
1483                 cnp->cn_flags |= ISWHITEOUT;
1484         NAME_CACHE_UNLOCK();
1485         return (ENOENT);
1486 }
1487
1488 const char *
1489 cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
1490 {
1491         const char *strname;
1492
1493         if (cnp->cn_hash == 0)
1494                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1495
1496         /*
1497          * grab 2 references on the string entered
1498          * one for the cache_enter_locked to consume
1499          * and the second to be consumed by v_name (vnode_create call point)
1500          */
1501         strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0);
1502
1503         NAME_CACHE_LOCK();
1504
1505         cache_enter_locked(dvp, vp, cnp, strname);
1506
1507         NAME_CACHE_UNLOCK();
1508
1509         return (strname);
1510 }
1511
1512
1513 /*
1514  * Add an entry to the cache...
1515  * but first check to see if the directory
1516  * that this entry is to be associated with has
1517  * had any cache_purges applied since we took
1518  * our identity snapshot... this check needs to
1519  * be done behind the name cache lock
1520  */
1521 void
1522 cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen)
1523 {
1524
1525         if (cnp->cn_hash == 0)
1526                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1527
1528         NAME_CACHE_LOCK();
1529
1530         if (dvp->v_nc_generation == gen)
1531                 (void)cache_enter_locked(dvp, vp, cnp, NULL);
1532
1533         NAME_CACHE_UNLOCK();
1534 }
1535
1536
1537 /*
1538  * Add an entry to the cache.
1539  */
1540 void
1541 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1542 {
1543         const char *strname;
1544
1545         if (cnp->cn_hash == 0)
1546                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1547
1548         /*
1549          * grab 1 reference on the string entered
1550          * for the cache_enter_locked to consume
1551          */
1552         strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1553
1554         NAME_CACHE_LOCK();
1555
1556         cache_enter_locked(dvp, vp, cnp, strname);
1557
1558         NAME_CACHE_UNLOCK();
1559 }
1560
1561
1562 static void
1563 cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname)
1564 {
1565         struct namecache *ncp, *negp;
1566         struct nchashhead *ncpp;
1567
1568         if (nc_disabled)
1569                 return;
1570
1571         /*
1572          * if the entry is for -ve caching vp is null
1573          */
1574         if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
1575                 /*
1576                  * someone beat us to the punch..
1577                  * this vnode is already in the cache
1578                  */
1579                 if (strname != NULL)
1580                         vfs_removename(strname);
1581                 return;
1582         }
1583         /*
1584          * We allocate a new entry if we are less than the maximum
1585          * allowed and the one at the front of the list is in use.
1586          * Otherwise we use the one at the front of the list.
1587          */
1588         if (numcache < desiredNodes &&
1589             ((ncp = nchead.tqh_first) == NULL ||
1590               ncp->nc_hash.le_prev != 0)) {
1591                 /*
1592                  * Allocate one more entry
1593                  */
1594                 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK);
1595                 numcache++;
1596         } else {
1597                 /*
1598                  * reuse an old entry
1599                  */
1600                 ncp = TAILQ_FIRST(&nchead);
1601                 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1602
1603                 if (ncp->nc_hash.le_prev != 0) {
1604                        /*
1605                         * still in use... we need to
1606                         * delete it before re-using it
1607                         */
1608                         NCHSTAT(ncs_stolen);
1609                         cache_delete(ncp, 0);
1610                 }
1611         }
1612         NCHSTAT(ncs_enters);
1613
1614         /*
1615          * Fill in cache info, if vp is NULL this is a "negative" cache entry.
1616          */
1617         ncp->nc_vp = vp;
1618         ncp->nc_dvp = dvp;
1619         ncp->nc_hashval = cnp->cn_hash;
1620         ncp->nc_whiteout = FALSE;
1621
1622         if (strname == NULL)
1623                 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1624         else
1625                 ncp->nc_name = strname;
1626         /*
1627          * make us the newest entry in the cache
1628          * i.e. we'll be the last to be stolen
1629          */
1630         TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
1631
1632         ncpp = NCHHASH(dvp, cnp->cn_hash);
1633 #if DIAGNOSTIC
1634         {
1635                 struct namecache *p;
1636
1637                 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
1638                         if (p == ncp)
1639                                 panic("cache_enter: duplicate");
1640         }
1641 #endif
1642         /*
1643          * make us available to be found via lookup
1644          */
1645         LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
1646
1647         if (vp) {
1648                /*
1649                 * add to the list of name cache entries
1650                 * that point at vp
1651                 */
1652                 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
1653         } else {
1654                 /*
1655                  * this is a negative cache entry (vp == NULL)
1656                  * stick it on the negative cache list
1657                  * and record the whiteout state
1658                  */
1659                 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
1660
1661                 if (cnp->cn_flags & ISWHITEOUT)
1662                         ncp->nc_whiteout = TRUE;
1663                 ncs_negtotal++;
1664
1665                 if (ncs_negtotal > desiredNegNodes) {
1666                        /*
1667                         * if we've reached our desired limit
1668                         * of negative cache entries, delete
1669                         * the oldest
1670                         */
1671                         negp = TAILQ_FIRST(&neghead);
1672                         cache_delete(negp, 1);
1673                 }
1674         }
1675         /*
1676          * add us to the list of name cache entries that
1677          * are children of dvp
1678          */
1679         LIST_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
1680 }
1681
1682
1683 /*
1684  * Initialize CRC-32 remainder table.
1685  */
1686 static void init_crc32(void)
1687 {
1688         /*
1689          * the CRC-32 generator polynomial is:
1690          *   x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
1691          *        + x^8  + x^7  + x^5  + x^4  + x^2  + x + 1
1692          */
1693         unsigned int crc32_polynomial = 0x04c11db7;
1694         unsigned int i,j;
1695
1696         /*
1697          * pre-calculate the CRC-32 remainder for each possible octet encoding
1698          */
1699         for (i = 0;  i < 256;  i++) {
1700                 unsigned int crc_rem = i << 24;
1701
1702                 for (j = 0;  j < 8;  j++) {
1703                         if (crc_rem & 0x80000000)
1704                                 crc_rem = (crc_rem << 1) ^ crc32_polynomial;
1705                         else
1706                                 crc_rem = (crc_rem << 1);
1707                 }
1708                 crc32tab[i] = crc_rem;
1709         }
1710 }
1711
1712
1713 /*
1714  * Name cache initialization, from vfs_init() when we are booting
1715  */
1716 void
1717 nchinit(void)
1718 {
1719         int     i;
1720
1721         desiredNegNodes = (desiredvnodes / 10);
1722         desiredNodes = desiredvnodes + desiredNegNodes;
1723
1724         TAILQ_INIT(&nchead);
1725         TAILQ_INIT(&neghead);
1726
1727         init_crc32();
1728
1729         nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 *desiredNodes)), M_CACHE, &nchash);
1730         nchashmask = nchash;
1731         nchash++;
1732
1733         init_string_table();
1734
1735         /* Allocate name cache lock group attribute and group */
1736         namecache_lck_grp_attr= lck_grp_attr_alloc_init();
1737
1738         namecache_lck_grp = lck_grp_alloc_init("Name Cache",  namecache_lck_grp_attr);
1739
1740         /* Allocate name cache lock attribute */
1741         namecache_lck_attr = lck_attr_alloc_init();
1742
1743         /* Allocate name cache lock */
1744         namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
1745
1746
1747         /* Allocate string cache lock group attribute and group */
1748         strcache_lck_grp_attr= lck_grp_attr_alloc_init();
1749
1750         strcache_lck_grp = lck_grp_alloc_init("String Cache",  strcache_lck_grp_attr);
1751
1752         /* Allocate string cache lock attribute */
1753         strcache_lck_attr = lck_attr_alloc_init();
1754
1755         /* Allocate string cache lock */
1756         strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
1757
1758         for (i = 0; i < NUM_STRCACHE_LOCKS; i++)
1759                 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
1760 }
1761
1762 void
1763 name_cache_lock_shared(void)
1764 {
1765         lck_rw_lock_shared(namecache_rw_lock);
1766 }
1767
1768 void
1769 name_cache_lock(void)
1770 {
1771         lck_rw_lock_exclusive(namecache_rw_lock);
1772 }
1773
1774 void
1775 name_cache_unlock(void)
1776 {
1777         lck_rw_done(namecache_rw_lock);
1778 }
1779
1780
1781 int
1782 resize_namecache(u_int newsize)
1783 {
1784     struct nchashhead   *new_table;
1785     struct nchashhead   *old_table;
1786     struct nchashhead   *old_head, *head;
1787     struct namecache    *entry, *next;
1788     uint32_t            i, hashval;
1789     int                 dNodes, dNegNodes;
1790     u_long              new_size, old_size;
1791
1792     dNegNodes = (newsize / 10);
1793     dNodes = newsize + dNegNodes;
1794
1795     // we don't support shrinking yet
1796     if (dNodes <= desiredNodes) {
1797         return 0;
1798     }
1799     new_table = hashinit(2 * dNodes, M_CACHE, &nchashmask);
1800     new_size  = nchashmask + 1;
1801
1802     if (new_table == NULL) {
1803         return ENOMEM;
1804     }
1805
1806     NAME_CACHE_LOCK();
1807     // do the switch!
1808     old_table = nchashtbl;
1809     nchashtbl = new_table;
1810     old_size  = nchash;
1811     nchash    = new_size;
1812
1813     // walk the old table and insert all the entries into
1814     // the new table
1815     //
1816     for(i=0; i < old_size; i++) {
1817         old_head = &old_table[i];
1818         for (entry=old_head->lh_first; entry != NULL; entry=next) {
1819             //
1820             // XXXdbg - Beware: this assumes that hash_string() does
1821             //                  the same thing as what happens in
1822             //                  lookup() over in vfs_lookup.c
1823             hashval = hash_string(entry->nc_name, 0);
1824             entry->nc_hashval = hashval;
1825             head = NCHHASH(entry->nc_dvp, hashval);
1826
1827             next = entry->nc_hash.le_next;
1828             LIST_INSERT_HEAD(head, entry, nc_hash);
1829         }
1830     }
1831     desiredNodes = dNodes;
1832     desiredNegNodes = dNegNodes;
1833
1834     NAME_CACHE_UNLOCK();
1835     FREE(old_table, M_CACHE);
1836
1837     return 0;
1838 }
1839
1840 static void
1841 cache_delete(struct namecache *ncp, int age_entry)
1842 {
1843         NCHSTAT(ncs_deletes);
1844
1845         if (ncp->nc_vp) {
1846                 LIST_REMOVE(ncp, nc_un.nc_link);
1847         } else {
1848                 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
1849                 ncs_negtotal--;
1850         }
1851         LIST_REMOVE(ncp, nc_child);
1852
1853         LIST_REMOVE(ncp, nc_hash);
1854         /*
1855          * this field is used to indicate
1856          * that the entry is in use and
1857          * must be deleted before it can
1858          * be reused...
1859          */
1860         ncp->nc_hash.le_prev = NULL;
1861
1862         if (age_entry) {
1863                 /*
1864                  * make it the next one available
1865                  * for cache_enter's use
1866                  */
1867                 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1868                 TAILQ_INSERT_HEAD(&nchead, ncp, nc_entry);
1869         }
1870         vfs_removename(ncp->nc_name);
1871         ncp->nc_name = NULL;
1872 }
1873
1874
1875 /*
1876  * purge the entry associated with the
1877  * specified vnode from the name cache
1878  */
1879 void
1880 cache_purge(vnode_t vp)
1881 {
1882         struct namecache *ncp;
1883         kauth_cred_t tcred = NULL;
1884
1885         if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
1886                         (LIST_FIRST(&vp->v_ncchildren) == NULL) &&
1887                         (vp->v_cred == NOCRED) &&
1888                         (vp->v_parent == NULLVP))
1889                 return;
1890
1891         NAME_CACHE_LOCK();
1892
1893         if (vp->v_parent)
1894                 vp->v_parent->v_nc_generation++;
1895
1896         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
1897                 cache_delete(ncp, 1);
1898
1899         while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) )
1900                 cache_delete(ncp, 1);
1901
1902         /*
1903          * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1904          */
1905         tcred = vp->v_cred;
1906         vp->v_cred = NOCRED;
1907         vp->v_authorized_actions = 0;
1908
1909         NAME_CACHE_UNLOCK();
1910
1911         if (IS_VALID_CRED(tcred))
1912                 kauth_cred_unref(&tcred);
1913 }
1914
1915 /*
1916  * Purge all negative cache entries that are children of the
1917  * given vnode.  A case-insensitive file system (or any file
1918  * system that has multiple equivalent names for the same
1919  * directory entry) can use this when creating or renaming
1920  * to remove negative entries that may no longer apply.
1921  */
1922 void
1923 cache_purge_negatives(vnode_t vp)
1924 {
1925         struct namecache *ncp, *next_ncp;
1926
1927         NAME_CACHE_LOCK();
1928
1929         LIST_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp)
1930                 if (ncp->nc_vp == NULL)
1931                         cache_delete(ncp , 1);
1932
1933         NAME_CACHE_UNLOCK();
1934 }
1935
1936 /*
1937  * Flush all entries referencing a particular filesystem.
1938  *
1939  * Since we need to check it anyway, we will flush all the invalid
1940  * entries at the same time.
1941  */
1942 void
1943 cache_purgevfs(struct mount *mp)
1944 {
1945         struct nchashhead *ncpp;
1946         struct namecache *ncp;
1947
1948         NAME_CACHE_LOCK();
1949         /* Scan hash tables for applicable entries */
1950         for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
1951 restart:
1952                 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) {
1953                         if (ncp->nc_dvp->v_mount == mp) {
1954                                 cache_delete(ncp, 0);
1955                                 goto restart;
1956                         }
1957                 }
1958         }
1959         NAME_CACHE_UNLOCK();
1960 }
1961
1962
1963
1964 //
1965 // String ref routines
1966 //
1967 static LIST_HEAD(stringhead, string_t) *string_ref_table;
1968 static u_long   string_table_mask;
1969 static uint32_t filled_buckets=0;
1970
1971
1972 typedef struct string_t {
1973     LIST_ENTRY(string_t)  hash_chain;
1974     const char *str;
1975     uint32_t              refcount;
1976 } string_t;
1977
1978
1979 static void
1980 resize_string_ref_table(void)
1981 {
1982         struct stringhead *new_table;
1983         struct stringhead *old_table;
1984         struct stringhead *old_head, *head;
1985         string_t          *entry, *next;
1986         uint32_t           i, hashval;
1987         u_long             new_mask, old_mask;
1988
1989         /*
1990          * need to hold the table lock exclusively
1991          * in order to grow the table... need to recheck
1992          * the need to resize again after we've taken
1993          * the lock exclusively in case some other thread
1994          * beat us to the punch
1995          */
1996         lck_rw_lock_exclusive(strtable_rw_lock);
1997
1998         if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
1999                 lck_rw_done(strtable_rw_lock);
2000                 return;
2001         }
2002         new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
2003
2004         if (new_table == NULL) {
2005                 printf("failed to resize the hash table.\n");
2006                 lck_rw_done(strtable_rw_lock);
2007                 return;
2008         }
2009
2010         // do the switch!
2011         old_table         = string_ref_table;
2012         string_ref_table  = new_table;
2013         old_mask          = string_table_mask;
2014         string_table_mask = new_mask;
2015         filled_buckets    = 0;
2016
2017         // walk the old table and insert all the entries into
2018         // the new table
2019         //
2020         for (i = 0; i <= old_mask; i++) {
2021                 old_head = &old_table[i];
2022                 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2023                         hashval = hash_string((const char *)entry->str, 0);
2024                         head = &string_ref_table[hashval & string_table_mask];
2025                         if (head->lh_first == NULL) {
2026                                 filled_buckets++;
2027                         }
2028                         next = entry->hash_chain.le_next;
2029                         LIST_INSERT_HEAD(head, entry, hash_chain);
2030                 }
2031         }
2032         lck_rw_done(strtable_rw_lock);
2033
2034         FREE(old_table, M_CACHE);
2035 }
2036
2037
2038 static void
2039 init_string_table(void)
2040 {
2041         string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask);
2042 }
2043
2044
2045 const char *
2046 vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
2047 {
2048         return (add_name_internal(name, len, hashval, FALSE, flags));
2049 }
2050
2051
2052 static const char *
2053 add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
2054 {
2055         struct stringhead *head;
2056         string_t          *entry;
2057         uint32_t          chain_len = 0;
2058         uint32_t          hash_index;
2059         uint32_t          lock_index;
2060         char              *ptr;
2061
2062         /*
2063          * if the length already accounts for the null-byte, then
2064          * subtract one so later on we don't index past the end
2065          * of the string.
2066          */
2067         if (len > 0 && name[len-1] == '\0') {
2068                 len--;
2069         }
2070         if (hashval == 0) {
2071                 hashval = hash_string(name, len);
2072         }
2073
2074         /*
2075          * take this lock 'shared' to keep the hash stable
2076          * if someone else decides to grow the pool they
2077          * will take this lock exclusively
2078          */
2079         lck_rw_lock_shared(strtable_rw_lock);
2080
2081         /*
2082          * If the table gets more than 3/4 full, resize it
2083          */
2084         if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
2085                 lck_rw_done(strtable_rw_lock);
2086
2087                 resize_string_ref_table();
2088
2089                 lck_rw_lock_shared(strtable_rw_lock);
2090         }
2091         hash_index = hashval & string_table_mask;
2092         lock_index = hash_index % NUM_STRCACHE_LOCKS;
2093
2094         head = &string_ref_table[hash_index];
2095
2096         lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2097
2098         for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
2099                 if (memcmp(entry->str, name, len) == 0 && entry->str[len] == 0) {
2100                         entry->refcount++;
2101                         break;
2102                 }
2103         }
2104         if (entry == NULL) {
2105                 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]);
2106                 /*
2107                  * it wasn't already there so add it.
2108                  */
2109                 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
2110
2111                 if (head->lh_first == NULL) {
2112                         OSAddAtomic(1, &filled_buckets);
2113                 }
2114                 ptr = (char *)((char *)entry + sizeof(string_t));
2115                 strncpy(ptr, name, len);
2116                 ptr[len] = '\0';
2117                 entry->str = ptr;
2118                 entry->refcount = 1;
2119                 LIST_INSERT_HEAD(head, entry, hash_chain);
2120         }
2121         if (need_extra_ref == TRUE)
2122                 entry->refcount++;
2123
2124         lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2125         lck_rw_done(strtable_rw_lock);
2126
2127         return (const char *)entry->str;
2128 }
2129
2130
2131 int
2132 vfs_removename(const char *nameref)
2133 {
2134         struct stringhead *head;
2135         string_t          *entry;
2136         uint32_t           hashval;
2137         uint32_t           hash_index;
2138         uint32_t           lock_index;
2139         int                retval = ENOENT;
2140
2141         hashval = hash_string(nameref, 0);
2142
2143         /*
2144          * take this lock 'shared' to keep the hash stable
2145          * if someone else decides to grow the pool they
2146          * will take this lock exclusively
2147          */
2148         lck_rw_lock_shared(strtable_rw_lock);
2149         /*
2150          * must compute the head behind the table lock
2151          * since the size and location of the table
2152          * can change on the fly
2153          */
2154         hash_index = hashval & string_table_mask;
2155         lock_index = hash_index % NUM_STRCACHE_LOCKS;
2156
2157         head = &string_ref_table[hash_index];
2158
2159         lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2160
2161         for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2162                 if (entry->str == nameref) {
2163                         entry->refcount--;
2164
2165                         if (entry->refcount == 0) {
2166                                 LIST_REMOVE(entry, hash_chain);
2167
2168                                 if (head->lh_first == NULL) {
2169                                         OSAddAtomic(-1, &filled_buckets);
2170                                 }
2171                         } else {
2172                                 entry = NULL;
2173                         }
2174                         retval = 0;
2175                         break;
2176                 }
2177         }
2178         lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2179         lck_rw_done(strtable_rw_lock);
2180
2181         if (entry != NULL)
2182                 FREE(entry, M_TEMP);
2183
2184         return retval;
2185 }
2186
2187
2188 #ifdef DUMP_STRING_TABLE
2189 void
2190 dump_string_table(void)
2191 {
2192     struct stringhead *head;
2193     string_t          *entry;
2194     u_long            i;
2195
2196     lck_rw_lock_shared(strtable_rw_lock);
2197
2198     for (i = 0; i <= string_table_mask; i++) {
2199         head = &string_ref_table[i];
2200         for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
2201             printf("%6d - %s\n", entry->refcount, entry->str);
2202         }
2203     }
2204     lck_rw_done(strtable_rw_lock);
2205 }
2206 #endif  /* DUMP_STRING_TABLE */