bsd/vfs/vfs_cache.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1989, 1993, 1995
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * This code is derived from software contributed to Berkeley by
  34  * Poul-Henning Kamp of the FreeBSD Project.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. All advertising materials mentioning features or use of this software
  45  *    must display the following acknowledgement:
  46  *      This product includes software developed by the University of
  47  *      California, Berkeley and its contributors.
  48  * 4. Neither the name of the University nor the names of its contributors
  49  *    may be used to endorse or promote products derived from this software
  50  *    without specific prior written permission.
  51  *
  52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  62  * SUCH DAMAGE.
  63  *
  64  *
  65  *      @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/time.h>
  76 #include <sys/mount_internal.h>
  77 #include <sys/vnode_internal.h>
  78 #include <miscfs/specfs/specdev.h>
  79 #include <sys/namei.h>
  80 #include <sys/errno.h>
  81 #include <kern/kalloc.h>
  82 #include <sys/kauth.h>
  83 #include <sys/user.h>
  84 #include <sys/paths.h>
  85 #include <os/overflow.h>
  86
  87 #if CONFIG_MACF
  88 #include <security/mac_framework.h>
  89 #endif
  90
  91 /*
  92  * Name caching works as follows:
  93  *
  94  * Names found by directory scans are retained in a cache
  95  * for future reference.  It is managed LRU, so frequently
  96  * used names will hang around.  Cache is indexed by hash value
  97  * obtained from (vp, name) where vp refers to the directory
  98  * containing name.
  99  *
 100  * If it is a "negative" entry, (i.e. for a name that is known NOT to
 101  * exist) the vnode pointer will be NULL.
 102  *
 103  * Upon reaching the last segment of a path, if the reference
 104  * is for DELETE, or NOCACHE is set (rewrite), and the
 105  * name is located in the cache, it will be dropped.
 106  */
 107
 108 /*
 109  * Structures associated with name cacheing.
 110  */
 111
 112 ZONE_DECLARE(namecache_zone, "namecache", sizeof(struct namecache), ZC_NONE);
 113
 114 LIST_HEAD(nchashhead, namecache) * nchashtbl;    /* Hash Table */
 115 u_long  nchashmask;
 116 u_long  nchash;                         /* size of hash table - 1 */
 117 long    numcache;                       /* number of cache entries allocated */
 118 int     desiredNodes;
 119 int     desiredNegNodes;
 120 int     ncs_negtotal;
 121 int     nc_disabled = 0;
 122 TAILQ_HEAD(, namecache) nchead;         /* chain of all name cache entries */
 123 TAILQ_HEAD(, namecache) neghead;        /* chain of only negative cache entries */
 124
 125
 126 #if COLLECT_STATS
 127
 128 struct  nchstats nchstats;              /* cache effectiveness statistics */
 129
 130 #define NCHSTAT(v) {            \
 131         nchstats.v++;           \
 132 }
 133 #define NAME_CACHE_LOCK()               name_cache_lock()
 134 #define NAME_CACHE_UNLOCK()             name_cache_unlock()
 135 #define NAME_CACHE_LOCK_SHARED()        name_cache_lock()
 136
 137 #else
 138
 139 #define NCHSTAT(v)
 140 #define NAME_CACHE_LOCK()               name_cache_lock()
 141 #define NAME_CACHE_UNLOCK()             name_cache_unlock()
 142 #define NAME_CACHE_LOCK_SHARED()        name_cache_lock_shared()
 143
 144 #endif
 145
 146
 147 /* vars for name cache list lock */
 148 lck_grp_t * namecache_lck_grp;
 149 lck_grp_attr_t * namecache_lck_grp_attr;
 150 lck_attr_t * namecache_lck_attr;
 151
 152 lck_grp_t * strcache_lck_grp;
 153 lck_grp_attr_t * strcache_lck_grp_attr;
 154 lck_attr_t * strcache_lck_attr;
 155
 156 lck_grp_t * rootvnode_lck_grp;
 157 lck_grp_attr_t * rootvnode_lck_grp_attr;
 158 lck_attr_t * rootvnode_lck_attr;
 159
 160 lck_rw_t  * namecache_rw_lock;
 161 lck_rw_t  * strtable_rw_lock;
 162 lck_rw_t  * rootvnode_rw_lock;
 163
 164 #define NUM_STRCACHE_LOCKS 1024
 165
 166 lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
 167
 168
 169 static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
 170 static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
 171 static void init_string_table(void);
 172 static void cache_delete(struct namecache *, int);
 173 static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
 174 static void cache_purge_locked(vnode_t vp, kauth_cred_t *credp);
 175
 176 #ifdef DUMP_STRING_TABLE
 177 /*
 178  * Internal dump function used for debugging
 179  */
 180 void dump_string_table(void);
 181 #endif  /* DUMP_STRING_TABLE */
 182
 183 static void init_crc32(void);
 184 static unsigned int crc32tab[256];
 185
 186
 187 #define NCHHASH(dvp, hash_val) \
 188         (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
 189
 190 /*
 191  * This function tries to check if a directory vp is a subdirectory of dvp
 192  * only from valid v_parent pointers. It is called with the name cache lock
 193  * held and does not drop the lock anytime inside the function.
 194  *
 195  * It returns a boolean that indicates whether or not it was able to
 196  * successfully infer the parent/descendent relationship via the v_parent
 197  * pointers, or if it could not infer such relationship and that the decision
 198  * must be delegated to the owning filesystem.
 199  *
 200  * If it does not defer the decision, i.e. it was successfuly able to determine
 201  * the parent/descendent relationship,  *is_subdir tells the caller if vp is a
 202  * subdirectory of dvp.
 203  *
 204  * If the decision is deferred, *next_vp is where it stopped i.e. *next_vp
 205  * is the vnode whose parent is to be determined from the filesystem.
 206  * *is_subdir, in this case, is not indicative of anything and should be
 207  * ignored.
 208  *
 209  * The return value and output args should be used as follows :
 210  *
 211  * defer = cache_check_vnode_issubdir(vp, dvp, is_subdir, next_vp);
 212  * if (!defer) {
 213  *      if (*is_subdir)
 214  *              vp is subdirectory;
 215  *      else
 216  *              vp is not a subdirectory;
 217  * } else {
 218  *      if (*next_vp)
 219  *              check this vnode's parent from the filesystem
 220  *      else
 221  *              error (likely because of forced unmount).
 222  * }
 223  *
 224  */
 225 static boolean_t
 226 cache_check_vnode_issubdir(vnode_t vp, vnode_t dvp, boolean_t *is_subdir,
 227     vnode_t *next_vp)
 228 {
 229         vnode_t tvp = vp;
 230         int defer = FALSE;
 231
 232         *is_subdir = FALSE;
 233         *next_vp = NULLVP;
 234         while (1) {
 235                 mount_t tmp;
 236
 237                 if (tvp == dvp) {
 238                         *is_subdir = TRUE;
 239                         break;
 240                 } else if (tvp == rootvnode) {
 241                         /* *is_subdir = FALSE */
 242                         break;
 243                 }
 244
 245                 tmp = tvp->v_mount;
 246                 while ((tvp->v_flag & VROOT) && tmp && tmp->mnt_vnodecovered &&
 247                     tvp != dvp && tvp != rootvnode) {
 248                         tvp = tmp->mnt_vnodecovered;
 249                         tmp = tvp->v_mount;
 250                 }
 251
 252                 /*
 253                  * If dvp is not at the top of a mount "stack" then
 254                  * vp is not a subdirectory of dvp either.
 255                  */
 256                 if (tvp == dvp || tvp == rootvnode) {
 257                         /* *is_subdir = FALSE */
 258                         break;
 259                 }
 260
 261                 if (!tmp) {
 262                         defer = TRUE;
 263                         *next_vp = NULLVP;
 264                         break;
 265                 }
 266
 267                 if ((tvp->v_flag & VISHARDLINK) || !(tvp->v_parent)) {
 268                         defer = TRUE;
 269                         *next_vp = tvp;
 270                         break;
 271                 }
 272
 273                 tvp = tvp->v_parent;
 274         }
 275
 276         return defer;
 277 }
 278
 279 /* maximum times retry from potentially transient errors in vnode_issubdir */
 280 #define MAX_ERROR_RETRY 3
 281
 282 /*
 283  * This function checks if a given directory (vp) is a subdirectory of dvp.
 284  * It walks backwards from vp and if it hits dvp in its parent chain,
 285  * it is a subdirectory. If it encounters the root directory, it is not
 286  * a subdirectory.
 287  *
 288  * This function returns an error if it is unsuccessful and 0 on success.
 289  *
 290  * On entry (and exit) vp has an iocount and if this function has to take
 291  * any iocounts on other vnodes in the parent chain traversal, it releases them.
 292  */
 293 int
 294 vnode_issubdir(vnode_t vp, vnode_t dvp, int *is_subdir, vfs_context_t ctx)
 295 {
 296         vnode_t start_vp, tvp;
 297         vnode_t vp_with_iocount;
 298         int error = 0;
 299         char dotdotbuf[] = "..";
 300         int error_retry_count = 0; /* retry count for potentially transient
 301                                     *  errors */
 302
 303         *is_subdir = FALSE;
 304         tvp = start_vp = vp;
 305         /*
 306          * Anytime we acquire an iocount in this function, we save the vnode
 307          * in this variable and release it before exiting.
 308          */
 309         vp_with_iocount = NULLVP;
 310
 311         while (1) {
 312                 boolean_t defer;
 313                 vnode_t pvp;
 314                 uint32_t vid;
 315                 struct componentname cn;
 316                 boolean_t is_subdir_locked = FALSE;
 317
 318                 if (tvp == dvp) {
 319                         *is_subdir = TRUE;
 320                         break;
 321                 } else if (tvp == rootvnode) {
 322                         /* *is_subdir = FALSE */
 323                         break;
 324                 }
 325
 326                 NAME_CACHE_LOCK_SHARED();
 327
 328                 defer = cache_check_vnode_issubdir(tvp, dvp, &is_subdir_locked,
 329                     &tvp);
 330
 331                 if (defer && tvp) {
 332                         vid = vnode_vid(tvp);
 333                 }
 334
 335                 NAME_CACHE_UNLOCK();
 336
 337                 if (!defer) {
 338                         *is_subdir = is_subdir_locked;
 339                         break;
 340                 }
 341
 342                 if (!tvp) {
 343                         if (error_retry_count++ < MAX_ERROR_RETRY) {
 344                                 tvp = vp;
 345                                 continue;
 346                         }
 347                         error = ENOENT;
 348                         break;
 349                 }
 350
 351                 if (tvp != start_vp) {
 352                         if (vp_with_iocount) {
 353                                 vnode_put(vp_with_iocount);
 354                                 vp_with_iocount = NULLVP;
 355                         }
 356
 357                         error = vnode_getwithvid(tvp, vid);
 358                         if (error) {
 359                                 if (error_retry_count++ < MAX_ERROR_RETRY) {
 360                                         tvp = vp;
 361                                         error = 0;
 362                                         continue;
 363                                 }
 364                                 break;
 365                         }
 366
 367                         vp_with_iocount = tvp;
 368                 }
 369
 370                 bzero(&cn, sizeof(cn));
 371                 cn.cn_nameiop = LOOKUP;
 372                 cn.cn_flags = ISLASTCN | ISDOTDOT;
 373                 cn.cn_context = ctx;
 374                 cn.cn_pnbuf = &dotdotbuf[0];
 375                 cn.cn_pnlen = sizeof(dotdotbuf);
 376                 cn.cn_nameptr = cn.cn_pnbuf;
 377                 cn.cn_namelen = 2;
 378
 379                 pvp = NULLVP;
 380                 if ((error = VNOP_LOOKUP(tvp, &pvp, &cn, ctx))) {
 381                         break;
 382                 }
 383
 384                 if (!(tvp->v_flag & VISHARDLINK) && tvp->v_parent != pvp) {
 385                         (void)vnode_update_identity(tvp, pvp, NULL, 0, 0,
 386                             VNODE_UPDATE_PARENT);
 387                 }
 388
 389                 if (vp_with_iocount) {
 390                         vnode_put(vp_with_iocount);
 391                 }
 392
 393                 vp_with_iocount = tvp = pvp;
 394         }
 395
 396         if (vp_with_iocount) {
 397                 vnode_put(vp_with_iocount);
 398         }
 399
 400         return error;
 401 }
 402
 403 /*
 404  * This function builds the path in "buff" from the supplied vnode.
 405  * The length of the buffer *INCLUDING* the trailing zero byte is
 406  * returned in outlen.  NOTE: the length includes the trailing zero
 407  * byte and thus the length is one greater than what strlen would
 408  * return.  This is important and lots of code elsewhere in the kernel
 409  * assumes this behavior.
 410  *
 411  * This function can call vnop in file system if the parent vnode
 412  * does not exist or when called for hardlinks via volfs path.
 413  * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
 414  * in the name cache and does not enter the file system.
 415  *
 416  * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when
 417  * we encounter ENOENT during path reconstruction.  ENOENT means that
 418  * one of the parents moved while we were building the path.  The
 419  * caller can special handle this case by calling build_path again.
 420  *
 421  * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path
 422  * that is relative to the nearest mount point, i.e. do not
 423  * cross over mount points during building the path.
 424  *
 425  * passed in vp must have a valid io_count reference
 426  *
 427  * If parent vnode is non-NULL it also must have an io count.  This
 428  * allows build_path_with_parent to be safely called for operations
 429  * unlink, rmdir and rename that already have io counts on the target
 430  * and the directory. In this way build_path_with_parent does not have
 431  * to try and obtain an additional io count on the parent.  Taking an
 432  * io count ont the parent can lead to dead lock if a forced unmount
 433  * occures at the right moment. For a fuller explaination on how this
 434  * can occur see the comment for vn_getpath_with_parent.
 435  *
 436  */
 437 int
 438 build_path_with_parent(vnode_t first_vp, vnode_t parent_vp, char *buff, int buflen,
 439     int *outlen, size_t *mntpt_outlen, int flags, vfs_context_t ctx)
 440 {
 441         vnode_t vp, tvp;
 442         vnode_t vp_with_iocount;
 443         vnode_t proc_root_dir_vp;
 444         char *end;
 445         char *mntpt_end;
 446         const char *str;
 447         unsigned int  len;
 448         int  ret = 0;
 449         int  fixhardlink;
 450
 451         if (first_vp == NULLVP) {
 452                 return EINVAL;
 453         }
 454
 455         if (buflen <= 1) {
 456                 return ENOSPC;
 457         }
 458
 459         /*
 460          * Grab the process fd so we can evaluate fd_rdir.
 461          */
 462         if (vfs_context_proc(ctx)->p_fd && !(flags & BUILDPATH_NO_PROCROOT)) {
 463                 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
 464         } else {
 465                 proc_root_dir_vp = NULL;
 466         }
 467
 468         vp_with_iocount = NULLVP;
 469 again:
 470         vp = first_vp;
 471
 472         end = &buff[buflen - 1];
 473         *end = '\0';
 474         mntpt_end = NULL;
 475
 476         /*
 477          * Catch a special corner case here: chroot to /full/path/to/dir, chdir to
 478          * it, then open it. Without this check, the path to it will be
 479          * /full/path/to/dir instead of "/".
 480          */
 481         if (proc_root_dir_vp == first_vp) {
 482                 *--end = '/';
 483                 goto out;
 484         }
 485
 486         /*
 487          * holding the NAME_CACHE_LOCK in shared mode is
 488          * sufficient to stabilize both the vp->v_parent chain
 489          * and the 'vp->v_mount->mnt_vnodecovered' chain
 490          *
 491          * if we need to drop this lock, we must first grab the v_id
 492          * from the vnode we're currently working with... if that
 493          * vnode doesn't already have an io_count reference (the vp
 494          * passed in comes with one), we must grab a reference
 495          * after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
 496          * deadlocks may result if you call vnode_get while holding
 497          * the NAME_CACHE_LOCK... we lazily release the reference
 498          * we pick up the next time we encounter a need to drop
 499          * the NAME_CACHE_LOCK or before we return from this routine
 500          */
 501         NAME_CACHE_LOCK_SHARED();
 502
 503 #if CONFIG_FIRMLINKS
 504         if (!(flags & BUILDPATH_NO_FIRMLINK) &&
 505             (vp->v_flag & VFMLINKTARGET) && vp->v_fmlink) {
 506                 vp = vp->v_fmlink;
 507         }
 508 #endif
 509
 510         /*
 511          * Check if this is the root of a file system.
 512          */
 513         while (vp && vp->v_flag & VROOT) {
 514                 if (vp->v_mount == NULL) {
 515                         ret = EINVAL;
 516                         goto out_unlock;
 517                 }
 518                 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) {
 519                         /*
 520                          * It's the root of the root file system, so it's
 521                          * just "/".
 522                          */
 523                         *--end = '/';
 524
 525                         goto out_unlock;
 526                 } else {
 527                         /*
 528                          * This the root of the volume and the caller does not
 529                          * want to cross mount points.  Therefore just return
 530                          * '/' as the relative path.
 531                          */
 532 #if CONFIG_FIRMLINKS
 533                         if (!(flags & BUILDPATH_NO_FIRMLINK) &&
 534                             (vp->v_flag & VFMLINKTARGET) && vp->v_fmlink) {
 535                                 vp = vp->v_fmlink;
 536                         } else
 537 #endif
 538                         if (flags & BUILDPATH_VOLUME_RELATIVE) {
 539                                 *--end = '/';
 540                                 goto out_unlock;
 541                         } else {
 542                                 vp = vp->v_mount->mnt_vnodecovered;
 543                                 if (!mntpt_end && vp) {
 544                                         mntpt_end = end;
 545                                 }
 546                         }
 547                 }
 548         }
 549
 550         while ((vp != NULLVP) && (vp->v_parent != vp)) {
 551                 int  vid;
 552
 553                 /*
 554                  * For hardlinks the v_name may be stale, so if its OK
 555                  * to enter a file system, ask the file system for the
 556                  * name and parent (below).
 557                  */
 558                 fixhardlink = (vp->v_flag & VISHARDLINK) &&
 559                     (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
 560                     !(flags & BUILDPATH_NO_FS_ENTER);
 561
 562                 if (!fixhardlink) {
 563                         str = vp->v_name;
 564
 565                         if (str == NULL || *str == '\0') {
 566                                 if (vp->v_parent != NULL) {
 567                                         ret = EINVAL;
 568                                 } else {
 569                                         ret = ENOENT;
 570                                 }
 571                                 goto out_unlock;
 572                         }
 573                         len = (unsigned int)strlen(str);
 574                         /*
 575                          * Check that there's enough space (including space for the '/')
 576                          */
 577                         if ((unsigned int)(end - buff) < (len + 1)) {
 578                                 ret = ENOSPC;
 579                                 goto out_unlock;
 580                         }
 581                         /*
 582                          * Copy the name backwards.
 583                          */
 584                         str += len;
 585
 586                         for (; len > 0; len--) {
 587                                 *--end = *--str;
 588                         }
 589                         /*
 590                          * Add a path separator.
 591                          */
 592                         *--end = '/';
 593                 }
 594
 595                 /*
 596                  * Walk up the parent chain.
 597                  */
 598                 if (((vp->v_parent != NULLVP) && !fixhardlink) ||
 599                     (flags & BUILDPATH_NO_FS_ENTER)) {
 600                         /*
 601                          * In this if () block we are not allowed to enter the filesystem
 602                          * to conclusively get the most accurate parent identifier.
 603                          * As a result, if 'vp' does not identify '/' and it
 604                          * does not have a valid v_parent, then error out
 605                          * and disallow further path construction
 606                          */
 607                         if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
 608                                 /*
 609                                  * Only '/' is allowed to have a NULL parent
 610                                  * pointer. Upper level callers should ideally
 611                                  * re-drive name lookup on receiving a ENOENT.
 612                                  */
 613                                 ret = ENOENT;
 614
 615                                 /* The code below will exit early if 'tvp = vp' == NULL */
 616                         }
 617                         vp = vp->v_parent;
 618
 619                         /*
 620                          * if the vnode we have in hand isn't a directory and it
 621                          * has a v_parent, then we started with the resource fork
 622                          * so skip up to avoid getting a duplicate copy of the
 623                          * file name in the path.
 624                          */
 625                         if (vp && !vnode_isdir(vp) && vp->v_parent) {
 626                                 vp = vp->v_parent;
 627                         }
 628                 } else {
 629                         /*
 630                          * No parent, go get it if supported.
 631                          */
 632                         struct vnode_attr  va;
 633                         vnode_t  dvp;
 634
 635                         /*
 636                          * Make sure file system supports obtaining a path from id.
 637                          */
 638                         if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
 639                                 ret = ENOENT;
 640                                 goto out_unlock;
 641                         }
 642                         vid = vp->v_id;
 643
 644                         NAME_CACHE_UNLOCK();
 645
 646                         if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
 647                                 if (vp_with_iocount) {
 648                                         vnode_put(vp_with_iocount);
 649                                         vp_with_iocount = NULLVP;
 650                                 }
 651                                 if (vnode_getwithvid(vp, vid)) {
 652                                         goto again;
 653                                 }
 654                                 vp_with_iocount = vp;
 655                         }
 656                         VATTR_INIT(&va);
 657                         VATTR_WANTED(&va, va_parentid);
 658
 659                         if (fixhardlink) {
 660                                 VATTR_WANTED(&va, va_name);
 661                                 va.va_name = zalloc(ZV_NAMEI);
 662                         } else {
 663                                 va.va_name = NULL;
 664                         }
 665                         /*
 666                          * Ask the file system for its parent id and for its name (optional).
 667                          */
 668                         ret = vnode_getattr(vp, &va, ctx);
 669
 670                         if (fixhardlink) {
 671                                 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
 672                                         str = va.va_name;
 673                                         vnode_update_identity(vp, NULL, str, (unsigned int)strlen(str), 0, VNODE_UPDATE_NAME);
 674                                 } else if (vp->v_name) {
 675                                         str = vp->v_name;
 676                                         ret = 0;
 677                                 } else {
 678                                         ret = ENOENT;
 679                                         goto bad_news;
 680                                 }
 681                                 len = (unsigned int)strlen(str);
 682
 683                                 /*
 684                                  * Check that there's enough space.
 685                                  */
 686                                 if ((unsigned int)(end - buff) < (len + 1)) {
 687                                         ret = ENOSPC;
 688                                 } else {
 689                                         /* Copy the name backwards. */
 690                                         str += len;
 691
 692                                         for (; len > 0; len--) {
 693                                                 *--end = *--str;
 694                                         }
 695                                         /*
 696                                          * Add a path separator.
 697                                          */
 698                                         *--end = '/';
 699                                 }
 700 bad_news:
 701                                 zfree(ZV_NAMEI, va.va_name);
 702                         }
 703                         if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
 704                                 ret = ENOENT;
 705                                 goto out;
 706                         }
 707                         /*
 708                          * Ask the file system for the parent vnode.
 709                          */
 710                         if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx))) {
 711                                 goto out;
 712                         }
 713
 714                         if (!fixhardlink && (vp->v_parent != dvp)) {
 715                                 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT);
 716                         }
 717
 718                         if (vp_with_iocount) {
 719                                 vnode_put(vp_with_iocount);
 720                         }
 721                         vp = dvp;
 722                         vp_with_iocount = vp;
 723
 724                         NAME_CACHE_LOCK_SHARED();
 725
 726                         /*
 727                          * if the vnode we have in hand isn't a directory and it
 728                          * has a v_parent, then we started with the resource fork
 729                          * so skip up to avoid getting a duplicate copy of the
 730                          * file name in the path.
 731                          */
 732                         if (vp && !vnode_isdir(vp) && vp->v_parent) {
 733                                 vp = vp->v_parent;
 734                         }
 735                 }
 736
 737                 if (vp && (flags & BUILDPATH_CHECKACCESS)) {
 738                         vid = vp->v_id;
 739
 740                         NAME_CACHE_UNLOCK();
 741
 742                         if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
 743                                 if (vp_with_iocount) {
 744                                         vnode_put(vp_with_iocount);
 745                                         vp_with_iocount = NULLVP;
 746                                 }
 747                                 if (vnode_getwithvid(vp, vid)) {
 748                                         goto again;
 749                                 }
 750                                 vp_with_iocount = vp;
 751                         }
 752                         if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx))) {
 753                                 goto out;       /* no peeking */
 754                         }
 755                         NAME_CACHE_LOCK_SHARED();
 756                 }
 757
 758                 /*
 759                  * When a mount point is crossed switch the vp.
 760                  * Continue until we find the root or we find
 761                  * a vnode that's not the root of a mounted
 762                  * file system.
 763                  */
 764                 tvp = vp;
 765
 766                 while (tvp) {
 767                         if (tvp == proc_root_dir_vp) {
 768                                 goto out_unlock;        /* encountered the root */
 769                         }
 770
 771 #if CONFIG_FIRMLINKS
 772                         if (!(flags & BUILDPATH_NO_FIRMLINK) &&
 773                             (tvp->v_flag & VFMLINKTARGET) && tvp->v_fmlink) {
 774                                 tvp = tvp->v_fmlink;
 775                                 break;
 776                         }
 777 #endif
 778
 779                         if (!(tvp->v_flag & VROOT) || !tvp->v_mount) {
 780                                 break;                  /* not the root of a mounted FS */
 781                         }
 782                         if (flags & BUILDPATH_VOLUME_RELATIVE) {
 783                                 /* Do not cross over mount points */
 784                                 tvp = NULL;
 785                         } else {
 786                                 tvp = tvp->v_mount->mnt_vnodecovered;
 787                                 if (!mntpt_end && tvp) {
 788                                         mntpt_end = end;
 789                                 }
 790                         }
 791                 }
 792                 if (tvp == NULLVP) {
 793                         goto out_unlock;
 794                 }
 795                 vp = tvp;
 796         }
 797 out_unlock:
 798         NAME_CACHE_UNLOCK();
 799 out:
 800         if (vp_with_iocount) {
 801                 vnode_put(vp_with_iocount);
 802         }
 803         /*
 804          * Slide the name down to the beginning of the buffer.
 805          */
 806         memmove(buff, end, &buff[buflen] - end);
 807
 808         /*
 809          * length includes the trailing zero byte
 810          */
 811         *outlen = (int)(&buff[buflen] - end);
 812         if (mntpt_outlen && mntpt_end) {
 813                 *mntpt_outlen = (size_t)*outlen - (size_t)(&buff[buflen] - mntpt_end);
 814         }
 815
 816         /* One of the parents was moved during path reconstruction.
 817          * The caller is interested in knowing whether any of the
 818          * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
 819          */
 820         if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
 821                 ret = EAGAIN;
 822         }
 823
 824         return ret;
 825 }
 826
 827 int
 828 build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
 829 {
 830         return build_path_with_parent(first_vp, NULL, buff, buflen, outlen, NULL, flags, ctx);
 831 }
 832
 833 /*
 834  * return NULLVP if vp's parent doesn't
 835  * exist, or we can't get a valid iocount
 836  * else return the parent of vp
 837  */
 838 vnode_t
 839 vnode_getparent(vnode_t vp)
 840 {
 841         vnode_t pvp = NULLVP;
 842         int     pvid;
 843
 844         NAME_CACHE_LOCK_SHARED();
 845
 846         pvp = vp->v_parent;
 847
 848         /*
 849          * v_parent is stable behind the name_cache lock
 850          * however, the only thing we can really guarantee
 851          * is that we've grabbed a valid iocount on the
 852          * parent of 'vp' at the time we took the name_cache lock...
 853          * once we drop the lock, vp could get re-parented
 854          */
 855         if (pvp != NULLVP) {
 856                 pvid = pvp->v_id;
 857
 858                 NAME_CACHE_UNLOCK();
 859
 860                 if (vnode_getwithvid(pvp, pvid) != 0) {
 861                         pvp = NULL;
 862                 }
 863         } else {
 864                 NAME_CACHE_UNLOCK();
 865         }
 866         return pvp;
 867 }
 868
 869 const char *
 870 vnode_getname(vnode_t vp)
 871 {
 872         const char *name = NULL;
 873
 874         NAME_CACHE_LOCK_SHARED();
 875
 876         if (vp->v_name) {
 877                 name = vfs_addname(vp->v_name, (unsigned int)strlen(vp->v_name), 0, 0);
 878         }
 879         NAME_CACHE_UNLOCK();
 880
 881         return name;
 882 }
 883
 884 void
 885 vnode_putname(const char *name)
 886 {
 887         vfs_removename(name);
 888 }
 889
 890 static const char unknown_vnodename[] = "(unknown vnode name)";
 891
 892 const char *
 893 vnode_getname_printable(vnode_t vp)
 894 {
 895         const char *name = vnode_getname(vp);
 896         if (name != NULL) {
 897                 return name;
 898         }
 899
 900         switch (vp->v_type) {
 901         case VCHR:
 902         case VBLK:
 903         {
 904                 /*
 905                  * Create an artificial dev name from
 906                  * major and minor device number
 907                  */
 908                 char dev_name[64];
 909                 (void) snprintf(dev_name, sizeof(dev_name),
 910                     "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b',
 911                     major(vp->v_rdev), minor(vp->v_rdev));
 912                 /*
 913                  * Add the newly created dev name to the name
 914                  * cache to allow easier cleanup. Also,
 915                  * vfs_addname allocates memory for the new name
 916                  * and returns it.
 917                  */
 918                 NAME_CACHE_LOCK_SHARED();
 919                 name = vfs_addname(dev_name, (unsigned int)strlen(dev_name), 0, 0);
 920                 NAME_CACHE_UNLOCK();
 921                 return name;
 922         }
 923         default:
 924                 return unknown_vnodename;
 925         }
 926 }
 927
 928 void
 929 vnode_putname_printable(const char *name)
 930 {
 931         if (name == unknown_vnodename) {
 932                 return;
 933         }
 934         vnode_putname(name);
 935 }
 936
 937
 938 /*
 939  * if VNODE_UPDATE_PARENT, and we can take
 940  * a reference on dvp, then update vp with
 941  * it's new parent... if vp already has a parent,
 942  * then drop the reference vp held on it
 943  *
 944  * if VNODE_UPDATE_NAME,
 945  * then drop string ref on v_name if it exists, and if name is non-NULL
 946  * then pick up a string reference on name and record it in v_name...
 947  * optionally pass in the length and hashval of name if known
 948  *
 949  * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
 950  */
 951 void
 952 vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags)
 953 {
 954         struct  namecache *ncp;
 955         vnode_t old_parentvp = NULLVP;
 956         int isstream = (vp->v_flag & VISNAMEDSTREAM);
 957         int kusecountbumped = 0;
 958         kauth_cred_t tcred = NULL;
 959         const char *vname = NULL;
 960         const char *tname = NULL;
 961
 962         if (name_len < 0) {
 963                 return;
 964         }
 965
 966         if (flags & VNODE_UPDATE_PARENT) {
 967                 if (dvp && vnode_ref(dvp) != 0) {
 968                         dvp = NULLVP;
 969                 }
 970                 /* Don't count a stream's parent ref during unmounts */
 971                 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
 972                         vnode_lock_spin(dvp);
 973                         ++dvp->v_kusecount;
 974                         kusecountbumped = 1;
 975                         vnode_unlock(dvp);
 976                 }
 977         } else {
 978                 dvp = NULLVP;
 979         }
 980         if ((flags & VNODE_UPDATE_NAME)) {
 981                 if (name != vp->v_name) {
 982                         if (name && *name) {
 983                                 if (name_len == 0) {
 984                                         name_len = (int)strlen(name);
 985                                 }
 986                                 tname = vfs_addname(name, name_len, name_hashval, 0);
 987                         }
 988                 } else {
 989                         flags &= ~VNODE_UPDATE_NAME;
 990                 }
 991         }
 992         if ((flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME | VNODE_UPDATE_PURGEFIRMLINK))) {
 993                 NAME_CACHE_LOCK();
 994
 995 #if CONFIG_FIRMLINKS
 996                 if (flags & VNODE_UPDATE_PURGEFIRMLINK) {
 997                         vnode_t old_fvp = vp->v_fmlink;
 998                         if (old_fvp) {
 999                                 vnode_lock_spin(vp);
1000                                 vp->v_flag &= ~VFMLINKTARGET;
1001                                 vp->v_fmlink = NULLVP;
1002                                 vnode_unlock(vp);
1003                                 NAME_CACHE_UNLOCK();
1004
1005                                 /*
1006                                  * vnode_rele can result in cascading series of
1007                                  * usecount releases. The combination of calling
1008                                  * vnode_recycle and dont_reenter (3rd arg to
1009                                  * vnode_rele_internal) ensures we don't have
1010                                  * that issue.
1011                                  */
1012                                 vnode_recycle(old_fvp);
1013                                 vnode_rele_internal(old_fvp, O_EVTONLY, 1, 0);
1014
1015                                 NAME_CACHE_LOCK();
1016                         }
1017                 }
1018 #endif
1019
1020                 if ((flags & VNODE_UPDATE_PURGE)) {
1021                         if (vp->v_parent) {
1022                                 vp->v_parent->v_nc_generation++;
1023                         }
1024
1025                         while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
1026                                 cache_delete(ncp, 1);
1027                         }
1028
1029                         while ((ncp = TAILQ_FIRST(&vp->v_ncchildren))) {
1030                                 cache_delete(ncp, 1);
1031                         }
1032
1033                         /*
1034                          * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1035                          */
1036                         tcred = vp->v_cred;
1037                         vp->v_cred = NOCRED;
1038                         vp->v_authorized_actions = 0;
1039                         vp->v_cred_timestamp = 0;
1040                 }
1041                 if ((flags & VNODE_UPDATE_NAME)) {
1042                         vname = vp->v_name;
1043                         vp->v_name = tname;
1044                 }
1045                 if (flags & VNODE_UPDATE_PARENT) {
1046                         if (dvp != vp && dvp != vp->v_parent) {
1047                                 old_parentvp = vp->v_parent;
1048                                 vp->v_parent = dvp;
1049                                 dvp = NULLVP;
1050
1051                                 if (old_parentvp) {
1052                                         flags |= VNODE_UPDATE_CACHE;
1053                                 }
1054                         }
1055                 }
1056                 if (flags & VNODE_UPDATE_CACHE) {
1057                         while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
1058                                 cache_delete(ncp, 1);
1059                         }
1060                 }
1061                 NAME_CACHE_UNLOCK();
1062
1063                 if (vname != NULL) {
1064                         vfs_removename(vname);
1065                 }
1066
1067                 if (IS_VALID_CRED(tcred)) {
1068                         kauth_cred_unref(&tcred);
1069                 }
1070         }
1071         if (dvp != NULLVP) {
1072                 /* Back-out the ref we took if we lost a race for vp->v_parent. */
1073                 if (kusecountbumped) {
1074                         vnode_lock_spin(dvp);
1075                         if (dvp->v_kusecount > 0) {
1076                                 --dvp->v_kusecount;
1077                         }
1078                         vnode_unlock(dvp);
1079                 }
1080                 vnode_rele(dvp);
1081         }
1082         if (old_parentvp) {
1083                 struct  uthread *ut;
1084
1085                 if (isstream) {
1086                         vnode_lock_spin(old_parentvp);
1087                         if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0)) {
1088                                 --old_parentvp->v_kusecount;
1089                         }
1090                         vnode_unlock(old_parentvp);
1091                 }
1092                 ut = get_bsdthread_info(current_thread());
1093
1094                 /*
1095                  * indicated to vnode_rele that it shouldn't do a
1096                  * vnode_reclaim at this time... instead it will
1097                  * chain the vnode to the uu_vreclaims list...
1098                  * we'll be responsible for calling vnode_reclaim
1099                  * on each of the vnodes in this list...
1100                  */
1101                 ut->uu_defer_reclaims = 1;
1102                 ut->uu_vreclaims = NULLVP;
1103
1104                 while ((vp = old_parentvp) != NULLVP) {
1105                         vnode_lock_spin(vp);
1106                         vnode_rele_internal(vp, 0, 0, 1);
1107
1108                         /*
1109                          * check to see if the vnode is now in the state
1110                          * that would have triggered a vnode_reclaim in vnode_rele
1111                          * if it is, we save it's parent pointer and then NULL
1112                          * out the v_parent field... we'll drop the reference
1113                          * that was held on the next iteration of this loop...
1114                          * this short circuits a potential deep recursion if we
1115                          * have a long chain of parents in this state...
1116                          * we'll sit in this loop until we run into
1117                          * a parent in this chain that is not in this state
1118                          *
1119                          * make our check and the vnode_rele atomic
1120                          * with respect to the current vnode we're working on
1121                          * by holding the vnode lock
1122                          * if vnode_rele deferred the vnode_reclaim and has put
1123                          * this vnode on the list to be reaped by us, than
1124                          * it has left this vnode with an iocount == 1
1125                          */
1126                         if ((vp->v_iocount == 1) && (vp->v_usecount == 0) &&
1127                             ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
1128                                 /*
1129                                  * vnode_rele wanted to do a vnode_reclaim on this vnode
1130                                  * it should be sitting on the head of the uu_vreclaims chain
1131                                  * pull the parent pointer now so that when we do the
1132                                  * vnode_reclaim for each of the vnodes in the uu_vreclaims
1133                                  * list, we won't recurse back through here
1134                                  *
1135                                  * need to do a convert here in case vnode_rele_internal
1136                                  * returns with the lock held in the spin mode... it
1137                                  * can drop and retake the lock under certain circumstances
1138                                  */
1139                                 vnode_lock_convert(vp);
1140
1141                                 NAME_CACHE_LOCK();
1142                                 old_parentvp = vp->v_parent;
1143                                 vp->v_parent = NULLVP;
1144                                 NAME_CACHE_UNLOCK();
1145                         } else {
1146                                 /*
1147                                  * we're done... we ran into a vnode that isn't
1148                                  * being terminated
1149                                  */
1150                                 old_parentvp = NULLVP;
1151                         }
1152                         vnode_unlock(vp);
1153                 }
1154                 ut->uu_defer_reclaims = 0;
1155
1156                 while ((vp = ut->uu_vreclaims) != NULLVP) {
1157                         ut->uu_vreclaims = vp->v_defer_reclaimlist;
1158
1159                         /*
1160                          * vnode_put will drive the vnode_reclaim if
1161                          * we are still the only reference on this vnode
1162                          */
1163                         vnode_put(vp);
1164                 }
1165         }
1166 }
1167
1168 #if CONFIG_FIRMLINKS
1169 errno_t
1170 vnode_setasfirmlink(vnode_t vp, vnode_t target_vp)
1171 {
1172         int error = 0;
1173         vnode_t old_target_vp = NULLVP;
1174         vnode_t old_target_vp_v_fmlink = NULLVP;
1175         kauth_cred_t target_vp_cred = NULL;
1176         kauth_cred_t old_target_vp_cred = NULL;
1177
1178         if (!vp) {
1179                 return EINVAL;
1180         }
1181
1182         if (target_vp) {
1183                 if (vp->v_fmlink == target_vp) { /* Will be checked again under the name cache lock */
1184                         return 0;
1185                 }
1186
1187                 /*
1188                  * Firmlink source and target will take both a usecount
1189                  * and kusecount on each other.
1190                  */
1191                 if ((error = vnode_ref_ext(target_vp, O_EVTONLY, VNODE_REF_FORCE))) {
1192                         return error;
1193                 }
1194
1195                 if ((error = vnode_ref_ext(vp, O_EVTONLY, VNODE_REF_FORCE))) {
1196                         vnode_rele_ext(target_vp, O_EVTONLY, 1);
1197                         return error;
1198                 }
1199         }
1200
1201         NAME_CACHE_LOCK();
1202
1203         old_target_vp = vp->v_fmlink;
1204         if (target_vp && (target_vp == old_target_vp)) {
1205                 NAME_CACHE_UNLOCK();
1206                 return 0;
1207         }
1208         vp->v_fmlink = target_vp;
1209
1210         vnode_lock_spin(vp);
1211         vp->v_flag &= ~VFMLINKTARGET;
1212         vnode_unlock(vp);
1213
1214         if (target_vp) {
1215                 target_vp->v_fmlink = vp;
1216                 vnode_lock_spin(target_vp);
1217                 target_vp->v_flag |= VFMLINKTARGET;
1218                 vnode_unlock(target_vp);
1219                 cache_purge_locked(vp, &target_vp_cred);
1220         }
1221
1222         if (old_target_vp) {
1223                 old_target_vp_v_fmlink = old_target_vp->v_fmlink;
1224                 old_target_vp->v_fmlink = NULLVP;
1225                 vnode_lock_spin(old_target_vp);
1226                 old_target_vp->v_flag &= ~VFMLINKTARGET;
1227                 vnode_unlock(old_target_vp);
1228                 cache_purge_locked(vp, &old_target_vp_cred);
1229         }
1230
1231         NAME_CACHE_UNLOCK();
1232
1233         if (target_vp_cred && IS_VALID_CRED(target_vp_cred)) {
1234                 kauth_cred_unref(&target_vp_cred);
1235         }
1236
1237         if (old_target_vp) {
1238                 if (old_target_vp_cred && IS_VALID_CRED(old_target_vp_cred)) {
1239                         kauth_cred_unref(&old_target_vp_cred);
1240                 }
1241
1242                 vnode_rele_ext(old_target_vp, O_EVTONLY, 1);
1243                 if (old_target_vp_v_fmlink) {
1244                         vnode_rele_ext(old_target_vp_v_fmlink, O_EVTONLY, 1);
1245                 }
1246         }
1247
1248         return 0;
1249 }
1250
1251 errno_t
1252 vnode_getfirmlink(vnode_t vp, vnode_t *target_vp)
1253 {
1254         int error;
1255
1256         if (!vp->v_fmlink) {
1257                 return ENODEV;
1258         }
1259
1260         NAME_CACHE_LOCK_SHARED();
1261         if (vp->v_fmlink && !(vp->v_flag & VFMLINKTARGET) &&
1262             (vnode_get(vp->v_fmlink) == 0)) {
1263                 vnode_t tvp = vp->v_fmlink;
1264
1265                 vnode_lock_spin(tvp);
1266                 if (tvp->v_lflag & (VL_TERMINATE | VL_DEAD)) {
1267                         vnode_unlock(tvp);
1268                         NAME_CACHE_UNLOCK();
1269                         vnode_put(tvp);
1270                         return ENOENT;
1271                 }
1272                 if (!(tvp->v_flag & VFMLINKTARGET)) {
1273                         panic("firmlink target for vnode %p does not have flag set", vp);
1274                 }
1275                 vnode_unlock(tvp);
1276                 *target_vp = tvp;
1277                 error = 0;
1278         } else {
1279                 *target_vp = NULLVP;
1280                 error = ENODEV;
1281         }
1282         NAME_CACHE_UNLOCK();
1283         return error;
1284 }
1285
1286 #else /* CONFIG_FIRMLINKS */
1287
1288 errno_t
1289 vnode_setasfirmlink(__unused vnode_t vp, __unused vnode_t src_vp)
1290 {
1291         return ENOTSUP;
1292 }
1293
1294 errno_t
1295 vnode_getfirmlink(__unused vnode_t vp, __unused vnode_t *target_vp)
1296 {
1297         return ENOTSUP;
1298 }
1299
1300 #endif
1301
1302 /*
1303  * Mark a vnode as having multiple hard links.  HFS makes use of this
1304  * because it keeps track of each link separately, and wants to know
1305  * which link was actually used.
1306  *
1307  * This will cause the name cache to force a VNOP_LOOKUP on the vnode
1308  * so that HFS can post-process the lookup.  Also, volfs will call
1309  * VNOP_GETATTR2 to determine the parent, instead of using v_parent.
1310  */
1311 void
1312 vnode_setmultipath(vnode_t vp)
1313 {
1314         vnode_lock_spin(vp);
1315
1316         /*
1317          * In theory, we're changing the vnode's identity as far as the
1318          * name cache is concerned, so we ought to grab the name cache lock
1319          * here.  However, there is already a race, and grabbing the name
1320          * cache lock only makes the race window slightly smaller.
1321          *
1322          * The race happens because the vnode already exists in the name
1323          * cache, and could be found by one thread before another thread
1324          * can set the hard link flag.
1325          */
1326
1327         vp->v_flag |= VISHARDLINK;
1328
1329         vnode_unlock(vp);
1330 }
1331
1332
1333
1334 /*
1335  * backwards compatibility
1336  */
1337 void
1338 vnode_uncache_credentials(vnode_t vp)
1339 {
1340         vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
1341 }
1342
1343
1344 /*
1345  * use the exclusive form of NAME_CACHE_LOCK to protect the update of the
1346  * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
1347  * we use this lock so that we can look at the v_cred and v_authorized_actions
1348  * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
1349  * which is the super-hot path... if we are updating the authorized actions for this
1350  * vnode, we are already in the super-slow and far less frequented path so its not
1351  * that bad that we take the lock exclusive for this case... of course we strive
1352  * to hold it for the minimum amount of time possible
1353  */
1354
1355 void
1356 vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
1357 {
1358         kauth_cred_t tcred = NOCRED;
1359
1360         NAME_CACHE_LOCK();
1361
1362         vp->v_authorized_actions &= ~action;
1363
1364         if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
1365             IS_VALID_CRED(vp->v_cred)) {
1366                 /*
1367                  * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1368                  */
1369                 tcred = vp->v_cred;
1370                 vp->v_cred = NOCRED;
1371         }
1372         NAME_CACHE_UNLOCK();
1373
1374         if (tcred != NOCRED) {
1375                 kauth_cred_unref(&tcred);
1376         }
1377 }
1378
1379
1380 extern int bootarg_vnode_cache_defeat;  /* default = 0, from bsd_init.c */
1381
1382 boolean_t
1383 vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1384 {
1385         kauth_cred_t    ucred;
1386         boolean_t       retval = FALSE;
1387
1388         /* Boot argument to defeat rights caching */
1389         if (bootarg_vnode_cache_defeat) {
1390                 return FALSE;
1391         }
1392
1393         if ((vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL))) {
1394                 /*
1395                  * a TTL is enabled on the rights cache... handle it here
1396                  * a TTL of 0 indicates that no rights should be cached
1397                  */
1398                 if (vp->v_mount->mnt_authcache_ttl) {
1399                         if (!(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL)) {
1400                                 /*
1401                                  * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
1402                                  * we will only allow a SEARCH right on a directory to be cached...
1403                                  * that cached right always has a default TTL associated with it
1404                                  */
1405                                 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR) {
1406                                         vp = NULLVP;
1407                                 }
1408                         }
1409                         if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
1410                                 vnode_uncache_authorized_action(vp, vp->v_authorized_actions);
1411                                 vp = NULLVP;
1412                         }
1413                 } else {
1414                         vp = NULLVP;
1415                 }
1416         }
1417         if (vp != NULLVP) {
1418                 ucred = vfs_context_ucred(ctx);
1419
1420                 NAME_CACHE_LOCK_SHARED();
1421
1422                 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action) {
1423                         retval = TRUE;
1424                 }
1425
1426                 NAME_CACHE_UNLOCK();
1427         }
1428         return retval;
1429 }
1430
1431
1432 void
1433 vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1434 {
1435         kauth_cred_t tcred = NOCRED;
1436         kauth_cred_t ucred;
1437         struct timeval tv;
1438         boolean_t ttl_active = FALSE;
1439
1440         ucred = vfs_context_ucred(ctx);
1441
1442         if (!IS_VALID_CRED(ucred) || action == 0) {
1443                 return;
1444         }
1445
1446         if ((vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL))) {
1447                 /*
1448                  * a TTL is enabled on the rights cache... handle it here
1449                  * a TTL of 0 indicates that no rights should be cached
1450                  */
1451                 if (vp->v_mount->mnt_authcache_ttl == 0) {
1452                         return;
1453                 }
1454
1455                 if (!(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL)) {
1456                         /*
1457                          * only cache SEARCH action for filesystems marked
1458                          * MNTK_AUTH_OPAQUE on VDIRs...
1459                          * the lookup_path code will time these out
1460                          */
1461                         if ((action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR) {
1462                                 return;
1463                         }
1464                 }
1465                 ttl_active = TRUE;
1466
1467                 microuptime(&tv);
1468         }
1469         NAME_CACHE_LOCK();
1470
1471         if (vp->v_cred != ucred) {
1472                 kauth_cred_ref(ucred);
1473                 /*
1474                  * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1475                  */
1476                 tcred = vp->v_cred;
1477                 vp->v_cred = ucred;
1478                 vp->v_authorized_actions = 0;
1479         }
1480         if (ttl_active == TRUE && vp->v_authorized_actions == 0) {
1481                 /*
1482                  * only reset the timestamnp on the
1483                  * first authorization cached after the previous
1484                  * timer has expired or we're switching creds...
1485                  * 'vnode_cache_is_authorized' will clear the
1486                  * authorized actions if the TTL is active and
1487                  * it has expired
1488                  */
1489                 vp->v_cred_timestamp = (int)tv.tv_sec;
1490         }
1491         vp->v_authorized_actions |= action;
1492
1493         NAME_CACHE_UNLOCK();
1494
1495         if (IS_VALID_CRED(tcred)) {
1496                 kauth_cred_unref(&tcred);
1497         }
1498 }
1499
1500
1501 boolean_t
1502 vnode_cache_is_stale(vnode_t vp)
1503 {
1504         struct timeval  tv;
1505         boolean_t       retval;
1506
1507         microuptime(&tv);
1508
1509         if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl) {
1510                 retval = TRUE;
1511         } else {
1512                 retval = FALSE;
1513         }
1514
1515         return retval;
1516 }
1517
1518
1519
1520 /*
1521  * Returns:     0                       Success
1522  *              ERECYCLE                vnode was recycled from underneath us.  Force lookup to be re-driven from namei.
1523  *                                              This errno value should not be seen by anyone outside of the kernel.
1524  */
1525 int
1526 cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1527     vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
1528 {
1529         char            *cp;            /* pointer into pathname argument */
1530         int             vid;
1531         int             vvid = 0;       /* protected by vp != NULLVP */
1532         vnode_t         vp = NULLVP;
1533         vnode_t         tdp = NULLVP;
1534         kauth_cred_t    ucred;
1535         boolean_t       ttl_enabled = FALSE;
1536         struct timeval  tv;
1537         mount_t         mp;
1538         unsigned int    hash;
1539         int             error = 0;
1540         boolean_t       dotdotchecked = FALSE;
1541
1542 #if CONFIG_TRIGGERS
1543         vnode_t         trigger_vp;
1544 #endif /* CONFIG_TRIGGERS */
1545
1546         ucred = vfs_context_ucred(ctx);
1547         ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1548
1549         NAME_CACHE_LOCK_SHARED();
1550
1551         if (dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL))) {
1552                 ttl_enabled = TRUE;
1553                 microuptime(&tv);
1554         }
1555         for (;;) {
1556                 /*
1557                  * Search a directory.
1558                  *
1559                  * The cn_hash value is for use by cache_lookup
1560                  * The last component of the filename is left accessible via
1561                  * cnp->cn_nameptr for callers that need the name.
1562                  */
1563                 hash = 0;
1564                 cp = cnp->cn_nameptr;
1565
1566                 while (*cp && (*cp != '/')) {
1567                         hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1568                 }
1569                 /*
1570                  * the crc generator can legitimately generate
1571                  * a 0... however, 0 for us means that we
1572                  * haven't computed a hash, so use 1 instead
1573                  */
1574                 if (hash == 0) {
1575                         hash = 1;
1576                 }
1577                 cnp->cn_hash = hash;
1578                 cnp->cn_namelen = (int)(cp - cnp->cn_nameptr);
1579
1580                 ndp->ni_pathlen -= cnp->cn_namelen;
1581                 ndp->ni_next = cp;
1582
1583                 /*
1584                  * Replace multiple slashes by a single slash and trailing slashes
1585                  * by a null.  This must be done before VNOP_LOOKUP() because some
1586                  * fs's don't know about trailing slashes.  Remember if there were
1587                  * trailing slashes to handle symlinks, existing non-directories
1588                  * and non-existing files that won't be directories specially later.
1589                  */
1590                 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
1591                         cp++;
1592                         ndp->ni_pathlen--;
1593
1594                         if (*cp == '\0') {
1595                                 ndp->ni_flag |= NAMEI_TRAILINGSLASH;
1596                                 *ndp->ni_next = '\0';
1597                         }
1598                 }
1599                 ndp->ni_next = cp;
1600
1601                 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT);
1602
1603                 if (*cp == '\0') {
1604                         cnp->cn_flags |= ISLASTCN;
1605                 }
1606
1607                 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') {
1608                         cnp->cn_flags |= ISDOTDOT;
1609                 }
1610
1611                 *dp_authorized = 0;
1612 #if NAMEDRSRCFORK
1613                 /*
1614                  * Process a request for a file's resource fork.
1615                  *
1616                  * Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1617                  */
1618                 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1619                     (cp[1] == '.' && cp[2] == '.') &&
1620                     bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1621                         /* Skip volfs file systems that don't support native streams. */
1622                         if ((dp->v_mount != NULL) &&
1623                             (dp->v_mount->mnt_flag & MNT_DOVOLFS) &&
1624                             (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
1625                                 goto skiprsrcfork;
1626                         }
1627                         cnp->cn_flags |= CN_WANTSRSRCFORK;
1628                         cnp->cn_flags |= ISLASTCN;
1629                         ndp->ni_next[0] = '\0';
1630                         ndp->ni_pathlen = 1;
1631                 }
1632 skiprsrcfork:
1633 #endif
1634
1635 #if CONFIG_MACF
1636
1637                 /*
1638                  * Name cache provides authorization caching (see below)
1639                  * that will short circuit MAC checks in lookup().
1640                  * We must perform MAC check here.  On denial
1641                  * dp_authorized will remain 0 and second check will
1642                  * be perfomed in lookup().
1643                  */
1644                 if (!(cnp->cn_flags & DONOTAUTH)) {
1645                         error = mac_vnode_check_lookup(ctx, dp, cnp);
1646                         if (error) {
1647                                 NAME_CACHE_UNLOCK();
1648                                 goto errorout;
1649                         }
1650                 }
1651 #endif /* MAC */
1652                 if (ttl_enabled &&
1653                     (dp->v_mount->mnt_authcache_ttl == 0 ||
1654                     ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl))) {
1655                         break;
1656                 }
1657
1658                 /*
1659                  * NAME_CACHE_LOCK holds these fields stable
1660                  *
1661                  * We can't cache KAUTH_VNODE_SEARCHBYANYONE for root correctly
1662                  * so we make an ugly check for root here. root is always
1663                  * allowed and breaking out of here only to find out that is
1664                  * authorized by virtue of being root is very very expensive.
1665                  * However, the check for not root is valid only for filesystems
1666                  * which use local authorization.
1667                  *
1668                  * XXX: Remove the check for root when we can reliably set
1669                  * KAUTH_VNODE_SEARCHBYANYONE as root.
1670                  */
1671                 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
1672                     !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE) &&
1673                     (ttl_enabled || !vfs_context_issuser(ctx))) {
1674                         break;
1675                 }
1676
1677                 /*
1678                  * indicate that we're allowed to traverse this directory...
1679                  * even if we fail the cache lookup or decide to bail for
1680                  * some other reason, this information is valid and is used
1681                  * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1682                  */
1683                 *dp_authorized = 1;
1684
1685                 if ((cnp->cn_flags & (ISLASTCN | ISDOTDOT))) {
1686                         /*
1687                          * Moving the firmlinks section to be first to catch a corner case:
1688                          * When using DOTDOT to get a parent of a firmlink, we want the
1689                          * firmlink source to be resolved even if cn_nameiop != LOOKUP.
1690                          * This is because lookup() traverses DOTDOT by calling VNOP_LOOKUP
1691                          * and has no notion about firmlinks
1692                          */
1693 #if CONFIG_FIRMLINKS
1694                         if (cnp->cn_flags & ISDOTDOT && dp->v_fmlink && (dp->v_flag & VFMLINKTARGET)) {
1695                                 dp = dp->v_fmlink;
1696                         }
1697 #endif
1698                         if (cnp->cn_nameiop != LOOKUP) {
1699                                 break;
1700                         }
1701                         if (cnp->cn_flags & LOCKPARENT) {
1702                                 break;
1703                         }
1704                         if (cnp->cn_flags & NOCACHE) {
1705                                 break;
1706                         }
1707
1708                         if (cnp->cn_flags & ISDOTDOT) {
1709                                 /*
1710                                  * Force directory hardlinks to go to
1711                                  * file system for ".." requests.
1712                                  */
1713                                 if ((dp->v_flag & VISHARDLINK)) {
1714                                         break;
1715                                 }
1716                                 /*
1717                                  * Quit here only if we can't use
1718                                  * the parent directory pointer or
1719                                  * don't have one.  Otherwise, we'll
1720                                  * use it below.
1721                                  */
1722                                 if ((dp->v_flag & VROOT) ||
1723                                     dp == ndp->ni_rootdir ||
1724                                     dp->v_parent == NULLVP) {
1725                                         break;
1726                                 }
1727                         }
1728                 }
1729
1730                 if ((cnp->cn_flags & CN_SKIPNAMECACHE)) {
1731                         /*
1732                          * Force lookup to go to the filesystem with
1733                          * all cnp fields set up.
1734                          */
1735                         break;
1736                 }
1737
1738                 /*
1739                  * "." and ".." aren't supposed to be cached, so check
1740                  * for them before checking the cache.
1741                  */
1742                 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
1743                         vp = dp;
1744                 } else if ((cnp->cn_flags & ISDOTDOT)) {
1745                         /*
1746                          * If this is a chrooted process, we need to check if
1747                          * the process is trying to break out of its chrooted
1748                          * jail. We do that by trying to determine if dp is
1749                          * a subdirectory of ndp->ni_rootdir. If we aren't
1750                          * able to determine that by the v_parent pointers, we
1751                          * will leave the fast path.
1752                          *
1753                          * Since this function may see dotdot components
1754                          * many times and it has the name cache lock held for
1755                          * the entire duration, we optimise this by doing this
1756                          * check only once per cache_lookup_path call.
1757                          * If dotdotchecked is set, it means we've done this
1758                          * check once already and don't need to do it again.
1759                          */
1760                         if (!dotdotchecked && (ndp->ni_rootdir != rootvnode)) {
1761                                 vnode_t tvp = dp;
1762                                 boolean_t defer = FALSE;
1763                                 boolean_t is_subdir = FALSE;
1764
1765                                 defer = cache_check_vnode_issubdir(tvp,
1766                                     ndp->ni_rootdir, &is_subdir, &tvp);
1767
1768                                 if (defer) {
1769                                         /* defer to Filesystem */
1770                                         break;
1771                                 } else if (!is_subdir) {
1772                                         /*
1773                                          * This process is trying to break  out
1774                                          * of its chrooted jail, so all its
1775                                          * dotdot accesses will be translated to
1776                                          * its root directory.
1777                                          */
1778                                         vp = ndp->ni_rootdir;
1779                                 } else {
1780                                         /*
1781                                          * All good, let this dotdot access
1782                                          * proceed normally
1783                                          */
1784                                         vp = dp->v_parent;
1785                                 }
1786                                 dotdotchecked = TRUE;
1787                         } else {
1788                                 vp = dp->v_parent;
1789                         }
1790                 } else {
1791                         if ((vp = cache_lookup_locked(dp, cnp)) == NULLVP) {
1792                                 break;
1793                         }
1794
1795                         if ((vp->v_flag & VISHARDLINK)) {
1796                                 /*
1797                                  * The file system wants a VNOP_LOOKUP on this vnode
1798                                  */
1799                                 vp = NULL;
1800                                 break;
1801                         }
1802                 }
1803                 if ((cnp->cn_flags & ISLASTCN)) {
1804                         break;
1805                 }
1806
1807                 if (vp->v_type != VDIR) {
1808                         if (vp->v_type != VLNK) {
1809                                 vp = NULL;
1810                         }
1811                         break;
1812                 }
1813
1814                 if ((mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
1815                         vnode_t tmp_vp = mp->mnt_realrootvp;
1816                         if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation ||
1817                             mp->mnt_realrootvp_vid != tmp_vp->v_id) {
1818                                 break;
1819                         }
1820                         vp = tmp_vp;
1821                 }
1822
1823 #if CONFIG_TRIGGERS
1824                 /*
1825                  * After traversing all mountpoints stacked here, if we have a
1826                  * trigger in hand, resolve it.  Note that we don't need to
1827                  * leave the fast path if the mount has already happened.
1828                  */
1829                 if (vp->v_resolve) {
1830                         break;
1831                 }
1832 #endif /* CONFIG_TRIGGERS */
1833
1834
1835                 dp = vp;
1836                 vp = NULLVP;
1837
1838                 cnp->cn_nameptr = ndp->ni_next + 1;
1839                 ndp->ni_pathlen--;
1840                 while (*cnp->cn_nameptr == '/') {
1841                         cnp->cn_nameptr++;
1842                         ndp->ni_pathlen--;
1843                 }
1844         }
1845         if (vp != NULLVP) {
1846                 vvid = vp->v_id;
1847         }
1848         vid = dp->v_id;
1849
1850         NAME_CACHE_UNLOCK();
1851
1852         if ((vp != NULLVP) && (vp->v_type != VLNK) &&
1853             ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) {
1854                 /*
1855                  * if we've got a child and it's the last component, and
1856                  * the lookup doesn't need to return the parent then we
1857                  * can skip grabbing an iocount on the parent, since all
1858                  * we're going to do with it is a vnode_put just before
1859                  * we return from 'lookup'.  If it's a symbolic link,
1860                  * we need the parent in case the link happens to be
1861                  * a relative pathname.
1862                  */
1863                 tdp = dp;
1864                 dp = NULLVP;
1865         } else {
1866 need_dp:
1867                 /*
1868                  * return the last directory we looked at
1869                  * with an io reference held. If it was the one passed
1870                  * in as a result of the last iteration of VNOP_LOOKUP,
1871                  * it should already hold an io ref. No need to increase ref.
1872                  */
1873                 if (last_dp != dp) {
1874                         if (dp == ndp->ni_usedvp) {
1875                                 /*
1876                                  * if this vnode matches the one passed in via USEDVP
1877                                  * than this context already holds an io_count... just
1878                                  * use vnode_get to get an extra ref for lookup to play
1879                                  * with... can't use the getwithvid variant here because
1880                                  * it will block behind a vnode_drain which would result
1881                                  * in a deadlock (since we already own an io_count that the
1882                                  * vnode_drain is waiting on)... vnode_get grabs the io_count
1883                                  * immediately w/o waiting... it always succeeds
1884                                  */
1885                                 vnode_get(dp);
1886                         } else if ((error = vnode_getwithvid_drainok(dp, vid))) {
1887                                 /*
1888                                  * failure indicates the vnode
1889                                  * changed identity or is being
1890                                  * TERMINATED... in either case
1891                                  * punt this lookup.
1892                                  *
1893                                  * don't necessarily return ENOENT, though, because
1894                                  * we really want to go back to disk and make sure it's
1895                                  * there or not if someone else is changing this
1896                                  * vnode. That being said, the one case where we do want
1897                                  * to return ENOENT is when the vnode's mount point is
1898                                  * in the process of unmounting and we might cause a deadlock
1899                                  * in our attempt to take an iocount. An ENODEV error return
1900                                  * is from vnode_get* is an indication this but we change that
1901                                  * ENOENT for upper layers.
1902                                  */
1903                                 if (error == ENODEV) {
1904                                         error = ENOENT;
1905                                 } else {
1906                                         error = ERECYCLE;
1907                                 }
1908                                 goto errorout;
1909                         }
1910                 }
1911         }
1912         if (vp != NULLVP) {
1913                 if ((vnode_getwithvid_drainok(vp, vvid))) {
1914                         vp = NULLVP;
1915
1916                         /*
1917                          * can't get reference on the vp we'd like
1918                          * to return... if we didn't grab a reference
1919                          * on the directory (due to fast path bypass),
1920                          * then we need to do it now... we can't return
1921                          * with both ni_dvp and ni_vp NULL, and no
1922                          * error condition
1923                          */
1924                         if (dp == NULLVP) {
1925                                 dp = tdp;
1926                                 goto need_dp;
1927                         }
1928                 }
1929         }
1930
1931         ndp->ni_dvp = dp;
1932         ndp->ni_vp  = vp;
1933
1934 #if CONFIG_TRIGGERS
1935         trigger_vp = vp ? vp : dp;
1936         if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
1937                 error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
1938                 if (error) {
1939                         if (vp) {
1940                                 vnode_put(vp);
1941                         }
1942                         if (dp) {
1943                                 vnode_put(dp);
1944                         }
1945                         goto errorout;
1946                 }
1947         }
1948 #endif /* CONFIG_TRIGGERS */
1949
1950 errorout:
1951         /*
1952          * If we came into cache_lookup_path after an iteration of the lookup loop that
1953          * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
1954          * on it.  It is now the job of cache_lookup_path to drop the ref on this vnode
1955          * when it is no longer needed.  If we get to this point, and last_dp is not NULL
1956          * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
1957          * the case that we got to a subsequent path component and this previous vnode is
1958          * no longer needed.  We can then drop the io ref on it.
1959          */
1960         if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)) {
1961                 vnode_put(last_dp);
1962         }
1963
1964         //initialized to 0, should be the same if no error cases occurred.
1965         return error;
1966 }
1967
1968
1969 static vnode_t
1970 cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
1971 {
1972         struct namecache *ncp;
1973         struct nchashhead *ncpp;
1974         long namelen = cnp->cn_namelen;
1975         unsigned int hashval = cnp->cn_hash;
1976
1977         if (nc_disabled) {
1978                 return NULL;
1979         }
1980
1981         ncpp = NCHHASH(dvp, cnp->cn_hash);
1982         LIST_FOREACH(ncp, ncpp, nc_hash) {
1983                 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1984                         if (strncmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) {
1985                                 break;
1986                         }
1987                 }
1988         }
1989         if (ncp == 0) {
1990                 /*
1991                  * We failed to find an entry
1992                  */
1993                 NCHSTAT(ncs_miss);
1994                 return NULL;
1995         }
1996         NCHSTAT(ncs_goodhits);
1997
1998         return ncp->nc_vp;
1999 }
2000
2001
2002 unsigned int hash_string(const char *cp, int len);
2003 //
2004 // Have to take a len argument because we may only need to
2005 // hash part of a componentname.
2006 //
2007 unsigned int
2008 hash_string(const char *cp, int len)
2009 {
2010         unsigned hash = 0;
2011
2012         if (len) {
2013                 while (len--) {
2014                         hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
2015                 }
2016         } else {
2017                 while (*cp != '\0') {
2018                         hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
2019                 }
2020         }
2021         /*
2022          * the crc generator can legitimately generate
2023          * a 0... however, 0 for us means that we
2024          * haven't computed a hash, so use 1 instead
2025          */
2026         if (hash == 0) {
2027                 hash = 1;
2028         }
2029         return hash;
2030 }
2031
2032
2033 /*
2034  * Lookup an entry in the cache
2035  *
2036  * We don't do this if the segment name is long, simply so the cache
2037  * can avoid holding long names (which would either waste space, or
2038  * add greatly to the complexity).
2039  *
2040  * Lookup is called with dvp pointing to the directory to search,
2041  * cnp pointing to the name of the entry being sought. If the lookup
2042  * succeeds, the vnode is returned in *vpp, and a status of -1 is
2043  * returned. If the lookup determines that the name does not exist
2044  * (negative cacheing), a status of ENOENT is returned. If the lookup
2045  * fails, a status of zero is returned.
2046  */
2047
2048 int
2049 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
2050 {
2051         struct namecache *ncp;
2052         struct nchashhead *ncpp;
2053         long namelen = cnp->cn_namelen;
2054         unsigned int hashval;
2055         boolean_t       have_exclusive = FALSE;
2056         uint32_t vid;
2057         vnode_t  vp;
2058
2059         if (cnp->cn_hash == 0) {
2060                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
2061         }
2062         hashval = cnp->cn_hash;
2063
2064         if (nc_disabled) {
2065                 return 0;
2066         }
2067
2068         NAME_CACHE_LOCK_SHARED();
2069
2070 relook:
2071         ncpp = NCHHASH(dvp, cnp->cn_hash);
2072         LIST_FOREACH(ncp, ncpp, nc_hash) {
2073                 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
2074                         if (strncmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) {
2075                                 break;
2076                         }
2077                 }
2078         }
2079         /* We failed to find an entry */
2080         if (ncp == 0) {
2081                 NCHSTAT(ncs_miss);
2082                 NAME_CACHE_UNLOCK();
2083                 return 0;
2084         }
2085
2086         /* We don't want to have an entry, so dump it */
2087         if ((cnp->cn_flags & MAKEENTRY) == 0) {
2088                 if (have_exclusive == TRUE) {
2089                         NCHSTAT(ncs_badhits);
2090                         cache_delete(ncp, 1);
2091                         NAME_CACHE_UNLOCK();
2092                         return 0;
2093                 }
2094                 NAME_CACHE_UNLOCK();
2095                 NAME_CACHE_LOCK();
2096                 have_exclusive = TRUE;
2097                 goto relook;
2098         }
2099         vp = ncp->nc_vp;
2100
2101         /* We found a "positive" match, return the vnode */
2102         if (vp) {
2103                 NCHSTAT(ncs_goodhits);
2104
2105                 vid = vp->v_id;
2106                 NAME_CACHE_UNLOCK();
2107
2108                 if (vnode_getwithvid(vp, vid)) {
2109 #if COLLECT_STATS
2110                         NAME_CACHE_LOCK();
2111                         NCHSTAT(ncs_badvid);
2112                         NAME_CACHE_UNLOCK();
2113 #endif
2114                         return 0;
2115                 }
2116                 *vpp = vp;
2117                 return -1;
2118         }
2119
2120         /* We found a negative match, and want to create it, so purge */
2121         if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) {
2122                 if (have_exclusive == TRUE) {
2123                         NCHSTAT(ncs_badhits);
2124                         cache_delete(ncp, 1);
2125                         NAME_CACHE_UNLOCK();
2126                         return 0;
2127                 }
2128                 NAME_CACHE_UNLOCK();
2129                 NAME_CACHE_LOCK();
2130                 have_exclusive = TRUE;
2131                 goto relook;
2132         }
2133
2134         /*
2135          * We found a "negative" match, ENOENT notifies client of this match.
2136          */
2137         NCHSTAT(ncs_neghits);
2138
2139         NAME_CACHE_UNLOCK();
2140         return ENOENT;
2141 }
2142
2143 const char *
2144 cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
2145 {
2146         const char *strname;
2147
2148         if (cnp->cn_hash == 0) {
2149                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
2150         }
2151
2152         /*
2153          * grab 2 references on the string entered
2154          * one for the cache_enter_locked to consume
2155          * and the second to be consumed by v_name (vnode_create call point)
2156          */
2157         strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0);
2158
2159         NAME_CACHE_LOCK();
2160
2161         cache_enter_locked(dvp, vp, cnp, strname);
2162
2163         NAME_CACHE_UNLOCK();
2164
2165         return strname;
2166 }
2167
2168
2169 /*
2170  * Add an entry to the cache...
2171  * but first check to see if the directory
2172  * that this entry is to be associated with has
2173  * had any cache_purges applied since we took
2174  * our identity snapshot... this check needs to
2175  * be done behind the name cache lock
2176  */
2177 void
2178 cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen)
2179 {
2180         if (cnp->cn_hash == 0) {
2181                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
2182         }
2183
2184         NAME_CACHE_LOCK();
2185
2186         if (dvp->v_nc_generation == gen) {
2187                 (void)cache_enter_locked(dvp, vp, cnp, NULL);
2188         }
2189
2190         NAME_CACHE_UNLOCK();
2191 }
2192
2193
2194 /*
2195  * Add an entry to the cache.
2196  */
2197 void
2198 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2199 {
2200         const char *strname;
2201
2202         if (cnp->cn_hash == 0) {
2203                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
2204         }
2205
2206         /*
2207          * grab 1 reference on the string entered
2208          * for the cache_enter_locked to consume
2209          */
2210         strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
2211
2212         NAME_CACHE_LOCK();
2213
2214         cache_enter_locked(dvp, vp, cnp, strname);
2215
2216         NAME_CACHE_UNLOCK();
2217 }
2218
2219
2220 static void
2221 cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname)
2222 {
2223         struct namecache *ncp, *negp;
2224         struct nchashhead *ncpp;
2225
2226         if (nc_disabled) {
2227                 return;
2228         }
2229
2230         /*
2231          * if the entry is for -ve caching vp is null
2232          */
2233         if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
2234                 /*
2235                  * someone beat us to the punch..
2236                  * this vnode is already in the cache
2237                  */
2238                 if (strname != NULL) {
2239                         vfs_removename(strname);
2240                 }
2241                 return;
2242         }
2243         /*
2244          * We allocate a new entry if we are less than the maximum
2245          * allowed and the one at the front of the list is in use.
2246          * Otherwise we use the one at the front of the list.
2247          */
2248         if (numcache < desiredNodes &&
2249             ((ncp = nchead.tqh_first) == NULL ||
2250             ncp->nc_hash.le_prev != 0)) {
2251                 /*
2252                  * Allocate one more entry
2253                  */
2254                 ncp = zalloc(namecache_zone);
2255                 numcache++;
2256         } else {
2257                 /*
2258                  * reuse an old entry
2259                  */
2260                 ncp = TAILQ_FIRST(&nchead);
2261                 TAILQ_REMOVE(&nchead, ncp, nc_entry);
2262
2263                 if (ncp->nc_hash.le_prev != 0) {
2264                         /*
2265                          * still in use... we need to
2266                          * delete it before re-using it
2267                          */
2268                         NCHSTAT(ncs_stolen);
2269                         cache_delete(ncp, 0);
2270                 }
2271         }
2272         NCHSTAT(ncs_enters);
2273
2274         /*
2275          * Fill in cache info, if vp is NULL this is a "negative" cache entry.
2276          */
2277         ncp->nc_vp = vp;
2278         ncp->nc_dvp = dvp;
2279         ncp->nc_hashval = cnp->cn_hash;
2280
2281         if (strname == NULL) {
2282                 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
2283         } else {
2284                 ncp->nc_name = strname;
2285         }
2286
2287         //
2288         // If the bytes of the name associated with the vnode differ,
2289         // use the name associated with the vnode since the file system
2290         // may have set that explicitly in the case of a lookup on a
2291         // case-insensitive file system where the case of the looked up
2292         // name differs from what is on disk.  For more details, see:
2293         //   <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
2294         //
2295         const char *vn_name = vp ? vp->v_name : NULL;
2296         unsigned int len = vn_name ? (unsigned int)strlen(vn_name) : 0;
2297         if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) {
2298                 unsigned int hash = hash_string(vn_name, len);
2299
2300                 vfs_removename(ncp->nc_name);
2301                 ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0);
2302                 ncp->nc_hashval = hash;
2303         }
2304
2305         /*
2306          * make us the newest entry in the cache
2307          * i.e. we'll be the last to be stolen
2308          */
2309         TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
2310
2311         ncpp = NCHHASH(dvp, cnp->cn_hash);
2312 #if DIAGNOSTIC
2313         {
2314                 struct namecache *p;
2315
2316                 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next) {
2317                         if (p == ncp) {
2318                                 panic("cache_enter: duplicate");
2319                         }
2320                 }
2321         }
2322 #endif
2323         /*
2324          * make us available to be found via lookup
2325          */
2326         LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
2327
2328         if (vp) {
2329                 /*
2330                  * add to the list of name cache entries
2331                  * that point at vp
2332                  */
2333                 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
2334         } else {
2335                 /*
2336                  * this is a negative cache entry (vp == NULL)
2337                  * stick it on the negative cache list.
2338                  */
2339                 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
2340
2341                 ncs_negtotal++;
2342
2343                 if (ncs_negtotal > desiredNegNodes) {
2344                         /*
2345                          * if we've reached our desired limit
2346                          * of negative cache entries, delete
2347                          * the oldest
2348                          */
2349                         negp = TAILQ_FIRST(&neghead);
2350                         cache_delete(negp, 1);
2351                 }
2352         }
2353         /*
2354          * add us to the list of name cache entries that
2355          * are children of dvp
2356          */
2357         if (vp) {
2358                 TAILQ_INSERT_TAIL(&dvp->v_ncchildren, ncp, nc_child);
2359         } else {
2360                 TAILQ_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
2361         }
2362 }
2363
2364
2365 /*
2366  * Initialize CRC-32 remainder table.
2367  */
2368 static void
2369 init_crc32(void)
2370 {
2371         /*
2372          * the CRC-32 generator polynomial is:
2373          *   x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
2374          *        + x^8  + x^7  + x^5  + x^4  + x^2  + x + 1
2375          */
2376         unsigned int crc32_polynomial = 0x04c11db7;
2377         unsigned int i, j;
2378
2379         /*
2380          * pre-calculate the CRC-32 remainder for each possible octet encoding
2381          */
2382         for (i = 0; i < 256; i++) {
2383                 unsigned int crc_rem = i << 24;
2384
2385                 for (j = 0; j < 8; j++) {
2386                         if (crc_rem & 0x80000000) {
2387                                 crc_rem = (crc_rem << 1) ^ crc32_polynomial;
2388                         } else {
2389                                 crc_rem = (crc_rem << 1);
2390                         }
2391                 }
2392                 crc32tab[i] = crc_rem;
2393         }
2394 }
2395
2396
2397 /*
2398  * Name cache initialization, from vfs_init() when we are booting
2399  */
2400 void
2401 nchinit(void)
2402 {
2403         int     i;
2404
2405         desiredNegNodes = (desiredvnodes / 10);
2406         desiredNodes = desiredvnodes + desiredNegNodes;
2407
2408         TAILQ_INIT(&nchead);
2409         TAILQ_INIT(&neghead);
2410
2411         init_crc32();
2412
2413         nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 * desiredNodes)), M_CACHE, &nchash);
2414         nchashmask = nchash;
2415         nchash++;
2416
2417         init_string_table();
2418
2419         /* Allocate name cache lock group attribute and group */
2420         namecache_lck_grp_attr = lck_grp_attr_alloc_init();
2421
2422         namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr);
2423
2424         /* Allocate name cache lock attribute */
2425         namecache_lck_attr = lck_attr_alloc_init();
2426
2427         /* Allocate name cache lock */
2428         namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
2429
2430
2431         /* Allocate string cache lock group attribute and group */
2432         strcache_lck_grp_attr = lck_grp_attr_alloc_init();
2433
2434         strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr);
2435
2436         /* Allocate string cache lock attribute */
2437         strcache_lck_attr = lck_attr_alloc_init();
2438
2439         /* Allocate string cache lock */
2440         strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
2441
2442         for (i = 0; i < NUM_STRCACHE_LOCKS; i++) {
2443                 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
2444         }
2445
2446         /* Allocate root vnode lock group attribute and group */
2447         rootvnode_lck_grp_attr = lck_grp_attr_alloc_init();
2448
2449         rootvnode_lck_grp = lck_grp_alloc_init("rootvnode", rootvnode_lck_grp_attr);
2450
2451         /* Allocate rootvnode lock attribute */
2452         rootvnode_lck_attr = lck_attr_alloc_init();
2453
2454         /* Allocate rootvnode lock */
2455         rootvnode_rw_lock = lck_rw_alloc_init(rootvnode_lck_grp, rootvnode_lck_attr);
2456 }
2457
2458 void
2459 name_cache_lock_shared(void)
2460 {
2461         lck_rw_lock_shared(namecache_rw_lock);
2462 }
2463
2464 void
2465 name_cache_lock(void)
2466 {
2467         lck_rw_lock_exclusive(namecache_rw_lock);
2468 }
2469
2470 void
2471 name_cache_unlock(void)
2472 {
2473         lck_rw_done(namecache_rw_lock);
2474 }
2475
2476
2477 int
2478 resize_namecache(int newsize)
2479 {
2480         struct nchashhead   *new_table;
2481         struct nchashhead   *old_table;
2482         struct nchashhead   *old_head, *head;
2483         struct namecache    *entry, *next;
2484         uint32_t            i, hashval;
2485         int                 dNodes, dNegNodes, nelements;
2486         u_long              new_size, old_size;
2487
2488         if (newsize < 0) {
2489                 return EINVAL;
2490         }
2491
2492         dNegNodes = (newsize / 10);
2493         dNodes = newsize + dNegNodes;
2494         // we don't support shrinking yet
2495         if (dNodes <= desiredNodes) {
2496                 return 0;
2497         }
2498
2499         if (os_mul_overflow(dNodes, 2, &nelements)) {
2500                 return EINVAL;
2501         }
2502
2503         new_table = hashinit(nelements, M_CACHE, &nchashmask);
2504         new_size  = nchashmask + 1;
2505
2506         if (new_table == NULL) {
2507                 return ENOMEM;
2508         }
2509
2510         NAME_CACHE_LOCK();
2511         // do the switch!
2512         old_table = nchashtbl;
2513         nchashtbl = new_table;
2514         old_size  = nchash;
2515         nchash    = new_size;
2516
2517         // walk the old table and insert all the entries into
2518         // the new table
2519         //
2520         for (i = 0; i < old_size; i++) {
2521                 old_head = &old_table[i];
2522                 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2523                         //
2524                         // XXXdbg - Beware: this assumes that hash_string() does
2525                         //                  the same thing as what happens in
2526                         //                  lookup() over in vfs_lookup.c
2527                         hashval = hash_string(entry->nc_name, 0);
2528                         entry->nc_hashval = hashval;
2529                         head = NCHHASH(entry->nc_dvp, hashval);
2530
2531                         next = entry->nc_hash.le_next;
2532                         LIST_INSERT_HEAD(head, entry, nc_hash);
2533                 }
2534         }
2535         desiredNodes = dNodes;
2536         desiredNegNodes = dNegNodes;
2537
2538         NAME_CACHE_UNLOCK();
2539         FREE(old_table, M_CACHE);
2540
2541         return 0;
2542 }
2543
2544 static void
2545 cache_delete(struct namecache *ncp, int free_entry)
2546 {
2547         NCHSTAT(ncs_deletes);
2548
2549         if (ncp->nc_vp) {
2550                 LIST_REMOVE(ncp, nc_un.nc_link);
2551         } else {
2552                 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
2553                 ncs_negtotal--;
2554         }
2555         TAILQ_REMOVE(&(ncp->nc_dvp->v_ncchildren), ncp, nc_child);
2556
2557         LIST_REMOVE(ncp, nc_hash);
2558         /*
2559          * this field is used to indicate
2560          * that the entry is in use and
2561          * must be deleted before it can
2562          * be reused...
2563          */
2564         ncp->nc_hash.le_prev = NULL;
2565
2566         vfs_removename(ncp->nc_name);
2567         ncp->nc_name = NULL;
2568         if (free_entry) {
2569                 TAILQ_REMOVE(&nchead, ncp, nc_entry);
2570                 zfree(namecache_zone, ncp);
2571                 numcache--;
2572         }
2573 }
2574
2575
2576 /*
2577  * purge the entry associated with the
2578  * specified vnode from the name cache
2579  */
2580 static void
2581 cache_purge_locked(vnode_t vp, kauth_cred_t *credp)
2582 {
2583         struct namecache *ncp;
2584
2585         *credp = NULL;
2586         if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
2587             (TAILQ_FIRST(&vp->v_ncchildren) == NULL) &&
2588             (vp->v_cred == NOCRED) &&
2589             (vp->v_parent == NULLVP)) {
2590                 return;
2591         }
2592
2593         if (vp->v_parent) {
2594                 vp->v_parent->v_nc_generation++;
2595         }
2596
2597         while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
2598                 cache_delete(ncp, 1);
2599         }
2600
2601         while ((ncp = TAILQ_FIRST(&vp->v_ncchildren))) {
2602                 cache_delete(ncp, 1);
2603         }
2604
2605         /*
2606          * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
2607          */
2608         *credp = vp->v_cred;
2609         vp->v_cred = NOCRED;
2610         vp->v_authorized_actions = 0;
2611 }
2612
2613 void
2614 cache_purge(vnode_t vp)
2615 {
2616         kauth_cred_t tcred = NULL;
2617
2618         if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
2619             (TAILQ_FIRST(&vp->v_ncchildren) == NULL) &&
2620             (vp->v_cred == NOCRED) &&
2621             (vp->v_parent == NULLVP)) {
2622                 return;
2623         }
2624
2625         NAME_CACHE_LOCK();
2626
2627         cache_purge_locked(vp, &tcred);
2628
2629         NAME_CACHE_UNLOCK();
2630
2631         if (tcred && IS_VALID_CRED(tcred)) {
2632                 kauth_cred_unref(&tcred);
2633         }
2634 }
2635
2636 /*
2637  * Purge all negative cache entries that are children of the
2638  * given vnode.  A case-insensitive file system (or any file
2639  * system that has multiple equivalent names for the same
2640  * directory entry) can use this when creating or renaming
2641  * to remove negative entries that may no longer apply.
2642  */
2643 void
2644 cache_purge_negatives(vnode_t vp)
2645 {
2646         struct namecache *ncp, *next_ncp;
2647
2648         NAME_CACHE_LOCK();
2649
2650         TAILQ_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) {
2651                 if (ncp->nc_vp) {
2652                         break;
2653                 }
2654
2655                 cache_delete(ncp, 1);
2656         }
2657
2658         NAME_CACHE_UNLOCK();
2659 }
2660
2661 /*
2662  * Flush all entries referencing a particular filesystem.
2663  *
2664  * Since we need to check it anyway, we will flush all the invalid
2665  * entries at the same time.
2666  */
2667 void
2668 cache_purgevfs(struct mount *mp)
2669 {
2670         struct nchashhead *ncpp;
2671         struct namecache *ncp;
2672
2673         NAME_CACHE_LOCK();
2674         /* Scan hash tables for applicable entries */
2675         for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
2676 restart:
2677                 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) {
2678                         if (ncp->nc_dvp->v_mount == mp) {
2679                                 cache_delete(ncp, 0);
2680                                 goto restart;
2681                         }
2682                 }
2683         }
2684         NAME_CACHE_UNLOCK();
2685 }
2686
2687
2688
2689 //
2690 // String ref routines
2691 //
2692 static LIST_HEAD(stringhead, string_t) * string_ref_table;
2693 static u_long   string_table_mask;
2694 static uint32_t filled_buckets = 0;
2695
2696
2697 typedef struct string_t {
2698         LIST_ENTRY(string_t)  hash_chain;
2699         const char *str;
2700         uint32_t              refcount;
2701 } string_t;
2702
2703
2704 static void
2705 resize_string_ref_table(void)
2706 {
2707         struct stringhead *new_table;
2708         struct stringhead *old_table;
2709         struct stringhead *old_head, *head;
2710         string_t          *entry, *next;
2711         uint32_t           i, hashval;
2712         u_long             new_mask, old_mask;
2713
2714         /*
2715          * need to hold the table lock exclusively
2716          * in order to grow the table... need to recheck
2717          * the need to resize again after we've taken
2718          * the lock exclusively in case some other thread
2719          * beat us to the punch
2720          */
2721         lck_rw_lock_exclusive(strtable_rw_lock);
2722
2723         if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
2724                 lck_rw_done(strtable_rw_lock);
2725                 return;
2726         }
2727         assert(string_table_mask < INT32_MAX);
2728         new_table = hashinit((int)(string_table_mask + 1) * 2, M_CACHE, &new_mask);
2729
2730         if (new_table == NULL) {
2731                 printf("failed to resize the hash table.\n");
2732                 lck_rw_done(strtable_rw_lock);
2733                 return;
2734         }
2735
2736         // do the switch!
2737         old_table         = string_ref_table;
2738         string_ref_table  = new_table;
2739         old_mask          = string_table_mask;
2740         string_table_mask = new_mask;
2741         filled_buckets    = 0;
2742
2743         // walk the old table and insert all the entries into
2744         // the new table
2745         //
2746         for (i = 0; i <= old_mask; i++) {
2747                 old_head = &old_table[i];
2748                 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2749                         hashval = hash_string((const char *)entry->str, 0);
2750                         head = &string_ref_table[hashval & string_table_mask];
2751                         if (head->lh_first == NULL) {
2752                                 filled_buckets++;
2753                         }
2754                         next = entry->hash_chain.le_next;
2755                         LIST_INSERT_HEAD(head, entry, hash_chain);
2756                 }
2757         }
2758         lck_rw_done(strtable_rw_lock);
2759
2760         FREE(old_table, M_CACHE);
2761 }
2762
2763
2764 static void
2765 init_string_table(void)
2766 {
2767         string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask);
2768 }
2769
2770
2771 const char *
2772 vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
2773 {
2774         return add_name_internal(name, len, hashval, FALSE, flags);
2775 }
2776
2777
2778 static const char *
2779 add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
2780 {
2781         struct stringhead *head;
2782         string_t          *entry;
2783         uint32_t          chain_len = 0;
2784         uint32_t          hash_index;
2785         uint32_t          lock_index;
2786         char              *ptr;
2787
2788         if (len > MAXPATHLEN) {
2789                 len = MAXPATHLEN;
2790         }
2791
2792         /*
2793          * if the length already accounts for the null-byte, then
2794          * subtract one so later on we don't index past the end
2795          * of the string.
2796          */
2797         if (len > 0 && name[len - 1] == '\0') {
2798                 len--;
2799         }
2800         if (hashval == 0) {
2801                 hashval = hash_string(name, len);
2802         }
2803
2804         /*
2805          * take this lock 'shared' to keep the hash stable
2806          * if someone else decides to grow the pool they
2807          * will take this lock exclusively
2808          */
2809         lck_rw_lock_shared(strtable_rw_lock);
2810
2811         /*
2812          * If the table gets more than 3/4 full, resize it
2813          */
2814         if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
2815                 lck_rw_done(strtable_rw_lock);
2816
2817                 resize_string_ref_table();
2818
2819                 lck_rw_lock_shared(strtable_rw_lock);
2820         }
2821         hash_index = hashval & string_table_mask;
2822         lock_index = hash_index % NUM_STRCACHE_LOCKS;
2823
2824         head = &string_ref_table[hash_index];
2825
2826         lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2827
2828         for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
2829                 if (strncmp(entry->str, name, len) == 0 && entry->str[len] == 0) {
2830                         entry->refcount++;
2831                         break;
2832                 }
2833         }
2834         if (entry == NULL) {
2835                 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]);
2836                 /*
2837                  * it wasn't already there so add it.
2838                  */
2839                 entry = kheap_alloc(KHEAP_DEFAULT, sizeof(string_t) + len + 1, Z_WAITOK);
2840
2841                 if (head->lh_first == NULL) {
2842                         OSAddAtomic(1, &filled_buckets);
2843                 }
2844                 ptr = (char *)((char *)entry + sizeof(string_t));
2845                 strncpy(ptr, name, len);
2846                 ptr[len] = '\0';
2847                 entry->str = ptr;
2848                 entry->refcount = 1;
2849                 LIST_INSERT_HEAD(head, entry, hash_chain);
2850         }
2851         if (need_extra_ref == TRUE) {
2852                 entry->refcount++;
2853         }
2854
2855         lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2856         lck_rw_done(strtable_rw_lock);
2857
2858         return (const char *)entry->str;
2859 }
2860
2861
2862 int
2863 vfs_removename(const char *nameref)
2864 {
2865         struct stringhead *head;
2866         string_t          *entry;
2867         uint32_t           hashval;
2868         uint32_t           hash_index;
2869         uint32_t           lock_index;
2870         int                retval = ENOENT;
2871
2872         hashval = hash_string(nameref, 0);
2873
2874         /*
2875          * take this lock 'shared' to keep the hash stable
2876          * if someone else decides to grow the pool they
2877          * will take this lock exclusively
2878          */
2879         lck_rw_lock_shared(strtable_rw_lock);
2880         /*
2881          * must compute the head behind the table lock
2882          * since the size and location of the table
2883          * can change on the fly
2884          */
2885         hash_index = hashval & string_table_mask;
2886         lock_index = hash_index % NUM_STRCACHE_LOCKS;
2887
2888         head = &string_ref_table[hash_index];
2889
2890         lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2891
2892         for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2893                 if (entry->str == nameref) {
2894                         entry->refcount--;
2895
2896                         if (entry->refcount == 0) {
2897                                 LIST_REMOVE(entry, hash_chain);
2898
2899                                 if (head->lh_first == NULL) {
2900                                         OSAddAtomic(-1, &filled_buckets);
2901                                 }
2902                         } else {
2903                                 entry = NULL;
2904                         }
2905                         retval = 0;
2906                         break;
2907                 }
2908         }
2909         lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2910         lck_rw_done(strtable_rw_lock);
2911
2912         kheap_free_addr(KHEAP_DEFAULT, entry);
2913
2914         return retval;
2915 }
2916
2917
2918 #ifdef DUMP_STRING_TABLE
2919 void
2920 dump_string_table(void)
2921 {
2922         struct stringhead *head;
2923         string_t          *entry;
2924         u_long            i;
2925
2926         lck_rw_lock_shared(strtable_rw_lock);
2927
2928         for (i = 0; i <= string_table_mask; i++) {
2929                 head = &string_ref_table[i];
2930                 for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2931                         printf("%6d - %s\n", entry->refcount, entry->str);
2932                 }
2933         }
2934         lck_rw_done(strtable_rw_lock);
2935 }
2936 #endif  /* DUMP_STRING_TABLE */