bsd/vfs/vfs_cache.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1989, 1993, 1995
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * This code is derived from software contributed to Berkeley by
  34  * Poul-Henning Kamp of the FreeBSD Project.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. All advertising materials mentioning features or use of this software
  45  *    must display the following acknowledgement:
  46  *      This product includes software developed by the University of
  47  *      California, Berkeley and its contributors.
  48  * 4. Neither the name of the University nor the names of its contributors
  49  *    may be used to endorse or promote products derived from this software
  50  *    without specific prior written permission.
  51  *
  52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  62  * SUCH DAMAGE.
  63  *
  64  *
  65  *      @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/time.h>
  76 #include <sys/mount_internal.h>
  77 #include <sys/vnode_internal.h>
  78 #include <miscfs/specfs/specdev.h>
  79 #include <sys/namei.h>
  80 #include <sys/errno.h>
  81 #include <sys/malloc.h>
  82 #include <sys/kauth.h>
  83 #include <sys/user.h>
  84 #include <sys/paths.h>
  85
  86 #if CONFIG_MACF
  87 #include <security/mac_framework.h>
  88 #endif
  89
  90 /*
  91  * Name caching works as follows:
  92  *
  93  * Names found by directory scans are retained in a cache
  94  * for future reference.  It is managed LRU, so frequently
  95  * used names will hang around.  Cache is indexed by hash value
  96  * obtained from (vp, name) where vp refers to the directory
  97  * containing name.
  98  *
  99  * If it is a "negative" entry, (i.e. for a name that is known NOT to
 100  * exist) the vnode pointer will be NULL.
 101  *
 102  * Upon reaching the last segment of a path, if the reference
 103  * is for DELETE, or NOCACHE is set (rewrite), and the
 104  * name is located in the cache, it will be dropped.
 105  */
 106
 107 /*
 108  * Structures associated with name cacheing.
 109  */
 110
 111 LIST_HEAD(nchashhead, namecache) *nchashtbl;    /* Hash Table */
 112 u_long  nchashmask;
 113 u_long  nchash;                         /* size of hash table - 1 */
 114 long    numcache;                       /* number of cache entries allocated */
 115 int     desiredNodes;
 116 int     desiredNegNodes;
 117 int     ncs_negtotal;
 118 int     nc_disabled = 0;
 119 TAILQ_HEAD(, namecache) nchead;         /* chain of all name cache entries */
 120 TAILQ_HEAD(, namecache) neghead;        /* chain of only negative cache entries */
 121
 122
 123 #if COLLECT_STATS
 124
 125 struct  nchstats nchstats;              /* cache effectiveness statistics */
 126
 127 #define NCHSTAT(v) {            \
 128         nchstats.v++;           \
 129 }
 130 #define NAME_CACHE_LOCK()               name_cache_lock()
 131 #define NAME_CACHE_UNLOCK()             name_cache_unlock()
 132 #define NAME_CACHE_LOCK_SHARED()        name_cache_lock()
 133
 134 #else
 135
 136 #define NCHSTAT(v)
 137 #define NAME_CACHE_LOCK()               name_cache_lock()
 138 #define NAME_CACHE_UNLOCK()             name_cache_unlock()
 139 #define NAME_CACHE_LOCK_SHARED()        name_cache_lock_shared()
 140
 141 #endif
 142
 143
 144 /* vars for name cache list lock */
 145 lck_grp_t * namecache_lck_grp;
 146 lck_grp_attr_t * namecache_lck_grp_attr;
 147 lck_attr_t * namecache_lck_attr;
 148
 149 lck_grp_t * strcache_lck_grp;
 150 lck_grp_attr_t * strcache_lck_grp_attr;
 151 lck_attr_t * strcache_lck_attr;
 152
 153 lck_rw_t  * namecache_rw_lock;
 154 lck_rw_t  * strtable_rw_lock;
 155
 156 #define NUM_STRCACHE_LOCKS 1024
 157
 158 lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
 159
 160
 161 static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
 162 static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
 163 static void init_string_table(void);
 164 static void cache_delete(struct namecache *, int);
 165 static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
 166
 167 #ifdef DUMP_STRING_TABLE
 168 /*
 169  * Internal dump function used for debugging
 170  */
 171 void dump_string_table(void);
 172 #endif  /* DUMP_STRING_TABLE */
 173
 174 static void init_crc32(void);
 175 static unsigned int crc32tab[256];
 176
 177
 178 #define NCHHASH(dvp, hash_val) \
 179         (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
 180
 181 /*
 182  * This function tries to check if a directory vp is a subdirectory of dvp
 183  * only from valid v_parent pointers. It is called with the name cache lock
 184  * held and does not drop the lock anytime inside the function.
 185  *
 186  * It returns a boolean that indicates whether or not it was able to
 187  * successfully infer the parent/descendent relationship via the v_parent
 188  * pointers, or if it could not infer such relationship and that the decision
 189  * must be delegated to the owning filesystem.
 190  *
 191  * If it does not defer the decision, i.e. it was successfuly able to determine
 192  * the parent/descendent relationship,  *is_subdir tells the caller if vp is a
 193  * subdirectory of dvp.
 194  *
 195  * If the decision is deferred, *next_vp is where it stopped i.e. *next_vp
 196  * is the vnode whose parent is to be determined from the filesystem.
 197  * *is_subdir, in this case, is not indicative of anything and should be
 198  * ignored.
 199  *
 200  * The return value and output args should be used as follows :
 201  *
 202  * defer = cache_check_vnode_issubdir(vp, dvp, is_subdir, next_vp);
 203  * if (!defer) {
 204  *      if (*is_subdir)
 205  *              vp is subdirectory;
 206  *      else
 207  *              vp is not a subdirectory;
 208  * } else {
 209  *      if (*next_vp)
 210  *              check this vnode's parent from the filesystem
 211  *      else
 212  *              error (likely because of forced unmount).
 213  * }
 214  *
 215  */
 216 static boolean_t
 217 cache_check_vnode_issubdir(vnode_t vp, vnode_t dvp, boolean_t *is_subdir,
 218     vnode_t *next_vp)
 219 {
 220         vnode_t tvp = vp;
 221         int defer = FALSE;
 222
 223         *is_subdir = FALSE;
 224         *next_vp = NULLVP;
 225         while (1) {
 226                 mount_t tmp;
 227
 228                 if (tvp == dvp) {
 229                         *is_subdir = TRUE;
 230                         break;
 231                 } else if (tvp == rootvnode) {
 232                         /* *is_subdir = FALSE */
 233                         break;
 234                 }
 235
 236                 tmp = tvp->v_mount;
 237                 while ((tvp->v_flag & VROOT) && tmp && tmp->mnt_vnodecovered &&
 238                     tvp != dvp && tvp != rootvnode) {
 239                         tvp = tmp->mnt_vnodecovered;
 240                         tmp = tvp->v_mount;
 241                 }
 242
 243                 /*
 244                  * If dvp is not at the top of a mount "stack" then
 245                  * vp is not a subdirectory of dvp either.
 246                  */
 247                 if (tvp == dvp || tvp == rootvnode) {
 248                         /* *is_subdir = FALSE */
 249                         break;
 250                 }
 251
 252                 if (!tmp) {
 253                         defer = TRUE;
 254                         *next_vp = NULLVP;
 255                         break;
 256                 }
 257
 258                 if ((tvp->v_flag & VISHARDLINK) || !(tvp->v_parent)) {
 259                         defer = TRUE;
 260                         *next_vp = tvp;
 261                         break;
 262                 }
 263
 264                 tvp = tvp->v_parent;
 265         }
 266
 267         return (defer);
 268 }
 269
 270 /* maximum times retry from potentially transient errors in vnode_issubdir */
 271 #define MAX_ERROR_RETRY 3
 272
 273 /*
 274  * This function checks if a given directory (vp) is a subdirectory of dvp.
 275  * It walks backwards from vp and if it hits dvp in its parent chain,
 276  * it is a subdirectory. If it encounters the root directory, it is not
 277  * a subdirectory.
 278  *
 279  * This function returns an error if it is unsuccessful and 0 on success.
 280  *
 281  * On entry (and exit) vp has an iocount and if this function has to take
 282  * any iocounts on other vnodes in the parent chain traversal, it releases them.
 283  */
 284 int
 285 vnode_issubdir(vnode_t vp, vnode_t dvp, int *is_subdir, vfs_context_t ctx)
 286 {
 287         vnode_t start_vp, tvp;
 288         vnode_t vp_with_iocount;
 289         int error = 0;
 290         char dotdotbuf[] = "..";
 291         int error_retry_count = 0; /* retry count for potentially transient
 292                                       errors */
 293
 294         *is_subdir = FALSE;
 295         tvp = start_vp = vp;
 296         /*
 297          * Anytime we acquire an iocount in this function, we save the vnode
 298          * in this variable and release it before exiting.
 299          */
 300         vp_with_iocount = NULLVP;
 301
 302         while (1) {
 303                 boolean_t defer;
 304                 vnode_t pvp;
 305                 uint32_t vid;
 306                 struct componentname cn;
 307                 boolean_t is_subdir_locked = FALSE;
 308
 309                 if (tvp == dvp) {
 310                         *is_subdir = TRUE;
 311                         break;
 312                 } else if (tvp == rootvnode) {
 313                         /* *is_subdir = FALSE */
 314                         break;
 315                 }
 316
 317                 NAME_CACHE_LOCK_SHARED();
 318
 319                 defer = cache_check_vnode_issubdir(tvp, dvp, &is_subdir_locked,
 320                     &tvp);
 321
 322                 if (defer && tvp)
 323                         vid = vnode_vid(tvp);
 324
 325                 NAME_CACHE_UNLOCK();
 326
 327                 if (!defer) {
 328                         *is_subdir = is_subdir_locked;
 329                         break;
 330                 }
 331
 332                 if (!tvp) {
 333                         if (error_retry_count++ < MAX_ERROR_RETRY) {
 334                                 tvp = vp;
 335                                 continue;
 336                         }
 337                         error = ENOENT;
 338                         break;
 339                 }
 340
 341                 if (tvp != start_vp) {
 342                         if (vp_with_iocount) {
 343                                 vnode_put(vp_with_iocount);
 344                                 vp_with_iocount = NULLVP;
 345                         }
 346
 347                         error = vnode_getwithvid(tvp, vid);
 348                         if (error) {
 349                                 if (error_retry_count++ < MAX_ERROR_RETRY) {
 350                                         tvp = vp;
 351                                         error = 0;
 352                                         continue;
 353                                 }
 354                                 break;
 355                         }
 356
 357                         vp_with_iocount = tvp;
 358                 }
 359
 360                 bzero(&cn, sizeof(cn));
 361                 cn.cn_nameiop = LOOKUP;
 362                 cn.cn_flags = ISLASTCN | ISDOTDOT;
 363                 cn.cn_context = ctx;
 364                 cn.cn_pnbuf = &dotdotbuf[0];
 365                 cn.cn_pnlen = sizeof(dotdotbuf);
 366                 cn.cn_nameptr = cn.cn_pnbuf;
 367                 cn.cn_namelen = 2;
 368
 369                 pvp = NULLVP;
 370                 if ((error = VNOP_LOOKUP(tvp, &pvp, &cn, ctx)))
 371                         break;
 372
 373                 if (!(tvp->v_flag & VISHARDLINK) && tvp->v_parent != pvp) {
 374                         (void)vnode_update_identity(tvp, pvp, NULL, 0, 0,
 375                             VNODE_UPDATE_PARENT);
 376                 }
 377
 378                 if (vp_with_iocount)
 379                         vnode_put(vp_with_iocount);
 380
 381                 vp_with_iocount = tvp = pvp;
 382         }
 383
 384         if (vp_with_iocount)
 385                 vnode_put(vp_with_iocount);
 386
 387         return (error);
 388 }
 389
 390 /*
 391  * This function builds the path to a filename in "buff".  The
 392  * length of the buffer *INCLUDING* the trailing zero byte is
 393  * returned in outlen.  NOTE: the length includes the trailing
 394  * zero byte and thus the length is one greater than what strlen
 395  * would return.  This is important and lots of code elsewhere
 396  * in the kernel assumes this behavior.
 397  *
 398  * This function can call vnop in file system if the parent vnode
 399  * does not exist or when called for hardlinks via volfs path.
 400  * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
 401  * in the name cache and does not enter the file system.
 402  *
 403  * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when
 404  * we encounter ENOENT during path reconstruction.  ENOENT means that
 405  * one of the parents moved while we were building the path.  The
 406  * caller can special handle this case by calling build_path again.
 407  *
 408  * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path
 409  * that is relative to the nearest mount point, i.e. do not
 410  * cross over mount points during building the path.
 411  *
 412  * passed in vp must have a valid io_count reference
 413  */
 414 int
 415 build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
 416 {
 417         vnode_t vp, tvp;
 418         vnode_t vp_with_iocount;
 419         vnode_t proc_root_dir_vp;
 420         char *end;
 421         const char *str;
 422         int  len;
 423         int  ret = 0;
 424         int  fixhardlink;
 425
 426         if (first_vp == NULLVP)
 427                 return (EINVAL);
 428
 429         if (buflen <= 1)
 430                 return (ENOSPC);
 431
 432         /*
 433          * Grab the process fd so we can evaluate fd_rdir.
 434          */
 435         if (vfs_context_proc(ctx)->p_fd)
 436                 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
 437         else
 438                 proc_root_dir_vp = NULL;
 439
 440         vp_with_iocount = NULLVP;
 441 again:
 442         vp = first_vp;
 443
 444         end = &buff[buflen-1];
 445         *end = '\0';
 446
 447         /*
 448          * holding the NAME_CACHE_LOCK in shared mode is
 449          * sufficient to stabilize both the vp->v_parent chain
 450          * and the 'vp->v_mount->mnt_vnodecovered' chain
 451          *
 452          * if we need to drop this lock, we must first grab the v_id
 453          * from the vnode we're currently working with... if that
 454          * vnode doesn't already have an io_count reference (the vp
 455          * passed in comes with one), we must grab a reference
 456          * after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
 457          * deadlocks may result if you call vnode_get while holding
 458          * the NAME_CACHE_LOCK... we lazily release the reference
 459          * we pick up the next time we encounter a need to drop
 460          * the NAME_CACHE_LOCK or before we return from this routine
 461          */
 462         NAME_CACHE_LOCK_SHARED();
 463
 464         /*
 465          * Check if this is the root of a file system.
 466          */
 467         while (vp && vp->v_flag & VROOT) {
 468                 if (vp->v_mount == NULL) {
 469                         ret = EINVAL;
 470                         goto out_unlock;
 471                 }
 472                 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) {
 473                         /*
 474                          * It's the root of the root file system, so it's
 475                          * just "/".
 476                          */
 477                         *--end = '/';
 478
 479                         goto out_unlock;
 480                 } else {
 481                         /*
 482                          * This the root of the volume and the caller does not
 483                          * want to cross mount points.  Therefore just return
 484                          * '/' as the relative path.
 485                          */
 486                         if (flags & BUILDPATH_VOLUME_RELATIVE) {
 487                                 *--end = '/';
 488                                 goto out_unlock;
 489                         } else {
 490                                 vp = vp->v_mount->mnt_vnodecovered;
 491                         }
 492                 }
 493         }
 494
 495         while ((vp != NULLVP) && (vp->v_parent != vp)) {
 496                 int  vid;
 497
 498                 /*
 499                  * For hardlinks the v_name may be stale, so if its OK
 500                  * to enter a file system, ask the file system for the
 501                  * name and parent (below).
 502                  */
 503                 fixhardlink = (vp->v_flag & VISHARDLINK) &&
 504                               (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
 505                               !(flags & BUILDPATH_NO_FS_ENTER);
 506
 507                 if (!fixhardlink) {
 508                         str = vp->v_name;
 509
 510                         if (str == NULL || *str == '\0') {
 511                                 if (vp->v_parent != NULL)
 512                                         ret = EINVAL;
 513                                 else
 514                                         ret = ENOENT;
 515                                 goto out_unlock;
 516                         }
 517                         len = strlen(str);
 518                         /*
 519                          * Check that there's enough space (including space for the '/')
 520                          */
 521                         if ((end - buff) < (len + 1)) {
 522                                 ret = ENOSPC;
 523                                 goto out_unlock;
 524                         }
 525                         /*
 526                          * Copy the name backwards.
 527                          */
 528                         str += len;
 529
 530                         for (; len > 0; len--)
 531                                *--end = *--str;
 532                         /*
 533                          * Add a path separator.
 534                          */
 535                         *--end = '/';
 536                 }
 537
 538                 /*
 539                  * Walk up the parent chain.
 540                  */
 541                 if (((vp->v_parent != NULLVP) && !fixhardlink) ||
 542                                 (flags & BUILDPATH_NO_FS_ENTER)) {
 543
 544                         /*
 545                          * In this if () block we are not allowed to enter the filesystem
 546                          * to conclusively get the most accurate parent identifier.
 547                          * As a result, if 'vp' does not identify '/' and it
 548                          * does not have a valid v_parent, then error out
 549                          * and disallow further path construction
 550                          */
 551                         if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
 552                                 /*
 553                                  * Only '/' is allowed to have a NULL parent
 554                                  * pointer. Upper level callers should ideally
 555                                  * re-drive name lookup on receiving a ENOENT.
 556                                  */
 557                                 ret = ENOENT;
 558
 559                                 /* The code below will exit early if 'tvp = vp' == NULL */
 560                         }
 561                         vp = vp->v_parent;
 562
 563                         /*
 564                          * if the vnode we have in hand isn't a directory and it
 565                          * has a v_parent, then we started with the resource fork
 566                          * so skip up to avoid getting a duplicate copy of the
 567                          * file name in the path.
 568                          */
 569                         if (vp && !vnode_isdir(vp) && vp->v_parent) {
 570                                 vp = vp->v_parent;
 571                         }
 572                 } else {
 573                         /*
 574                          * No parent, go get it if supported.
 575                          */
 576                         struct vnode_attr  va;
 577                         vnode_t  dvp;
 578
 579                         /*
 580                          * Make sure file system supports obtaining a path from id.
 581                          */
 582                         if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
 583                                 ret = ENOENT;
 584                                 goto out_unlock;
 585                         }
 586                         vid = vp->v_id;
 587
 588                         NAME_CACHE_UNLOCK();
 589
 590                         if (vp != first_vp && vp != vp_with_iocount) {
 591                                 if (vp_with_iocount) {
 592                                         vnode_put(vp_with_iocount);
 593                                         vp_with_iocount = NULLVP;
 594                                 }
 595                                 if (vnode_getwithvid(vp, vid))
 596                                         goto again;
 597                                 vp_with_iocount = vp;
 598                         }
 599                         VATTR_INIT(&va);
 600                         VATTR_WANTED(&va, va_parentid);
 601
 602                         if (fixhardlink) {
 603                                 VATTR_WANTED(&va, va_name);
 604                                 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
 605                         } else {
 606                                 va.va_name = NULL;
 607                         }
 608                         /*
 609                          * Ask the file system for its parent id and for its name (optional).
 610                          */
 611                         ret = vnode_getattr(vp, &va, ctx);
 612
 613                         if (fixhardlink) {
 614                                 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
 615                                         str = va.va_name;
 616                                         vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME);
 617                                 } else if (vp->v_name) {
 618                                         str = vp->v_name;
 619                                         ret = 0;
 620                                 } else {
 621                                         ret = ENOENT;
 622                                         goto bad_news;
 623                                 }
 624                                 len = strlen(str);
 625
 626                                 /*
 627                                  * Check that there's enough space.
 628                                  */
 629                                 if ((end - buff) < (len + 1)) {
 630                                         ret = ENOSPC;
 631                                 } else {
 632                                         /* Copy the name backwards. */
 633                                         str += len;
 634
 635                                         for (; len > 0; len--) {
 636                                                 *--end = *--str;
 637                                         }
 638                                         /*
 639                                          * Add a path separator.
 640                                          */
 641                                         *--end = '/';
 642                                 }
 643 bad_news:
 644                                 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI);
 645                         }
 646                         if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
 647                                 ret = ENOENT;
 648                                 goto out;
 649                         }
 650                         /*
 651                          * Ask the file system for the parent vnode.
 652                          */
 653                         if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx)))
 654                                 goto out;
 655
 656                         if (!fixhardlink && (vp->v_parent != dvp))
 657                                 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT);
 658
 659                         if (vp_with_iocount)
 660                                 vnode_put(vp_with_iocount);
 661                         vp = dvp;
 662                         vp_with_iocount = vp;
 663
 664                         NAME_CACHE_LOCK_SHARED();
 665
 666                         /*
 667                          * if the vnode we have in hand isn't a directory and it
 668                          * has a v_parent, then we started with the resource fork
 669                          * so skip up to avoid getting a duplicate copy of the
 670                          * file name in the path.
 671                          */
 672                         if (vp && !vnode_isdir(vp) && vp->v_parent)
 673                                 vp = vp->v_parent;
 674                 }
 675
 676                 if (vp && (flags & BUILDPATH_CHECKACCESS)) {
 677                         vid = vp->v_id;
 678
 679                         NAME_CACHE_UNLOCK();
 680
 681                         if (vp != first_vp && vp != vp_with_iocount) {
 682                                 if (vp_with_iocount) {
 683                                         vnode_put(vp_with_iocount);
 684                                         vp_with_iocount = NULLVP;
 685                                 }
 686                                 if (vnode_getwithvid(vp, vid))
 687                                         goto again;
 688                                 vp_with_iocount = vp;
 689                         }
 690                         if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx)))
 691                                 goto out;       /* no peeking */
 692
 693                         NAME_CACHE_LOCK_SHARED();
 694                 }
 695
 696                 /*
 697                  * When a mount point is crossed switch the vp.
 698                  * Continue until we find the root or we find
 699                  * a vnode that's not the root of a mounted
 700                  * file system.
 701                  */
 702                 tvp = vp;
 703
 704                 while (tvp) {
 705                         if (tvp == proc_root_dir_vp)
 706                                 goto out_unlock;        /* encountered the root */
 707
 708                         if (!(tvp->v_flag & VROOT) || !tvp->v_mount)
 709                                 break;                  /* not the root of a mounted FS */
 710
 711                         if (flags & BUILDPATH_VOLUME_RELATIVE) {
 712                                 /* Do not cross over mount points */
 713                                 tvp = NULL;
 714                         } else {
 715                                 tvp = tvp->v_mount->mnt_vnodecovered;
 716                         }
 717                 }
 718                 if (tvp == NULLVP)
 719                         goto out_unlock;
 720                 vp = tvp;
 721         }
 722 out_unlock:
 723         NAME_CACHE_UNLOCK();
 724 out:
 725         if (vp_with_iocount)
 726                 vnode_put(vp_with_iocount);
 727         /*
 728          * Slide the name down to the beginning of the buffer.
 729          */
 730         memmove(buff, end, &buff[buflen] - end);
 731
 732         /*
 733          * length includes the trailing zero byte
 734          */
 735         *outlen = &buff[buflen] - end;
 736
 737         /* One of the parents was moved during path reconstruction.
 738          * The caller is interested in knowing whether any of the
 739          * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
 740          */
 741         if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
 742                 ret = EAGAIN;
 743         }
 744
 745         return (ret);
 746 }
 747
 748
 749 /*
 750  * return NULLVP if vp's parent doesn't
 751  * exist, or we can't get a valid iocount
 752  * else return the parent of vp
 753  */
 754 vnode_t
 755 vnode_getparent(vnode_t vp)
 756 {
 757         vnode_t pvp = NULLVP;
 758         int     pvid;
 759
 760         NAME_CACHE_LOCK_SHARED();
 761         /*
 762          * v_parent is stable behind the name_cache lock
 763          * however, the only thing we can really guarantee
 764          * is that we've grabbed a valid iocount on the
 765          * parent of 'vp' at the time we took the name_cache lock...
 766          * once we drop the lock, vp could get re-parented
 767          */
 768         if ( (pvp = vp->v_parent) != NULLVP ) {
 769                 pvid = pvp->v_id;
 770
 771                 NAME_CACHE_UNLOCK();
 772
 773                 if (vnode_getwithvid(pvp, pvid) != 0)
 774                         pvp = NULL;
 775         } else
 776                 NAME_CACHE_UNLOCK();
 777         return (pvp);
 778 }
 779
 780 const char *
 781 vnode_getname(vnode_t vp)
 782 {
 783         const char *name = NULL;
 784
 785         NAME_CACHE_LOCK_SHARED();
 786
 787         if (vp->v_name)
 788                 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0);
 789         NAME_CACHE_UNLOCK();
 790
 791         return (name);
 792 }
 793
 794 void
 795 vnode_putname(const char *name)
 796 {
 797         vfs_removename(name);
 798 }
 799
 800 static const char unknown_vnodename[] = "(unknown vnode name)";
 801
 802 const char *
 803 vnode_getname_printable(vnode_t vp)
 804 {
 805         const char *name = vnode_getname(vp);
 806         if (name != NULL)
 807                 return name;
 808
 809         switch (vp->v_type) {
 810                 case VCHR:
 811                 case VBLK:
 812                         {
 813                         /*
 814                          * Create an artificial dev name from
 815                          * major and minor device number
 816                          */
 817                         char dev_name[64];
 818                         (void) snprintf(dev_name, sizeof(dev_name),
 819                                         "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b',
 820                                         major(vp->v_rdev), minor(vp->v_rdev));
 821                         /*
 822                          * Add the newly created dev name to the name
 823                          * cache to allow easier cleanup. Also,
 824                          * vfs_addname allocates memory for the new name
 825                          * and returns it.
 826                          */
 827                         NAME_CACHE_LOCK_SHARED();
 828                         name = vfs_addname(dev_name, strlen(dev_name), 0, 0);
 829                         NAME_CACHE_UNLOCK();
 830                         return name;
 831                         }
 832                 default:
 833                         return unknown_vnodename;
 834         }
 835 }
 836
 837 void
 838 vnode_putname_printable(const char *name)
 839 {
 840         if (name == unknown_vnodename)
 841                 return;
 842         vnode_putname(name);
 843 }
 844
 845
 846 /*
 847  * if VNODE_UPDATE_PARENT, and we can take
 848  * a reference on dvp, then update vp with
 849  * it's new parent... if vp already has a parent,
 850  * then drop the reference vp held on it
 851  *
 852  * if VNODE_UPDATE_NAME,
 853  * then drop string ref on v_name if it exists, and if name is non-NULL
 854  * then pick up a string reference on name and record it in v_name...
 855  * optionally pass in the length and hashval of name if known
 856  *
 857  * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
 858  */
 859 void
 860 vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags)
 861 {
 862         struct  namecache *ncp;
 863         vnode_t old_parentvp = NULLVP;
 864 #if NAMEDSTREAMS
 865         int isstream = (vp->v_flag & VISNAMEDSTREAM);
 866         int kusecountbumped = 0;
 867 #endif
 868         kauth_cred_t tcred = NULL;
 869         const char *vname = NULL;
 870         const char *tname = NULL;
 871
 872         if (flags & VNODE_UPDATE_PARENT) {
 873                 if (dvp && vnode_ref(dvp) != 0) {
 874                         dvp = NULLVP;
 875                 }
 876 #if NAMEDSTREAMS
 877                 /* Don't count a stream's parent ref during unmounts */
 878                 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
 879                         vnode_lock_spin(dvp);
 880                         ++dvp->v_kusecount;
 881                         kusecountbumped = 1;
 882                         vnode_unlock(dvp);
 883                 }
 884 #endif
 885         } else {
 886                 dvp = NULLVP;
 887         }
 888         if ( (flags & VNODE_UPDATE_NAME) ) {
 889                 if (name != vp->v_name) {
 890                         if (name && *name) {
 891                                 if (name_len == 0)
 892                                         name_len = strlen(name);
 893                                 tname = vfs_addname(name, name_len, name_hashval, 0);
 894                         }
 895                 } else
 896                         flags &= ~VNODE_UPDATE_NAME;
 897         }
 898         if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) {
 899
 900                 NAME_CACHE_LOCK();
 901
 902                 if ( (flags & VNODE_UPDATE_PURGE) ) {
 903
 904                         if (vp->v_parent)
 905                                 vp->v_parent->v_nc_generation++;
 906
 907                         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
 908                                 cache_delete(ncp, 1);
 909
 910                         while ( (ncp = TAILQ_FIRST(&vp->v_ncchildren)) )
 911                                 cache_delete(ncp, 1);
 912
 913                         /*
 914                          * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
 915                          */
 916                         tcred = vp->v_cred;
 917                         vp->v_cred = NOCRED;
 918                         vp->v_authorized_actions = 0;
 919                 }
 920                 if ( (flags & VNODE_UPDATE_NAME) ) {
 921                         vname = vp->v_name;
 922                         vp->v_name = tname;
 923                 }
 924                 if (flags & VNODE_UPDATE_PARENT) {
 925                         if (dvp != vp && dvp != vp->v_parent) {
 926                                 old_parentvp = vp->v_parent;
 927                                 vp->v_parent = dvp;
 928                                 dvp = NULLVP;
 929
 930                                 if (old_parentvp)
 931                                         flags |= VNODE_UPDATE_CACHE;
 932                         }
 933                 }
 934                 if (flags & VNODE_UPDATE_CACHE) {
 935                         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
 936                                 cache_delete(ncp, 1);
 937                 }
 938                 NAME_CACHE_UNLOCK();
 939
 940                 if (vname != NULL)
 941                         vfs_removename(vname);
 942
 943                 if (IS_VALID_CRED(tcred))
 944                         kauth_cred_unref(&tcred);
 945         }
 946         if (dvp != NULLVP) {
 947 #if NAMEDSTREAMS
 948                 /* Back-out the ref we took if we lost a race for vp->v_parent. */
 949                 if (kusecountbumped) {
 950                         vnode_lock_spin(dvp);
 951                         if (dvp->v_kusecount > 0)
 952                                 --dvp->v_kusecount;
 953                         vnode_unlock(dvp);
 954                 }
 955 #endif
 956                 vnode_rele(dvp);
 957         }
 958         if (old_parentvp) {
 959                 struct  uthread *ut;
 960
 961 #if NAMEDSTREAMS
 962                 if (isstream) {
 963                         vnode_lock_spin(old_parentvp);
 964                         if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0))
 965                                 --old_parentvp->v_kusecount;
 966                         vnode_unlock(old_parentvp);
 967                 }
 968 #endif
 969                 ut = get_bsdthread_info(current_thread());
 970
 971                 /*
 972                  * indicated to vnode_rele that it shouldn't do a
 973                  * vnode_reclaim at this time... instead it will
 974                  * chain the vnode to the uu_vreclaims list...
 975                  * we'll be responsible for calling vnode_reclaim
 976                  * on each of the vnodes in this list...
 977                  */
 978                 ut->uu_defer_reclaims = 1;
 979                 ut->uu_vreclaims = NULLVP;
 980
 981                 while ( (vp = old_parentvp) != NULLVP ) {
 982
 983                         vnode_lock_spin(vp);
 984                         vnode_rele_internal(vp, 0, 0, 1);
 985
 986                         /*
 987                          * check to see if the vnode is now in the state
 988                          * that would have triggered a vnode_reclaim in vnode_rele
 989                          * if it is, we save it's parent pointer and then NULL
 990                          * out the v_parent field... we'll drop the reference
 991                          * that was held on the next iteration of this loop...
 992                          * this short circuits a potential deep recursion if we
 993                          * have a long chain of parents in this state...
 994                          * we'll sit in this loop until we run into
 995                          * a parent in this chain that is not in this state
 996                          *
 997                          * make our check and the vnode_rele atomic
 998                          * with respect to the current vnode we're working on
 999                          * by holding the vnode lock
1000                          * if vnode_rele deferred the vnode_reclaim and has put
1001                          * this vnode on the list to be reaped by us, than
1002                          * it has left this vnode with an iocount == 1
1003                          */
1004                         if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) &&
1005                              ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
1006                                 /*
1007                                  * vnode_rele wanted to do a vnode_reclaim on this vnode
1008                                  * it should be sitting on the head of the uu_vreclaims chain
1009                                  * pull the parent pointer now so that when we do the
1010                                  * vnode_reclaim for each of the vnodes in the uu_vreclaims
1011                                  * list, we won't recurse back through here
1012                                  *
1013                                  * need to do a convert here in case vnode_rele_internal
1014                                  * returns with the lock held in the spin mode... it
1015                                  * can drop and retake the lock under certain circumstances
1016                                  */
1017                                 vnode_lock_convert(vp);
1018
1019                                 NAME_CACHE_LOCK();
1020                                 old_parentvp = vp->v_parent;
1021                                 vp->v_parent = NULLVP;
1022                                 NAME_CACHE_UNLOCK();
1023                         } else {
1024                                 /*
1025                                  * we're done... we ran into a vnode that isn't
1026                                  * being terminated
1027                                  */
1028                                 old_parentvp = NULLVP;
1029                         }
1030                         vnode_unlock(vp);
1031                 }
1032                 ut->uu_defer_reclaims = 0;
1033
1034                 while ( (vp = ut->uu_vreclaims) != NULLVP) {
1035                         ut->uu_vreclaims = vp->v_defer_reclaimlist;
1036
1037                         /*
1038                          * vnode_put will drive the vnode_reclaim if
1039                          * we are still the only reference on this vnode
1040                          */
1041                         vnode_put(vp);
1042                 }
1043         }
1044 }
1045
1046
1047 /*
1048  * Mark a vnode as having multiple hard links.  HFS makes use of this
1049  * because it keeps track of each link separately, and wants to know
1050  * which link was actually used.
1051  *
1052  * This will cause the name cache to force a VNOP_LOOKUP on the vnode
1053  * so that HFS can post-process the lookup.  Also, volfs will call
1054  * VNOP_GETATTR2 to determine the parent, instead of using v_parent.
1055  */
1056 void vnode_setmultipath(vnode_t vp)
1057 {
1058         vnode_lock_spin(vp);
1059
1060         /*
1061          * In theory, we're changing the vnode's identity as far as the
1062          * name cache is concerned, so we ought to grab the name cache lock
1063          * here.  However, there is already a race, and grabbing the name
1064          * cache lock only makes the race window slightly smaller.
1065          *
1066          * The race happens because the vnode already exists in the name
1067          * cache, and could be found by one thread before another thread
1068          * can set the hard link flag.
1069          */
1070
1071         vp->v_flag |= VISHARDLINK;
1072
1073         vnode_unlock(vp);
1074 }
1075
1076
1077
1078 /*
1079  * backwards compatibility
1080  */
1081 void vnode_uncache_credentials(vnode_t vp)
1082 {
1083         vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
1084 }
1085
1086
1087 /*
1088  * use the exclusive form of NAME_CACHE_LOCK to protect the update of the
1089  * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
1090  * we use this lock so that we can look at the v_cred and v_authorized_actions
1091  * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
1092  * which is the super-hot path... if we are updating the authorized actions for this
1093  * vnode, we are already in the super-slow and far less frequented path so its not
1094  * that bad that we take the lock exclusive for this case... of course we strive
1095  * to hold it for the minimum amount of time possible
1096  */
1097
1098 void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
1099 {
1100         kauth_cred_t tcred = NOCRED;
1101
1102         NAME_CACHE_LOCK();
1103
1104         vp->v_authorized_actions &= ~action;
1105
1106         if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
1107             IS_VALID_CRED(vp->v_cred)) {
1108                 /*
1109                  * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1110                  */
1111                 tcred = vp->v_cred;
1112                 vp->v_cred = NOCRED;
1113         }
1114         NAME_CACHE_UNLOCK();
1115
1116         if (tcred != NOCRED)
1117                 kauth_cred_unref(&tcred);
1118 }
1119
1120
1121 extern int bootarg_vnode_cache_defeat;  /* default = 0, from bsd_init.c */
1122
1123 boolean_t
1124 vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1125 {
1126         kauth_cred_t    ucred;
1127         boolean_t       retval = FALSE;
1128
1129         /* Boot argument to defeat rights caching */
1130         if (bootarg_vnode_cache_defeat)
1131                 return FALSE;
1132
1133         if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1134                 /*
1135                  * a TTL is enabled on the rights cache... handle it here
1136                  * a TTL of 0 indicates that no rights should be cached
1137                  */
1138                 if (vp->v_mount->mnt_authcache_ttl) {
1139                         if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
1140                                 /*
1141                                  * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
1142                                  * we will only allow a SEARCH right on a directory to be cached...
1143                                  * that cached right always has a default TTL associated with it
1144                                  */
1145                                 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR)
1146                                         vp = NULLVP;
1147                         }
1148                         if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
1149                                 vnode_uncache_authorized_action(vp, vp->v_authorized_actions);
1150                                 vp = NULLVP;
1151                         }
1152                 } else
1153                         vp = NULLVP;
1154         }
1155         if (vp != NULLVP) {
1156                 ucred = vfs_context_ucred(ctx);
1157
1158                 NAME_CACHE_LOCK_SHARED();
1159
1160                 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action)
1161                         retval = TRUE;
1162
1163                 NAME_CACHE_UNLOCK();
1164         }
1165         return retval;
1166 }
1167
1168
1169 void vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1170 {
1171         kauth_cred_t tcred = NOCRED;
1172         kauth_cred_t ucred;
1173         struct timeval tv;
1174         boolean_t ttl_active = FALSE;
1175
1176         ucred = vfs_context_ucred(ctx);
1177
1178         if (!IS_VALID_CRED(ucred) || action == 0)
1179                 return;
1180
1181         if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1182                 /*
1183                  * a TTL is enabled on the rights cache... handle it here
1184                  * a TTL of 0 indicates that no rights should be cached
1185                  */
1186                 if (vp->v_mount->mnt_authcache_ttl == 0)
1187                         return;
1188
1189                 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
1190                         /*
1191                          * only cache SEARCH action for filesystems marked
1192                          * MNTK_AUTH_OPAQUE on VDIRs...
1193                          * the lookup_path code will time these out
1194                          */
1195                         if ( (action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR )
1196                                 return;
1197                 }
1198                 ttl_active = TRUE;
1199
1200                 microuptime(&tv);
1201         }
1202         NAME_CACHE_LOCK();
1203
1204         if (vp->v_cred != ucred) {
1205                 kauth_cred_ref(ucred);
1206                 /*
1207                  * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1208                  */
1209                 tcred = vp->v_cred;
1210                 vp->v_cred = ucred;
1211                 vp->v_authorized_actions = 0;
1212         }
1213         if (ttl_active == TRUE && vp->v_authorized_actions == 0) {
1214                 /*
1215                  * only reset the timestamnp on the
1216                  * first authorization cached after the previous
1217                  * timer has expired or we're switching creds...
1218                  * 'vnode_cache_is_authorized' will clear the
1219                  * authorized actions if the TTL is active and
1220                  * it has expired
1221                  */
1222                 vp->v_cred_timestamp = tv.tv_sec;
1223         }
1224         vp->v_authorized_actions |= action;
1225
1226         NAME_CACHE_UNLOCK();
1227
1228         if (IS_VALID_CRED(tcred))
1229                 kauth_cred_unref(&tcred);
1230 }
1231
1232
1233 boolean_t vnode_cache_is_stale(vnode_t vp)
1234 {
1235         struct timeval  tv;
1236         boolean_t       retval;
1237
1238         microuptime(&tv);
1239
1240         if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl)
1241                 retval = TRUE;
1242         else
1243                 retval = FALSE;
1244
1245         return retval;
1246 }
1247
1248
1249
1250 /*
1251  * Returns:     0                       Success
1252  *              ERECYCLE                vnode was recycled from underneath us.  Force lookup to be re-driven from namei.
1253  *                                              This errno value should not be seen by anyone outside of the kernel.
1254  */
1255 int
1256 cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1257                 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
1258 {
1259         char            *cp;            /* pointer into pathname argument */
1260         int             vid;
1261         int             vvid = 0;       /* protected by vp != NULLVP */
1262         vnode_t         vp = NULLVP;
1263         vnode_t         tdp = NULLVP;
1264         kauth_cred_t    ucred;
1265         boolean_t       ttl_enabled = FALSE;
1266         struct timeval  tv;
1267         mount_t         mp;
1268         unsigned int    hash;
1269         int             error = 0;
1270         boolean_t       dotdotchecked = FALSE;
1271
1272 #if CONFIG_TRIGGERS
1273         vnode_t         trigger_vp;
1274 #endif /* CONFIG_TRIGGERS */
1275
1276         ucred = vfs_context_ucred(ctx);
1277         ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1278
1279         NAME_CACHE_LOCK_SHARED();
1280
1281         if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1282                 ttl_enabled = TRUE;
1283                 microuptime(&tv);
1284         }
1285         for (;;) {
1286                 /*
1287                  * Search a directory.
1288                  *
1289                  * The cn_hash value is for use by cache_lookup
1290                  * The last component of the filename is left accessible via
1291                  * cnp->cn_nameptr for callers that need the name.
1292                  */
1293                 hash = 0;
1294                 cp = cnp->cn_nameptr;
1295
1296                 while (*cp && (*cp != '/')) {
1297                         hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1298                 }
1299                 /*
1300                  * the crc generator can legitimately generate
1301                  * a 0... however, 0 for us means that we
1302                  * haven't computed a hash, so use 1 instead
1303                  */
1304                 if (hash == 0)
1305                         hash = 1;
1306                 cnp->cn_hash = hash;
1307                 cnp->cn_namelen = cp - cnp->cn_nameptr;
1308
1309                 ndp->ni_pathlen -= cnp->cn_namelen;
1310                 ndp->ni_next = cp;
1311
1312                 /*
1313                  * Replace multiple slashes by a single slash and trailing slashes
1314                  * by a null.  This must be done before VNOP_LOOKUP() because some
1315                  * fs's don't know about trailing slashes.  Remember if there were
1316                  * trailing slashes to handle symlinks, existing non-directories
1317                  * and non-existing files that won't be directories specially later.
1318                  */
1319                 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
1320                         cp++;
1321                         ndp->ni_pathlen--;
1322
1323                         if (*cp == '\0') {
1324                                 ndp->ni_flag |= NAMEI_TRAILINGSLASH;
1325                                 *ndp->ni_next = '\0';
1326                         }
1327                 }
1328                 ndp->ni_next = cp;
1329
1330                 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT);
1331
1332                 if (*cp == '\0')
1333                         cnp->cn_flags |= ISLASTCN;
1334
1335                 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
1336                         cnp->cn_flags |= ISDOTDOT;
1337
1338                 *dp_authorized = 0;
1339 #if NAMEDRSRCFORK
1340                 /*
1341                  * Process a request for a file's resource fork.
1342                  *
1343                  * Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1344                  */
1345                 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1346                     (cp[1] == '.' && cp[2] == '.') &&
1347                     bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1348                         /* Skip volfs file systems that don't support native streams. */
1349                         if ((dp->v_mount != NULL) &&
1350                             (dp->v_mount->mnt_flag & MNT_DOVOLFS) &&
1351                             (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
1352                                 goto skiprsrcfork;
1353                         }
1354                         cnp->cn_flags |= CN_WANTSRSRCFORK;
1355                         cnp->cn_flags |= ISLASTCN;
1356                         ndp->ni_next[0] = '\0';
1357                         ndp->ni_pathlen = 1;
1358                 }
1359 skiprsrcfork:
1360 #endif
1361
1362 #if CONFIG_MACF
1363
1364                 /*
1365                  * Name cache provides authorization caching (see below)
1366                  * that will short circuit MAC checks in lookup().
1367                  * We must perform MAC check here.  On denial
1368                  * dp_authorized will remain 0 and second check will
1369                  * be perfomed in lookup().
1370                  */
1371                 if (!(cnp->cn_flags & DONOTAUTH)) {
1372                         error = mac_vnode_check_lookup(ctx, dp, cnp);
1373                         if (error) {
1374                                 NAME_CACHE_UNLOCK();
1375                                 goto errorout;
1376                         }
1377                 }
1378 #endif /* MAC */
1379                 if (ttl_enabled && ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl))
1380                         break;
1381
1382                 /*
1383                  * NAME_CACHE_LOCK holds these fields stable
1384                  *
1385                  * We can't cache KAUTH_VNODE_SEARCHBYANYONE for root correctly
1386                  * so we make an ugly check for root here. root is always
1387                  * allowed and breaking out of here only to find out that is
1388                  * authorized by virtue of being root is very very expensive.
1389                  */
1390                 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
1391                     !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE) &&
1392                     !vfs_context_issuser(ctx))
1393                         break;
1394
1395                 /*
1396                  * indicate that we're allowed to traverse this directory...
1397                  * even if we fail the cache lookup or decide to bail for
1398                  * some other reason, this information is valid and is used
1399                  * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1400                  */
1401                 *dp_authorized = 1;
1402
1403                 if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) {
1404                         if (cnp->cn_nameiop != LOOKUP)
1405                                 break;
1406                         if (cnp->cn_flags & LOCKPARENT)
1407                                 break;
1408                         if (cnp->cn_flags & NOCACHE)
1409                                 break;
1410                         if (cnp->cn_flags & ISDOTDOT) {
1411                                 /*
1412                                  * Force directory hardlinks to go to
1413                                  * file system for ".." requests.
1414                                  */
1415                                 if (dp && (dp->v_flag & VISHARDLINK)) {
1416                                         break;
1417                                 }
1418                                 /*
1419                                  * Quit here only if we can't use
1420                                  * the parent directory pointer or
1421                                  * don't have one.  Otherwise, we'll
1422                                  * use it below.
1423                                  */
1424                                 if ((dp->v_flag & VROOT)  ||
1425                                     dp == ndp->ni_rootdir ||
1426                                     dp->v_parent == NULLVP)
1427                                         break;
1428                         }
1429                 }
1430
1431                 if ((cnp->cn_flags & CN_SKIPNAMECACHE)) {
1432                         /*
1433                          * Force lookup to go to the filesystem with
1434                          * all cnp fields set up.
1435                          */
1436                         break;
1437                 }
1438
1439                 /*
1440                  * "." and ".." aren't supposed to be cached, so check
1441                  * for them before checking the cache.
1442                  */
1443                 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')
1444                         vp = dp;
1445                 else if ( (cnp->cn_flags & ISDOTDOT) ) {
1446                         /*
1447                          * If this is a chrooted process, we need to check if
1448                          * the process is trying to break out of its chrooted
1449                          * jail. We do that by trying to determine if dp is
1450                          * a subdirectory of ndp->ni_rootdir. If we aren't
1451                          * able to determine that by the v_parent pointers, we
1452                          * will leave the fast path.
1453                          *
1454                          * Since this function may see dotdot components
1455                          * many times and it has the name cache lock held for
1456                          * the entire duration, we optimise this by doing this
1457                          * check only once per cache_lookup_path call.
1458                          * If dotdotchecked is set, it means we've done this
1459                          * check once already and don't need to do it again.
1460                          */
1461                         if (!dotdotchecked && (ndp->ni_rootdir != rootvnode)) {
1462                                 vnode_t tvp = dp;
1463                                 boolean_t defer = FALSE;
1464                                 boolean_t is_subdir = FALSE;
1465
1466                                 defer = cache_check_vnode_issubdir(tvp,
1467                                     ndp->ni_rootdir, &is_subdir, &tvp);
1468
1469                                 if (defer) {
1470                                         /* defer to Filesystem */
1471                                         break;
1472                                 } else if (!is_subdir) {
1473                                         /*
1474                                          * This process is trying to break  out
1475                                          * of its chrooted jail, so all its
1476                                          * dotdot accesses will be translated to
1477                                          * its root directory.
1478                                          */
1479                                         vp = ndp->ni_rootdir;
1480                                 } else {
1481                                         /*
1482                                          * All good, let this dotdot access
1483                                          * proceed normally
1484                                          */
1485                                         vp = dp->v_parent;
1486                                 }
1487                                 dotdotchecked = TRUE;
1488                         } else {
1489                                 vp = dp->v_parent;
1490                         }
1491                 } else {
1492                         if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP)
1493                                 break;
1494
1495                         if ( (vp->v_flag & VISHARDLINK) ) {
1496                                 /*
1497                                  * The file system wants a VNOP_LOOKUP on this vnode
1498                                  */
1499                                 vp = NULL;
1500                                 break;
1501                         }
1502                 }
1503                 if ( (cnp->cn_flags & ISLASTCN) )
1504                         break;
1505
1506                 if (vp->v_type != VDIR) {
1507                         if (vp->v_type != VLNK)
1508                                 vp = NULL;
1509                         break;
1510                 }
1511
1512                 if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
1513                         vnode_t tmp_vp = mp->mnt_realrootvp;
1514                         if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation ||
1515                                 mp->mnt_realrootvp_vid != tmp_vp->v_id)
1516                                 break;
1517                         vp = tmp_vp;
1518                 }
1519
1520 #if CONFIG_TRIGGERS
1521                 /*
1522                  * After traversing all mountpoints stacked here, if we have a
1523                  * trigger in hand, resolve it.  Note that we don't need to
1524                  * leave the fast path if the mount has already happened.
1525                  */
1526                 if (vp->v_resolve)
1527                         break;
1528 #endif /* CONFIG_TRIGGERS */
1529
1530
1531                 dp = vp;
1532                 vp = NULLVP;
1533
1534                 cnp->cn_nameptr = ndp->ni_next + 1;
1535                 ndp->ni_pathlen--;
1536                 while (*cnp->cn_nameptr == '/') {
1537                         cnp->cn_nameptr++;
1538                         ndp->ni_pathlen--;
1539                 }
1540         }
1541         if (vp != NULLVP)
1542                 vvid = vp->v_id;
1543         vid = dp->v_id;
1544
1545         NAME_CACHE_UNLOCK();
1546
1547         if ((vp != NULLVP) && (vp->v_type != VLNK) &&
1548             ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) {
1549                 /*
1550                  * if we've got a child and it's the last component, and
1551                  * the lookup doesn't need to return the parent then we
1552                  * can skip grabbing an iocount on the parent, since all
1553                  * we're going to do with it is a vnode_put just before
1554                  * we return from 'lookup'.  If it's a symbolic link,
1555                  * we need the parent in case the link happens to be
1556                  * a relative pathname.
1557                  */
1558                 tdp = dp;
1559                 dp = NULLVP;
1560         } else {
1561 need_dp:
1562                 /*
1563                  * return the last directory we looked at
1564                  * with an io reference held. If it was the one passed
1565                  * in as a result of the last iteration of VNOP_LOOKUP,
1566                  * it should already hold an io ref. No need to increase ref.
1567                  */
1568                 if (last_dp != dp){
1569
1570                         if (dp == ndp->ni_usedvp) {
1571                                 /*
1572                                  * if this vnode matches the one passed in via USEDVP
1573                                  * than this context already holds an io_count... just
1574                                  * use vnode_get to get an extra ref for lookup to play
1575                                  * with... can't use the getwithvid variant here because
1576                                  * it will block behind a vnode_drain which would result
1577                                  * in a deadlock (since we already own an io_count that the
1578                                  * vnode_drain is waiting on)... vnode_get grabs the io_count
1579                                  * immediately w/o waiting... it always succeeds
1580                                  */
1581                                 vnode_get(dp);
1582                         } else if ((error = vnode_getwithvid_drainok(dp, vid))) {
1583                                 /*
1584                                  * failure indicates the vnode
1585                                  * changed identity or is being
1586                                  * TERMINATED... in either case
1587                                  * punt this lookup.
1588                                  *
1589                                  * don't necessarily return ENOENT, though, because
1590                                  * we really want to go back to disk and make sure it's
1591                                  * there or not if someone else is changing this
1592                                  * vnode. That being said, the one case where we do want
1593                                  * to return ENOENT is when the vnode's mount point is
1594                                  * in the process of unmounting and we might cause a deadlock
1595                                  * in our attempt to take an iocount. An ENODEV error return
1596                                  * is from vnode_get* is an indication this but we change that
1597                                  * ENOENT for upper layers.
1598                                  */
1599                                 if (error == ENODEV) {
1600                                         error = ENOENT;
1601                                 } else {
1602                                         error = ERECYCLE;
1603                                 }
1604                                 goto errorout;
1605                         }
1606                 }
1607         }
1608         if (vp != NULLVP) {
1609                 if ( (vnode_getwithvid_drainok(vp, vvid)) ) {
1610                         vp = NULLVP;
1611
1612                         /*
1613                          * can't get reference on the vp we'd like
1614                          * to return... if we didn't grab a reference
1615                          * on the directory (due to fast path bypass),
1616                          * then we need to do it now... we can't return
1617                          * with both ni_dvp and ni_vp NULL, and no
1618                          * error condition
1619                          */
1620                         if (dp == NULLVP) {
1621                                 dp = tdp;
1622                                 goto need_dp;
1623                         }
1624                 }
1625         }
1626
1627         ndp->ni_dvp = dp;
1628         ndp->ni_vp  = vp;
1629
1630 #if CONFIG_TRIGGERS
1631         trigger_vp = vp ? vp : dp;
1632         if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
1633                 error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
1634                 if (error) {
1635                         if (vp)
1636                                 vnode_put(vp);
1637                         if (dp)
1638                                 vnode_put(dp);
1639                         goto errorout;
1640                 }
1641         }
1642 #endif /* CONFIG_TRIGGERS */
1643
1644 errorout:
1645         /*
1646          * If we came into cache_lookup_path after an iteration of the lookup loop that
1647          * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
1648          * on it.  It is now the job of cache_lookup_path to drop the ref on this vnode
1649          * when it is no longer needed.  If we get to this point, and last_dp is not NULL
1650          * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
1651          * the case that we got to a subsequent path component and this previous vnode is
1652          * no longer needed.  We can then drop the io ref on it.
1653          */
1654         if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
1655                 vnode_put(last_dp);
1656         }
1657
1658         //initialized to 0, should be the same if no error cases occurred.
1659         return error;
1660 }
1661
1662
1663 static vnode_t
1664 cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
1665 {
1666         struct namecache *ncp;
1667         struct nchashhead *ncpp;
1668         long namelen = cnp->cn_namelen;
1669         unsigned int hashval = cnp->cn_hash;
1670
1671         if (nc_disabled) {
1672                 return NULL;
1673         }
1674
1675         ncpp = NCHHASH(dvp, cnp->cn_hash);
1676         LIST_FOREACH(ncp, ncpp, nc_hash) {
1677                 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1678                         if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1679                                 break;
1680                 }
1681         }
1682         if (ncp == 0) {
1683                 /*
1684                  * We failed to find an entry
1685                  */
1686                 NCHSTAT(ncs_miss);
1687                 return (NULL);
1688         }
1689         NCHSTAT(ncs_goodhits);
1690
1691         return (ncp->nc_vp);
1692 }
1693
1694
1695 unsigned int hash_string(const char *cp, int len);
1696 //
1697 // Have to take a len argument because we may only need to
1698 // hash part of a componentname.
1699 //
1700 unsigned int
1701 hash_string(const char *cp, int len)
1702 {
1703     unsigned hash = 0;
1704
1705     if (len) {
1706             while (len--) {
1707                     hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1708             }
1709     } else {
1710             while (*cp != '\0') {
1711                     hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1712             }
1713     }
1714     /*
1715      * the crc generator can legitimately generate
1716      * a 0... however, 0 for us means that we
1717      * haven't computed a hash, so use 1 instead
1718      */
1719     if (hash == 0)
1720             hash = 1;
1721     return hash;
1722 }
1723
1724
1725 /*
1726  * Lookup an entry in the cache
1727  *
1728  * We don't do this if the segment name is long, simply so the cache
1729  * can avoid holding long names (which would either waste space, or
1730  * add greatly to the complexity).
1731  *
1732  * Lookup is called with dvp pointing to the directory to search,
1733  * cnp pointing to the name of the entry being sought. If the lookup
1734  * succeeds, the vnode is returned in *vpp, and a status of -1 is
1735  * returned. If the lookup determines that the name does not exist
1736  * (negative cacheing), a status of ENOENT is returned. If the lookup
1737  * fails, a status of zero is returned.
1738  */
1739
1740 int
1741 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1742 {
1743         struct namecache *ncp;
1744         struct nchashhead *ncpp;
1745         long namelen = cnp->cn_namelen;
1746         unsigned int hashval;
1747         boolean_t       have_exclusive = FALSE;
1748         uint32_t vid;
1749         vnode_t  vp;
1750
1751         if (cnp->cn_hash == 0)
1752                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1753         hashval = cnp->cn_hash;
1754
1755         if (nc_disabled) {
1756                 return 0;
1757         }
1758
1759         NAME_CACHE_LOCK_SHARED();
1760
1761 relook:
1762         ncpp = NCHHASH(dvp, cnp->cn_hash);
1763         LIST_FOREACH(ncp, ncpp, nc_hash) {
1764                 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1765                         if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1766                                 break;
1767                 }
1768         }
1769         /* We failed to find an entry */
1770         if (ncp == 0) {
1771                 NCHSTAT(ncs_miss);
1772                 NAME_CACHE_UNLOCK();
1773                 return (0);
1774         }
1775
1776         /* We don't want to have an entry, so dump it */
1777         if ((cnp->cn_flags & MAKEENTRY) == 0) {
1778                 if (have_exclusive == TRUE) {
1779                         NCHSTAT(ncs_badhits);
1780                         cache_delete(ncp, 1);
1781                         NAME_CACHE_UNLOCK();
1782                         return (0);
1783                 }
1784                 NAME_CACHE_UNLOCK();
1785                 NAME_CACHE_LOCK();
1786                 have_exclusive = TRUE;
1787                 goto relook;
1788         }
1789         vp = ncp->nc_vp;
1790
1791         /* We found a "positive" match, return the vnode */
1792         if (vp) {
1793                 NCHSTAT(ncs_goodhits);
1794
1795                 vid = vp->v_id;
1796                 NAME_CACHE_UNLOCK();
1797
1798                 if (vnode_getwithvid(vp, vid)) {
1799 #if COLLECT_STATS
1800                         NAME_CACHE_LOCK();
1801                         NCHSTAT(ncs_badvid);
1802                         NAME_CACHE_UNLOCK();
1803 #endif
1804                         return (0);
1805                 }
1806                 *vpp = vp;
1807                 return (-1);
1808         }
1809
1810         /* We found a negative match, and want to create it, so purge */
1811         if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) {
1812                 if (have_exclusive == TRUE) {
1813                         NCHSTAT(ncs_badhits);
1814                         cache_delete(ncp, 1);
1815                         NAME_CACHE_UNLOCK();
1816                         return (0);
1817                 }
1818                 NAME_CACHE_UNLOCK();
1819                 NAME_CACHE_LOCK();
1820                 have_exclusive = TRUE;
1821                 goto relook;
1822         }
1823
1824         /*
1825          * We found a "negative" match, ENOENT notifies client of this match.
1826          */
1827         NCHSTAT(ncs_neghits);
1828
1829         NAME_CACHE_UNLOCK();
1830         return (ENOENT);
1831 }
1832
1833 const char *
1834 cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
1835 {
1836         const char *strname;
1837
1838         if (cnp->cn_hash == 0)
1839                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1840
1841         /*
1842          * grab 2 references on the string entered
1843          * one for the cache_enter_locked to consume
1844          * and the second to be consumed by v_name (vnode_create call point)
1845          */
1846         strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0);
1847
1848         NAME_CACHE_LOCK();
1849
1850         cache_enter_locked(dvp, vp, cnp, strname);
1851
1852         NAME_CACHE_UNLOCK();
1853
1854         return (strname);
1855 }
1856
1857
1858 /*
1859  * Add an entry to the cache...
1860  * but first check to see if the directory
1861  * that this entry is to be associated with has
1862  * had any cache_purges applied since we took
1863  * our identity snapshot... this check needs to
1864  * be done behind the name cache lock
1865  */
1866 void
1867 cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen)
1868 {
1869
1870         if (cnp->cn_hash == 0)
1871                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1872
1873         NAME_CACHE_LOCK();
1874
1875         if (dvp->v_nc_generation == gen)
1876                 (void)cache_enter_locked(dvp, vp, cnp, NULL);
1877
1878         NAME_CACHE_UNLOCK();
1879 }
1880
1881
1882 /*
1883  * Add an entry to the cache.
1884  */
1885 void
1886 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1887 {
1888         const char *strname;
1889
1890         if (cnp->cn_hash == 0)
1891                 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1892
1893         /*
1894          * grab 1 reference on the string entered
1895          * for the cache_enter_locked to consume
1896          */
1897         strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1898
1899         NAME_CACHE_LOCK();
1900
1901         cache_enter_locked(dvp, vp, cnp, strname);
1902
1903         NAME_CACHE_UNLOCK();
1904 }
1905
1906
1907 static void
1908 cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname)
1909 {
1910         struct namecache *ncp, *negp;
1911         struct nchashhead *ncpp;
1912
1913         if (nc_disabled)
1914                 return;
1915
1916         /*
1917          * if the entry is for -ve caching vp is null
1918          */
1919         if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
1920                 /*
1921                  * someone beat us to the punch..
1922                  * this vnode is already in the cache
1923                  */
1924                 if (strname != NULL)
1925                         vfs_removename(strname);
1926                 return;
1927         }
1928         /*
1929          * We allocate a new entry if we are less than the maximum
1930          * allowed and the one at the front of the list is in use.
1931          * Otherwise we use the one at the front of the list.
1932          */
1933         if (numcache < desiredNodes &&
1934             ((ncp = nchead.tqh_first) == NULL ||
1935               ncp->nc_hash.le_prev != 0)) {
1936                 /*
1937                  * Allocate one more entry
1938                  */
1939                 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK);
1940                 numcache++;
1941         } else {
1942                 /*
1943                  * reuse an old entry
1944                  */
1945                 ncp = TAILQ_FIRST(&nchead);
1946                 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1947
1948                 if (ncp->nc_hash.le_prev != 0) {
1949                        /*
1950                         * still in use... we need to
1951                         * delete it before re-using it
1952                         */
1953                         NCHSTAT(ncs_stolen);
1954                         cache_delete(ncp, 0);
1955                 }
1956         }
1957         NCHSTAT(ncs_enters);
1958
1959         /*
1960          * Fill in cache info, if vp is NULL this is a "negative" cache entry.
1961          */
1962         ncp->nc_vp = vp;
1963         ncp->nc_dvp = dvp;
1964         ncp->nc_hashval = cnp->cn_hash;
1965
1966         if (strname == NULL)
1967                 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1968         else
1969                 ncp->nc_name = strname;
1970
1971         //
1972         // If the bytes of the name associated with the vnode differ,
1973         // use the name associated with the vnode since the file system
1974         // may have set that explicitly in the case of a lookup on a
1975         // case-insensitive file system where the case of the looked up
1976         // name differs from what is on disk.  For more details, see:
1977         //   <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
1978         //
1979         const char *vn_name = vp ? vp->v_name : NULL;
1980         unsigned int len = vn_name ? strlen(vn_name) : 0;
1981         if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) {
1982                 unsigned int hash = hash_string(vn_name, len);
1983
1984                 vfs_removename(ncp->nc_name);
1985                 ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0);
1986                 ncp->nc_hashval = hash;
1987         }
1988
1989         /*
1990          * make us the newest entry in the cache
1991          * i.e. we'll be the last to be stolen
1992          */
1993         TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
1994
1995         ncpp = NCHHASH(dvp, cnp->cn_hash);
1996 #if DIAGNOSTIC
1997         {
1998                 struct namecache *p;
1999
2000                 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
2001                         if (p == ncp)
2002                                 panic("cache_enter: duplicate");
2003         }
2004 #endif
2005         /*
2006          * make us available to be found via lookup
2007          */
2008         LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
2009
2010         if (vp) {
2011                /*
2012                 * add to the list of name cache entries
2013                 * that point at vp
2014                 */
2015                 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
2016         } else {
2017                 /*
2018                  * this is a negative cache entry (vp == NULL)
2019                  * stick it on the negative cache list.
2020                  */
2021                 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
2022
2023                 ncs_negtotal++;
2024
2025                 if (ncs_negtotal > desiredNegNodes) {
2026                        /*
2027                         * if we've reached our desired limit
2028                         * of negative cache entries, delete
2029                         * the oldest
2030                         */
2031                         negp = TAILQ_FIRST(&neghead);
2032                         cache_delete(negp, 1);
2033                 }
2034         }
2035         /*
2036          * add us to the list of name cache entries that
2037          * are children of dvp
2038          */
2039         if (vp)
2040                 TAILQ_INSERT_TAIL(&dvp->v_ncchildren, ncp, nc_child);
2041         else
2042                 TAILQ_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
2043 }
2044
2045
2046 /*
2047  * Initialize CRC-32 remainder table.
2048  */
2049 static void init_crc32(void)
2050 {
2051         /*
2052          * the CRC-32 generator polynomial is:
2053          *   x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
2054          *        + x^8  + x^7  + x^5  + x^4  + x^2  + x + 1
2055          */
2056         unsigned int crc32_polynomial = 0x04c11db7;
2057         unsigned int i,j;
2058
2059         /*
2060          * pre-calculate the CRC-32 remainder for each possible octet encoding
2061          */
2062         for (i = 0;  i < 256;  i++) {
2063                 unsigned int crc_rem = i << 24;
2064
2065                 for (j = 0;  j < 8;  j++) {
2066                         if (crc_rem & 0x80000000)
2067                                 crc_rem = (crc_rem << 1) ^ crc32_polynomial;
2068                         else
2069                                 crc_rem = (crc_rem << 1);
2070                 }
2071                 crc32tab[i] = crc_rem;
2072         }
2073 }
2074
2075
2076 /*
2077  * Name cache initialization, from vfs_init() when we are booting
2078  */
2079 void
2080 nchinit(void)
2081 {
2082         int     i;
2083
2084         desiredNegNodes = (desiredvnodes / 10);
2085         desiredNodes = desiredvnodes + desiredNegNodes;
2086
2087         TAILQ_INIT(&nchead);
2088         TAILQ_INIT(&neghead);
2089
2090         init_crc32();
2091
2092         nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 *desiredNodes)), M_CACHE, &nchash);
2093         nchashmask = nchash;
2094         nchash++;
2095
2096         init_string_table();
2097
2098         /* Allocate name cache lock group attribute and group */
2099         namecache_lck_grp_attr= lck_grp_attr_alloc_init();
2100
2101         namecache_lck_grp = lck_grp_alloc_init("Name Cache",  namecache_lck_grp_attr);
2102
2103         /* Allocate name cache lock attribute */
2104         namecache_lck_attr = lck_attr_alloc_init();
2105
2106         /* Allocate name cache lock */
2107         namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
2108
2109
2110         /* Allocate string cache lock group attribute and group */
2111         strcache_lck_grp_attr= lck_grp_attr_alloc_init();
2112
2113         strcache_lck_grp = lck_grp_alloc_init("String Cache",  strcache_lck_grp_attr);
2114
2115         /* Allocate string cache lock attribute */
2116         strcache_lck_attr = lck_attr_alloc_init();
2117
2118         /* Allocate string cache lock */
2119         strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
2120
2121         for (i = 0; i < NUM_STRCACHE_LOCKS; i++)
2122                 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
2123 }
2124
2125 void
2126 name_cache_lock_shared(void)
2127 {
2128         lck_rw_lock_shared(namecache_rw_lock);
2129 }
2130
2131 void
2132 name_cache_lock(void)
2133 {
2134         lck_rw_lock_exclusive(namecache_rw_lock);
2135 }
2136
2137 void
2138 name_cache_unlock(void)
2139 {
2140         lck_rw_done(namecache_rw_lock);
2141 }
2142
2143
2144 int
2145 resize_namecache(u_int newsize)
2146 {
2147     struct nchashhead   *new_table;
2148     struct nchashhead   *old_table;
2149     struct nchashhead   *old_head, *head;
2150     struct namecache    *entry, *next;
2151     uint32_t            i, hashval;
2152     int                 dNodes, dNegNodes;
2153     u_long              new_size, old_size;
2154
2155     dNegNodes = (newsize / 10);
2156     dNodes = newsize + dNegNodes;
2157
2158     // we don't support shrinking yet
2159     if (dNodes <= desiredNodes) {
2160         return 0;
2161     }
2162     new_table = hashinit(2 * dNodes, M_CACHE, &nchashmask);
2163     new_size  = nchashmask + 1;
2164
2165     if (new_table == NULL) {
2166         return ENOMEM;
2167     }
2168
2169     NAME_CACHE_LOCK();
2170     // do the switch!
2171     old_table = nchashtbl;
2172     nchashtbl = new_table;
2173     old_size  = nchash;
2174     nchash    = new_size;
2175
2176     // walk the old table and insert all the entries into
2177     // the new table
2178     //
2179     for(i=0; i < old_size; i++) {
2180         old_head = &old_table[i];
2181         for (entry=old_head->lh_first; entry != NULL; entry=next) {
2182             //
2183             // XXXdbg - Beware: this assumes that hash_string() does
2184             //                  the same thing as what happens in
2185             //                  lookup() over in vfs_lookup.c
2186             hashval = hash_string(entry->nc_name, 0);
2187             entry->nc_hashval = hashval;
2188             head = NCHHASH(entry->nc_dvp, hashval);
2189
2190             next = entry->nc_hash.le_next;
2191             LIST_INSERT_HEAD(head, entry, nc_hash);
2192         }
2193     }
2194     desiredNodes = dNodes;
2195     desiredNegNodes = dNegNodes;
2196
2197     NAME_CACHE_UNLOCK();
2198     FREE(old_table, M_CACHE);
2199
2200     return 0;
2201 }
2202
2203 static void
2204 cache_delete(struct namecache *ncp, int age_entry)
2205 {
2206         NCHSTAT(ncs_deletes);
2207
2208         if (ncp->nc_vp) {
2209                 LIST_REMOVE(ncp, nc_un.nc_link);
2210         } else {
2211                 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
2212                 ncs_negtotal--;
2213         }
2214         TAILQ_REMOVE(&(ncp->nc_dvp->v_ncchildren), ncp, nc_child);
2215
2216         LIST_REMOVE(ncp, nc_hash);
2217         /*
2218          * this field is used to indicate
2219          * that the entry is in use and
2220          * must be deleted before it can
2221          * be reused...
2222          */
2223         ncp->nc_hash.le_prev = NULL;
2224
2225         if (age_entry) {
2226                 /*
2227                  * make it the next one available
2228                  * for cache_enter's use
2229                  */
2230                 TAILQ_REMOVE(&nchead, ncp, nc_entry);
2231                 TAILQ_INSERT_HEAD(&nchead, ncp, nc_entry);
2232         }
2233         vfs_removename(ncp->nc_name);
2234         ncp->nc_name = NULL;
2235 }
2236
2237
2238 /*
2239  * purge the entry associated with the
2240  * specified vnode from the name cache
2241  */
2242 void
2243 cache_purge(vnode_t vp)
2244 {
2245         struct namecache *ncp;
2246         kauth_cred_t tcred = NULL;
2247
2248         if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
2249                         (TAILQ_FIRST(&vp->v_ncchildren) == NULL) &&
2250                         (vp->v_cred == NOCRED) &&
2251                         (vp->v_parent == NULLVP))
2252                 return;
2253
2254         NAME_CACHE_LOCK();
2255
2256         if (vp->v_parent)
2257                 vp->v_parent->v_nc_generation++;
2258
2259         while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
2260                 cache_delete(ncp, 1);
2261
2262         while ( (ncp = TAILQ_FIRST(&vp->v_ncchildren)) )
2263                 cache_delete(ncp, 1);
2264
2265         /*
2266          * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
2267          */
2268         tcred = vp->v_cred;
2269         vp->v_cred = NOCRED;
2270         vp->v_authorized_actions = 0;
2271
2272         NAME_CACHE_UNLOCK();
2273
2274         if (IS_VALID_CRED(tcred))
2275                 kauth_cred_unref(&tcred);
2276 }
2277
2278 /*
2279  * Purge all negative cache entries that are children of the
2280  * given vnode.  A case-insensitive file system (or any file
2281  * system that has multiple equivalent names for the same
2282  * directory entry) can use this when creating or renaming
2283  * to remove negative entries that may no longer apply.
2284  */
2285 void
2286 cache_purge_negatives(vnode_t vp)
2287 {
2288         struct namecache *ncp, *next_ncp;
2289
2290         NAME_CACHE_LOCK();
2291
2292         TAILQ_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) {
2293                 if (ncp->nc_vp)
2294                         break;
2295
2296                 cache_delete(ncp, 1);
2297         }
2298
2299         NAME_CACHE_UNLOCK();
2300 }
2301
2302 /*
2303  * Flush all entries referencing a particular filesystem.
2304  *
2305  * Since we need to check it anyway, we will flush all the invalid
2306  * entries at the same time.
2307  */
2308 void
2309 cache_purgevfs(struct mount *mp)
2310 {
2311         struct nchashhead *ncpp;
2312         struct namecache *ncp;
2313
2314         NAME_CACHE_LOCK();
2315         /* Scan hash tables for applicable entries */
2316         for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
2317 restart:
2318                 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) {
2319                         if (ncp->nc_dvp->v_mount == mp) {
2320                                 cache_delete(ncp, 0);
2321                                 goto restart;
2322                         }
2323                 }
2324         }
2325         NAME_CACHE_UNLOCK();
2326 }
2327
2328
2329
2330 //
2331 // String ref routines
2332 //
2333 static LIST_HEAD(stringhead, string_t) *string_ref_table;
2334 static u_long   string_table_mask;
2335 static uint32_t filled_buckets=0;
2336
2337
2338 typedef struct string_t {
2339     LIST_ENTRY(string_t)  hash_chain;
2340     const char *str;
2341     uint32_t              refcount;
2342 } string_t;
2343
2344
2345 static void
2346 resize_string_ref_table(void)
2347 {
2348         struct stringhead *new_table;
2349         struct stringhead *old_table;
2350         struct stringhead *old_head, *head;
2351         string_t          *entry, *next;
2352         uint32_t           i, hashval;
2353         u_long             new_mask, old_mask;
2354
2355         /*
2356          * need to hold the table lock exclusively
2357          * in order to grow the table... need to recheck
2358          * the need to resize again after we've taken
2359          * the lock exclusively in case some other thread
2360          * beat us to the punch
2361          */
2362         lck_rw_lock_exclusive(strtable_rw_lock);
2363
2364         if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
2365                 lck_rw_done(strtable_rw_lock);
2366                 return;
2367         }
2368         new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
2369
2370         if (new_table == NULL) {
2371                 printf("failed to resize the hash table.\n");
2372                 lck_rw_done(strtable_rw_lock);
2373                 return;
2374         }
2375
2376         // do the switch!
2377         old_table         = string_ref_table;
2378         string_ref_table  = new_table;
2379         old_mask          = string_table_mask;
2380         string_table_mask = new_mask;
2381         filled_buckets    = 0;
2382
2383         // walk the old table and insert all the entries into
2384         // the new table
2385         //
2386         for (i = 0; i <= old_mask; i++) {
2387                 old_head = &old_table[i];
2388                 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2389                         hashval = hash_string((const char *)entry->str, 0);
2390                         head = &string_ref_table[hashval & string_table_mask];
2391                         if (head->lh_first == NULL) {
2392                                 filled_buckets++;
2393                         }
2394                         next = entry->hash_chain.le_next;
2395                         LIST_INSERT_HEAD(head, entry, hash_chain);
2396                 }
2397         }
2398         lck_rw_done(strtable_rw_lock);
2399
2400         FREE(old_table, M_CACHE);
2401 }
2402
2403
2404 static void
2405 init_string_table(void)
2406 {
2407         string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask);
2408 }
2409
2410
2411 const char *
2412 vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
2413 {
2414         return (add_name_internal(name, len, hashval, FALSE, flags));
2415 }
2416
2417
2418 static const char *
2419 add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
2420 {
2421         struct stringhead *head;
2422         string_t          *entry;
2423         uint32_t          chain_len = 0;
2424         uint32_t          hash_index;
2425         uint32_t          lock_index;
2426         char              *ptr;
2427
2428         if (len > MAXPATHLEN)
2429                 len = MAXPATHLEN;
2430
2431         /*
2432          * if the length already accounts for the null-byte, then
2433          * subtract one so later on we don't index past the end
2434          * of the string.
2435          */
2436         if (len > 0 && name[len-1] == '\0') {
2437                 len--;
2438         }
2439         if (hashval == 0) {
2440                 hashval = hash_string(name, len);
2441         }
2442
2443         /*
2444          * take this lock 'shared' to keep the hash stable
2445          * if someone else decides to grow the pool they
2446          * will take this lock exclusively
2447          */
2448         lck_rw_lock_shared(strtable_rw_lock);
2449
2450         /*
2451          * If the table gets more than 3/4 full, resize it
2452          */
2453         if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
2454                 lck_rw_done(strtable_rw_lock);
2455
2456                 resize_string_ref_table();
2457
2458                 lck_rw_lock_shared(strtable_rw_lock);
2459         }
2460         hash_index = hashval & string_table_mask;
2461         lock_index = hash_index % NUM_STRCACHE_LOCKS;
2462
2463         head = &string_ref_table[hash_index];
2464
2465         lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2466
2467         for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
2468                 if (memcmp(entry->str, name, len) == 0 && entry->str[len] == 0) {
2469                         entry->refcount++;
2470                         break;
2471                 }
2472         }
2473         if (entry == NULL) {
2474                 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]);
2475                 /*
2476                  * it wasn't already there so add it.
2477                  */
2478                 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
2479
2480                 if (head->lh_first == NULL) {
2481                         OSAddAtomic(1, &filled_buckets);
2482                 }
2483                 ptr = (char *)((char *)entry + sizeof(string_t));
2484                 strncpy(ptr, name, len);
2485                 ptr[len] = '\0';
2486                 entry->str = ptr;
2487                 entry->refcount = 1;
2488                 LIST_INSERT_HEAD(head, entry, hash_chain);
2489         }
2490         if (need_extra_ref == TRUE)
2491                 entry->refcount++;
2492
2493         lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2494         lck_rw_done(strtable_rw_lock);
2495
2496         return (const char *)entry->str;
2497 }
2498
2499
2500 int
2501 vfs_removename(const char *nameref)
2502 {
2503         struct stringhead *head;
2504         string_t          *entry;
2505         uint32_t           hashval;
2506         uint32_t           hash_index;
2507         uint32_t           lock_index;
2508         int                retval = ENOENT;
2509
2510         hashval = hash_string(nameref, 0);
2511
2512         /*
2513          * take this lock 'shared' to keep the hash stable
2514          * if someone else decides to grow the pool they
2515          * will take this lock exclusively
2516          */
2517         lck_rw_lock_shared(strtable_rw_lock);
2518         /*
2519          * must compute the head behind the table lock
2520          * since the size and location of the table
2521          * can change on the fly
2522          */
2523         hash_index = hashval & string_table_mask;
2524         lock_index = hash_index % NUM_STRCACHE_LOCKS;
2525
2526         head = &string_ref_table[hash_index];
2527
2528         lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2529
2530         for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2531                 if (entry->str == nameref) {
2532                         entry->refcount--;
2533
2534                         if (entry->refcount == 0) {
2535                                 LIST_REMOVE(entry, hash_chain);
2536
2537                                 if (head->lh_first == NULL) {
2538                                         OSAddAtomic(-1, &filled_buckets);
2539                                 }
2540                         } else {
2541                                 entry = NULL;
2542                         }
2543                         retval = 0;
2544                         break;
2545                 }
2546         }
2547         lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2548         lck_rw_done(strtable_rw_lock);
2549
2550         if (entry != NULL)
2551                 FREE(entry, M_TEMP);
2552
2553         return retval;
2554 }
2555
2556
2557 #ifdef DUMP_STRING_TABLE
2558 void
2559 dump_string_table(void)
2560 {
2561     struct stringhead *head;
2562     string_t          *entry;
2563     u_long            i;
2564
2565     lck_rw_lock_shared(strtable_rw_lock);
2566
2567     for (i = 0; i <= string_table_mask; i++) {
2568         head = &string_ref_table[i];
2569         for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
2570             printf("%6d - %s\n", entry->refcount, entry->str);
2571         }
2572     }
2573     lck_rw_done(strtable_rw_lock);
2574 }
2575 #endif  /* DUMP_STRING_TABLE */