]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/vfs/vfs_cache.c
xnu-3248.30.4.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cache.c
index ba73d95a49ecd2d0a4df1bff4f38f57b43bcc44b..36b1d24e6c88b01ab3ff35949855980cb2d05c31 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -75,6 +75,7 @@
 #include <sys/time.h>
 #include <sys/mount_internal.h>
 #include <sys/vnode_internal.h>
+#include <miscfs/specfs/specdev.h>
 #include <sys/namei.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
@@ -114,6 +115,7 @@ long        numcache;                       /* number of cache entries allocated */
 int    desiredNodes;
 int    desiredNegNodes;
 int    ncs_negtotal;
+int    nc_disabled = 0;
 TAILQ_HEAD(, namecache) nchead;                /* chain of all name cache entries */
 TAILQ_HEAD(, namecache) neghead;       /* chain of only negative cache entries */
 
@@ -158,7 +160,7 @@ lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
 
 static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
 static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
-static void init_string_table(void) __attribute__((section("__TEXT, initcode")));
+static void init_string_table(void);
 static void cache_delete(struct namecache *, int);
 static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
 
@@ -169,7 +171,7 @@ static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cn
 void dump_string_table(void);
 #endif /* DUMP_STRING_TABLE */
 
-static void init_crc32(void) __attribute__((section("__TEXT, initcode")));
+static void init_crc32(void);
 static unsigned int crc32tab[256];
 
 
@@ -191,6 +193,15 @@ static unsigned int crc32tab[256];
  * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
  * in the name cache and does not enter the file system.
  *
+ * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when 
+ * we encounter ENOENT during path reconstruction.  ENOENT means that 
+ * one of the parents moved while we were building the path.  The 
+ * caller can special handle this case by calling build_path again.
+ *
+ * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path 
+ * that is relative to the nearest mount point, i.e. do not 
+ * cross over mount points during building the path. 
+ *
  * passed in vp must have a valid io_count reference
  */
 int
@@ -207,6 +218,9 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs
 
        if (first_vp == NULLVP)
                return (EINVAL);
+               
+       if (buflen <= 1)
+               return (ENOSPC);
 
        /*
         * Grab the process fd so we can evaluate fd_rdir.
@@ -257,7 +271,17 @@ again:
 
                        goto out_unlock;
                } else {
-                       vp = vp->v_mount->mnt_vnodecovered;
+                       /* 
+                        * This the root of the volume and the caller does not 
+                        * want to cross mount points.  Therefore just return 
+                        * '/' as the relative path. 
+                        */
+                       if (flags & BUILDPATH_VOLUME_RELATIVE) {
+                               *--end = '/';
+                               goto out_unlock;
+                       } else {
+                               vp = vp->v_mount->mnt_vnodecovered;
+                       }
                }
        }
 
@@ -308,7 +332,25 @@ again:
                 * Walk up the parent chain.
                 */
                if (((vp->v_parent != NULLVP) && !fixhardlink) ||
-                   (flags & BUILDPATH_NO_FS_ENTER)) {
+                               (flags & BUILDPATH_NO_FS_ENTER)) {
+
+                       /*
+                        * In this if () block we are not allowed to enter the filesystem
+                        * to conclusively get the most accurate parent identifier.
+                        * As a result, if 'vp' does not identify '/' and it
+                        * does not have a valid v_parent, then error out
+                        * and disallow further path construction
+                        */
+                       if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
+                               /*
+                                * Only '/' is allowed to have a NULL parent
+                                * pointer. Upper level callers should ideally
+                                * re-drive name lookup on receiving a ENOENT.
+                                */
+                               ret = ENOENT;
+
+                               /* The code below will exit early if 'tvp = vp' == NULL */
+                       }
                        vp = vp->v_parent;
 
                        /*
@@ -317,8 +359,9 @@ again:
                         * so skip up to avoid getting a duplicate copy of the
                         * file name in the path.
                         */
-                       if (vp && !vnode_isdir(vp) && vp->v_parent)
+                       if (vp && !vnode_isdir(vp) && vp->v_parent) {
                                vp = vp->v_parent;
+                       }
                } else {
                        /*
                         * No parent, go get it if supported.
@@ -422,6 +465,7 @@ bad_news:
                        if (vp && !vnode_isdir(vp) && vp->v_parent)
                                vp = vp->v_parent;
                }
+
                /*
                 * When a mount point is crossed switch the vp.
                 * Continue until we find the root or we find
@@ -436,7 +480,13 @@ bad_news:
 
                        if (!(tvp->v_flag & VROOT) || !tvp->v_mount)
                                break;                  /* not the root of a mounted FS */
-                       tvp = tvp->v_mount->mnt_vnodecovered;
+
+                       if (flags & BUILDPATH_VOLUME_RELATIVE) {
+                               /* Do not cross over mount points */
+                               tvp = NULL;
+                       } else {
+                               tvp = tvp->v_mount->mnt_vnodecovered;
+                       }
                }
                if (tvp == NULLVP)
                        goto out_unlock;
@@ -477,6 +527,14 @@ out:
         */
        *outlen = &buff[buflen] - end;
  
+       /* One of the parents was moved during path reconstruction. 
+        * The caller is interested in knowing whether any of the 
+        * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
+        */
+       if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
+               ret = EAGAIN;
+       }
+
        return (ret);
 }
 
@@ -532,6 +590,51 @@ vnode_putname(const char *name)
        vfs_removename(name);
 }
 
+static const char unknown_vnodename[] = "(unknown vnode name)";
+
+const char *
+vnode_getname_printable(vnode_t vp)
+{
+       const char *name = vnode_getname(vp);
+       if (name != NULL)
+               return name;
+       
+       switch (vp->v_type) {
+               case VCHR:
+               case VBLK:
+                       {
+                       /*
+                        * Create an artificial dev name from
+                        * major and minor device number
+                        */
+                       char dev_name[64];
+                       (void) snprintf(dev_name, sizeof(dev_name),
+                                       "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b',
+                                       major(vp->v_rdev), minor(vp->v_rdev));
+                       /*
+                        * Add the newly created dev name to the name
+                        * cache to allow easier cleanup. Also,
+                        * vfs_addname allocates memory for the new name
+                        * and returns it.
+                        */
+                       NAME_CACHE_LOCK_SHARED();
+                       name = vfs_addname(dev_name, strlen(dev_name), 0, 0);
+                       NAME_CACHE_UNLOCK();
+                       return name;
+                       }
+               default:
+                       return unknown_vnodename;
+       }
+}
+
+void 
+vnode_putname_printable(const char *name)
+{
+       if (name == unknown_vnodename)
+               return;
+       vnode_putname(name);
+}
+               
 
 /*
  * if VNODE_UPDATE_PARENT, and we can take
@@ -808,11 +911,18 @@ void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
 }
 
 
-boolean_t vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
+extern int bootarg_vnode_cache_defeat; /* default = 0, from bsd_init.c */
+
+boolean_t
+vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
 {
        kauth_cred_t    ucred;
        boolean_t       retval = FALSE;
 
+       /* Boot argument to defeat rights caching */
+       if (bootarg_vnode_cache_defeat)
+               return FALSE;
+
        if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
                /*
                 * a TTL is enabled on the rights cache... handle it here
@@ -937,7 +1047,7 @@ boolean_t vnode_cache_is_stale(vnode_t vp)
  */
 int 
 cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 
-               vfs_context_t ctx, int *trailing_slash, int *dp_authorized, vnode_t last_dp)
+               vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
 {
        char            *cp;            /* pointer into pathname argument */
        int             vid;
@@ -951,8 +1061,12 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
        unsigned int    hash;
        int             error = 0;
 
+#if CONFIG_TRIGGERS
+       vnode_t         trigger_vp;
+#endif /* CONFIG_TRIGGERS */
+
        ucred = vfs_context_ucred(ctx);
-       *trailing_slash = 0;
+       ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
 
        NAME_CACHE_LOCK_SHARED();
 
@@ -961,7 +1075,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
                microuptime(&tv);
        }
        for (;;) {
-               /*
+               /*
                 * Search a directory.
                 *
                 * The cn_hash value is for use by cache_lookup
@@ -999,7 +1113,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
                        ndp->ni_pathlen--;
 
                        if (*cp == '\0') {
-                               *trailing_slash = 1;
+                               ndp->ni_flag |= NAMEI_TRAILINGSLASH;
                                *ndp->ni_next = '\0';
                        }
                }
@@ -1073,10 +1187,12 @@ skiprsrcfork:
                *dp_authorized = 1;
 
                if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) {
-                       if (cnp->cn_nameiop != LOOKUP)
-                               break;
-                       if (cnp->cn_flags & (LOCKPARENT | NOCACHE))
-                               break;
+                       if (cnp->cn_nameiop != LOOKUP)
+                               break;
+                       if (cnp->cn_flags & LOCKPARENT) 
+                               break;
+                       if (cnp->cn_flags & NOCACHE)
+                               break;
                        if (cnp->cn_flags & ISDOTDOT) {
                                /*
                                 * Force directory hardlinks to go to
@@ -1098,6 +1214,14 @@ skiprsrcfork:
                        }
                }
 
+               if ((cnp->cn_flags & CN_SKIPNAMECACHE)) {
+                       /*
+                        * Force lookup to go to the filesystem with
+                        * all cnp fields set up.
+                        */
+                       break;
+               }
+
                /*
                 * "." and ".." aren't supposed to be cached, so check
                 * for them before checking the cache.
@@ -1126,13 +1250,26 @@ skiprsrcfork:
                                vp = NULL;
                        break;
                }
-               if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
 
-                       if (mp->mnt_realrootvp == NULLVP || mp->mnt_generation != mount_generation ||
-                               mp->mnt_realrootvp_vid != mp->mnt_realrootvp->v_id)
-                               break;
-                       vp = mp->mnt_realrootvp;
+               if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
+                       vnode_t tmp_vp = mp->mnt_realrootvp;
+                       if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation ||
+                               mp->mnt_realrootvp_vid != tmp_vp->v_id)
+                               break;
+                       vp = tmp_vp;
                }
+
+#if CONFIG_TRIGGERS
+               /*
+                * After traversing all mountpoints stacked here, if we have a
+                * trigger in hand, resolve it.  Note that we don't need to 
+                * leave the fast path if the mount has already happened.
+                */
+               if (vp->v_resolve)
+                       break;
+#endif /* CONFIG_TRIGGERS */
+
+
                dp = vp;
                vp = NULLVP;
 
@@ -1184,7 +1321,7 @@ need_dp:
                                 * immediately w/o waiting... it always succeeds
                                 */
                                vnode_get(dp);
-                       } else if ( (vnode_getwithvid(dp, vid)) ) {
+                       } else if ((error = vnode_getwithvid_drainok(dp, vid))) {
                                /*
                                 * failure indicates the vnode
                                 * changed identity or is being
@@ -1194,15 +1331,24 @@ need_dp:
                                 * don't necessarily return ENOENT, though, because
                                 * we really want to go back to disk and make sure it's
                                 * there or not if someone else is changing this
-                                * vnode.
+                                * vnode. That being said, the one case where we do want
+                                * to return ENOENT is when the vnode's mount point is
+                                * in the process of unmounting and we might cause a deadlock
+                                * in our attempt to take an iocount. An ENODEV error return
+                                * is from vnode_get* is an indication this but we change that
+                                * ENOENT for upper layers.
                                 */
-                               error = ERECYCLE;
+                               if (error == ENODEV) {
+                                       error = ENOENT;
+                               } else {
+                                       error = ERECYCLE;
+                               }
                                goto errorout;
                        }
                }
        }
        if (vp != NULLVP) {
-               if ( (vnode_getwithvid(vp, vvid)) ) {
+               if ( (vnode_getwithvid_drainok(vp, vvid)) ) {
                        vp = NULLVP;
 
                        /*
@@ -1219,9 +1365,24 @@ need_dp:
                        }
                }
        }
+
        ndp->ni_dvp = dp;
        ndp->ni_vp  = vp;
 
+#if CONFIG_TRIGGERS
+       trigger_vp = vp ? vp : dp;
+       if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
+               error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
+               if (error) {
+                       if (vp)
+                               vnode_put(vp);
+                       if (dp) 
+                               vnode_put(dp);
+                       goto errorout;
+               }
+       } 
+#endif /* CONFIG_TRIGGERS */
+
 errorout:
        /* 
         * If we came into cache_lookup_path after an iteration of the lookup loop that
@@ -1247,8 +1408,12 @@ cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
        struct namecache *ncp;
        struct nchashhead *ncpp;
        long namelen = cnp->cn_namelen;
-       unsigned int hashval = (cnp->cn_hash & NCHASHMASK);
+       unsigned int hashval = cnp->cn_hash;
        
+       if (nc_disabled) {
+               return NULL;
+       }
+
        ncpp = NCHHASH(dvp, cnp->cn_hash);
        LIST_FOREACH(ncp, ncpp, nc_hash) {
                if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
@@ -1269,11 +1434,12 @@ cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
 }
 
 
+unsigned int hash_string(const char *cp, int len);
 //
 // Have to take a len argument because we may only need to
 // hash part of a componentname.
 //
-static unsigned int
+unsigned int
 hash_string(const char *cp, int len)
 {
     unsigned hash = 0;
@@ -1326,7 +1492,11 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
 
        if (cnp->cn_hash == 0)
                cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
-       hashval = (cnp->cn_hash & NCHASHMASK);
+       hashval = cnp->cn_hash;
+
+       if (nc_disabled) {
+               return 0;
+       }
 
        NAME_CACHE_LOCK_SHARED();
 
@@ -1395,12 +1565,9 @@ relook:
 
        /*
         * We found a "negative" match, ENOENT notifies client of this match.
-        * The nc_whiteout field records whether this is a whiteout.
         */
        NCHSTAT(ncs_neghits);
 
-       if (ncp->nc_whiteout)
-               cnp->cn_flags |= ISWHITEOUT;
        NAME_CACHE_UNLOCK();
        return (ENOENT);
 }
@@ -1485,6 +1652,9 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn
         struct namecache *ncp, *negp;
        struct nchashhead *ncpp;
 
+       if (nc_disabled) 
+               return;
+
        /*
         * if the entry is for -ve caching vp is null
         */
@@ -1534,12 +1704,30 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn
        ncp->nc_vp = vp;
        ncp->nc_dvp = dvp;
        ncp->nc_hashval = cnp->cn_hash;
-       ncp->nc_whiteout = FALSE;
 
        if (strname == NULL)
                ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
        else
                ncp->nc_name = strname;
+
+       //
+       // If the bytes of the name associated with the vnode differ,
+       // use the name associated with the vnode since the file system
+       // may have set that explicitly in the case of a lookup on a
+       // case-insensitive file system where the case of the looked up
+       // name differs from what is on disk.  For more details, see:
+       //   <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+       // 
+       const char *vn_name = vp ? vp->v_name : NULL;
+       unsigned int len = vn_name ? strlen(vn_name) : 0;
+       if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) {
+               unsigned int hash = hash_string(vn_name, len);
+               
+               vfs_removename(ncp->nc_name);
+               ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0);
+               ncp->nc_hashval = hash;
+       }
+
        /*
         * make us the newest entry in the cache
         * i.e. we'll be the last to be stolen
@@ -1570,13 +1758,10 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn
        } else {
                /*
                 * this is a negative cache entry (vp == NULL)
-                * stick it on the negative cache list
-                * and record the whiteout state
+                * stick it on the negative cache list.
                 */
                TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
          
-               if (cnp->cn_flags & ISWHITEOUT)
-                       ncp->nc_whiteout = TRUE;
                ncs_negtotal++;
 
                if (ncs_negtotal > desiredNegNodes) {
@@ -1799,7 +1984,10 @@ cache_purge(vnode_t vp)
         struct namecache *ncp;
        kauth_cred_t tcred = NULL;
 
-       if ((LIST_FIRST(&vp->v_nclinks) == NULL) && (LIST_FIRST(&vp->v_ncchildren) == NULL) && (vp->v_cred == NOCRED))
+       if ((LIST_FIRST(&vp->v_nclinks) == NULL) && 
+                       (LIST_FIRST(&vp->v_ncchildren) == NULL) && 
+                       (vp->v_cred == NOCRED) &&
+                       (vp->v_parent == NULLVP))
                return;
 
        NAME_CACHE_LOCK();
@@ -1973,9 +2161,6 @@ add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_
         uint32_t         lock_index;
        char              *ptr;
     
-       if (hashval == 0) {
-               hashval = hash_string(name, 0);
-       }
        /*
         * if the length already accounts for the null-byte, then
         * subtract one so later on we don't index past the end
@@ -1984,6 +2169,10 @@ add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_
        if (len > 0 && name[len-1] == '\0') {
                len--;
        }
+       if (hashval == 0) {
+               hashval = hash_string(name, len);
+       }
+
        /*
         * take this lock 'shared' to keep the hash stable
         * if someone else decides to grow the pool they