]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/vfs/vfs_lookup.c
xnu-4570.61.1.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_lookup.c
index 784c835fcd8075e2b1c635c8f943c446d3abf8ae..55b86f9e65efb6d812f20172cd9d96266c57bfeb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -90,7 +90,7 @@
 #include <sys/kauth.h>
 #include <kern/kalloc.h>
 #include <security/audit/audit.h>
-
+#include <sys/dtrace.h>        /* to get the prototype for strstr() in sys/dtrace_glue.h */
 #if CONFIG_MACF
 #include <security/mac_framework.h>
 #endif
@@ -112,17 +112,18 @@ static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, v
 #define MAX_VOLFS_RESTARTS 5
 #endif
 
-boolean_t      lookup_continue_ok(struct nameidata *ndp);
-int            lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
-int            lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx);
-int            lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
-void           lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
-int            lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
-int            lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, 
+static int             lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
+static int             lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx);
+static int             lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
+static void            lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
+static int             lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, 
                        int vbusyflags, int *keep_going, int nc_generation,
                        int wantparent, int atroot, vfs_context_t ctx);
-int            lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);
+static int             lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);
 
+#if NAMEDRSRCFORK
+static int             lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
+#endif
 
 
 
@@ -181,6 +182,7 @@ namei(struct nameidata *ndp)
 #if CONFIG_VOLFS
        int volfs_restarts = 0;
 #endif
+       size_t bytes_copied = 0;
 
        fdp = p->p_fd;
 
@@ -243,10 +245,10 @@ vnode_recycled:
 retry_copy:
        if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
                error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
-                           cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen);
+                           cnp->cn_pnlen, &bytes_copied);
        } else {
                error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
-                           cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen);
+                           cnp->cn_pnlen, &bytes_copied);
        }
        if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
                MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
@@ -257,11 +259,14 @@ retry_copy:
 
                cnp->cn_flags |= HASBUF;
                cnp->cn_pnlen = MAXPATHLEN;
+               bytes_copied = 0;
                
                goto retry_copy;
        }
        if (error)
                goto error_out;
+       ndp->ni_pathlen = bytes_copied;
+       bytes_copied = 0;
 
        /*
         * Since the name cache may contain positive entries of
@@ -365,11 +370,27 @@ retry_copy:
        ndp->ni_vp  = NULLVP;
 
        for (;;) {
+#if CONFIG_MACF
+               /*
+                * Give MACF policies a chance to reject the lookup
+                * before performing any filesystem operations.
+                * This hook is called before resolving the path and
+                * again each time a symlink is encountered.
+                * NB: policies receive path information as supplied
+                *     by the caller and thus cannot be trusted.
+                */
+               error = mac_vnode_check_lookup_preflight(ctx, dp, cnp->cn_nameptr, cnp->cn_namelen);
+               if (error) {
+                       goto error_out;
+               }
+#endif
+
                ndp->ni_startdir = dp;
 
                if ( (error = lookup(ndp)) ) {
                        goto error_out;
                }
+
                /*
                 * Check for symbolic link
                 */
@@ -456,7 +477,8 @@ namei_compound_available(vnode_t dp, struct nameidata *ndp)
 
        return 0;
 }
-int
+
+static int
 lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx)
 {
 #if !CONFIG_MACF
@@ -479,7 +501,7 @@ lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized
        return 0;
 }
 
-void 
+static void 
 lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation) 
 {
        int isdot_or_dotdot;
@@ -524,11 +546,12 @@ lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp,
  * data fork if requested.  On failure, returns with iocount data fork (always) and its parent directory 
  * (if one was provided).
  */
-int
+static int
 lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx)
 {
        vnode_t svp = NULLVP;
        enum nsoperation nsop;
+       int nsflags;
        int error;
 
        if (dp->v_type != VREG) {
@@ -565,8 +588,13 @@ lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname
                        error = EPERM;
                        goto out;
        }
+
+       nsflags = 0;
+       if (cnp->cn_flags & CN_RAW_ENCRYPTED)
+               nsflags |= NS_GETRAWENCRYPTED;
+
        /* Ask the file system for the resource fork. */
-       error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, 0, ctx);
+       error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, nsflags, ctx);
 
        /* During a create, it OK for stream vnode to be missing. */
        if (error == ENOATTR || error == ENOENT) {
@@ -612,7 +640,7 @@ out:
  *     --In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount
  *     was dropped).
  */
-int            
+static int             
 lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, 
                int vbusyflags, int *keep_going, int nc_generation,
                int wantparent, int atroot, vfs_context_t ctx)
@@ -632,15 +660,6 @@ lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int
                goto nextname;
        }
 
-#if CONFIG_TRIGGERS
-       if (dp->v_resolve) {
-               error = vnode_trigger_resolve(dp, ndp, ctx);
-               if (error) {
-                       goto out;
-               }
-       }
-#endif /* CONFIG_TRIGGERS */
-
        /*
         * Take into account any additional components consumed by
         * the underlying filesystem.
@@ -797,7 +816,7 @@ out:
 /*
  * Comes in iocount on ni_vp.  May overwrite ni_dvp, but doesn't interpret incoming value.
  */
-int 
+static int 
 lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent)
 {
        vnode_t dp;
@@ -924,9 +943,8 @@ lookup(struct nameidata *ndp)
        /*
         * Setup: break out flag bits into variables.
         */
-       if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) {
-               if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE))
-                       docache = 0;
+       if (cnp->cn_flags & NOCACHE) {
+               docache = 0;
        }
        wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
        rdonly = cnp->cn_flags & RDONLY;
@@ -998,6 +1016,40 @@ dirloop:
         *    .. in the other file system.
         */
        if ( (cnp->cn_flags & ISDOTDOT) ) {
+               /*
+                * if this is a chroot'ed process, check if the current
+                * directory is still a subdirectory of the process's
+                * root directory.
+                */
+               if (ndp->ni_rootdir && (ndp->ni_rootdir != rootvnode) &&
+                   dp !=  ndp->ni_rootdir) {
+                       int sdir_error;
+                       int is_subdir = FALSE;
+
+                       sdir_error = vnode_issubdir(dp, ndp->ni_rootdir,
+                           &is_subdir, vfs_context_kernel());
+
+                       /*
+                        * If we couldn't determine if dp is a subdirectory of
+                        * ndp->ni_rootdir (sdir_error != 0), we let the request
+                        * proceed.
+                        */
+                       if (!sdir_error && !is_subdir) {
+                               vnode_put(dp);
+                               dp = ndp->ni_rootdir;
+                               /*
+                                * There's a ref on the process's root directory
+                                * but we can't use vnode_getwithref here as
+                                * there is nothing preventing that ref being
+                                * released by another thread.
+                                */
+                               if (vnode_get(dp)) {
+                                       error = ENOENT;
+                                       goto bad;
+                               }
+                       }
+               }
+
                for (;;) {
                        if (dp == ndp->ni_rootdir || dp == rootvnode) {
                                ndp->ni_dvp = dp;
@@ -1305,104 +1357,101 @@ lookup_validate_creation_path(struct nameidata *ndp)
 /*
  * Modifies only ni_vp.  Always returns with ni_vp still valid (iocount held).
  */
-int
+static int
 lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 
                int vbusyflags, vfs_context_t ctx)
 {
        mount_t mp;
        vnode_t tdp;
        int error = 0;
-       uthread_t uth;
        uint32_t depth = 0;
-       int dont_cache_mp = 0;
        vnode_t mounted_on_dp;
        int current_mount_generation = 0;
+#if CONFIG_TRIGGERS
+       vnode_t triggered_dp = NULLVP;
+       int retry_cnt = 0;
+#define MAX_TRIGGER_RETRIES 1
+#endif
        
-       mounted_on_dp = dp;
-       current_mount_generation = mount_generation;
-
-       while ((dp->v_type == VDIR) && dp->v_mountedhere &&
-                       ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
+       if (dp->v_type != VDIR || cnp->cn_flags & NOCROSSMOUNT)
+               return 0;
 
-               if (dp->v_mountedhere->mnt_lflag & MNT_LFORCE) {
-                       break;  // don't traverse into a forced unmount
-               }
+       mounted_on_dp = dp;
 #if CONFIG_TRIGGERS
-               /*
-                * For a trigger vnode, call its resolver when crossing its mount (if requested)
-                */
-               if (dp->v_resolve) {
-                       (void) vnode_trigger_resolve(dp, ndp, ctx);
-               }
+restart:
 #endif
-               vnode_lock(dp);
-
-               if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) {
+       current_mount_generation = mount_generation;
 
+       while (dp->v_mountedhere) {
+               vnode_lock_spin(dp);
+               if ((mp = dp->v_mountedhere)) {
                        mp->mnt_crossref++;
                        vnode_unlock(dp);
+               } else {
+                       vnode_unlock(dp);
+                       break;
+               }
 
+               if (ISSET(mp->mnt_lflag, MNT_LFORCE)) {
+                       mount_dropcrossref(mp, dp, 0);
+                       break;  // don't traverse into a forced unmount
+               }
 
-                       if (vfs_busy(mp, vbusyflags)) {
-                               mount_dropcrossref(mp, dp, 0);
-                               if (vbusyflags == LK_NOWAIT) {
-                                       error = ENOENT;
-                                       goto out;
-                               }
-
-                               continue;
-                       }
-
-
-                       /*
-                        * XXX - if this is the last component of the
-                        * pathname, and it's either not a lookup operation
-                        * or the NOTRIGGER flag is set for the operation,
-                        * set a uthread flag to let VFS_ROOT() for autofs
-                        * know it shouldn't trigger a mount.
-                        */
-                       uth = (struct uthread *)get_bsdthread_info(current_thread());
-                       if ((cnp->cn_flags & ISLASTCN) &&
-                                       (cnp->cn_nameiop != LOOKUP ||
-                                        (cnp->cn_flags & NOTRIGGER))) {
-                               uth->uu_notrigger = 1;
-                               dont_cache_mp = 1;
-                       }
-
-                       error = VFS_ROOT(mp, &tdp, ctx);
-                       /* XXX - clear the uthread flag */
-                       uth->uu_notrigger = 0;
 
+               if (vfs_busy(mp, vbusyflags)) {
                        mount_dropcrossref(mp, dp, 0);
-                       vfs_unbusy(mp);
-
-                       if (error) {
+                       if (vbusyflags == LK_NOWAIT) {
+                               error = ENOENT;
                                goto out;
                        }
 
-                       vnode_put(dp);
-                       ndp->ni_vp = dp = tdp;
-                       depth++;
+                       continue;
+               }
 
-#if CONFIG_TRIGGERS
-                       /*
-                        * Check if root dir is a trigger vnode
-                        */
-                       if (dp->v_resolve) {
-                               error = vnode_trigger_resolve(dp, ndp, ctx);
-                               if (error) {
-                                       goto out;
-                               }
-                       }
-#endif                 
+               error = VFS_ROOT(mp, &tdp, ctx);
 
-               } else { 
-                       vnode_unlock(dp);
+               mount_dropcrossref(mp, dp, 0);
+               vfs_unbusy(mp);
+
+               if (error) {
+                       goto out;
+               }
+
+               vnode_put(dp);
+               ndp->ni_vp = dp = tdp;
+               if (dp->v_type != VDIR) {
+#if DEVELOPMENT || DEBUG
+                       panic("%s : Root of filesystem not a directory\n",
+                           __FUNCTION__);
+#else
                        break;
+#endif
                }
+               depth++;
        }
 
-       if (depth && !dont_cache_mp) {
+#if CONFIG_TRIGGERS
+       /*
+        * The triggered_dp check here is required but is susceptible to a
+        * (unlikely) race in which trigger mount is done from here and is
+        * unmounted before we get past vfs_busy above. We retry to deal with
+        * that case but it has the side effect of unwanted retries for
+        * "special" processes which don't want to trigger mounts.
+        */
+       if (dp->v_resolve && retry_cnt < MAX_TRIGGER_RETRIES) {
+               error = vnode_trigger_resolve(dp, ndp, ctx);
+               if (error)
+                       goto out;
+               if (dp == triggered_dp)
+                       retry_cnt += 1;
+               else
+                       retry_cnt = 0;
+               triggered_dp = dp;
+               goto restart;
+       }
+#endif /* CONFIG_TRIGGERS */
+
+       if (depth) {
                mp = mounted_on_dp->v_mountedhere;
 
                if (mp) {
@@ -1424,13 +1473,20 @@ out:
  * Takes ni_vp and ni_dvp non-NULL.  Returns with *new_dp set to the location
  * at which to start a lookup with a resolved path, and all other iocounts dropped.
  */
-int 
+static int 
 lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
 {
        int error;
        char *cp;               /* pointer into pathname argument */
        uio_t auio;
-       char uio_buf[ UIO_SIZEOF(1) ];
+       union {
+               union {
+                       struct user_iovec s_uiovec;
+                       struct kern_iovec s_kiovec;
+               } u_iovec;
+               struct uio s_uio;
+               char uio_buf[ UIO_SIZEOF(1) ];
+       } u_uio_buf; /* union only for aligning uio_buf correctly */
        int need_newpathbuf;
        u_int linklen;
        struct componentname *cnp = &ndp->ni_cnd;
@@ -1457,7 +1513,8 @@ lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
        } else {
                cp = cnp->cn_pnbuf;
        }
-       auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+       auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
+           &u_uio_buf.uio_buf[0], sizeof(u_uio_buf.uio_buf));
 
        uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN);
 
@@ -1699,14 +1756,14 @@ kdebug_lookup_gen_events(long *dbg_parms, int dbg_namelen, void *dp, boolean_t l
         * entries, we must mark the start of the path's string and the end.
         */
        if (lookup == TRUE)
-               code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START;
+               code = VFS_LOOKUP | DBG_FUNC_START;
        else
-               code = (FSDBG_CODE(DBG_FSRW,39)) | DBG_FUNC_START;
+               code = VFS_LOOKUP_DONE | DBG_FUNC_START;
 
        if (dbg_namelen <= (int)(3 * sizeof(long)))
                code |= DBG_FUNC_END;
 
-       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, VM_KERNEL_ADDRPERM(dp), dbg_parms[0], dbg_parms[1], dbg_parms[2], 0);
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, kdebug_vnode(dp), dbg_parms[0], dbg_parms[1], dbg_parms[2], 0);
 
        code &= ~DBG_FUNC_START;