X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c910b4d9d2451126ae3917b931cd4390c11e1d52..e8c3f78193f1895ea514044358b93b1add9322f3:/bsd/vfs/vfs_lookup.c diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index 0c5299ae6..ccee2e1c5 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -88,9 +88,9 @@ #include /* For _PC_NAME_MAX */ #include #include - -#include - +#include +#include +#include /* to get the prototype for strstr() in sys/dtrace_glue.h */ #if CONFIG_MACF #include #endif @@ -105,12 +105,26 @@ #define VOLFS_MIN_PATH_LEN 9 -static void kdebug_lookup(struct vnode *dp, struct componentname *cnp); - #if CONFIG_VOLFS static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx); +#define MAX_VOLFS_RESTARTS 5 +#endif + +static int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx); +static int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx); +static int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx); +static void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation); +static int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, + int vbusyflags, int *keep_going, int nc_generation, + int wantparent, int atroot, vfs_context_t ctx); +static int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent); + +#if NAMEDRSRCFORK +static int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx); #endif + + /* * Convert a pathname into a pointer to a locked inode. * @@ -150,20 +164,25 @@ int namei(struct nameidata *ndp) { struct filedesc *fdp; /* pointer to file descriptor state */ - char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct vnode *usedvp = ndp->ni_dvp; /* store pointer to vp in case we must loop due to heavy vnode pressure */ u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */ - uio_t auio; int error; struct componentname *cnp = &ndp->ni_cnd; vfs_context_t ctx = cnp->cn_context; proc_t p = vfs_context_proc(ctx); +#if CONFIG_AUDIT /* XXX ut should be from context */ uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread()); - char *tmppn; - char uio_buf[ UIO_SIZEOF(1) ]; +#endif + +#if CONFIG_VOLFS + int volfs_restarts = 0; +#endif + size_t bytes_copied = 0; + + fdp = p->p_fd; #if DIAGNOSTIC if (!vfs_context_ucred(ctx) || !p) @@ -173,7 +192,35 @@ namei(struct nameidata *ndp) if (cnp->cn_flags & OPMASK) panic ("namei: flags contaminated with nameiops"); #endif - fdp = p->p_fd; + + /* + * A compound VNOP found something that needs further processing: + * either a trigger vnode, a covered directory, or a symlink. + */ + if (ndp->ni_flag & NAMEI_CONTLOOKUP) { + int rdonly, vbusyflags, keep_going, wantparent; + + rdonly = cnp->cn_flags & RDONLY; + vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0; + keep_going = 0; + wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); + + ndp->ni_flag &= ~(NAMEI_CONTLOOKUP); + + error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags, + &keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx); + if (error) + goto out_drop; + if (keep_going) { + if ((cnp->cn_flags & ISSYMLINK) == 0) { + panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)\n", ndp->ni_vp->v_type, ndp->ni_vp->v_tag); + } + goto continue_symlink; + } + + return 0; + + } vnode_recycled: @@ -186,7 +233,9 @@ vnode_recycled: cnp->cn_pnlen = PATHBUFLEN; } #if LP64_DEBUG - if (IS_VALID_UIO_SEGFLG(ndp->ni_segflg) == 0) { + if ((UIO_SEG_IS_USER_SPACE(ndp->ni_segflg) == 0) + && (ndp->ni_segflg != UIO_SYSSPACE) + && (ndp->ni_segflg != UIO_SYSSPACE32)) { panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__); } #endif /* LP64_DEBUG */ @@ -194,10 +243,10 @@ vnode_recycled: retry_copy: if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) { error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, - cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen); + cnp->cn_pnlen, &bytes_copied); } else { error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf, - cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen); + cnp->cn_pnlen, &bytes_copied); } if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) { MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); @@ -208,11 +257,24 @@ retry_copy: cnp->cn_flags |= HASBUF; cnp->cn_pnlen = MAXPATHLEN; + bytes_copied = 0; goto retry_copy; } if (error) goto error_out; + ndp->ni_pathlen = bytes_copied; + bytes_copied = 0; + + /* + * Since the name cache may contain positive entries of + * the incorrect case, force lookup() to bypass the cache + * and call directly into the filesystem for each path + * component. Note: the FS may still consult the cache, + * but can apply rules to validate the results. + */ + if (proc_is_forcing_hfs_case_sensitivity(p)) + cnp->cn_flags |= CN_SKIPNAMECACHE; #if CONFIG_VOLFS /* @@ -233,9 +295,15 @@ retry_copy: /* Attempt to resolve a legacy volfs style pathname. */ MALLOC_ZONE(realpath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); if (realpath) { + /* + * We only error out on the ENAMETOOLONG cases where we know that + * vfs_getrealpath translation succeeded but the path could not fit into + * MAXPATHLEN characters. In other failure cases, we may be dealing with a path + * that legitimately looks like /.vol/1234/567 and is not meant to be translated + */ if ((realpath_err= vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, MAXPATHLEN, ctx))) { FREE_ZONE(realpath, MAXPATHLEN, M_NAMEI); - if (realpath_err == ENOSPC){ + if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG){ error = ENAMETOOLONG; goto error_out; } @@ -250,13 +318,15 @@ retry_copy: } } } - #endif /* CONFIG_VOLFS */ +#endif /* CONFIG_VOLFS */ +#if CONFIG_AUDIT /* If we are auditing the kernel pathname, save the user pathname */ if (cnp->cn_flags & AUDITVNPATH1) AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1); if (cnp->cn_flags & AUDITVNPATH2) AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2); +#endif /* CONFIG_AUDIT */ /* * Do not allow empty pathnames @@ -298,141 +368,501 @@ retry_copy: ndp->ni_vp = NULLVP; for (;;) { - int need_newpathbuf; - int linklen; +#if CONFIG_MACF + /* + * Give MACF policies a chance to reject the lookup + * before performing any filesystem operations. + * This hook is called before resolving the path and + * again each time a symlink is encountered. + * NB: policies receive path information as supplied + * by the caller and thus cannot be trusted. + */ + error = mac_vnode_check_lookup_preflight(ctx, dp, cnp->cn_nameptr, cnp->cn_namelen); + if (error) { + goto error_out; + } +#endif ndp->ni_startdir = dp; if ( (error = lookup(ndp)) ) { goto error_out; } + /* * Check for symbolic link */ if ((cnp->cn_flags & ISSYMLINK) == 0) { return (0); } - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } - if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { - error = ELOOP; + +continue_symlink: + /* Gives us a new path to process, and a starting dir */ + error = lookup_handle_symlink(ndp, &dp, ctx); + if (error != 0) { break; } -#if CONFIG_MACF - if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0) - break; -#endif /* MAC */ - if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF)) - need_newpathbuf = 1; - else - need_newpathbuf = 0; + } + /* + * only come here if we fail to handle a SYMLINK... + * if either ni_dvp or ni_vp is non-NULL, then + * we need to drop the iocount that was picked + * up in the lookup routine + */ +out_drop: + if (ndp->ni_dvp) + vnode_put(ndp->ni_dvp); + if (ndp->ni_vp) + vnode_put(ndp->ni_vp); + error_out: + if ( (cnp->cn_flags & HASBUF) ) { + cnp->cn_flags &= ~HASBUF; + FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); + } + cnp->cn_pnbuf = NULL; + ndp->ni_vp = NULLVP; + ndp->ni_dvp = NULLVP; - if (need_newpathbuf) { - MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - if (cp == NULL) { - error = ENOMEM; - break; - } - } else { - cp = cnp->cn_pnbuf; +#if CONFIG_VOLFS + /* + * Deal with volfs fallout. + * + * At this point, if we were originally given a volfs path that + * looks like /.vol/123/456, then we would have had to convert it into + * a full path. Assuming that part worked properly, we will now attempt + * to conduct a lookup of the item in the namespace. Under normal + * circumstances, if a user looked up /tmp/foo and it was not there, it + * would be permissible to return ENOENT. + * + * However, we may not want to do that here. Specifically, the volfs path + * uniquely identifies a certain item in the namespace regardless of where it + * lives. If the item has moved in between the time we constructed the + * path and now, when we're trying to do a lookup/authorization on the full + * path, we may have gotten an ENOENT. + * + * At this point we can no longer tell if the path no longer exists + * or if the item in question no longer exists. It could have been renamed + * away, in which case the /.vol identifier is still valid. + * + * Do this dance a maximum of MAX_VOLFS_RESTARTS times. + */ + if ((error == ENOENT) && (ndp->ni_cnd.cn_flags & CN_VOLFSPATH)) { + if (volfs_restarts < MAX_VOLFS_RESTARTS) { + volfs_restarts++; + goto vnode_recycled; } - auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + } +#endif - uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); + if (error == ERECYCLE){ + /* vnode was recycled underneath us. re-drive lookup to start at + the beginning again, since recycling invalidated last lookup*/ + ndp->ni_cnd.cn_flags = cnpflags; + ndp->ni_dvp = usedvp; + goto vnode_recycled; + } - error = VNOP_READLINK(ndp->ni_vp, auio, ctx); - if (error) { - if (need_newpathbuf) - FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); - break; + + return (error); +} + +int +namei_compound_available(vnode_t dp, struct nameidata *ndp) +{ + if ((ndp->ni_flag & NAMEI_COMPOUNDOPEN) != 0) { + return vnode_compound_open_available(dp); + } + + return 0; +} + +static int +lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx) +{ +#if !CONFIG_MACF +#pragma unused(cnp) +#endif + + int error; + + if (!dp_authorized_in_cache) { + error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx); + if (error) + return error; + } +#if CONFIG_MACF + error = mac_vnode_check_lookup(ctx, dp, cnp); + if (error) + return error; +#endif /* CONFIG_MACF */ + + return 0; +} + +static void +lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation) +{ + int isdot_or_dotdot; + isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); + + if (vp->v_name == NULL || vp->v_parent == NULLVP) { + int update_flags = 0; + + if (isdot_or_dotdot == 0) { + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (dvp != NULLVP && vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags); } - // LP64todo - fix this - linklen = MAXPATHLEN - uio_resid(auio); - if (linklen + ndp->ni_pathlen > MAXPATHLEN) { - if (need_newpathbuf) - FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); + } + if ( (cnp->cn_flags & MAKEENTRY) && (vp->v_flag & VNCACHEABLE) && LIST_FIRST(&vp->v_nclinks) == NULL) { + /* + * missing from name cache, but should + * be in it... this can happen if volfs + * causes the vnode to be created or the + * name cache entry got recycled but the + * vnode didn't... + * check to make sure that ni_dvp is valid + * cache_lookup_path may return a NULL + * do a quick check to see if the generation of the + * directory matches our snapshot... this will get + * rechecked behind the name cache lock, but if it + * already fails to match, no need to go any further + */ + if (dvp != NULLVP && (nc_generation == dvp->v_nc_generation) && (!isdot_or_dotdot)) + cache_enter_with_gen(dvp, vp, cnp, nc_generation); + } - error = ENAMETOOLONG; +} + +#if NAMEDRSRCFORK +/* + * Can change ni_dvp and ni_vp. On success, returns with iocounts on stream vnode (always) and + * data fork if requested. On failure, returns with iocount data fork (always) and its parent directory + * (if one was provided). + */ +static int +lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx) +{ + vnode_t svp = NULLVP; + enum nsoperation nsop; + int nsflags; + int error; + + if (dp->v_type != VREG) { + error = ENOENT; + goto out; + } + switch (cnp->cn_nameiop) { + case DELETE: + if (cnp->cn_flags & CN_ALLOWRSRCFORK) { + nsop = NS_DELETE; + } else { + error = EPERM; + goto out; + } + break; + case CREATE: + if (cnp->cn_flags & CN_ALLOWRSRCFORK) { + nsop = NS_CREATE; + } else { + error = EPERM; + goto out; + } break; + case LOOKUP: + /* Make sure our lookup of "/..namedfork/rsrc" is allowed. */ + if (cnp->cn_flags & CN_ALLOWRSRCFORK) { + nsop = NS_OPEN; + } else { + error = EPERM; + goto out; + } + break; + default: + error = EPERM; + goto out; + } + + nsflags = 0; + if (cnp->cn_flags & CN_RAW_ENCRYPTED) + nsflags |= NS_GETRAWENCRYPTED; + + /* Ask the file system for the resource fork. */ + error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, nsflags, ctx); + + /* During a create, it OK for stream vnode to be missing. */ + if (error == ENOATTR || error == ENOENT) { + error = (nsop == NS_CREATE) ? 0 : ENOENT; + } + if (error) { + goto out; + } + /* The "parent" of the stream is the file. */ + if (wantparent) { + if (ndp->ni_dvp) { + vnode_put(ndp->ni_dvp); } - if (need_newpathbuf) { - long len = cnp->cn_pnlen; + ndp->ni_dvp = dp; + } else { + vnode_put(dp); + } + ndp->ni_vp = svp; /* on create this may be null */ + + /* Restore the truncated pathname buffer (for audits). */ + if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') { + ndp->ni_next[0] = '/'; + } + cnp->cn_flags &= ~MAKEENTRY; + + return 0; +out: + return error; +} +#endif /* NAMEDRSRCFORK */ + +/* + * iocounts in: + * --One on ni_vp. One on ni_dvp if there is more path, or we didn't come through the + * cache, or we came through the cache and the caller doesn't want the parent. + * + * iocounts out: + * --Leaves us in the correct state for the next step, whatever that might be. + * --If we find a symlink, returns with iocounts on both ni_vp and ni_dvp. + * --If we are to look up another component, then we have an iocount on ni_vp and + * nothing else. + * --If we are done, returns an iocount on ni_vp, and possibly on ni_dvp depending on nameidata flags. + * --In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount + * was dropped). + */ +static int +lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, + int vbusyflags, int *keep_going, int nc_generation, + int wantparent, int atroot, vfs_context_t ctx) +{ + vnode_t dp; + int error; + char *cp; - tmppn = cnp->cn_pnbuf; - bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); - cnp->cn_pnbuf = cp; - cnp->cn_pnlen = MAXPATHLEN; + dp = ndp->ni_vp; + *keep_going = 0; - if ( (cnp->cn_flags & HASBUF) ) - FREE_ZONE(tmppn, len, M_NAMEI); - else - cnp->cn_flags |= HASBUF; - } else - cnp->cn_pnbuf[linklen] = '\0'; + if (ndp->ni_vp == NULLVP) { + panic("NULL ni_vp in %s\n", __FUNCTION__); + } - ndp->ni_pathlen += linklen; - cnp->cn_nameptr = cnp->cn_pnbuf; + if (atroot) { + goto nextname; + } - /* - * starting point for 'relative' - * symbolic link path - */ - dp = ndp->ni_dvp; - /* - * get rid of references returned via 'lookup' - */ - vnode_put(ndp->ni_vp); - vnode_put(ndp->ni_dvp); + /* + * Take into account any additional components consumed by + * the underlying filesystem. + */ + if (cnp->cn_consume > 0) { + cnp->cn_nameptr += cnp->cn_consume; + ndp->ni_next += cnp->cn_consume; + ndp->ni_pathlen -= cnp->cn_consume; + cnp->cn_consume = 0; + } else { + lookup_consider_update_cache(ndp->ni_dvp, dp, cnp, nc_generation); + } + + /* + * Check to see if the vnode has been mounted on... + * if so find the root of the mounted file system. + * Updates ndp->ni_vp. + */ + error = lookup_traverse_mountpoints(ndp, cnp, dp, vbusyflags, ctx); + dp = ndp->ni_vp; + if (error) { + goto out; + } + +#if CONFIG_MACF + if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) { + error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx); + if (error) + goto out; + } +#endif - ndp->ni_vp = NULLVP; + /* + * Check for symbolic link + */ + if ((dp->v_type == VLNK) && + ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) { + cnp->cn_flags |= ISSYMLINK; + *keep_going = 1; + return (0); + } + + /* + * Check for bogus trailing slashes. + */ + if ((ndp->ni_flag & NAMEI_TRAILINGSLASH)) { + if (dp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH); + } + +#if NAMEDSTREAMS + /* + * Deny namei/lookup requests to resolve paths that point to shadow files. + * Access to shadow files must be conducted by explicit calls to VNOP_LOOKUP + * directly, and not use lookup/namei + */ + if (vnode_isshadow (dp)) { + error = ENOENT; + goto out; + } +#endif + +nextname: + /* + * Not a symbolic link. If more pathname, + * continue at next component, else return. + * + * Definitely have a dvp if there's another slash + */ + if (*ndp->ni_next == '/') { + cnp->cn_nameptr = ndp->ni_next + 1; + ndp->ni_pathlen--; + while (*cnp->cn_nameptr == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } + + cp = cnp->cn_nameptr; + vnode_put(ndp->ni_dvp); ndp->ni_dvp = NULLVP; - /* - * Check if symbolic link restarts us at the root + if (*cp == '\0') { + goto emptyname; + } + + *keep_going = 1; + return 0; + } + + /* + * Disallow directory write attempts on read-only file systems. + */ + if (rdonly && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { + error = EROFS; + goto out; + } + + /* If SAVESTART is set, we should have a dvp */ + if (cnp->cn_flags & SAVESTART) { + /* + * note that we already hold a reference + * on both dp and ni_dvp, but for some reason + * can't get another one... in this case we + * need to do vnode_put on dp in 'bad2' */ - if (*(cnp->cn_nameptr) == '/') { - while (*(cnp->cn_nameptr) == '/') { - cnp->cn_nameptr++; - ndp->ni_pathlen--; - } - if ((dp = ndp->ni_rootdir) == NULLVP) { - error = ENOENT; - goto error_out; - } + if ( (vnode_get(ndp->ni_dvp)) ) { + error = ENOENT; + goto out; } + ndp->ni_startdir = ndp->ni_dvp; } + if (!wantparent && ndp->ni_dvp) { + vnode_put(ndp->ni_dvp); + ndp->ni_dvp = NULLVP; + } + + if (cnp->cn_flags & AUDITVNPATH1) + AUDIT_ARG(vnpath, dp, ARG_VNODE1); + else if (cnp->cn_flags & AUDITVNPATH2) + AUDIT_ARG(vnpath, dp, ARG_VNODE2); + +#if NAMEDRSRCFORK + /* + * Caller wants the resource fork. + */ + if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) { + error = lookup_handle_rsrc_fork(dp, ndp, cnp, wantparent, ctx); + if (error != 0) + goto out; + + dp = ndp->ni_vp; + } +#endif + if (kdebug_enable) + kdebug_lookup(ndp->ni_vp, cnp); + + return 0; + +emptyname: + error = lookup_handle_emptyname(ndp, cnp, wantparent); + if (error != 0) + goto out; + + return 0; +out: + return error; + +} + +/* + * Comes in iocount on ni_vp. May overwrite ni_dvp, but doesn't interpret incoming value. + */ +static int +lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent) +{ + vnode_t dp; + int error = 0; + + dp = ndp->ni_vp; + cnp->cn_namelen = 0; /* - * only come here if we fail to handle a SYMLINK... - * if either ni_dvp or ni_vp is non-NULL, then - * we need to drop the iocount that was picked - * up in the lookup routine + * A degenerate name (e.g. / or "") which is a way of + * talking about a directory, e.g. like "/." or ".". */ - if (ndp->ni_dvp) - vnode_put(ndp->ni_dvp); - if (ndp->ni_vp) - vnode_put(ndp->ni_vp); - error_out: - if ( (cnp->cn_flags & HASBUF) ) { - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); + if (dp->v_type != VDIR) { + error = ENOTDIR; + goto out; } - cnp->cn_pnbuf = NULL; - ndp->ni_vp = NULLVP; - if (error == ERECYCLE){ - /* vnode was recycled underneath us. re-drive lookup to start at - the beginning again, since recycling invalidated last lookup*/ - ndp->ni_cnd.cn_flags = cnpflags; - ndp->ni_dvp = usedvp; - goto vnode_recycled; + if (cnp->cn_nameiop != LOOKUP) { + error = EISDIR; + goto out; + } + if (wantparent) { + /* + * note that we already hold a reference + * on dp, but for some reason can't + * get another one... in this case we + * need to do vnode_put on dp in 'bad' + */ + if ( (vnode_get(dp)) ) { + error = ENOENT; + goto out; + } + ndp->ni_dvp = dp; } + cnp->cn_flags &= ~ISDOTDOT; + cnp->cn_flags |= ISLASTCN; + ndp->ni_next = cnp->cn_nameptr; + ndp->ni_vp = dp; + if (cnp->cn_flags & AUDITVNPATH1) + AUDIT_ARG(vnpath, dp, ARG_VNODE1); + else if (cnp->cn_flags & AUDITVNPATH2) + AUDIT_ARG(vnpath, dp, ARG_VNODE2); + if (cnp->cn_flags & SAVESTART) + panic("lookup: SAVESTART"); - return (error); + return 0; +out: + return error; } - - /* * Search a pathname. * This is a very central and rather complicated routine. @@ -495,29 +925,24 @@ lookup(struct nameidata *ndp) char *cp; /* pointer into pathname argument */ vnode_t tdp; /* saved dp */ vnode_t dp; /* the directory we are searching */ - mount_t mp; /* mount table entry */ int docache = 1; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ - int trailing_slash = 0; int dp_authorized = 0; int error = 0; struct componentname *cnp = &ndp->ni_cnd; vfs_context_t ctx = cnp->cn_context; - int mounted_on_depth = 0; - int dont_cache_mp = 0; - vnode_t mounted_on_dp = NULLVP; - int current_mount_generation = 0; int vbusyflags = 0; int nc_generation = 0; vnode_t last_dp = NULLVP; + int keep_going; + int atroot; /* * Setup: break out flag bits into variables. */ - if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) { - if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE)) - docache = 0; + if (cnp->cn_flags & NOCACHE) { + docache = 0; } wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); rdonly = cnp->cn_flags & RDONLY; @@ -537,12 +962,19 @@ lookup(struct nameidata *ndp) error = ENOENT; goto bad; } - goto emptyname; + ndp->ni_vp = dp; + error = lookup_handle_emptyname(ndp, cnp, wantparent); + if (error) { + goto bad; + } + + return 0; } dirloop: + atroot = 0; ndp->ni_vp = NULLVP; - if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized, last_dp)) ) { + if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp)) ) { dp = NULLVP; goto bad; } @@ -582,6 +1014,40 @@ dirloop: * .. in the other file system. */ if ( (cnp->cn_flags & ISDOTDOT) ) { + /* + * if this is a chroot'ed process, check if the current + * directory is still a subdirectory of the process's + * root directory. + */ + if (ndp->ni_rootdir && (ndp->ni_rootdir != rootvnode) && + dp != ndp->ni_rootdir) { + int sdir_error; + int is_subdir = FALSE; + + sdir_error = vnode_issubdir(dp, ndp->ni_rootdir, + &is_subdir, vfs_context_kernel()); + + /* + * If we couldn't determine if dp is a subdirectory of + * ndp->ni_rootdir (sdir_error != 0), we let the request + * proceed. + */ + if (!sdir_error && !is_subdir) { + vnode_put(dp); + dp = ndp->ni_rootdir; + /* + * There's a ref on the process's root directory + * but we can't use vnode_getwithref here as + * there is nothing preventing that ref being + * released by another thread. + */ + if (vnode_get(dp)) { + error = ENOENT; + goto bad; + } + } + } + for (;;) { if (dp == ndp->ni_rootdir || dp == rootvnode) { ndp->ni_dvp = dp; @@ -598,7 +1064,8 @@ dirloop: error = ENOENT; goto bad; } - goto nextname; + atroot = 1; + goto returned_from_lookup_path; } if ((dp->v_flag & VROOT) == 0 || (cnp->cn_flags & NOCROSSMOUNT)) @@ -633,39 +1100,66 @@ unionlookup: goto lookup_error; } if ( (cnp->cn_flags & DONOTAUTH) != DONOTAUTH ) { - if (!dp_authorized) { - error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx); - if (error) - goto lookup_error; - } -#if CONFIG_MACF - error = mac_vnode_check_lookup(ctx, dp, cnp); - if (error) + error = lookup_authorize_search(dp, cnp, dp_authorized, ctx); + if (error) { goto lookup_error; -#endif /* CONFIG_MACF */ + } + } + + /* + * Now that we've authorized a lookup, can bail out if the filesystem + * will be doing a batched operation. Return an iocount on dvp. + */ +#if NAMEDRSRCFORK + if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) { +#else + if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) { +#endif /* NAMEDRSRCFORK */ + ndp->ni_flag |= NAMEI_UNFINISHED; + ndp->ni_ncgeneration = dp->v_nc_generation; + return 0; } nc_generation = dp->v_nc_generation; - if ( (error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx)) ) { + /* + * Note: + * Filesystems that support hardlinks may want to call vnode_update_identity + * if the lookup operation below will modify the in-core vnode to belong to a new point + * in the namespace. VFS cannot infer whether or not the look up operation makes the vnode + * name change or change parents. Without this, the lookup may make update + * filesystem-specific in-core metadata but fail to update the v_parent or v_name + * fields in the vnode. If VFS were to do this, it would be necessary to call + * vnode_update_identity on every lookup operation -- expensive! + * + * However, even with this in place, multiple lookups may occur in between this lookup + * and the subsequent vnop, so, at best, we could only guarantee that you would get a + * valid path back, and not necessarily the one that you wanted. + * + * Example: + * /tmp/a == /foo/b + * + * If you are now looking up /foo/b and the vnode for this link represents /tmp/a, + * vnode_update_identity will fix the parentage so that you can get /foo/b back + * through the v_parent chain (preventing you from getting /tmp/b back). It would + * not fix whether or not you should or should not get /tmp/a vs. /foo/b. + */ + + error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx); + + if ( error ) { lookup_error: if ((error == ENOENT) && - (dp->v_flag & VROOT) && (dp->v_mount != NULL) && + (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(dp, NULL); - } tdp = dp; - dp = tdp->v_mount->mnt_vnodecovered; - + error = lookup_traverse_union(tdp, &dp, ctx); vnode_put(tdp); - - if ( (vnode_getwithref(dp)) ) { + if (error) { dp = NULLVP; - error = ENOENT; goto bad; } + ndp->ni_dvp = dp; dp_authorized = 0; goto unionlookup; @@ -677,18 +1171,28 @@ lookup_error: if (ndp->ni_vp != NULLVP) panic("leaf should be empty"); - /* - * If creating and at end of pathname, then can consider - * allowing file to be created. +#if NAMEDRSRCFORK + /* + * At this point, error should be EJUSTRETURN. + * + * If CN_WANTSRSRCFORK is set, that implies that the + * underlying filesystem could not find the "parent" of the + * resource fork (the data fork), and we are doing a lookup + * for a CREATE event. + * + * However, this should be converted to an error, as the + * failure to find this parent should disallow further + * progress to try and acquire a resource fork vnode. */ - if (rdonly) { - error = EROFS; - goto bad; - } - if ((cnp->cn_flags & ISLASTCN) && trailing_slash && !(cnp->cn_flags & WILLBEDIR)) { + if (cnp->cn_flags & CN_WANTSRSRCFORK) { error = ENOENT; goto bad; } +#endif + + error = lookup_validate_creation_path(ndp); + if (error) + goto bad; /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the @@ -709,365 +1213,377 @@ lookup_error: return (0); } returned_from_lookup_path: - dp = ndp->ni_vp; + /* We'll always have an iocount on ni_vp when this finishes. */ + error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx); + if (error != 0) { + goto bad2; + } - /* - * Take into account any additional components consumed by - * the underlying filesystem. - */ - if (cnp->cn_consume > 0) { - cnp->cn_nameptr += cnp->cn_consume; - ndp->ni_next += cnp->cn_consume; - ndp->ni_pathlen -= cnp->cn_consume; - cnp->cn_consume = 0; - } else { - if (dp->v_name == NULL || dp->v_parent == NULLVP) { - int isdot_or_dotdot; - int update_flags = 0; - - isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); - - if (isdot_or_dotdot == 0) { - if (dp->v_name == NULL) - update_flags |= VNODE_UPDATE_NAME; - if (ndp->ni_dvp != NULLVP && dp->v_parent == NULLVP) - update_flags |= VNODE_UPDATE_PARENT; - - if (update_flags) - vnode_update_identity(dp, ndp->ni_dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags); - } - } - if ( (cnp->cn_flags & MAKEENTRY) && (dp->v_flag & VNCACHEABLE) && LIST_FIRST(&dp->v_nclinks) == NULL) { - /* - * missing from name cache, but should - * be in it... this can happen if volfs - * causes the vnode to be created or the - * name cache entry got recycled but the - * vnode didn't... - * check to make sure that ni_dvp is valid - * cache_lookup_path may return a NULL - * do a quick check to see if the generation of the - * directory matches our snapshot... this will get - * rechecked behind the name cache lock, but if it - * already fails to match, no need to go any further - */ - if (ndp->ni_dvp != NULLVP && (nc_generation == ndp->ni_dvp->v_nc_generation)) - cache_enter_with_gen(ndp->ni_dvp, dp, cnp, nc_generation); + if (keep_going) { + dp = ndp->ni_vp; + + /* namei() will handle symlinks */ + if ((dp->v_type == VLNK) && + ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) { + return 0; } - } - mounted_on_dp = dp; - mounted_on_depth = 0; - dont_cache_mp = 0; - current_mount_generation = mount_generation; - /* - * Check to see if the vnode has been mounted on... - * if so find the root of the mounted file system. - */ -check_mounted_on: - if ((dp->v_type == VDIR) && dp->v_mountedhere && - ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { - - vnode_lock(dp); + /* + * Otherwise, there's more path to process. + * cache_lookup_path is now responsible for dropping io ref on dp + * when it is called again in the dirloop. This ensures we hold + * a ref on dp until we complete the next round of lookup. + */ + last_dp = dp; - if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) { - struct uthread *uth = (struct uthread *)get_bsdthread_info(current_thread()); + goto dirloop; + } - mp->mnt_crossref++; - vnode_unlock(dp); + return (0); +bad2: + if (ndp->ni_dvp) + vnode_put(ndp->ni_dvp); - - if (vfs_busy(mp, vbusyflags)) { - mount_dropcrossref(mp, dp, 0); - if (vbusyflags == LK_NOWAIT) { - error = ENOENT; - goto bad2; - } - goto check_mounted_on; - } + vnode_put(ndp->ni_vp); + ndp->ni_vp = NULLVP; - /* - * XXX - if this is the last component of the - * pathname, and it's either not a lookup operation - * or the NOTRIGGER flag is set for the operation, - * set a uthread flag to let VFS_ROOT() for autofs - * know it shouldn't trigger a mount. - */ - if ((cnp->cn_flags & ISLASTCN) && - (cnp->cn_nameiop != LOOKUP || - (cnp->cn_flags & NOTRIGGER))) { - uth->uu_notrigger = 1; - dont_cache_mp = 1; - } - error = VFS_ROOT(mp, &tdp, ctx); - /* XXX - clear the uthread flag */ - uth->uu_notrigger = 0; - /* - * mount_dropcrossref does a vnode_put - * on dp if the 3rd arg is non-zero - */ - mount_dropcrossref(mp, dp, 1); - dp = NULL; - vfs_unbusy(mp); + if (kdebug_enable) + kdebug_lookup(dp, cnp); + return (error); - if (error) { - goto bad2; - } - ndp->ni_vp = dp = tdp; - mounted_on_depth++; - - goto check_mounted_on; - } - vnode_unlock(dp); - } +bad: + if (dp) + vnode_put(dp); + ndp->ni_vp = NULLVP; -#if CONFIG_MACF - if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) { - error = vnode_label(vnode_mount(dp), NULL, dp, NULL, - VNODE_LABEL_NEEDREF, ctx); - if (error) - goto bad2; - } -#endif + if (kdebug_enable) + kdebug_lookup(dp, cnp); + return (error); +} - if (mounted_on_depth && !dont_cache_mp) { - mp = mounted_on_dp->v_mountedhere; +/* + * Given a vnode in a union mount, traverse to the equivalent + * vnode in the underlying mount. + */ +int +lookup_traverse_union(vnode_t dvp, vnode_t *new_dvp, vfs_context_t ctx) +{ + char *path = NULL, *pp; + const char *name, *np; + int len; + int error = 0; + struct nameidata nd; + vnode_t vp = dvp; - if (mp) { - mount_lock(mp); - mp->mnt_realrootvp_vid = dp->v_id; - mp->mnt_realrootvp = dp; - mp->mnt_generation = current_mount_generation; - mount_unlock(mp); - } - } + *new_dvp = NULL; - /* - * Check for symbolic link - */ - if ((dp->v_type == VLNK) && - ((cnp->cn_flags & FOLLOW) || trailing_slash || *ndp->ni_next == '/')) { - cnp->cn_flags |= ISSYMLINK; - return (0); + if (vp && vp->v_flag & VROOT) { + *new_dvp = vp->v_mount->mnt_vnodecovered; + if (vnode_getwithref(*new_dvp)) + return ENOENT; + return 0; } - /* - * Check for bogus trailing slashes. - */ - if (trailing_slash) { - if (dp->v_type != VDIR) { - error = ENOTDIR; - goto bad2; - } - trailing_slash = 0; + path = (char *) kalloc(MAXPATHLEN); + if (path == NULL) { + error = ENOMEM; + goto done; } -nextname: /* - * Not a symbolic link. If more pathname, - * continue at next component, else return. + * Walk back up to the mountpoint following the + * v_parent chain and build a slash-separated path. + * Then lookup that path starting with the covered vnode. */ - if (*ndp->ni_next == '/') { - cnp->cn_nameptr = ndp->ni_next + 1; - ndp->ni_pathlen--; - while (*cnp->cn_nameptr == '/') { - cnp->cn_nameptr++; - ndp->ni_pathlen--; + pp = path + (MAXPATHLEN - 1); + *pp = '\0'; + + while (1) { + name = vnode_getname(vp); + if (name == NULL) { + printf("lookup_traverse_union: null parent name: .%s\n", pp); + error = ENOENT; + goto done; } - vnode_put(ndp->ni_dvp); + len = strlen(name); + if ((len + 1) > (pp - path)) { // Enough space for this name ? + error = ENAMETOOLONG; + vnode_putname(name); + goto done; + } + for (np = name + len; len > 0; len--) // Copy name backwards + *--pp = *--np; + vnode_putname(name); + vp = vp->v_parent; + if (vp == NULLVP || vp->v_flag & VROOT) + break; + *--pp = '/'; + } - cp = cnp->cn_nameptr; + /* Evaluate the path in the underlying mount */ + NDINIT(&nd, LOOKUP, OP_LOOKUP, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(pp), ctx); + nd.ni_dvp = dvp->v_mount->mnt_vnodecovered; + error = namei(&nd); + if (error == 0) + *new_dvp = nd.ni_vp; + nameidone(&nd); +done: + if (path) + kfree(path, MAXPATHLEN); + return error; +} - if (*cp == '\0') - goto emptyname; +int +lookup_validate_creation_path(struct nameidata *ndp) +{ + struct componentname *cnp = &ndp->ni_cnd; - /* - * cache_lookup_path is now responsible for dropping io ref on dp - * when it is called again in the dirloop. This ensures we hold - * a ref on dp until we complete the next round of lookup. - */ - last_dp = dp; - goto dirloop; - } - /* - * Disallow directory write attempts on read-only file systems. + * If creating and at end of pathname, then can consider + * allowing file to be created. */ - if (rdonly && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { - error = EROFS; - goto bad2; - } - if (cnp->cn_flags & SAVESTART) { - /* - * note that we already hold a reference - * on both dp and ni_dvp, but for some reason - * can't get another one... in this case we - * need to do vnode_put on dp in 'bad2' - */ - if ( (vnode_get(ndp->ni_dvp)) ) { - error = ENOENT; - goto bad2; - } - ndp->ni_startdir = ndp->ni_dvp; + if (cnp->cn_flags & RDONLY) { + return EROFS; } - if (!wantparent && ndp->ni_dvp) { - vnode_put(ndp->ni_dvp); - ndp->ni_dvp = NULLVP; + if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) { + return ENOENT; } + + return 0; +} + +/* + * Modifies only ni_vp. Always returns with ni_vp still valid (iocount held). + */ +static int +lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, + int vbusyflags, vfs_context_t ctx) +{ + mount_t mp; + vnode_t tdp; + int error = 0; + uint32_t depth = 0; + vnode_t mounted_on_dp; + int current_mount_generation = 0; +#if CONFIG_TRIGGERS + vnode_t triggered_dp = NULLVP; + int retry_cnt = 0; +#define MAX_TRIGGER_RETRIES 1 +#endif + + if (dp->v_type != VDIR || cnp->cn_flags & NOCROSSMOUNT) + return 0; - if (cnp->cn_flags & AUDITVNPATH1) - AUDIT_ARG(vnpath, dp, ARG_VNODE1); - else if (cnp->cn_flags & AUDITVNPATH2) - AUDIT_ARG(vnpath, dp, ARG_VNODE2); + mounted_on_dp = dp; +#if CONFIG_TRIGGERS +restart: +#endif + current_mount_generation = mount_generation; -#if NAMEDRSRCFORK - /* - * Caller wants the resource fork. - */ - if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) { - vnode_t svp = NULLVP; - enum nsoperation nsop; + while (dp->v_mountedhere) { + vnode_lock_spin(dp); + if ((mp = dp->v_mountedhere)) { + mp->mnt_crossref++; + vnode_unlock(dp); + } else { + vnode_unlock(dp); + break; + } - if (dp->v_type != VREG) { - error = ENOENT; - goto bad2; + if (ISSET(mp->mnt_lflag, MNT_LFORCE)) { + mount_dropcrossref(mp, dp, 0); + break; // don't traverse into a forced unmount } - switch (cnp->cn_nameiop) { - case DELETE: - if (cnp->cn_flags & CN_ALLOWRSRCFORK) { - nsop = NS_DELETE; - } - else { - error = EPERM; - goto bad; - } - break; - case CREATE: - if (cnp->cn_flags & CN_ALLOWRSRCFORK) { - nsop = NS_CREATE; - } - else { - error = EPERM; - goto bad; - } - break; - case LOOKUP: - /* Make sure our lookup of "/..namedfork/rsrc" is allowed. */ - if (cnp->cn_flags & CN_ALLOWRSRCFORK) { - nsop = NS_OPEN; - } else { - error = EPERM; - goto bad2; + + + if (vfs_busy(mp, vbusyflags)) { + mount_dropcrossref(mp, dp, 0); + if (vbusyflags == LK_NOWAIT) { + error = ENOENT; + goto out; } - break; - default: - error = EPERM; - goto bad2; + + continue; } - /* Ask the file system for the resource fork. */ - error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, 0, ctx); - /* During a create, it OK for stream vnode to be missing. */ - if (error == ENOATTR || error == ENOENT) { - error = (nsop == NS_CREATE) ? 0 : ENOENT; - } + error = VFS_ROOT(mp, &tdp, ctx); + + mount_dropcrossref(mp, dp, 0); + vfs_unbusy(mp); + if (error) { - goto bad2; - } - /* The "parent" of the stream is the file. */ - if (wantparent) { - if (ndp->ni_dvp) { - if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) { - ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } - vnode_put(ndp->ni_dvp); - } - ndp->ni_dvp = dp; - } else { - vnode_put(dp); + goto out; } - ndp->ni_vp = dp = svp; /* on create this may be null */ - /* Restore the truncated pathname buffer (for audits). */ - if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') { - ndp->ni_next[0] = '/'; + vnode_put(dp); + ndp->ni_vp = dp = tdp; + if (dp->v_type != VDIR) { +#if DEVELOPMENT || DEBUG + panic("%s : Root of filesystem not a directory\n", + __FUNCTION__); +#else + break; +#endif } - cnp->cn_flags &= ~MAKEENTRY; + depth++; } -#endif - if (kdebug_enable) - kdebug_lookup(dp, cnp); - return (0); -emptyname: - cnp->cn_namelen = 0; +#if CONFIG_TRIGGERS /* - * A degenerate name (e.g. / or "") which is a way of - * talking about a directory, e.g. like "/." or ".". + * The triggered_dp check here is required but is susceptible to a + * (unlikely) race in which trigger mount is done from here and is + * unmounted before we get past vfs_busy above. We retry to deal with + * that case but it has the side effect of unwanted retries for + * "special" processes which don't want to trigger mounts. */ - if (dp->v_type != VDIR) { - error = ENOTDIR; - goto bad; + if (dp->v_resolve && retry_cnt < MAX_TRIGGER_RETRIES) { + error = vnode_trigger_resolve(dp, ndp, ctx); + if (error) + goto out; + if (dp == triggered_dp) + retry_cnt += 1; + else + retry_cnt = 0; + triggered_dp = dp; + goto restart; } - if (cnp->cn_nameiop != LOOKUP) { - error = EISDIR; - goto bad; +#endif /* CONFIG_TRIGGERS */ + + if (depth) { + mp = mounted_on_dp->v_mountedhere; + + if (mp) { + mount_lock_spin(mp); + mp->mnt_realrootvp_vid = dp->v_id; + mp->mnt_realrootvp = dp; + mp->mnt_generation = current_mount_generation; + mount_unlock(mp); + } } - if (wantparent) { - /* - * note that we already hold a reference - * on dp, but for some reason can't - * get another one... in this case we - * need to do vnode_put on dp in 'bad' - */ - if ( (vnode_get(dp)) ) { - error = ENOENT; - goto bad; + + return 0; + +out: + return error; +} + +/* + * Takes ni_vp and ni_dvp non-NULL. Returns with *new_dp set to the location + * at which to start a lookup with a resolved path, and all other iocounts dropped. + */ +static int +lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx) +{ + int error; + char *cp; /* pointer into pathname argument */ + uio_t auio; + union { + union { + struct user_iovec s_uiovec; + struct kern_iovec s_kiovec; + } u_iovec; + struct uio s_uio; + char uio_buf[ UIO_SIZEOF(1) ]; + } u_uio_buf; /* union only for aligning uio_buf correctly */ + int need_newpathbuf; + u_int linklen; + struct componentname *cnp = &ndp->ni_cnd; + vnode_t dp; + char *tmppn; + + if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { + return ELOOP; + } +#if CONFIG_MACF + if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0) + return error; +#endif /* MAC */ + if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF)) + need_newpathbuf = 1; + else + need_newpathbuf = 0; + + if (need_newpathbuf) { + MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (cp == NULL) { + return ENOMEM; } - ndp->ni_dvp = dp; + } else { + cp = cnp->cn_pnbuf; } - cnp->cn_flags &= ~ISDOTDOT; - cnp->cn_flags |= ISLASTCN; - ndp->ni_next = cp; - ndp->ni_vp = dp; + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &u_uio_buf.uio_buf[0], sizeof(u_uio_buf.uio_buf)); - if (cnp->cn_flags & AUDITVNPATH1) - AUDIT_ARG(vnpath, dp, ARG_VNODE1); - else if (cnp->cn_flags & AUDITVNPATH2) - AUDIT_ARG(vnpath, dp, ARG_VNODE2); - if (cnp->cn_flags & SAVESTART) - panic("lookup: SAVESTART"); - return (0); + uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); -bad2: - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); + error = VNOP_READLINK(ndp->ni_vp, auio, ctx); + if (error) { + if (need_newpathbuf) + FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); + return error; } - if (ndp->ni_dvp) - vnode_put(ndp->ni_dvp); - if (dp) - vnode_put(dp); - ndp->ni_vp = NULLVP; - if (kdebug_enable) - kdebug_lookup(dp, cnp); - return (error); + /* + * Safe to set unsigned with a [larger] signed type here + * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN + * is only 1024. + */ + linklen = MAXPATHLEN - (u_int)uio_resid(auio); + if (linklen + ndp->ni_pathlen > MAXPATHLEN) { + if (need_newpathbuf) + FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); + + return ENAMETOOLONG; + } + if (need_newpathbuf) { + long len = cnp->cn_pnlen; + + tmppn = cnp->cn_pnbuf; + bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); + cnp->cn_pnbuf = cp; + cnp->cn_pnlen = MAXPATHLEN; + + if ( (cnp->cn_flags & HASBUF) ) + FREE_ZONE(tmppn, len, M_NAMEI); + else + cnp->cn_flags |= HASBUF; + } else + cnp->cn_pnbuf[linklen] = '\0'; + + ndp->ni_pathlen += linklen; + cnp->cn_nameptr = cnp->cn_pnbuf; + + /* + * starting point for 'relative' + * symbolic link path + */ + dp = ndp->ni_dvp; + + /* + * get rid of references returned via 'lookup' + */ + vnode_put(ndp->ni_vp); + vnode_put(ndp->ni_dvp); /* ALWAYS have a dvp for a symlink */ -bad: - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } - if (dp) - vnode_put(dp); ndp->ni_vp = NULLVP; + ndp->ni_dvp = NULLVP; - if (kdebug_enable) - kdebug_lookup(dp, cnp); - return (error); + /* + * Check if symbolic link restarts us at the root + */ + if (*(cnp->cn_nameptr) == '/') { + while (*(cnp->cn_nameptr) == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } + if ((dp = ndp->ni_rootdir) == NULLVP) { + return ENOENT; + } + } + + *new_dp = dp; + + return 0; } /* @@ -1186,10 +1702,6 @@ bad: void nameidone(struct nameidata *ndp) { - if ((ndp->ni_cnd.cn_flags & FSNODELOCKHELD)) { - ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } if (ndp->ni_cnd.cn_flags & HASBUF) { char *tmp = ndp->ni_cnd.cn_pnbuf; @@ -1200,8 +1712,6 @@ nameidone(struct nameidata *ndp) } -#define NUMPARMS 23 - /* * Log (part of) a pathname using the KERNEL_DEBUG_CONSTANT mechanism, as used * by fs_usage. The path up to and including the current component name are @@ -1231,11 +1741,64 @@ nameidone(struct nameidata *ndp) * fails because /foo_bar_baz is not found will only log "/foo_bar_baz", with * no '>' padding. But /foo_bar/spam would log "/foo_bar>>>>". */ -static void -kdebug_lookup(struct vnode *dp, struct componentname *cnp) +#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) + +void +kdebug_vfs_lookup(long *dbg_parms, int dbg_namelen, void *dp, uint32_t flags) { - unsigned int i; int code; + unsigned int i; + bool lookup = flags & KDBG_VFS_LOOKUP_FLAG_LOOKUP; + bool noprocfilt = flags & KDBG_VFS_LOOKUP_FLAG_NOPROCFILT; + + /* + * In the event that we collect multiple, consecutive pathname + * entries, we must mark the start of the path's string and the end. + */ + if (lookup) { + code = VFS_LOOKUP | DBG_FUNC_START; + } else { + code = VFS_LOOKUP_DONE | DBG_FUNC_START; + } + + if (dbg_namelen <= (int)(3 * sizeof(long))) + code |= DBG_FUNC_END; + + if (noprocfilt) { + KDBG_RELEASE_NOPROCFILT(code, kdebug_vnode(dp), dbg_parms[0], + dbg_parms[1], dbg_parms[2]); + } else { + KDBG_RELEASE(code, kdebug_vnode(dp), dbg_parms[0], dbg_parms[1], + dbg_parms[2]); + } + + code &= ~DBG_FUNC_START; + + for (i=3, dbg_namelen -= (3 * sizeof(long)); dbg_namelen > 0; i+=4, dbg_namelen -= (4 * sizeof(long))) { + if (dbg_namelen <= (int)(4 * sizeof(long))) + code |= DBG_FUNC_END; + + if (noprocfilt) { + KDBG_RELEASE_NOPROCFILT(code, dbg_parms[i], dbg_parms[i + 1], + dbg_parms[i + 2], dbg_parms[i + 3]); + } else { + KDBG_RELEASE(code, dbg_parms[i], dbg_parms[i + 1], dbg_parms[i + 2], + dbg_parms[i + 3]); + } + } +} + +void +kdebug_lookup_gen_events(long *dbg_parms, int dbg_namelen, void *dp, + boolean_t lookup) +{ + kdebug_vfs_lookup(dbg_parms, dbg_namelen, dp, + lookup ? KDBG_VFS_LOOKUP_FLAG_LOOKUP : 0); +} + +void +kdebug_lookup(vnode_t dp, struct componentname *cnp) +{ int dbg_namelen; char *dbg_nameptr; long dbg_parms[NUMPARMS]; @@ -1257,28 +1820,45 @@ kdebug_lookup(struct vnode *dp, struct componentname *cnp) *(cnp->cn_nameptr + cnp->cn_namelen) ? '>' : 0, sizeof(dbg_parms) - dbg_namelen); } - - /* - * In the event that we collect multiple, consecutive pathname - * entries, we must mark the start of the path's string and the end. - */ - code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START; + kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)dp, + KDBG_VFS_LOOKUP_FLAG_LOOKUP); +} - if (dbg_namelen <= 12) - code |= DBG_FUNC_END; +#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */ - KERNEL_DEBUG_CONSTANT(code, (unsigned int)dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); +void +kdebug_vfs_lookup(long *dbg_parms __unused, int dbg_namelen __unused, + void *dp __unused, __unused uint32_t flags) +{ +} - code &= ~DBG_FUNC_START; +static void +kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused) +{ +} +#endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */ - for (i=3, dbg_namelen -= 12; dbg_namelen > 0; i+=4, dbg_namelen -= 16) { - if (dbg_namelen <= 16) - code |= DBG_FUNC_END; +int +vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx) +{ + mount_t mp; + int error; + + mp = mount_lookupby_volfsid(fsid->val[0], 1); + if (mp == NULL) { + return EINVAL; + } - KERNEL_DEBUG_CONSTANT(code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); + /* Get the target vnode. */ + if (ino == 2) { + error = VFS_ROOT(mp, vpp, ctx); + } else { + error = VFS_VGET(mp, ino, vpp, ctx); } -} + vfs_unbusy(mp); + return error; +} /* * Obtain the real path from a legacy volfs style path. * @@ -1303,7 +1883,7 @@ vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_ struct mount *mp = NULL; char *str; char ch; - unsigned long id; + uint32_t id; ino64_t ino; int error; int length; @@ -1357,3 +1937,59 @@ out: return (error); } #endif + +void +lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create) +{ + if (error == 0 && vp == NULLVP) { + panic("NULL vp with error == 0.\n"); + } + + /* + * We don't want to do any of this if we didn't use the compound vnop + * to perform the lookup... i.e. if we're allowing and using the legacy pattern, + * where we did a full lookup. + */ + if ((ndp->ni_flag & NAMEI_COMPOUND_OP_MASK) == 0) { + return; + } + + /* + * If we're going to continue the lookup, we'll handle + * all lookup-related updates at that time. + */ + if (error == EKEEPLOOKING) { + return; + } + + /* + * Only audit or update cache for *found* vnodes. For creation + * neither would happen in the non-compound-vnop case. + */ + if ((vp != NULLVP) && !did_create) { + /* + * If MAKEENTRY isn't set, and we've done a successful compound VNOP, + * then we certainly don't want to update cache or identity. + */ + if ((error != 0) || (ndp->ni_cnd.cn_flags & MAKEENTRY)) { + lookup_consider_update_cache(dvp, vp, &ndp->ni_cnd, ndp->ni_ncgeneration); + } + if (ndp->ni_cnd.cn_flags & AUDITVNPATH1) + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + else if (ndp->ni_cnd.cn_flags & AUDITVNPATH2) + AUDIT_ARG(vnpath, vp, ARG_VNODE2); + } + + /* + * If you created (whether you opened or not), cut a lookup tracepoint + * for the parent dir (as would happen without a compound vnop). Note: we may need + * a vnode despite failure in this case! + * + * If you did not create: + * Found child (succeeded or not): cut a tracepoint for the child. + * Did not find child: cut a tracepoint with the parent. + */ + if (kdebug_enable) { + kdebug_lookup(vp ? vp : dvp, &ndp->ni_cnd); + } +}