X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/21362eb3e66fd2c787aee132bce100a44d71a99c..6d2010ae8f7a6078e10b361c6962983bab233e0f:/bsd/vfs/vfs_lookup.c diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index f0366b9c3..10b885d51 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -65,6 +65,12 @@ * * @(#)vfs_lookup.c 8.10 (Berkeley) 5/27/95 */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ #include #include @@ -83,15 +89,39 @@ #include #include -#include +#include + +#if CONFIG_MACF +#include +#endif -#if KTRACE -#include +#if NAMEDRSRCFORK +#include #endif +/* + * The minimum volfs-style pathname is 9. + * Example: "/.vol/1/2" + */ +#define VOLFS_MIN_PATH_LEN 9 static void kdebug_lookup(struct vnode *dp, struct componentname *cnp); +#if CONFIG_VOLFS +static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx); +#endif + +boolean_t lookup_continue_ok(struct nameidata *ndp); +int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx); +int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx); +int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx); +void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation); +int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx); +int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, + int vbusyflags, int *keep_going, int nc_generation, + int wantparent, int atroot, vfs_context_t ctx); +int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent); + /* * Convert a pathname into a pointer to a locked inode. * @@ -111,22 +141,40 @@ static void kdebug_lookup(struct vnode *dp, struct componentname *cnp); * call lookup to search path. * if symbolic link, massage name in buffer and continue * } + * + * Returns: 0 Success + * ENOENT No such file or directory + * ELOOP Too many levels of symbolic links + * ENAMETOOLONG Filename too long + * copyinstr:EFAULT Bad address + * copyinstr:ENAMETOOLONG Filename too long + * lookup:EBADF Bad file descriptor + * lookup:EROFS + * lookup:EACCES + * lookup:EPERM + * lookup:ERECYCLE vnode was recycled from underneath us in lookup. + * This means we should re-drive lookup from this point. + * lookup: ??? + * VNOP_READLINK:??? */ - int -namei(ndp) - register struct nameidata *ndp; +namei(struct nameidata *ndp) { - register struct filedesc *fdp; /* pointer to file descriptor state */ - register char *cp; /* pointer into pathname argument */ - register struct vnode *dp; /* the directory we are searching */ - uio_t auio; + struct filedesc *fdp; /* pointer to file descriptor state */ + struct vnode *dp; /* the directory we are searching */ + struct vnode *usedvp = ndp->ni_dvp; /* store pointer to vp in case we must loop due to + heavy vnode pressure */ + u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */ int error; struct componentname *cnp = &ndp->ni_cnd; vfs_context_t ctx = cnp->cn_context; - struct proc *p = vfs_context_proc(ctx); - char *tmppn; - char uio_buf[ UIO_SIZEOF(1) ]; + proc_t p = vfs_context_proc(ctx); +#if CONFIG_AUDIT +/* XXX ut should be from context */ + uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread()); +#endif + + fdp = p->p_fd; #if DIAGNOSTIC if (!vfs_context_ucred(ctx) || !p) @@ -136,33 +184,68 @@ namei(ndp) if (cnp->cn_flags & OPMASK) panic ("namei: flags contaminated with nameiops"); #endif - fdp = p->p_fd; + + /* + * A compound VNOP found something that needs further processing: + * either a trigger vnode, a covered directory, or a symlink. + */ + if (ndp->ni_flag & NAMEI_CONTLOOKUP) { + int rdonly, vbusyflags, keep_going, wantparent; + + rdonly = cnp->cn_flags & RDONLY; + vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0; + keep_going = 0; + wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); + + ndp->ni_flag &= ~(NAMEI_CONTLOOKUP); + + error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags, + &keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx); + if (error) + goto out_drop; + if (keep_going) { + if ((cnp->cn_flags & ISSYMLINK) == 0) { + panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)\n", ndp->ni_vp->v_type, ndp->ni_vp->v_tag); + } + goto continue_symlink; + } + + return 0; + + } + +vnode_recycled: /* * Get a buffer for the name to be translated, and copy the * name into the buffer. */ if ((cnp->cn_flags & HASBUF) == 0) { - cnp->cn_pnbuf = &ndp->ni_pathbuf; + cnp->cn_pnbuf = ndp->ni_pathbuf; cnp->cn_pnlen = PATHBUFLEN; } #if LP64_DEBUG - if (IS_VALID_UIO_SEGFLG(ndp->ni_segflg) == 0) { + if ((UIO_SEG_IS_USER_SPACE(ndp->ni_segflg) == 0) + && (ndp->ni_segflg != UIO_SYSSPACE) + && (ndp->ni_segflg != UIO_SYSSPACE32)) { panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__); } #endif /* LP64_DEBUG */ retry_copy: - if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) + if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) { error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen); - else + } else { error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf, cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen); - + } if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) { - MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, - MAXPATHLEN, M_NAMEI, M_WAITOK); + MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (cnp->cn_pnbuf == NULL) { + error = ENOMEM; + goto error_out; + } cnp->cn_flags |= HASBUF; cnp->cn_pnlen = MAXPATHLEN; @@ -172,24 +255,66 @@ retry_copy: if (error) goto error_out; +#if CONFIG_VOLFS + /* + * Check for legacy volfs style pathnames. + * + * For compatibility reasons we currently allow these paths, + * but future versions of the OS may not support them. + */ + if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN && + cnp->cn_pnbuf[0] == '/' && + cnp->cn_pnbuf[1] == '.' && + cnp->cn_pnbuf[2] == 'v' && + cnp->cn_pnbuf[3] == 'o' && + cnp->cn_pnbuf[4] == 'l' && + cnp->cn_pnbuf[5] == '/' ) { + char * realpath; + int realpath_err; + /* Attempt to resolve a legacy volfs style pathname. */ + MALLOC_ZONE(realpath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (realpath) { + /* + * We only error out on the ENAMETOOLONG cases where we know that + * vfs_getrealpath translation succeeded but the path could not fit into + * MAXPATHLEN characters. In other failure cases, we may be dealing with a path + * that legitimately looks like /.vol/1234/567 and is not meant to be translated + */ + if ((realpath_err= vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, MAXPATHLEN, ctx))) { + FREE_ZONE(realpath, MAXPATHLEN, M_NAMEI); + if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG){ + error = ENAMETOOLONG; + goto error_out; + } + } else { + if (cnp->cn_flags & HASBUF) { + FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); + } + cnp->cn_pnbuf = realpath; + cnp->cn_pnlen = MAXPATHLEN; + ndp->ni_pathlen = strlen(realpath) + 1; + cnp->cn_flags |= HASBUF | CN_VOLFSPATH; + } + } + } +#endif /* CONFIG_VOLFS */ + +#if CONFIG_AUDIT /* If we are auditing the kernel pathname, save the user pathname */ if (cnp->cn_flags & AUDITVNPATH1) - AUDIT_ARG(upath, p, cnp->cn_pnbuf, ARG_UPATH1); + AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1); if (cnp->cn_flags & AUDITVNPATH2) - AUDIT_ARG(upath, p, cnp->cn_pnbuf, ARG_UPATH2); + AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2); +#endif /* CONFIG_AUDIT */ /* * Do not allow empty pathnames */ if (*cnp->cn_pnbuf == '\0') { error = ENOENT; - goto error_out; + goto error_out; } ndp->ni_loopcnt = 0; -#if KTRACE - if (KTRPOINT(p, KTR_NAMEI)) - ktrnamei(p->p_tracep, cnp->cn_pnbuf); -#endif /* * determine the starting point for the translation. @@ -212,9 +337,9 @@ retry_copy: dp = ndp->ni_dvp; ndp->ni_usedvp = dp; } else - dp = fdp->fd_cdir; + dp = vfs_context_cwd(ctx); - if (dp == NULLVP) { + if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) { error = ENOENT; goto error_out; } @@ -222,9 +347,6 @@ retry_copy: ndp->ni_vp = NULLVP; for (;;) { - int need_newpathbuf; - int linklen; - ndp->ni_startdir = dp; if ( (error = lookup(ndp)) ) { @@ -236,88 +358,13 @@ retry_copy: if ((cnp->cn_flags & ISSYMLINK) == 0) { return (0); } - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } - if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { - error = ELOOP; - break; - } - if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF)) - need_newpathbuf = 1; - else - need_newpathbuf = 0; - - if (need_newpathbuf) { - MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - } else { - cp = cnp->cn_pnbuf; - } - auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); - - uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); - - error = VNOP_READLINK(ndp->ni_vp, auio, ctx); - if (error) { - if (need_newpathbuf) - FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); - break; - } - // LP64todo - fix this - linklen = MAXPATHLEN - uio_resid(auio); - if (linklen + ndp->ni_pathlen > MAXPATHLEN) { - if (need_newpathbuf) - FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); - error = ENAMETOOLONG; +continue_symlink: + /* Gives us a new path to process, and a starting dir */ + error = lookup_handle_symlink(ndp, &dp, ctx); + if (error != 0) { break; } - if (need_newpathbuf) { - long len = cnp->cn_pnlen; - - tmppn = cnp->cn_pnbuf; - bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); - cnp->cn_pnbuf = cp; - cnp->cn_pnlen = MAXPATHLEN; - - if ( (cnp->cn_flags & HASBUF) ) - FREE_ZONE(tmppn, len, M_NAMEI); - else - cnp->cn_flags |= HASBUF; - } else - cnp->cn_pnbuf[linklen] = '\0'; - - ndp->ni_pathlen += linklen; - cnp->cn_nameptr = cnp->cn_pnbuf; - - /* - * starting point for 'relative' - * symbolic link path - */ - dp = ndp->ni_dvp; - /* - * get rid of references returned via 'lookup' - */ - vnode_put(ndp->ni_vp); - vnode_put(ndp->ni_dvp); - - ndp->ni_vp = NULLVP; - ndp->ni_dvp = NULLVP; - - /* - * Check if symbolic link restarts us at the root - */ - if (*(cnp->cn_nameptr) == '/') { - while (*(cnp->cn_nameptr) == '/') { - cnp->cn_nameptr++; - ndp->ni_pathlen--; - } - if ((dp = ndp->ni_rootdir) == NULLVP) { - error = ENOENT; - goto error_out; - } - } } /* * only come here if we fail to handle a SYMLINK... @@ -325,254 +372,226 @@ retry_copy: * we need to drop the iocount that was picked * up in the lookup routine */ +out_drop: if (ndp->ni_dvp) vnode_put(ndp->ni_dvp); if (ndp->ni_vp) vnode_put(ndp->ni_vp); error_out: if ( (cnp->cn_flags & HASBUF) ) { - cnp->cn_flags &= ~HASBUF; + cnp->cn_flags &= ~HASBUF; FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); } cnp->cn_pnbuf = NULL; ndp->ni_vp = NULLVP; + ndp->ni_dvp = NULLVP; + if (error == ERECYCLE){ + /* vnode was recycled underneath us. re-drive lookup to start at + the beginning again, since recycling invalidated last lookup*/ + ndp->ni_cnd.cn_flags = cnpflags; + ndp->ni_dvp = usedvp; + goto vnode_recycled; + } + return (error); } +int +namei_compound_available(vnode_t dp, struct nameidata *ndp) +{ + if ((ndp->ni_flag & NAMEI_COMPOUNDOPEN) != 0) { + return vnode_compound_open_available(dp); + } -/* - * Search a pathname. - * This is a very central and rather complicated routine. - * - * The pathname is pointed to by ni_ptr and is of length ni_pathlen. - * The starting directory is taken from ni_startdir. The pathname is - * descended until done, or a symbolic link is encountered. The variable - * ni_more is clear if the path is completed; it is set to one if a - * symbolic link needing interpretation is encountered. - * - * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on - * whether the name is to be looked up, created, renamed, or deleted. - * When CREATE, RENAME, or DELETE is specified, information usable in - * creating, renaming, or deleting a directory entry may be calculated. - * If flag has LOCKPARENT or'ed into it, the parent directory is returned - * locked. If flag has WANTPARENT or'ed into it, the parent directory is - * returned unlocked. Otherwise the parent directory is not returned. If - * the target of the pathname exists and LOCKLEAF is or'ed into the flag - * the target is returned locked, otherwise it is returned unlocked. - * When creating or renaming and LOCKPARENT is specified, the target may not - * be ".". When deleting and LOCKPARENT is specified, the target may be ".". - * - * Overall outline of lookup: - * - * dirloop: - * identify next component of name at ndp->ni_ptr - * handle degenerate case where name is null string - * if .. and crossing mount points and on mounted filesys, find parent - * call VNOP_LOOKUP routine for next component name - * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set - * component vnode returned in ni_vp (if it exists), locked. - * if result vnode is mounted on and crossing mount points, - * find mounted on vnode - * if more components of name, do next level at dirloop - * return the answer in ni_vp, locked if LOCKLEAF set - * if LOCKPARENT set, return locked parent in ni_dvp - * if WANTPARENT set, return unlocked parent in ni_dvp - */ + return 0; +} int -lookup(ndp) - register struct nameidata *ndp; +lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx) { - register char *cp; /* pointer into pathname argument */ - vnode_t tdp; /* saved dp */ - vnode_t dp; /* the directory we are searching */ - mount_t mp; /* mount table entry */ - int docache = 1; /* == 0 do not cache last component */ - int wantparent; /* 1 => wantparent or lockparent flag */ - int rdonly; /* lookup read-only flag bit */ - int trailing_slash = 0; - int dp_authorized = 0; - int error = 0; - struct componentname *cnp = &ndp->ni_cnd; - vfs_context_t ctx = cnp->cn_context; + int error; - /* - * Setup: break out flag bits into variables. - */ - if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) { - if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE)) - docache = 0; + if (!dp_authorized_in_cache) { + error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx); + if (error) + return error; } - wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); - rdonly = cnp->cn_flags & RDONLY; - cnp->cn_flags &= ~ISSYMLINK; - cnp->cn_consume = 0; +#if CONFIG_MACF + error = mac_vnode_check_lookup(ctx, dp, cnp); + if (error) + return error; +#endif /* CONFIG_MACF */ - dp = ndp->ni_startdir; - ndp->ni_startdir = NULLVP; + return 0; +} - cp = cnp->cn_nameptr; +void +lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation) +{ + int isdot_or_dotdot; + isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); - if (*cp == '\0') { - if ( (vnode_getwithref(dp)) ) { - dp = NULLVP; - error = ENOENT; - goto bad; + if (vp->v_name == NULL || vp->v_parent == NULLVP) { + int update_flags = 0; + + if (isdot_or_dotdot == 0) { + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (dvp != NULLVP && vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags); } - goto emptyname; } -dirloop: - ndp->ni_vp = NULLVP; - - if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized)) ) { - dp = NULLVP; - goto bad; + if ( (cnp->cn_flags & MAKEENTRY) && (vp->v_flag & VNCACHEABLE) && LIST_FIRST(&vp->v_nclinks) == NULL) { + /* + * missing from name cache, but should + * be in it... this can happen if volfs + * causes the vnode to be created or the + * name cache entry got recycled but the + * vnode didn't... + * check to make sure that ni_dvp is valid + * cache_lookup_path may return a NULL + * do a quick check to see if the generation of the + * directory matches our snapshot... this will get + * rechecked behind the name cache lock, but if it + * already fails to match, no need to go any further + */ + if (dvp != NULLVP && (nc_generation == dvp->v_nc_generation) && (!isdot_or_dotdot)) + cache_enter_with_gen(dvp, vp, cnp, nc_generation); } - if ((cnp->cn_flags & ISLASTCN)) { - if (docache) - cnp->cn_flags |= MAKEENTRY; - } else - cnp->cn_flags |= MAKEENTRY; - dp = ndp->ni_dvp; +} - if (ndp->ni_vp != NULLVP) { - /* - * cache_lookup_path returned a non-NULL ni_vp then, - * we're guaranteed that the dp is a VDIR, it's - * been authorized, and vp is not ".." - */ - goto returned_from_lookup_path; - } +#if NAMEDRSRCFORK +/* + * Can change ni_dvp and ni_vp. On success, returns with iocounts on stream vnode (always) and + * data fork if requested. On failure, returns with iocount data fork (always) and its parent directory + * (if one was provided). + */ +int +lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx) +{ + vnode_t svp = NULLVP; + enum nsoperation nsop; + int error; - /* - * Handle "..": two special cases. - * 1. If at root directory (e.g. after chroot) - * or at absolute root directory - * then ignore it so can't get out. - * 2. If this vnode is the root of a mounted - * filesystem, then replace it with the - * vnode which was mounted on so we take the - * .. in the other file system. - */ - if ( (cnp->cn_flags & ISDOTDOT) ) { - for (;;) { - if (dp == ndp->ni_rootdir || dp == rootvnode) { - ndp->ni_dvp = dp; - ndp->ni_vp = dp; - /* - * we're pinned at the root - * we've already got one reference on 'dp' - * courtesy of cache_lookup_path... take - * another one for the ".." - * if we fail to get the new reference, we'll - * drop our original down in 'bad' - */ - if ( (vnode_get(dp)) ) { - error = ENOENT; - goto bad; - } - goto nextname; + if (dp->v_type != VREG) { + error = ENOENT; + goto out; + } + switch (cnp->cn_nameiop) { + case DELETE: + if (cnp->cn_flags & CN_ALLOWRSRCFORK) { + nsop = NS_DELETE; + } else { + error = EPERM; + goto out; } - if ((dp->v_flag & VROOT) == 0 || - (cnp->cn_flags & NOCROSSMOUNT)) - break; - if (dp->v_mount == NULL) { /* forced umount */ - error = EBADF; - goto bad; + break; + case CREATE: + if (cnp->cn_flags & CN_ALLOWRSRCFORK) { + nsop = NS_CREATE; + } else { + error = EPERM; + goto out; } - tdp = dp; - dp = tdp->v_mount->mnt_vnodecovered; - - vnode_put(tdp); - - if ( (vnode_getwithref(dp)) ) { - dp = NULLVP; - error = ENOENT; - goto bad; + break; + case LOOKUP: + /* Make sure our lookup of "/..namedfork/rsrc" is allowed. */ + if (cnp->cn_flags & CN_ALLOWRSRCFORK) { + nsop = NS_OPEN; + } else { + error = EPERM; + goto out; } - ndp->ni_dvp = dp; - dp_authorized = 0; + break; + default: + error = EPERM; + goto out; + } + /* Ask the file system for the resource fork. */ + error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, 0, ctx); + + /* During a create, it OK for stream vnode to be missing. */ + if (error == ENOATTR || error == ENOENT) { + error = (nsop == NS_CREATE) ? 0 : ENOENT; + } + if (error) { + goto out; + } + /* The "parent" of the stream is the file. */ + if (wantparent) { + if (ndp->ni_dvp) { +#ifndef __LP64__ + if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) { + ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(ndp->ni_dvp, NULL); + } +#endif /* __LP64__ */ + vnode_put(ndp->ni_dvp); } + ndp->ni_dvp = dp; + } else { + vnode_put(dp); } + ndp->ni_vp = svp; /* on create this may be null */ - /* - * We now have a segment name to search for, and a directory to search. - */ -unionlookup: - ndp->ni_vp = NULLVP; - - if (dp->v_type != VDIR) { - error = ENOTDIR; - goto lookup_error; + /* Restore the truncated pathname buffer (for audits). */ + if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') { + ndp->ni_next[0] = '/'; } - if ( !(dp_authorized || (cnp->cn_flags & DONOTAUTH)) ) { - if ( (error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx)) ) - goto lookup_error; - } - if ( (error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx)) ) { -lookup_error: - if ((error == ENOENT) && - (dp->v_flag & VROOT) && (dp->v_mount != NULL) && - (dp->v_mount->mnt_flag & MNT_UNION)) { - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(dp, NULL); - } - tdp = dp; - dp = tdp->v_mount->mnt_vnodecovered; + cnp->cn_flags &= ~MAKEENTRY; - vnode_put(tdp); + return 0; +out: + return error; +} +#endif /* NAMEDRSRCFORK */ - if ( (vnode_getwithref(dp)) ) { - dp = NULLVP; - error = ENOENT; - goto bad; - } - ndp->ni_dvp = dp; - dp_authorized = 0; - goto unionlookup; - } +/* + * iocounts in: + * --One on ni_vp. One on ni_dvp if there is more path, or we didn't come through the + * cache, or we came through the cache and the caller doesn't want the parent. + * + * iocounts out: + * --Leaves us in the correct state for the next step, whatever that might be. + * --If we find a symlink, returns with iocounts on both ni_vp and ni_dvp. + * --If we are to look up another component, then we have an iocount on ni_vp and + * nothing else. + * --If we are done, returns an iocount on ni_vp, and possibly on ni_dvp depending on nameidata flags. + * --In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount + * was dropped). + */ +int +lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, + int vbusyflags, int *keep_going, int nc_generation, + int wantparent, int atroot, vfs_context_t ctx) +{ + vnode_t dp; + int error; + char *cp; - if (error != EJUSTRETURN) - goto bad; + dp = ndp->ni_vp; + *keep_going = 0; - if (ndp->ni_vp != NULLVP) - panic("leaf should be empty"); + if (ndp->ni_vp == NULLVP) { + panic("NULL ni_vp in %s\n", __FUNCTION__); + } - /* - * If creating and at end of pathname, then can consider - * allowing file to be created. - */ - if (rdonly) { - error = EROFS; - goto bad; - } - if ((cnp->cn_flags & ISLASTCN) && trailing_slash && !(cnp->cn_flags & WILLBEDIR)) { - error = ENOENT; - goto bad; - } - /* - * We return with ni_vp NULL to indicate that the entry - * doesn't currently exist, leaving a pointer to the - * referenced directory vnode in ndp->ni_dvp. - */ - if (cnp->cn_flags & SAVESTART) { - if ( (vnode_get(ndp->ni_dvp)) ) { - error = ENOENT; - goto bad; - } - ndp->ni_startdir = ndp->ni_dvp; - } - if (!wantparent) - vnode_put(ndp->ni_dvp); + if (atroot) { + goto nextname; + } - if (kdebug_enable) - kdebug_lookup(ndp->ni_dvp, cnp); - return (0); +#if CONFIG_TRIGGERS + if (dp->v_resolve) { + error = vnode_trigger_resolve(dp, ndp, ctx); + if (error) { + goto out; + } } -returned_from_lookup_path: - dp = ndp->ni_vp; +#endif /* CONFIG_TRIGGERS */ /* * Take into account any additional components consumed by @@ -584,100 +603,55 @@ returned_from_lookup_path: ndp->ni_pathlen -= cnp->cn_consume; cnp->cn_consume = 0; } else { - if (dp->v_name == NULL || dp->v_parent == NULLVP) { - int isdot_or_dotdot; - int update_flags = 0; - - isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); - - if (isdot_or_dotdot == 0) { - if (dp->v_name == NULL) - update_flags |= VNODE_UPDATE_NAME; - if (ndp->ni_dvp != NULLVP && dp->v_parent == NULLVP) - update_flags |= VNODE_UPDATE_PARENT; - - if (update_flags) - vnode_update_identity(dp, ndp->ni_dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags); - } - } - - if ( (cnp->cn_flags & MAKEENTRY) && (dp->v_flag & VNCACHEABLE) && LIST_FIRST(&dp->v_nclinks) == NULL) { - /* - * missing from name cache, but should - * be in it... this can happen if volfs - * causes the vnode to be created or the - * name cache entry got recycled but the - * vnode didn't... - * check to make sure that ni_dvp is valid - * cache_lookup_path may return a NULL - */ - if (ndp->ni_dvp != NULL) - cache_enter(ndp->ni_dvp, dp, cnp); - } + lookup_consider_update_cache(ndp->ni_dvp, dp, cnp, nc_generation); } /* * Check to see if the vnode has been mounted on... * if so find the root of the mounted file system. + * Updates ndp->ni_vp. */ -check_mounted_on: - if ((dp->v_type == VDIR) && dp->v_mountedhere && - ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { - - vnode_lock(dp); - - if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) { - - mp->mnt_crossref++; - vnode_unlock(dp); - - if (vfs_busy(mp, 0)) { - mount_dropcrossref(mp, dp, 0); - goto check_mounted_on; - } - error = VFS_ROOT(mp, &tdp, ctx); - /* - * mount_dropcrossref does a vnode_put - * on dp if the 3rd arg is non-zero - */ - mount_dropcrossref(mp, dp, 1); - dp = NULL; - vfs_unbusy(mp); + error = lookup_traverse_mountpoints(ndp, cnp, dp, vbusyflags, ctx); + dp = ndp->ni_vp; + if (error) { + goto out; + } - if (error) { - goto bad2; - } - ndp->ni_vp = dp = tdp; - - goto check_mounted_on; - } - vnode_unlock(dp); +#if CONFIG_MACF + if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) { + error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx); + if (error) + goto out; } +#endif /* * Check for symbolic link */ if ((dp->v_type == VLNK) && - ((cnp->cn_flags & FOLLOW) || trailing_slash || *ndp->ni_next == '/')) { + ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) { cnp->cn_flags |= ISSYMLINK; + *keep_going = 1; return (0); } /* * Check for bogus trailing slashes. */ - if (trailing_slash) { + if ((ndp->ni_flag & NAMEI_TRAILINGSLASH)) { if (dp->v_type != VDIR) { error = ENOTDIR; - goto bad2; + goto out; } - trailing_slash = 0; - } - + ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH); + } + nextname: /* * Not a symbolic link. If more pathname, * continue at next component, else return. + * + * Definitely have a dvp if there's another slash */ if (*ndp->ni_next == '/') { cnp->cn_nameptr = ndp->ni_next + 1; @@ -686,15 +660,17 @@ nextname: cnp->cn_nameptr++; ndp->ni_pathlen--; } - vnode_put(ndp->ni_dvp); cp = cnp->cn_nameptr; + vnode_put(ndp->ni_dvp); + ndp->ni_dvp = NULLVP; - if (*cp == '\0') + if (*cp == '\0') { goto emptyname; + } - vnode_put(dp); - goto dirloop; + *keep_going = 1; + return 0; } /* @@ -703,8 +679,10 @@ nextname: if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; - goto bad2; + goto out; } + + /* If SAVESTART is set, we should have a dvp */ if (cnp->cn_flags & SAVESTART) { /* * note that we already hold a reference @@ -714,71 +692,422 @@ nextname: */ if ( (vnode_get(ndp->ni_dvp)) ) { error = ENOENT; - goto bad2; + goto out; } ndp->ni_startdir = ndp->ni_dvp; } - if (!wantparent && ndp->ni_dvp) - vnode_put(ndp->ni_dvp); + if (!wantparent && ndp->ni_dvp) { + vnode_put(ndp->ni_dvp); + ndp->ni_dvp = NULLVP; + } + + if (cnp->cn_flags & AUDITVNPATH1) + AUDIT_ARG(vnpath, dp, ARG_VNODE1); + else if (cnp->cn_flags & AUDITVNPATH2) + AUDIT_ARG(vnpath, dp, ARG_VNODE2); + +#if NAMEDRSRCFORK + /* + * Caller wants the resource fork. + */ + if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) { + error = lookup_handle_rsrc_fork(dp, ndp, cnp, wantparent, ctx); + if (error != 0) + goto out; + + dp = ndp->ni_vp; + } +#endif + if (kdebug_enable) + kdebug_lookup(dp, cnp); + + return 0; + +emptyname: + error = lookup_handle_emptyname(ndp, cnp, wantparent); + if (error != 0) + goto out; + + return 0; +out: + return error; + +} + +/* + * Comes in iocount on ni_vp. May overwrite ni_dvp, but doesn't interpret incoming value. + */ +int +lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent) +{ + vnode_t dp; + int error = 0; + + dp = ndp->ni_vp; + cnp->cn_namelen = 0; + /* + * A degenerate name (e.g. / or "") which is a way of + * talking about a directory, e.g. like "/." or ".". + */ + if (dp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + if (cnp->cn_nameiop != LOOKUP) { + error = EISDIR; + goto out; + } + if (wantparent) { + /* + * note that we already hold a reference + * on dp, but for some reason can't + * get another one... in this case we + * need to do vnode_put on dp in 'bad' + */ + if ( (vnode_get(dp)) ) { + error = ENOENT; + goto out; + } + ndp->ni_dvp = dp; + } + cnp->cn_flags &= ~ISDOTDOT; + cnp->cn_flags |= ISLASTCN; + ndp->ni_next = cnp->cn_nameptr; + ndp->ni_vp = dp; + + if (cnp->cn_flags & AUDITVNPATH1) + AUDIT_ARG(vnpath, dp, ARG_VNODE1); + else if (cnp->cn_flags & AUDITVNPATH2) + AUDIT_ARG(vnpath, dp, ARG_VNODE2); + if (cnp->cn_flags & SAVESTART) + panic("lookup: SAVESTART"); + + return 0; +out: + return error; +} +/* + * Search a pathname. + * This is a very central and rather complicated routine. + * + * The pathname is pointed to by ni_ptr and is of length ni_pathlen. + * The starting directory is taken from ni_startdir. The pathname is + * descended until done, or a symbolic link is encountered. The variable + * ni_more is clear if the path is completed; it is set to one if a + * symbolic link needing interpretation is encountered. + * + * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on + * whether the name is to be looked up, created, renamed, or deleted. + * When CREATE, RENAME, or DELETE is specified, information usable in + * creating, renaming, or deleting a directory entry may be calculated. + * If flag has LOCKPARENT or'ed into it, the parent directory is returned + * locked. If flag has WANTPARENT or'ed into it, the parent directory is + * returned unlocked. Otherwise the parent directory is not returned. If + * the target of the pathname exists and LOCKLEAF is or'ed into the flag + * the target is returned locked, otherwise it is returned unlocked. + * When creating or renaming and LOCKPARENT is specified, the target may not + * be ".". When deleting and LOCKPARENT is specified, the target may be ".". + * + * Overall outline of lookup: + * + * dirloop: + * identify next component of name at ndp->ni_ptr + * handle degenerate case where name is null string + * if .. and crossing mount points and on mounted filesys, find parent + * call VNOP_LOOKUP routine for next component name + * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set + * component vnode returned in ni_vp (if it exists), locked. + * if result vnode is mounted on and crossing mount points, + * find mounted on vnode + * if more components of name, do next level at dirloop + * return the answer in ni_vp, locked if LOCKLEAF set + * if LOCKPARENT set, return locked parent in ni_dvp + * if WANTPARENT set, return unlocked parent in ni_dvp + * + * Returns: 0 Success + * ENOENT No such file or directory + * EBADF Bad file descriptor + * ENOTDIR Not a directory + * EROFS Read-only file system [CREATE] + * EISDIR Is a directory [CREATE] + * cache_lookup_path:ERECYCLE (vnode was recycled from underneath us, redrive lookup again) + * vnode_authorize:EROFS + * vnode_authorize:EACCES + * vnode_authorize:EPERM + * vnode_authorize:??? + * VNOP_LOOKUP:ENOENT No such file or directory + * VNOP_LOOKUP:EJUSTRETURN Restart system call (INTERNAL) + * VNOP_LOOKUP:??? + * VFS_ROOT:ENOTSUP + * VFS_ROOT:ENOENT + * VFS_ROOT:??? + */ +int +lookup(struct nameidata *ndp) +{ + char *cp; /* pointer into pathname argument */ + vnode_t tdp; /* saved dp */ + vnode_t dp; /* the directory we are searching */ + int docache = 1; /* == 0 do not cache last component */ + int wantparent; /* 1 => wantparent or lockparent flag */ + int rdonly; /* lookup read-only flag bit */ + int dp_authorized = 0; + int error = 0; + struct componentname *cnp = &ndp->ni_cnd; + vfs_context_t ctx = cnp->cn_context; + int vbusyflags = 0; + int nc_generation = 0; + vnode_t last_dp = NULLVP; + int keep_going; + int atroot; + + /* + * Setup: break out flag bits into variables. + */ + if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) { + if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE)) + docache = 0; + } + wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); + rdonly = cnp->cn_flags & RDONLY; + cnp->cn_flags &= ~ISSYMLINK; + cnp->cn_consume = 0; + + dp = ndp->ni_startdir; + ndp->ni_startdir = NULLVP; + + if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) + vbusyflags = LK_NOWAIT; + cp = cnp->cn_nameptr; + + if (*cp == '\0') { + if ( (vnode_getwithref(dp)) ) { + dp = NULLVP; + error = ENOENT; + goto bad; + } + ndp->ni_vp = dp; + error = lookup_handle_emptyname(ndp, cnp, wantparent); + if (error) { + goto bad; + } + + return 0; + } +dirloop: + atroot = 0; + ndp->ni_vp = NULLVP; + + if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp)) ) { + dp = NULLVP; + goto bad; + } + if ((cnp->cn_flags & ISLASTCN)) { + if (docache) + cnp->cn_flags |= MAKEENTRY; + } else + cnp->cn_flags |= MAKEENTRY; + + dp = ndp->ni_dvp; + + if (ndp->ni_vp != NULLVP) { + /* + * cache_lookup_path returned a non-NULL ni_vp then, + * we're guaranteed that the dp is a VDIR, it's + * been authorized, and vp is not ".." + * + * make sure we don't try to enter the name back into + * the cache if this vp is purged before we get to that + * check since we won't have serialized behind whatever + * activity is occurring in the FS that caused the purge + */ + if (dp != NULLVP) + nc_generation = dp->v_nc_generation - 1; + + goto returned_from_lookup_path; + } + + /* + * Handle "..": two special cases. + * 1. If at root directory (e.g. after chroot) + * or at absolute root directory + * then ignore it so can't get out. + * 2. If this vnode is the root of a mounted + * filesystem, then replace it with the + * vnode which was mounted on so we take the + * .. in the other file system. + */ + if ( (cnp->cn_flags & ISDOTDOT) ) { + for (;;) { + if (dp == ndp->ni_rootdir || dp == rootvnode) { + ndp->ni_dvp = dp; + ndp->ni_vp = dp; + /* + * we're pinned at the root + * we've already got one reference on 'dp' + * courtesy of cache_lookup_path... take + * another one for the ".." + * if we fail to get the new reference, we'll + * drop our original down in 'bad' + */ + if ( (vnode_get(dp)) ) { + error = ENOENT; + goto bad; + } + atroot = 1; + goto returned_from_lookup_path; + } + if ((dp->v_flag & VROOT) == 0 || + (cnp->cn_flags & NOCROSSMOUNT)) + break; + if (dp->v_mount == NULL) { /* forced umount */ + error = EBADF; + goto bad; + } + tdp = dp; + dp = tdp->v_mount->mnt_vnodecovered; + + vnode_put(tdp); + + if ( (vnode_getwithref(dp)) ) { + dp = NULLVP; + error = ENOENT; + goto bad; + } + ndp->ni_dvp = dp; + dp_authorized = 0; + } + } + + /* + * We now have a segment name to search for, and a directory to search. + */ +unionlookup: + ndp->ni_vp = NULLVP; + + if (dp->v_type != VDIR) { + error = ENOTDIR; + goto lookup_error; + } + if ( (cnp->cn_flags & DONOTAUTH) != DONOTAUTH ) { + error = lookup_authorize_search(dp, cnp, dp_authorized, ctx); + if (error) { + goto lookup_error; + } + } + + /* + * Now that we've authorized a lookup, can bail out if the filesystem + * will be doing a batched operation. Return an iocount on dvp. + */ +#if NAMEDRSRCFORK + if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) { +#else + if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) { +#endif /* NAMEDRSRCFORK */ + ndp->ni_flag |= NAMEI_UNFINISHED; + ndp->ni_ncgeneration = dp->v_nc_generation; + return 0; + } + + nc_generation = dp->v_nc_generation; + + error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx); + + + if ( error ) { +lookup_error: + if ((error == ENOENT) && + (dp->v_flag & VROOT) && (dp->v_mount != NULL) && + (dp->v_mount->mnt_flag & MNT_UNION)) { +#ifndef __LP64__ + if ((cnp->cn_flags & FSNODELOCKHELD)) { + cnp->cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(dp, NULL); + } +#endif /* __LP64__ */ + tdp = dp; + dp = tdp->v_mount->mnt_vnodecovered; + + vnode_put(tdp); + + if ( (vnode_getwithref(dp)) ) { + dp = NULLVP; + error = ENOENT; + goto bad; + } + ndp->ni_dvp = dp; + dp_authorized = 0; + goto unionlookup; + } + + if (error != EJUSTRETURN) + goto bad; + + if (ndp->ni_vp != NULLVP) + panic("leaf should be empty"); + + error = lookup_validate_creation_path(ndp); + if (error) + goto bad; + /* + * We return with ni_vp NULL to indicate that the entry + * doesn't currently exist, leaving a pointer to the + * referenced directory vnode in ndp->ni_dvp. + */ + if (cnp->cn_flags & SAVESTART) { + if ( (vnode_get(ndp->ni_dvp)) ) { + error = ENOENT; + goto bad; + } + ndp->ni_startdir = ndp->ni_dvp; + } + if (!wantparent) + vnode_put(ndp->ni_dvp); + + if (kdebug_enable) + kdebug_lookup(ndp->ni_dvp, cnp); + return (0); + } +returned_from_lookup_path: + /* We'll always have an iocount on ni_vp when this finishes. */ + error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx); + if (error != 0) { + goto bad2; + } - if (cnp->cn_flags & AUDITVNPATH1) - AUDIT_ARG(vnpath, dp, ARG_VNODE1); - else if (cnp->cn_flags & AUDITVNPATH2) - AUDIT_ARG(vnpath, dp, ARG_VNODE2); + if (keep_going) { + dp = ndp->ni_vp; - if (kdebug_enable) - kdebug_lookup(dp, cnp); - return (0); + /* namei() will handle symlinks */ + if ((dp->v_type == VLNK) && + ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) { + return 0; + } -emptyname: - cnp->cn_namelen = 0; - /* - * A degenerate name (e.g. / or "") which is a way of - * talking about a directory, e.g. like "/." or ".". - */ - if (dp->v_type != VDIR) { - error = ENOTDIR; - goto bad; - } - if (cnp->cn_nameiop != LOOKUP) { - error = EISDIR; - goto bad; - } - if (wantparent) { - /* - * note that we already hold a reference - * on dp, but for some reason can't - * get another one... in this case we - * need to do vnode_put on dp in 'bad' + /* + * Otherwise, there's more path to process. + * cache_lookup_path is now responsible for dropping io ref on dp + * when it is called again in the dirloop. This ensures we hold + * a ref on dp until we complete the next round of lookup. */ - if ( (vnode_get(dp)) ) { - error = ENOENT; - goto bad; - } - ndp->ni_dvp = dp; + last_dp = dp; + + goto dirloop; } - cnp->cn_flags &= ~ISDOTDOT; - cnp->cn_flags |= ISLASTCN; - ndp->ni_next = cp; - ndp->ni_vp = dp; - if (cnp->cn_flags & AUDITVNPATH1) - AUDIT_ARG(vnpath, dp, ARG_VNODE1); - else if (cnp->cn_flags & AUDITVNPATH2) - AUDIT_ARG(vnpath, dp, ARG_VNODE2); - if (cnp->cn_flags & SAVESTART) - panic("lookup: SAVESTART"); return (0); - bad2: +#ifndef __LP64__ if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } +#endif /* __LP64__ */ if (ndp->ni_dvp) - vnode_put(ndp->ni_dvp); - if (dp) - vnode_put(dp); + vnode_put(ndp->ni_dvp); + + vnode_put(ndp->ni_vp); ndp->ni_vp = NULLVP; if (kdebug_enable) @@ -786,10 +1115,12 @@ bad2: return (error); bad: +#ifndef __LP64__ if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } +#endif /* __LP64__ */ if (dp) vnode_put(dp); ndp->ni_vp = NULLVP; @@ -799,16 +1130,265 @@ bad: return (error); } +int +lookup_validate_creation_path(struct nameidata *ndp) +{ + struct componentname *cnp = &ndp->ni_cnd; + + /* + * If creating and at end of pathname, then can consider + * allowing file to be created. + */ + if (cnp->cn_flags & RDONLY) { + return EROFS; + } + if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) { + return ENOENT; + } + + return 0; +} + +/* + * Modifies only ni_vp. Always returns with ni_vp still valid (iocount held). + */ +int +lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, + int vbusyflags, vfs_context_t ctx) +{ + mount_t mp; + vnode_t tdp; + int error = 0; + uthread_t uth; + uint32_t depth = 0; + int dont_cache_mp = 0; + vnode_t mounted_on_dp; + int current_mount_generation = 0; + + mounted_on_dp = dp; + current_mount_generation = mount_generation; + + while ((dp->v_type == VDIR) && dp->v_mountedhere && + ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { +#if CONFIG_TRIGGERS + /* + * For a trigger vnode, call its resolver when crossing its mount (if requested) + */ + if (dp->v_resolve) { + (void) vnode_trigger_resolve(dp, ndp, ctx); + } +#endif + vnode_lock(dp); + + if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) { + + mp->mnt_crossref++; + vnode_unlock(dp); + + + if (vfs_busy(mp, vbusyflags)) { + mount_dropcrossref(mp, dp, 0); + if (vbusyflags == LK_NOWAIT) { + error = ENOENT; + goto out; + } + + continue; + } + + + /* + * XXX - if this is the last component of the + * pathname, and it's either not a lookup operation + * or the NOTRIGGER flag is set for the operation, + * set a uthread flag to let VFS_ROOT() for autofs + * know it shouldn't trigger a mount. + */ + uth = (struct uthread *)get_bsdthread_info(current_thread()); + if ((cnp->cn_flags & ISLASTCN) && + (cnp->cn_nameiop != LOOKUP || + (cnp->cn_flags & NOTRIGGER))) { + uth->uu_notrigger = 1; + dont_cache_mp = 1; + } + + error = VFS_ROOT(mp, &tdp, ctx); + /* XXX - clear the uthread flag */ + uth->uu_notrigger = 0; + + mount_dropcrossref(mp, dp, 0); + vfs_unbusy(mp); + + if (error) { + goto out; + } + + vnode_put(dp); + ndp->ni_vp = dp = tdp; + depth++; + +#if CONFIG_TRIGGERS + /* + * Check if root dir is a trigger vnode + */ + if (dp->v_resolve) { + error = vnode_trigger_resolve(dp, ndp, ctx); + if (error) { + goto out; + } + } +#endif + + } else { + vnode_unlock(dp); + break; + } + } + + if (depth && !dont_cache_mp) { + mp = mounted_on_dp->v_mountedhere; + + if (mp) { + mount_lock_spin(mp); + mp->mnt_realrootvp_vid = dp->v_id; + mp->mnt_realrootvp = dp; + mp->mnt_generation = current_mount_generation; + mount_unlock(mp); + } + } + + return 0; + +out: + return error; +} + +/* + * Takes ni_vp and ni_dvp non-NULL. Returns with *new_dp set to the location + * at which to start a lookup with a resolved path, and all other iocounts dropped. + */ +int +lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx) +{ + int error; + char *cp; /* pointer into pathname argument */ + uio_t auio; + char uio_buf[ UIO_SIZEOF(1) ]; + int need_newpathbuf; + u_int linklen; + struct componentname *cnp = &ndp->ni_cnd; + vnode_t dp; + char *tmppn; + +#ifndef __LP64__ + if ((cnp->cn_flags & FSNODELOCKHELD)) { + cnp->cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(ndp->ni_dvp, NULL); + } +#endif /* __LP64__ */ + + if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { + return ELOOP; + } +#if CONFIG_MACF + if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0) + return error; +#endif /* MAC */ + if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF)) + need_newpathbuf = 1; + else + need_newpathbuf = 0; + + if (need_newpathbuf) { + MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (cp == NULL) { + return ENOMEM; + } + } else { + cp = cnp->cn_pnbuf; + } + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + + uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); + + error = VNOP_READLINK(ndp->ni_vp, auio, ctx); + if (error) { + if (need_newpathbuf) + FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); + return error; + } + + /* + * Safe to set unsigned with a [larger] signed type here + * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN + * is only 1024. + */ + linklen = MAXPATHLEN - (u_int)uio_resid(auio); + if (linklen + ndp->ni_pathlen > MAXPATHLEN) { + if (need_newpathbuf) + FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); + + return ENAMETOOLONG; + } + if (need_newpathbuf) { + long len = cnp->cn_pnlen; + + tmppn = cnp->cn_pnbuf; + bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); + cnp->cn_pnbuf = cp; + cnp->cn_pnlen = MAXPATHLEN; + + if ( (cnp->cn_flags & HASBUF) ) + FREE_ZONE(tmppn, len, M_NAMEI); + else + cnp->cn_flags |= HASBUF; + } else + cnp->cn_pnbuf[linklen] = '\0'; + + ndp->ni_pathlen += linklen; + cnp->cn_nameptr = cnp->cn_pnbuf; + + /* + * starting point for 'relative' + * symbolic link path + */ + dp = ndp->ni_dvp; + + /* + * get rid of references returned via 'lookup' + */ + vnode_put(ndp->ni_vp); + vnode_put(ndp->ni_dvp); /* ALWAYS have a dvp for a symlink */ + + ndp->ni_vp = NULLVP; + ndp->ni_dvp = NULLVP; + + /* + * Check if symbolic link restarts us at the root + */ + if (*(cnp->cn_nameptr) == '/') { + while (*(cnp->cn_nameptr) == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } + if ((dp = ndp->ni_rootdir) == NULLVP) { + return ENOENT; + } + } + + *new_dp = dp; + + return 0; +} + /* * relookup - lookup a path name component * Used by lookup to re-aquire things. */ int -relookup(dvp, vpp, cnp) - struct vnode *dvp, **vpp; - struct componentname *cnp; +relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { - struct vnode *dp = 0; /* the directory we are searching */ + struct vnode *dp = NULL; /* the directory we are searching */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int error = 0; @@ -911,16 +1491,27 @@ bad: return (error); } -/* - * Free pathname buffer - */ void -nameidone(struct nameidata *ndp) +namei_unlock_fsnode(struct nameidata *ndp) { +#ifndef __LP64__ if ((ndp->ni_cnd.cn_flags & FSNODELOCKHELD)) { ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } +#else + (void)ndp; +#endif /* __LP64__ */ +} + +/* + * Free pathname buffer + */ +void +nameidone(struct nameidata *ndp) +{ + namei_unlock_fsnode(ndp); + if (ndp->ni_cnd.cn_flags & HASBUF) { char *tmp = ndp->ni_cnd.cn_pnbuf; @@ -933,68 +1524,242 @@ nameidone(struct nameidata *ndp) #define NUMPARMS 23 +/* + * Log (part of) a pathname using the KERNEL_DEBUG_CONSTANT mechanism, as used + * by fs_usage. The path up to and including the current component name are + * logged. Up to NUMPARMS*4 bytes of pathname will be logged. If the path + * to be logged is longer than that, then the last NUMPARMS*4 bytes are logged. + * That is, the truncation removes the leading portion of the path. + * + * The logging is done via multiple KERNEL_DEBUG_CONSTANT calls. The first one + * is marked with DBG_FUNC_START. The last one is marked with DBG_FUNC_END + * (in addition to DBG_FUNC_START if it is also the first). There may be + * intermediate ones with neither DBG_FUNC_START nor DBG_FUNC_END. + * + * The first KERNEL_DEBUG_CONSTANT passes the vnode pointer and 12 bytes of + * pathname. The remaining KERNEL_DEBUG_CONSTANT calls add 16 bytes of pathname + * each. The minimum number of KERNEL_DEBUG_CONSTANT calls required to pass + * the path are used. Any excess padding in the final KERNEL_DEBUG_CONSTANT + * (because not all of the 12 or 16 bytes are needed for the remainder of the + * path) is set to zero bytes, or '>' if there is more path beyond the + * current component name (usually because an intermediate component was not + * found). + * + * NOTE: If the path length is greater than NUMPARMS*4, or is not of the form + * 12+N*16, there will be no padding. + * + * TODO: If there is more path beyond the current component name, should we + * force some padding? For example, a lookup for /foo_bar_baz/spam that + * fails because /foo_bar_baz is not found will only log "/foo_bar_baz", with + * no '>' padding. But /foo_bar/spam would log "/foo_bar>>>>". + */ +#if !defined(NO_KDEBUG) static void -kdebug_lookup(dp, cnp) - struct vnode *dp; - struct componentname *cnp; +kdebug_lookup(struct vnode *dp, struct componentname *cnp) { - register unsigned int i, n; - register int dbg_namelen; - register int save_dbg_namelen; - register char *dbg_nameptr; + unsigned int i; + int code; + int dbg_namelen; + char *dbg_nameptr; long dbg_parms[NUMPARMS]; - char dbg_buf[4]; - static char *dbg_filler = ">>>>"; /* Collect the pathname for tracing */ dbg_namelen = (cnp->cn_nameptr - cnp->cn_pnbuf) + cnp->cn_namelen; dbg_nameptr = cnp->cn_nameptr + cnp->cn_namelen; - if (dbg_namelen > sizeof(dbg_parms)) - dbg_namelen = sizeof(dbg_parms); + if (dbg_namelen > (int)sizeof(dbg_parms)) + dbg_namelen = sizeof(dbg_parms); dbg_nameptr -= dbg_namelen; - save_dbg_namelen = dbg_namelen; + + /* Copy the (possibly truncated) path itself */ + memcpy(dbg_parms, dbg_nameptr, dbg_namelen); + + /* Pad with '\0' or '>' */ + if (dbg_namelen < (int)sizeof(dbg_parms)) { + memset((char *)dbg_parms + dbg_namelen, + *(cnp->cn_nameptr + cnp->cn_namelen) ? '>' : 0, + sizeof(dbg_parms) - dbg_namelen); + } + + /* + * In the event that we collect multiple, consecutive pathname + * entries, we must mark the start of the path's string and the end. + */ + code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START; + + if (dbg_namelen <= 12) + code |= DBG_FUNC_END; + + KERNEL_DEBUG_CONSTANT(code, dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); + + code &= ~DBG_FUNC_START; + + for (i=3, dbg_namelen -= 12; dbg_namelen > 0; i+=4, dbg_namelen -= 16) { + if (dbg_namelen <= 16) + code |= DBG_FUNC_END; + + KERNEL_DEBUG_CONSTANT(code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); + } +} +#else /* NO_KDEBUG */ +static void +kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused) +{ +} +#endif /* NO_KDEBUG */ + +int +vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx) +{ + mount_t mp; + int error; + + mp = mount_lookupby_volfsid(fsid->val[0], 1); + if (mp == NULL) { + return EINVAL; + } + + /* Get the target vnode. */ + if (ino == 2) { + error = VFS_ROOT(mp, vpp, ctx); + } else { + error = VFS_VGET(mp, ino, vpp, ctx); + } + + vfs_unbusy(mp); + return error; +} +/* + * Obtain the real path from a legacy volfs style path. + * + * Valid formats of input path: + * + * "555/@" + * "555/2" + * "555/123456" + * "555/123456/foobar" + * + * Where: + * 555 represents the volfs file system id + * '@' and '2' are aliases to the root of a file system + * 123456 represents a file id + * "foobar" represents a file name + */ +#if CONFIG_VOLFS +static int +vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx) +{ + vnode_t vp; + struct mount *mp = NULL; + char *str; + char ch; + uint32_t id; + ino64_t ino; + int error; + int length; + + /* Get file system id and move str to next component. */ + id = strtoul(path, &str, 10); + if (id == 0 || str[0] != '/') { + return (EINVAL); + } + while (*str == '/') { + str++; + } + ch = *str; + + mp = mount_lookupby_volfsid(id, 1); + if (mp == NULL) { + return (EINVAL); /* unexpected failure */ + } + /* Check for an alias to a file system root. */ + if (ch == '@' && str[1] == '\0') { + ino = 2; + str++; + } else { + /* Get file id and move str to next component. */ + ino = strtouq(str, &str, 10); + } + + /* Get the target vnode. */ + if (ino == 2) { + error = VFS_ROOT(mp, &vp, ctx); + } else { + error = VFS_VGET(mp, ino, &vp, ctx); + } + vfs_unbusy(mp); + if (error) { + goto out; + } + realpath[0] = '\0'; - i = 0; + /* Get the absolute path to this vnode. */ + error = build_path(vp, realpath, bufsize, &length, 0, ctx); + vnode_put(vp); - while (dbg_namelen > 0) { - if (dbg_namelen >= 4) { - dbg_parms[i++] = *(long *)dbg_nameptr; - dbg_nameptr += sizeof(long); - dbg_namelen -= sizeof(long); - } else { - for (n = 0; n < dbg_namelen; n++) - dbg_buf[n] = *dbg_nameptr++; - while (n <= 3) { - if (*dbg_nameptr) - dbg_buf[n++] = '>'; - else - dbg_buf[n++] = 0; + if (error == 0 && *str != '\0') { + int attempt = strlcat(realpath, str, MAXPATHLEN); + if (attempt > MAXPATHLEN){ + error = ENAMETOOLONG; } - dbg_parms[i++] = *(long *)&dbg_buf[0]; + } +out: + return (error); +} +#endif + +void +lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create) +{ + if (error == 0 && vp == NULLVP) { + panic("NULL vp with error == 0.\n"); + } - break; - } + /* + * We don't want to do any of this if we didn't use the compound vnop + * to perform the lookup... i.e. if we're allowing and using the legacy pattern, + * where we did a full lookup. + */ + if ((ndp->ni_flag & NAMEI_COMPOUND_OP_MASK) == 0) { + return; } - while (i < NUMPARMS) { - if (*dbg_nameptr) - dbg_parms[i++] = *(long *)dbg_filler; - else - dbg_parms[i++] = 0; + + /* + * If we're going to continue the lookup, we'll handle + * all lookup-related updates at that time. + */ + if (error == EKEEPLOOKING) { + return; } /* - In the event that we collect multiple, consecutive pathname - entries, we must mark the start of the path's string. - */ - KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START, - (unsigned int)dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); - - for (dbg_namelen = save_dbg_namelen-12, i=3; - dbg_namelen > 0; - dbg_namelen -=(4 * sizeof(long)), i+= 4) - { - KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_NONE, - dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); - } + * Only audit or update cache for *found* vnodes. For creation + * neither would happen in the non-compound-vnop case. + */ + if ((vp != NULLVP) && !did_create) { + /* + * If MAKEENTRY isn't set, and we've done a successful compound VNOP, + * then we certainly don't want to update cache or identity. + */ + if ((error != 0) || (ndp->ni_cnd.cn_flags & MAKEENTRY)) { + lookup_consider_update_cache(dvp, vp, &ndp->ni_cnd, ndp->ni_ncgeneration); + } + if (ndp->ni_cnd.cn_flags & AUDITVNPATH1) + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + else if (ndp->ni_cnd.cn_flags & AUDITVNPATH2) + AUDIT_ARG(vnpath, vp, ARG_VNODE2); + } + + /* + * If you created (whether you opened or not), cut a lookup tracepoint + * for the parent dir (as would happen without a compound vnop). Note: we may need + * a vnode despite failure in this case! + * + * If you did not create: + * Found child (succeeded or not): cut a tracepoint for the child. + * Did not find child: cut a tracepoint with the parent. + */ + if (kdebug_enable) { + kdebug_lookup(vp ? vp : dvp, &ndp->ni_cnd); + } }