X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/378393581903b274cb7a4d18e0d978071a6b592d..13f56ec4e58bf8687e2a68032c093c0213dd519b:/bsd/hfs/hfs_cnode.c diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 132b17cc2..016df24e0 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2008 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include #include @@ -30,6 +36,7 @@ #include #include #include +#include #include @@ -40,6 +47,7 @@ #include #include #include +#include extern int prtactive; @@ -47,246 +55,553 @@ extern lck_attr_t * hfs_lock_attr; extern lck_grp_t * hfs_mutex_group; extern lck_grp_t * hfs_rwlock_group; -static int hfs_filedone(struct vnode *vp, vfs_context_t context); - static void hfs_reclaim_cnode(struct cnode *); +static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim); +static int hfs_isordered(struct cnode *, struct cnode *); -static int hfs_valid_cnode(struct hfsmount *, struct vnode *, struct componentname *, cnid_t); +__inline__ int hfs_checkdeleted (struct cnode *cp) { + return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0); +} -static int hfs_isordered(struct cnode *, struct cnode *); -int hfs_vnop_inactive(struct vnop_inactive_args *); +/* + * Function used by a special fcntl() that decorates a cnode/vnode that + * indicates it is backing another filesystem, like a disk image. + * + * the argument 'val' indicates whether or not to set the bit in the cnode flags + * + * Returns non-zero on failure. 0 on success + */ +int hfs_set_backingstore (struct vnode *vp, int val) { + struct cnode *cp = NULL; + int err = 0; + + cp = VTOC(vp); + if (!vnode_isreg(vp) && !vnode_isdir(vp)) { + return EINVAL; + } -int hfs_vnop_reclaim(struct vnop_reclaim_args *); + /* lock the cnode */ + err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK); + if (err) { + return err; + } + + if (val) { + cp->c_flag |= C_BACKINGSTORE; + } + else { + cp->c_flag &= ~C_BACKINGSTORE; + } + /* unlock everything */ + hfs_unlock (cp); + + return err; +} /* - * Last reference to an cnode. If necessary, write or delete it. + * Function used by a special fcntl() that check to see if a cnode/vnode + * indicates it is backing another filesystem, like a disk image. + * + * the argument 'val' is an output argument for whether or not the bit is set + * + * Returns non-zero on failure. 0 on success */ -__private_extern__ -int -hfs_vnop_inactive(struct vnop_inactive_args *ap) -{ - struct vnode *vp = ap->a_vp; - struct cnode *cp; - struct hfsmount *hfsmp = VTOHFS(vp); - struct proc *p = vfs_context_proc(ap->a_context); - int error = 0; - int recycle = 0; - int forkcount = 0; - int truncated = 0; - int started_tr = 0; - int took_trunc_lock = 0; - cat_cookie_t cookie; - int cat_reserve = 0; - int lockflags; - enum vtype v_type; - v_type = vnode_vtype(vp); - cp = VTOC(vp); +int hfs_is_backingstore (struct vnode *vp, int *val) { + struct cnode *cp = NULL; + int err = 0; - if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) || - (hfsmp->hfs_freezing_proc == p)) { - return (0); + if (!vnode_isreg(vp) && !vnode_isdir(vp)) { + *val = 0; + return 0; } - /* - * Ignore nodes related to stale file handles. - */ - if (cp->c_mode == 0) { - vnode_recycle(vp); - return (0); - } + cp = VTOC(vp); - if ((v_type == VREG) && - (ISSET(cp->c_flag, C_DELETED) || VTOF(vp)->ff_blocks)) { - hfs_lock_truncate(cp, TRUE); - took_trunc_lock = 1; + /* lock the cnode */ + err = hfs_lock (cp, HFS_SHARED_LOCK); + if (err) { + return err; } - /* - * We do the ubc_setsize before we take the cnode - * lock and before the hfs_truncate (since we'll - * be inside a transaction). - */ - if ((v_type == VREG || v_type == VLNK) && - (cp->c_flag & C_DELETED) && - (VTOF(vp)->ff_blocks != 0)) { - ubc_setsize(vp, 0); + if (cp->c_flag & C_BACKINGSTORE) { + *val = 1; + } + else { + *val = 0; } - (void) hfs_lock(cp, HFS_FORCE_LOCK); + /* unlock everything */ + hfs_unlock (cp); - if (v_type == VREG && !ISSET(cp->c_flag, C_DELETED) && VTOF(vp)->ff_blocks) { - hfs_filedone(vp, ap->a_context); + return err; +} + + +/* + * hfs_cnode_teardown + * + * This is an internal function that is invoked from both hfs_vnop_inactive + * and hfs_vnop_reclaim. As VNOP_INACTIVE is not necessarily called from vnodes + * being recycled and reclaimed, it is important that we do any post-processing + * necessary for the cnode in both places. Important tasks include things such as + * releasing the blocks from an open-unlinked file when all references to it have dropped, + * and handling resource forks separately from data forks. + * + * Note that we take only the vnode as an argument here (rather than the cnode). + * Recall that each cnode supports two forks (rsrc/data), and we can always get the right + * cnode from either of the vnodes, but the reverse is not true -- we can't determine which + * vnode we need to reclaim if only the cnode is supplied. + * + * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim + * if both are invoked right after the other. In the second call, most of this function's if() + * conditions will fail, since they apply generally to cnodes still marked with C_DELETED. + * As a quick check to see if this function is necessary, determine if the cnode is already + * marked C_NOEXISTS. If it is, then it is safe to skip this function. The only tasks that + * remain for cnodes marked in such a fashion is to teardown their fork references and + * release all directory hints and hardlink origins. However, both of those are done + * in hfs_vnop_reclaim. hfs_update, by definition, is not necessary if the cnode's catalog + * entry is no longer there. + * + * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim. If we are + * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info + * is totally gone by that point. + * + * Assumes that both truncate and cnode locks for 'cp' are held. + */ +static +int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { + + int forkcount = 0; + enum vtype v_type; + struct cnode *cp; + int error = 0; + int started_tr = 0; + struct hfsmount *hfsmp = VTOHFS(vp); + struct proc *p = vfs_context_proc(ctx); + int truncated = 0; + cat_cookie_t cookie; + int cat_reserve = 0; + int lockflags; + int ea_error = 0; + + v_type = vnode_vtype(vp); + cp = VTOC(vp); + + if (cp->c_datafork) { + ++forkcount; + } + if (cp->c_rsrcfork) { + ++forkcount; + } + + + /* + * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim. + * The dirty regions would have already been synced to disk, so informing UBC + * that they can toss the pages doesn't help anyone at this point. + * + * Note that this is a performance problem if the vnode goes straight to reclaim + * (and skips inactive), since there would be no way for anyone to notify the UBC + * that all pages in this file are basically useless. + */ + if (reclaim == 0) { + /* + * Check whether we are tearing down a cnode with only one remaining fork. + * If there are blocks in its filefork, then we need to unlock the cnode + * before calling ubc_setsize. The cluster layer may re-enter the filesystem + * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock + * panic. + */ + + if ((v_type == VREG || v_type == VLNK) && + (cp->c_flag & C_DELETED) && + (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) { + hfs_unlock(cp); + /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */ + ubc_setsize(vp, 0); + (void) hfs_lock(cp, HFS_FORCE_LOCK); + } + } + + /* + * Push file data out for normal files that haven't been evicted from + * the namespace. We only do this if this function was not called from reclaim, + * because by that point the UBC information has been totally torn down. + * + * There should also be no way that a normal file that has NOT been deleted from + * the namespace to skip INACTIVE and go straight to RECLAIM. That race only happens + * when the file becomes open-unlinked. + */ + if ((v_type == VREG) && + (!ISSET(cp->c_flag, C_DELETED)) && + (!ISSET(cp->c_flag, C_NOEXISTS)) && + (VTOF(vp)->ff_blocks) && + (reclaim == 0)) { + hfs_filedone(vp, ctx); } /* - * Remove any directory hints + * Remove any directory hints or cached origins */ - if (v_type == VDIR) + if (v_type == VDIR) { hfs_reldirhints(cp, 0); - - if (cp->c_datafork) - ++forkcount; - if (cp->c_rsrcfork) - ++forkcount; - - /* If needed, get rid of any fork's data for a deleted file */ - if ((v_type == VREG || v_type == VLNK) && (cp->c_flag & C_DELETED)) { + } + if (cp->c_flag & C_HARDLINK) { + hfs_relorigins(cp); + } + + /* + * This check is slightly complicated. We should only truncate data + * in very specific cases for open-unlinked files. This is because + * we want to ensure that the resource fork continues to be available + * if the caller has the data fork open. However, this is not symmetric; + * someone who has the resource fork open need not be able to access the data + * fork once the data fork has gone inactive. + * + * If we're the last fork, then we have cleaning up to do. + * + * A) last fork, and vp == c_vp + * Truncate away own fork data. If rsrc fork is not in core, truncate it too. + * + * B) last fork, and vp == c_rsrc_vp + * Truncate ourselves, assume data fork has been cleaned due to C). + * + * If we're not the last fork, then things are a little different: + * + * C) not the last fork, vp == c_vp + * Truncate ourselves. Once the file has gone out of the namespace, + * it cannot be further opened. Further access to the rsrc fork may + * continue, however. + * + * D) not the last fork, vp == c_rsrc_vp + * Don't enter the block below, just clean up vnode and push it out of core. + */ + + if ((v_type == VREG || v_type == VLNK) && + (cp->c_flag & C_DELETED) && + ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) { + + /* Truncate away our own fork data. (Case A, B, C above) */ if (VTOF(vp)->ff_blocks != 0) { - // start the transaction out here so that - // the truncate and the removal of the file - // are all in one transaction. otherwise - // because this cnode is marked for deletion - // the truncate won't cause the catalog entry - // to get updated which means that we could - // free blocks but still keep a reference to - // them in the catalog entry and then double - // free them later. - // -// if (hfs_start_transaction(hfsmp) != 0) { -// error = EINVAL; -// goto out; -// } -// started_tr = 1; - /* * Since we're already inside a transaction, * tell hfs_truncate to skip the ubc_setsize. + * + * This truncate call (and the one below) is fine from VNOP_RECLAIM's + * context because we're only removing blocks, not zero-filling new + * ones. The C_DELETED check above makes things much simpler. */ - error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context); - if (error) + error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ctx); + if (error) { goto out; + } truncated = 1; } - recycle = 1; + + /* + * Truncate away the resource fork, if we represent the data fork and + * it is the last fork. That means, by definition, the rsrc fork is not in + * core. So we bring it into core, and then truncate it away. + * + * This is invoked via case A above only. + */ + if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) { + struct vnode *rvp = NULLVP; + + /* + * It is safe for us to pass FALSE to the argument can_drop_lock + * on this call to hfs_vgetrsrc. We know that the resource fork does not + * exist in core, so we'll have to go to the catalog to retrieve its + * information. That will attach the resource fork vnode to our cnode. + */ + error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE, FALSE); + if (error) { + goto out; + } + /* + * Defer the vnode_put and ubc_setsize on rvp until hfs_unlock(). + * + * By bringing the vnode into core above, we may force hfs_vnop_reclaim + * to only partially finish if that's what called us. Bringing the + * resource fork into core results in a new rsrc vnode that will get + * immediately marked for termination below. It will get recycled/reclaimed + * as soon as possible, but that could cause another round of inactive and reclaim. + */ + cp->c_flag |= C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE; + error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 0, ctx); + if (error) { + goto out; + } + + /* + * Note that the following call to vnode_recycle is safe from within the + * context of hfs_vnop_inactive or hfs_vnop_reclaim. It is being invoked + * on the RSRC fork vp (which is not our current vnode) As such, we hold + * an iocount on it and vnode_recycle will just add the MARKTERM bit at this + * point. + */ + vnode_recycle(rvp); /* all done with this vnode */ + } } - + /* - * Check for a postponed deletion. - * (only delete cnode when the last fork goes inactive) + * If we represent the last fork (or none in the case of a dir), + * and the cnode has become open-unlinked, + * AND it has EA's, then we need to get rid of them. + * + * Note that this must happen outside of any other transactions + * because it starts/ends its own transactions and grabs its + * own locks. This is to prevent a file with a lot of attributes + * from creating a transaction that is too large (which panics). */ - if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) { - /* - * Mark cnode in transit so that no one can get this - * cnode from cnode hash. - */ - hfs_chash_mark_in_transit(cp); - - cp->c_flag &= ~C_DELETED; - cp->c_flag |= C_NOEXISTS; // XXXdbg - cp->c_rdev = 0; - - if (started_tr == 0) { - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; - goto out; - } - started_tr = 1; - } + if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 && + (cp->c_flag & C_DELETED) && + (forkcount <= 1)) { + + ea_error = hfs_removeallattr(hfsmp, cp->c_fileid); + } + + + /* + * If the cnode represented an open-unlinked file, then now + * actually remove the cnode's catalog entry and release all blocks + * it may have been using. + */ + if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) { + /* + * Mark cnode in transit so that no one can get this + * cnode from cnode hash. + */ + // hfs_chash_mark_in_transit(hfsmp, cp); + // XXXdbg - remove the cnode from the hash table since it's deleted + // otherwise someone could go to sleep on the cnode and not + // be woken up until this vnode gets recycled which could be + // a very long time... + hfs_chashremove(hfsmp, cp); + + cp->c_flag |= C_NOEXISTS; // XXXdbg + cp->c_rdev = 0; + + if (started_tr == 0) { + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + started_tr = 1; + } + + /* + * Reserve some space in the Catalog file. + */ + if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { + goto out; + } + cat_reserve = 1; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + if (cp->c_blocks > 0) { + printf("hfs_inactive: deleting non-empty%sfile %d, " + "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ", + (int)cp->c_fileid, (int)cp->c_blocks); + } - /* - * Reserve some space in the Catalog file. - */ - if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { - goto out; - } - cat_reserve = 1; - - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); - - if (cp->c_blocks > 0) - printf("hfs_inactive: attempting to delete a non-empty file!"); - - - // - // release the name pointer in the descriptor so that - // cat_delete() will use the file-id to do the deletion. - // in the case of hard links this is imperative (in the - // case of regular files the fileid and cnid are the - // same so it doesn't matter). // - cat_releasedesc(&cp->c_desc); + // release the name pointer in the descriptor so that + // cat_delete() will use the file-id to do the deletion. + // in the case of hard links this is imperative (in the + // case of regular files the fileid and cnid are the + // same so it doesn't matter). + // + cat_releasedesc(&cp->c_desc); - /* - * The descriptor name may be zero, - * in which case the fileid is used. - */ - error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); + /* + * The descriptor name may be zero, + * in which case the fileid is used. + */ + error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); - if (error && truncated && (error != ENXIO)) - printf("hfs_inactive: couldn't delete a truncated file!"); - - /* Update HFS Private Data dir */ - if (error == 0) { - hfsmp->hfs_privdir_attr.ca_entries--; - (void)cat_update(hfsmp, &hfsmp->hfs_privdir_desc, - &hfsmp->hfs_privdir_attr, NULL, NULL); - } - - if (error == 0) { - /* Delete any attributes, ignore errors */ - (void) hfs_removeallattr(hfsmp, cp->c_fileid); - } - - hfs_systemfile_unlock(hfsmp, lockflags); - - if (error) + if (error && truncated && (error != ENXIO)) + printf("hfs_inactive: couldn't delete a truncated file!"); + + /* Update HFS Private Data dir */ + if (error == 0) { + hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; + if (vnode_isdir(vp)) { + DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + } + (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], + &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); + } + + hfs_systemfile_unlock(hfsmp, lockflags); + + if (error) { goto out; - + } + #if QUOTA - (void)hfs_chkiq(cp, -1, NOCRED, 0); + if (hfsmp->hfs_flags & HFS_QUOTAS) + (void)hfs_chkiq(cp, -1, NOCRED, 0); #endif /* QUOTA */ - - cp->c_mode = 0; - cp->c_flag |= C_NOEXISTS; - cp->c_touch_chgtime = TRUE; - cp->c_touch_modtime = TRUE; - - if (error == 0) - hfs_volupdate(hfsmp, VOL_RMFILE, 0); - } - - if ((cp->c_flag & C_MODIFIED) || - cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) { - hfs_update(vp, 0); - } + + /* Already set C_NOEXISTS at the beginning of this block */ + cp->c_flag &= ~C_DELETED; + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; + + if (error == 0) + hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0); + } + + /* + * A file may have had delayed allocations, in which case hfs_update + * would not have updated the catalog record (cat_update). We need + * to do that now, before we lose our fork data. We also need to + * force the update, or hfs_update will again skip the cat_update. + * + * If the file has C_NOEXISTS set, then we can skip the hfs_update call + * because the catalog entry has already been removed. There would be no point + * to looking up the entry in the catalog to modify it when we already know it's gone + */ + if ((!ISSET(cp->c_flag, C_NOEXISTS)) && + ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime || + cp->c_touch_chgtime || cp->c_touch_modtime)) { + + if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){ + cp->c_flag |= C_FORCEUPDATE; + } + hfs_update(vp, 0); + } + out: - if (cat_reserve) - cat_postflight(hfsmp, &cookie, p); + if (cat_reserve) + cat_postflight(hfsmp, &cookie, p); + + // XXXdbg - have to do this because a goto could have come here + if (started_tr) { + hfs_end_transaction(hfsmp); + started_tr = 0; + } + + + return error; +} - // XXXdbg - have to do this because a goto could have come here - if (started_tr) { - hfs_end_transaction(hfsmp); - started_tr = 0; - } - hfs_unlock(cp); - if (took_trunc_lock) - hfs_unlock_truncate(cp); +/* + * hfs_vnop_inactive + * + * The last usecount on the vnode has gone away, so we need to tear down + * any remaining data still residing in the cnode. If necessary, write out + * remaining blocks or delete the cnode's entry in the catalog. + */ +int +hfs_vnop_inactive(struct vnop_inactive_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct cnode *cp; + struct hfsmount *hfsmp = VTOHFS(vp); + struct proc *p = vfs_context_proc(ap->a_context); + int error = 0; + int took_trunc_lock = 0; + enum vtype v_type; + + v_type = vnode_vtype(vp); + cp = VTOC(vp); + if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) || + (hfsmp->hfs_freezing_proc == p)) { + error = 0; + goto inactive_done; + } + /* - * If we are done with the vnode, reclaim it - * so that it can be reused immediately. + * For safety, do NOT call vnode_recycle from inside this function. This can cause + * problems in the following scenario: + * + * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE + * + * If we're being invoked as a result of a reclaim that was already in-flight, then we + * cannot call vnode_recycle again. Being in reclaim means that there are no usecounts or + * iocounts by definition. As a result, if we were to call vnode_recycle, it would immediately + * try to re-enter reclaim again and panic. + * + * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called. + * 1) last usecount goes away on the vnode (vnode_rele) + * 2) last iocount goes away on a vnode that previously had usecounts but didn't have + * vnode_recycle called (vnode_put) + * 3) vclean by way of reclaim + * + * In this function we would generally want to call vnode_recycle to speed things + * along to ensure that we don't leak blocks due to open-unlinked files. However, by + * virtue of being in this function already, we can call hfs_cnode_teardown, which + * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that + * there's no entry in the catalog and no backing store anymore. If that's the case, + * then we really don't care all that much when the vnode actually goes through reclaim. + * Further, the HFS VNOPs that manipulated the namespace in order to create the open- + * unlinked file in the first place should have already called vnode_recycle on the vnode + * to guarantee that it would go through reclaim in a speedy way. */ - if (cp->c_mode == 0 || recycle) - vnode_recycle(vp); - - return (error); + + if (cp->c_flag & C_NOEXISTS) { + /* + * If the cnode has already had its cat entry removed, then + * just skip to the end. We don't need to do anything here. + */ + error = 0; + goto inactive_done; + } + + if ((v_type == VREG || v_type == VLNK)) { + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + took_trunc_lock = 1; + } + + (void) hfs_lock(cp, HFS_FORCE_LOCK); + + /* + * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked + * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS. + */ + error = hfs_cnode_teardown (vp, ap->a_context, 0); + + /* + * Drop the truncate lock before unlocking the cnode + * (which can potentially perform a vnode_put and + * recycle the vnode which in turn might require the + * truncate lock) + */ + if (took_trunc_lock) { + hfs_unlock_truncate(cp, 0); + } + + hfs_unlock(cp); + +inactive_done: + + return error; } + /* * File clean-up (zero fill and shrink peof). */ -static int + +int hfs_filedone(struct vnode *vp, vfs_context_t context) { struct cnode *cp; struct filefork *fp; struct hfsmount *hfsmp; + struct rl_entry *invalid_range; off_t leof; - u_long blks, blocksize; + u_int32_t blks, blocksize; + int cluster_flags = IO_CLOSE; + int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE; cp = VTOC(vp); fp = VTOF(vp); @@ -296,16 +611,25 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0)) return (0); + /* + * If we are being invoked from F_SWAPDATAEXTENTS, then we + * need to issue synchronous IO; Unless we are sure that all + * of the data has been written to the disk, we won't know + * that all of the blocks have been allocated properly. + */ + if (cp->c_flag & C_SWAPINPROGRESS) { + cluster_flags |= IO_SYNC; + } + hfs_unlock(cp); - (void) cluster_push(vp, IO_CLOSE); + (void) cluster_push(vp, cluster_flags); hfs_lock(cp, HFS_FORCE_LOCK); /* * Explicitly zero out the areas of file * that are currently marked invalid. */ - while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) { - struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges); + while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { off_t start = invalid_range->rl_start; off_t end = invalid_range->rl_end; @@ -317,8 +641,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) hfs_unlock(cp); (void) cluster_write(vp, (struct uio *) 0, - leof, end + 1, start, (off_t)0, - IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); + leof, end + 1, start, (off_t)0, cluster_zero_flags); hfs_lock(cp, HFS_FORCE_LOCK); cp->c_flag |= C_MODIFIED; } @@ -332,9 +655,9 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) * Shrink the peof to the smallest size neccessary to contain the leof. */ if (blks < fp->ff_blocks) - (void) hfs_truncate(vp, leof, IO_NDELAY, 0, context); + (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context); hfs_unlock(cp); - (void) cluster_push(vp, IO_CLOSE); + (void) cluster_push(vp, cluster_flags); hfs_lock(cp, HFS_FORCE_LOCK); /* @@ -352,7 +675,6 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) /* * Reclaim a cnode so that it can be used for other purposes. */ -__private_extern__ int hfs_vnop_reclaim(struct vnop_reclaim_args *ap) { @@ -360,17 +682,41 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) struct cnode *cp; struct filefork *fp = NULL; struct filefork *altfp = NULL; + struct hfsmount *hfsmp = VTOHFS(vp); + vfs_context_t ctx = ap->a_context; int reclaim_cnode = 0; - - (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK); + int err = 0; + enum vtype v_type; + + v_type = vnode_vtype(vp); cp = VTOC(vp); + + /* + * We don't take the truncate lock since by the time reclaim comes along, + * all dirty pages have been synced and nobody should be competing + * with us for this thread. + */ + (void) hfs_lock (cp, HFS_FORCE_LOCK); + + /* + * Sync to disk any remaining data in the cnode/vnode. This includes + * a call to hfs_update if the cnode has outbound data. + * + * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do + * because the catalog entry for this cnode is already gone. + */ + if (!ISSET(cp->c_flag, C_NOEXISTS)) { + err = hfs_cnode_teardown(vp, ctx, 1); + } /* * Keep track of an inactive hot file. */ - if (!vnode_isdir(vp) && !vnode_issystem(vp)) + if (!vnode_isdir(vp) && + !vnode_issystem(vp) && + !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) { (void) hfs_addhotfile(vp); - + } vnode_removefsref(vp); /* @@ -390,14 +736,14 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) cp->c_rsrcfork = NULL; cp->c_rsrc_vp = NULL; } else { - panic("hfs_vnop_reclaim: vp points to wrong cnode\n"); + panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp); } /* * On the last fork, remove the cnode from its hash chain. */ if (altfp == NULL) { /* If we can't remove it then the cnode must persist! */ - if (hfs_chashremove(cp) == 0) + if (hfs_chashremove(hfsmp, cp) == 0) reclaim_cnode = 1; /* * Remove any directory hints @@ -405,6 +751,10 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) if (vnode_isdir(vp)) { hfs_reldirhints(cp, 0); } + + if(cp->c_flag & C_HARDLINK) { + hfs_relorigins(cp); + } } /* Release the file fork and related data */ if (fp) { @@ -419,9 +769,14 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) * If there was only one active fork then we can release the cnode. */ if (reclaim_cnode) { - hfs_chashwakeup(cp, H_ALLOC | H_TRANSIT); + hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT); hfs_reclaim_cnode(cp); - } else /* cnode in use */ { + } + else { + /* + * cnode in use. If it is a directory, it could have + * no live forks. Just release the lock. + */ hfs_unlock(cp); } @@ -431,25 +786,28 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) extern int (**hfs_vnodeop_p) (void *); +extern int (**hfs_std_vnodeop_p) (void *); extern int (**hfs_specop_p) (void *); +#if FIFO extern int (**hfs_fifoop_p) (void *); +#endif /* * hfs_getnewvnode - get new default vnode * * The vnode is returned with an iocount and the cnode locked */ -__private_extern__ int hfs_getnewvnode( struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, struct cat_desc *descp, - int wantrsrc, + int flags, struct cat_attr *attrp, struct cat_fork *forkp, - struct vnode **vpp) + struct vnode **vpp, + int *out_flags) { struct mount *mp = HFSTOVFS(hfsmp); struct vnode *vp = NULL; @@ -457,11 +815,18 @@ hfs_getnewvnode( struct vnode *tvp = NULLVP; struct cnode *cp = NULL; struct filefork *fp = NULL; - int i; + int hfs_standard = 0; int retval; int issystemfile; + int wantrsrc; + int hflags = 0; struct vnode_fsparam vfsp; enum vtype vtype; +#if QUOTA + int i; +#endif /* QUOTA */ + + hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD); if (attrp->ca_fileid == 0) { *vpp = NULL; @@ -473,36 +838,152 @@ hfs_getnewvnode( *vpp = NULL; return (ENOTSUP); } -#endif +#endif /* !FIFO */ vtype = IFTOVT(attrp->ca_mode); issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG); + wantrsrc = flags & GNV_WANTRSRC; + + /* Zero out the out_flags */ + *out_flags = 0; + +#ifdef HFS_CHECK_LOCK_ORDER + /* + * The only case were its permissible to hold the parent cnode + * lock is during a create operation (hfs_makenode) or when + * we don't need the cnode lock (GNV_SKIPLOCK). + */ + if ((dvp != NULL) && + (flags & (GNV_CREATE | GNV_SKIPLOCK)) == 0 && + VTOC(dvp)->c_lockowner == current_thread()) { + panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp)); + } +#endif /* HFS_CHECK_LOCK_ORDER */ /* * Get a cnode (new or existing) - * skip getting the cnode lock if we are getting resource fork (wantrsrc == 2) */ - cp = hfs_chash_getcnode(hfsmp->hfs_raw_dev, attrp->ca_fileid, vpp, wantrsrc, (wantrsrc == 2)); + cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc, + (flags & GNV_SKIPLOCK), out_flags, &hflags); - /* Hardlinks may need an updated catalog descriptor */ - if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) { - replace_desc(cp, descp); + /* + * If the id is no longer valid for lookups we'll get back a NULL cp. + */ + if (cp == NULL) { + return (ENOENT); } + + /* + * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the + * descriptor in the cnode as needed if the cnode represents a hardlink. + * We want the caller to get the most up-to-date copy of the descriptor + * as possible. However, we only do anything here if there was a valid vnode. + * If there isn't a vnode, then the cnode is brand new and needs to be initialized + * as it doesn't have a descriptor or cat_attr yet. + * + * If we are about to replace the descriptor with the user-supplied one, then validate + * that the descriptor correctly acknowledges this item is a hardlink. We could be + * subject to a race where the calling thread invoked cat_lookup, got a valid lookup + * result but the file was not yet a hardlink. With sufficient delay between there + * and here, we might accidentally copy in the raw inode ID into the descriptor in the + * call below. If the descriptor's CNID is the same as the fileID then it must + * not yet have been a hardlink when the lookup occurred. + */ + + if (!(hfs_checkdeleted(cp))) { + if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) { + /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */ + if ((descp->cd_cnid == cp->c_attr.ca_fileid) && + (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){ + if ((flags & GNV_SKIPLOCK) == 0) { + /* + * Then we took the lock. Drop it before calling + * vnode_put, which may invoke hfs_vnop_inactive and need to take + * the cnode lock again. + */ + hfs_unlock(cp); + } + + /* + * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to + * force a re-drive in the lookup routine. + * Drop the iocount on the vnode obtained from + * chash_getcnode if needed. + */ + if (*vpp != NULL) { + vnode_put (*vpp); + *vpp = NULL; + } + + /* + * If we raced with VNOP_RECLAIM for this vnode, the hash code could + * have observed it after the c_vp or c_rsrc_vp fields had been torn down; + * the hash code peeks at those fields without holding the cnode lock because + * it needs to be fast. As a result, we may have set H_ATTACH in the chash + * call above. Since we're bailing out, unset whatever flags we just set, and + * wake up all waiters for this cnode. + */ + if (hflags) { + hfs_chashwakeup(hfsmp, cp, hflags); + } + + *out_flags = GNV_CAT_ATTRCHANGED; + return ERECYCLE; + } + else { + /* Otherwise, CNID != fileid. Go ahead and copy in the new descriptor */ + replace_desc(cp, descp); + } + } + } + + /* Check if we found a matching vnode */ - if (*vpp != NULL) + if (*vpp != NULL) { return (0); + } /* * If this is a new cnode then initialize it. */ if (ISSET(cp->c_hflag, H_ALLOC)) { lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr); +#if HFS_COMPRESSION + cp->c_decmp = NULL; +#endif /* Make sure its still valid (ie exists on disk). */ - if (!hfs_valid_cnode(hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid)) { - hfs_chash_abort(cp); - hfs_reclaim_cnode(cp); - *vpp = NULL; - return (ENOENT); + if (!(flags & GNV_CREATE)) { + int error = 0; + if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) { + hfs_chash_abort(hfsmp, cp); + hfs_reclaim_cnode(cp); + *vpp = NULL; + /* + * If we hit this case, that means that the entry was there in the catalog when + * we did a cat_lookup earlier. Think hfs_lookup. However, in between the time + * that we checked the catalog and the time we went to get a vnode/cnode for it, + * it had been removed from the namespace and the vnode totally reclaimed. As a result, + * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out + * an ENOENT. To indicate to the caller that they should really double-check the + * entry (it could have been renamed over and gotten a new fileid), we mark a bit + * in the output flags. + */ + if (error == ENOENT) { + *out_flags = GNV_CAT_DELETED; + return ENOENT; + } + + /* + * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into + * this function as an argument because the catalog may have changed w.r.t hardlink + * link counts and the firstlink field. If that validation check fails, then let + * lookup re-drive itself to get valid/consistent data with the same failure condition below. + */ + if (error == ERECYCLE) { + *out_flags = GNV_CAT_ATTRCHANGED; + return (ERECYCLE); + } + } } bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr)); bcopy(descp, &cp->c_desc, sizeof(struct cat_desc)); @@ -513,23 +994,50 @@ hfs_getnewvnode( descp->cd_flags &= ~CD_HASBUF; /* Tag hardlinks */ - if (IFTOVT(cp->c_mode) == VREG && - (descp->cd_cnid != attrp->ca_fileid)) { + if ((vtype == VREG || vtype == VDIR) && + ((descp->cd_cnid != attrp->ca_fileid) || + (attrp->ca_recflags & kHFSHasLinkChainMask))) { cp->c_flag |= C_HARDLINK; } + /* + * Fix-up dir link counts. + * + * Earlier versions of Leopard used ca_linkcount for posix + * nlink support (effectively the sub-directory count + 2). + * That is now accomplished using the ca_dircount field with + * the corresponding kHFSHasFolderCountMask flag. + * + * For directories the ca_linkcount is the true link count, + * tracking the number of actual hardlinks to a directory. + * + * We only do this if the mount has HFS_FOLDERCOUNT set; + * at the moment, we only set that for HFSX volumes. + */ + if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) && + (vtype == VDIR) && + !(attrp->ca_recflags & kHFSHasFolderCountMask) && + (cp->c_attr.ca_linkcount > 1)) { + if (cp->c_attr.ca_entries == 0) + cp->c_attr.ca_dircount = 0; + else + cp->c_attr.ca_dircount = cp->c_attr.ca_linkcount - 2; - /* Take one dev reference for each non-directory cnode */ - if (IFTOVT(cp->c_mode) != VDIR) { - cp->c_devvp = hfsmp->hfs_devvp; - vnode_ref(cp->c_devvp); + cp->c_attr.ca_linkcount = 1; + cp->c_attr.ca_recflags |= kHFSHasFolderCountMask; + if ( !(hfsmp->hfs_flags & HFS_READ_ONLY) ) + cp->c_flag |= C_MODIFIED; } #if QUOTA - for (i = 0; i < MAXQUOTAS; i++) - cp->c_dquot[i] = NODQUOT; + if (hfsmp->hfs_flags & HFS_QUOTAS) { + for (i = 0; i < MAXQUOTAS; i++) + cp->c_dquot[i] = NODQUOT; + } #endif /* QUOTA */ + /* Mark the output flag that we're vending a new cnode */ + *out_flags |= GNV_NEW_CNODE; } - if (IFTOVT(cp->c_mode) == VDIR) { + if (vtype == VDIR) { if (cp->c_vp != NULL) panic("hfs_getnewvnode: orphaned vnode (data)"); cvpp = &cp->c_vp; @@ -597,16 +1105,32 @@ hfs_getnewvnode( vfsp.vnfs_mp = mp; vfsp.vnfs_vtype = vtype; vfsp.vnfs_str = "hfs"; - vfsp.vnfs_dvp = dvp; + if ((cp->c_flag & C_HARDLINK) && (vtype == VDIR)) { + vfsp.vnfs_dvp = NULL; /* no parent for me! */ + vfsp.vnfs_cnp = NULL; /* no name for me! */ + } else { + vfsp.vnfs_dvp = dvp; + vfsp.vnfs_cnp = cnp; + } vfsp.vnfs_fsnode = cp; - vfsp.vnfs_cnp = cnp; - if (vtype == VFIFO ) + + /* + * Special Case HFS Standard VNOPs from HFS+, since + * HFS standard is readonly/deprecated as of 10.6 + */ + +#if FIFO + if (vtype == VFIFO ) vfsp.vnfs_vops = hfs_fifoop_p; - else if (vtype == VBLK || vtype == VCHR) - vfsp.vnfs_vops = hfs_specop_p; else +#endif + if (vtype == VBLK || vtype == VCHR) + vfsp.vnfs_vops = hfs_specop_p; + else if (hfs_standard) + vfsp.vnfs_vops = hfs_std_vnodeop_p; + else vfsp.vnfs_vops = hfs_vnodeop_p; - + if (vtype == VBLK || vtype == VCHR) vfsp.vnfs_rdev = attrp->ca_rdev; else @@ -617,10 +1141,9 @@ hfs_getnewvnode( else vfsp.vnfs_filesize = 0; - if (dvp && cnp && (cnp->cn_flags & MAKEENTRY)) - vfsp.vnfs_flags = 0; - else - vfsp.vnfs_flags = VNFS_NOCACHE; + vfsp.vnfs_flags = VNFS_ADDFSREF; + if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE)) + vfsp.vnfs_flags |= VNFS_NOCACHE; /* Tag system files */ vfsp.vnfs_marksystem = issystemfile; @@ -645,27 +1168,51 @@ hfs_getnewvnode( * occurred during the attachment, then cleanup the cnode. */ if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) { - hfs_chash_abort(cp); + hfs_chash_abort(hfsmp, cp); hfs_reclaim_cnode(cp); - } else { - hfs_chashwakeup(cp, H_ALLOC | H_ATTACH); - hfs_unlock(cp); + } + else { + hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH); + if ((flags & GNV_SKIPLOCK) == 0){ + hfs_unlock(cp); + } } *vpp = NULL; return (retval); } vp = *cvpp; - vnode_addfsref(vp); vnode_settag(vp, VT_HFS); - if (cp->c_flag & C_HARDLINK) - vnode_set_hard_link(vp); - hfs_chashwakeup(cp, H_ALLOC | H_ATTACH); + if (cp->c_flag & C_HARDLINK) { + vnode_setmultipath(vp); + } + /* + * Tag resource fork vnodes as needing an VNOP_INACTIVE + * so that any deferred removes (open unlinked files) + * have the chance to process the resource fork. + */ + if (VNODE_IS_RSRC(vp)) { + int err; + KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0); + + /* Force VL_NEEDINACTIVE on this vnode */ + err = vnode_ref(vp); + if (err == 0) { + vnode_rele(vp); + } + } + hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH); /* * Stop tracking an active hot file. */ - if (!vnode_isdir(vp) && !vnode_issystem(vp)) + if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) { (void) hfs_removehotfile(vp); + } + +#if CONFIG_PROTECT + if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) + cp_entry_init(cp, mp); +#endif *vpp = vp; return (0); @@ -686,35 +1233,57 @@ hfs_reclaim_cnode(struct cnode *cp) } #endif /* QUOTA */ - if (cp->c_devvp) { - struct vnode *tmp_vp = cp->c_devvp; - - cp->c_devvp = NULL; - vnode_rele(tmp_vp); - } - /* * If the descriptor has a name then release it */ - if (cp->c_desc.cd_flags & CD_HASBUF) { - char *nameptr; + if ((cp->c_desc.cd_flags & CD_HASBUF) && (cp->c_desc.cd_nameptr != 0)) { + const char *nameptr; - nameptr = cp->c_desc.cd_nameptr; + nameptr = (const char *) cp->c_desc.cd_nameptr; cp->c_desc.cd_nameptr = 0; cp->c_desc.cd_flags &= ~CD_HASBUF; cp->c_desc.cd_namelen = 0; vfs_removename(nameptr); } - + + /* + * We only call this function if we are in hfs_vnop_reclaim and + * attempting to reclaim a cnode with only one live fork. Because the vnode + * went through reclaim, any future attempts to use this item will have to + * go through lookup again, which will need to create a new vnode. Thus, + * destroying the locks below (while they were still held during our parent + * function hfs_vnop_reclaim) is safe. + */ + lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group); lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group); +#if HFS_COMPRESSION + if (cp->c_decmp) { + decmpfs_cnode_destroy(cp->c_decmp); + FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE); + } +#endif +#if CONFIG_PROTECT + cp_entry_destroy(cp); +#endif + + bzero(cp, sizeof(struct cnode)); FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE); } -static int -hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, cnid_t cnid) +/* + * hfs_valid_cnode + * + * This function is used to validate data that is stored in-core against what is contained + * in the catalog. Common uses include validating that the parent-child relationship still exist + * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at + * the point of the check. + */ +int +hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, + cnid_t cnid, struct cat_attr *cattr, int *error) { struct cat_attr attr; struct cat_desc cndesc; @@ -722,34 +1291,181 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname int lockflags; /* System files are always valid */ - if (cnid < kHFSFirstUserCatalogNodeID) + if (cnid < kHFSFirstUserCatalogNodeID) { + *error = 0; return (1); + } /* XXX optimization: check write count in dvp */ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); if (dvp && cnp) { + int lookup = 0; + struct cat_fork fork; + bzero(&cndesc, sizeof(cndesc)); - cndesc.cd_nameptr = cnp->cn_nameptr; + cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr; cndesc.cd_namelen = cnp->cn_namelen; - cndesc.cd_parentcnid = VTOC(dvp)->c_cnid; + cndesc.cd_parentcnid = VTOC(dvp)->c_fileid; cndesc.cd_hint = VTOC(dvp)->c_childhint; - if ((cat_lookup(hfsmp, &cndesc, 0, NULL, &attr, NULL, NULL) == 0) && - (cnid == attr.ca_fileid)) { + /* + * We have to be careful when calling cat_lookup. The result argument + * 'attr' may get different results based on whether or not you ask + * for the filefork to be supplied as output. This is because cat_lookupbykey + * will attempt to do basic validation/smoke tests against the resident + * extents if there are no overflow extent records, but it needs someplace + * in memory to store the on-disk fork structures. + * + * Since hfs_lookup calls cat_lookup with a filefork argument, we should + * do the same here, to verify that block count differences are not + * due to calling the function with different styles. cat_lookupbykey + * will request the volume be fsck'd if there is true on-disk corruption + * where the number of blocks does not match the number generated by + * summing the number of blocks in the resident extents. + */ + + lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL); + if ((lookup == 0) && (cnid == attr.ca_fileid)) { stillvalid = 1; + *error = 0; + } + else { + *error = ENOENT; + } + + /* + * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation + * race. Specifically, if there is no vnode/cnode pair for the directory entry + * being looked up, we have to go to the catalog. But since we don't hold any locks (aside + * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record + * changing in between the time we do the cat_lookup there and the time we re-grab the + * catalog lock above to do another cat_lookup. + * + * However, we need to check more than just the CNID and parent-child name relationships above. + * Hardlinks can suffer the same race in the following scenario: Suppose we do a + * cat_lookup, and find a leaf record and a raw inode for a hardlink. Now, we have + * the cat_attr in hand (passed in above). But in between then and now, the vnode was + * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get + * a chance to do anything. This is possible if there are a lot of threads thrashing around + * with the cnode hash. In this case, if we don't check/validate the cat_attr in-hand, we will + * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is + * on disk. So validate the cat_attr below, if required. This race cannot happen if the cnode/vnode + * already exists, as it does in the case of rename and delete. + */ + if (stillvalid && cattr != NULL) { + if (cattr->ca_linkcount != attr.ca_linkcount) { + stillvalid = 0; + *error = ERECYCLE; + goto notvalid; + } + + if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) { + stillvalid = 0; + *error = ERECYCLE; + goto notvalid; + } + + if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) { + stillvalid = 0; + *error = ERECYCLE; + goto notvalid; + } + + if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) { + stillvalid = 0; + *error = ERECYCLE; + goto notvalid; + } } } else { - if (cat_idlookup(hfsmp, cnid, NULL, NULL, NULL) == 0) { + if (cat_idlookup(hfsmp, cnid, 0, NULL, NULL, NULL) == 0) { stillvalid = 1; + *error = 0; + } + else { + *error = ENOENT; } } +notvalid: hfs_systemfile_unlock(hfsmp, lockflags); return (stillvalid); } +/* + * Per HI and Finder requirements, HFS should add in the + * date/time that a particular directory entry was added + * to the containing directory. + * This is stored in the extended Finder Info for the + * item in question. + * + * Note that this field is also set explicitly in the hfs_vnop_setxattr code. + * We must ignore user attempts to set this part of the finderinfo, and + * so we need to save a local copy of the date added, write in the user + * finderinfo, then stuff the value back in. + */ +void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) { + u_int8_t *finfo = NULL; + + /* overlay the FinderInfo to the correct pointer, and advance */ + finfo = (u_int8_t*)attrp->ca_finderinfo; + finfo = finfo + 16; + + /* + * Make sure to write it out as big endian, since that's how + * finder info is defined. + * + * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp. + */ + if (S_ISREG(attrp->ca_mode)) { + struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + extinfo->date_added = OSSwapHostToBigInt32(dateadded); + attrp->ca_recflags |= kHFSHasDateAddedMask; + } + else if (S_ISDIR(attrp->ca_mode)) { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; + extinfo->date_added = OSSwapHostToBigInt32(dateadded); + attrp->ca_recflags |= kHFSHasDateAddedMask; + } + + /* If it were neither directory/file, then we'd bail out */ + return; +} + +u_int32_t hfs_get_dateadded (struct cnode *cp) { + u_int8_t *finfo = NULL; + u_int32_t dateadded = 0; + + if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) { + /* Date added was never set. Return 0. */ + return dateadded; + } + + + /* overlay the FinderInfo to the correct pointer, and advance */ + finfo = (u_int8_t*)cp->c_finderinfo; + finfo = finfo + 16; + + /* + * FinderInfo is written out in big endian... make sure to convert it to host + * native before we use it. + */ + if (S_ISREG(cp->c_attr.ca_mode)) { + struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + dateadded = OSSwapBigToHostInt32 (extinfo->date_added); + } + else if (S_ISDIR(cp->c_attr.ca_mode)) { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; + dateadded = OSSwapBigToHostInt32 (extinfo->date_added); + } + + return dateadded; +} + + + /* * Touch cnode times based on c_touch_xxx flags * @@ -757,16 +1473,41 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname * * This will also update the volume modify time */ -__private_extern__ void hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) { + vfs_context_t ctx; + /* don't modify times if volume is read-only */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + cp->c_touch_acctime = FALSE; + cp->c_touch_chgtime = FALSE; + cp->c_touch_modtime = FALSE; + return; + } + else if (hfsmp->hfs_flags & HFS_STANDARD) { /* HFS Standard doesn't support access times */ - if (hfsmp->hfs_flags & HFS_STANDARD) { cp->c_touch_acctime = FALSE; } - if (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) { + ctx = vfs_context_current(); + /* + * Skip access time updates if: + * . MNT_NOATIME is set + * . a file system freeze is in progress + * . a file system resize is in progress + * . the vnode associated with this cnode is marked for rapid aging + */ + if (cp->c_touch_acctime) { + if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) || + (hfsmp->hfs_freezing_proc != NULL) || + (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) || + (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) { + + cp->c_touch_acctime = FALSE; + } + } + if (cp->c_touch_acctime || cp->c_touch_chgtime || + cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) { struct timeval tv; int touchvol = 0; @@ -805,10 +1546,18 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) cp->c_flag |= C_MODIFIED; touchvol = 1; } + + if (cp->c_flag & C_NEEDS_DATEADDED) { + hfs_write_dateadded (&(cp->c_attr), tv.tv_sec); + cp->c_flag |= C_MODIFIED; + /* untwiddle the bit */ + cp->c_flag &= ~C_NEEDS_DATEADDED; + touchvol = 1; + } /* Touch the volume modtime if needed */ if (touchvol) { - HFSTOVCB(hfsmp)->vcbFlags |= 0xFF00; + MarkVCBDirty(hfsmp); HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec; } } @@ -817,41 +1566,73 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) /* * Lock a cnode. */ -__private_extern__ int hfs_lock(struct cnode *cp, enum hfslocktype locktype) { void * thread = current_thread(); - /* System files need to keep track of owner */ - if ((cp->c_fileid < kHFSFirstUserCatalogNodeID) && - (cp->c_fileid > kHFSRootFolderID) && - (locktype != HFS_SHARED_LOCK)) { - + if (cp->c_lockowner == thread) { /* - * The extents and bitmap file locks support - * recursion and are always taken exclusive. + * Only the extents and bitmap file's support lock recursion. */ - if (cp->c_fileid == kHFSExtentsFileID || - cp->c_fileid == kHFSAllocationFileID) { - if (cp->c_lockowner == thread) { - cp->c_syslockcount++; - } else { - lck_rw_lock_exclusive(&cp->c_rwlock); - cp->c_lockowner = thread; - cp->c_syslockcount = 1; - } + if ((cp->c_fileid == kHFSExtentsFileID) || + (cp->c_fileid == kHFSAllocationFileID)) { + cp->c_syslockcount++; } else { - lck_rw_lock_exclusive(&cp->c_rwlock); - cp->c_lockowner = thread; + panic("hfs_lock: locking against myself!"); } } else if (locktype == HFS_SHARED_LOCK) { lck_rw_lock_shared(&cp->c_rwlock); cp->c_lockowner = HFS_SHARED_OWNER; - } else { + + } else /* HFS_EXCLUSIVE_LOCK */ { lck_rw_lock_exclusive(&cp->c_rwlock); cp->c_lockowner = thread; + + /* + * Only the extents and bitmap file's support lock recursion. + */ + if ((cp->c_fileid == kHFSExtentsFileID) || + (cp->c_fileid == kHFSAllocationFileID)) { + cp->c_syslockcount = 1; + } } + +#ifdef HFS_CHECK_LOCK_ORDER + /* + * Regular cnodes (non-system files) cannot be locked + * while holding the journal lock or a system file lock. + */ + if (!(cp->c_desc.cd_flags & CD_ISMETA) && + ((cp->c_fileid > kHFSFirstUserCatalogNodeID) || (cp->c_fileid == kHFSRootFolderID))) { + vnode_t vp = NULLVP; + + /* Find corresponding vnode. */ + if (cp->c_vp != NULLVP && VTOC(cp->c_vp) == cp) { + vp = cp->c_vp; + } else if (cp->c_rsrc_vp != NULLVP && VTOC(cp->c_rsrc_vp) == cp) { + vp = cp->c_rsrc_vp; + } + if (vp != NULLVP) { + struct hfsmount *hfsmp = VTOHFS(vp); + + if (hfsmp->jnl && (journal_owner(hfsmp->jnl) == thread)) { + /* This will eventually be a panic here. */ + printf("hfs_lock: bad lock order (cnode after journal)\n"); + } + if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) { + panic("hfs_lock: bad lock order (cnode after catalog)"); + } + if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) { + panic("hfs_lock: bad lock order (cnode after attribute)"); + } + if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) { + panic("hfs_lock: bad lock order (cnode after extents)"); + } + } + } +#endif /* HFS_CHECK_LOCK_ORDER */ + /* * Skip cnodes that no longer exist (were deleted). */ @@ -867,7 +1648,6 @@ hfs_lock(struct cnode *cp, enum hfslocktype locktype) /* * Lock a pair of cnodes. */ -__private_extern__ int hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype) { @@ -882,13 +1662,9 @@ hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype) } /* - * Lock in cnode parent-child order (if there is a relationship); - * otherwise lock in cnode address order. + * Lock in cnode address order. */ - if ((IFTOVT(cp1->c_mode) == VDIR) && (cp1->c_fileid == cp2->c_parentcnid)) { - first = cp1; - last = cp2; - } else if (cp1 < cp2) { + if (cp1 < cp2) { first = cp1; last = cp2; } else { @@ -918,26 +1694,22 @@ hfs_isordered(struct cnode *cp1, struct cnode *cp2) return (1); if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff) return (0); - if (cp1->c_fileid == cp2->c_parentcnid) - return (1); /* cp1 is the parent and should go first */ - if (cp2->c_fileid == cp1->c_parentcnid) - return (0); /* cp1 is the child and should go last */ - - return (cp1 < cp2); /* fall-back is to use address order */ + /* + * Locking order is cnode address order. + */ + return (cp1 < cp2); } /* * Acquire 4 cnode locks. - * - locked in cnode parent-child order (if there is a relationship) - * otherwise lock in cnode address order (lesser address first). + * - locked in cnode address order (lesser address first). * - all or none of the locks are taken * - only one lock taken per cnode (dup cnodes are skipped) * - some of the cnode pointers may be null */ -__private_extern__ int hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, - struct cnode *cp4, enum hfslocktype locktype) + struct cnode *cp4, enum hfslocktype locktype, struct cnode **error_cnode) { struct cnode * a[3]; struct cnode * b[3]; @@ -945,6 +1717,9 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode * tmp; int i, j, k; int error; + if (error_cnode) { + *error_cnode = NULL; + } if (hfs_isordered(cp1, cp2)) { a[0] = cp1; a[1] = cp2; @@ -975,6 +1750,10 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, for (i = 0; i < k; ++i) { if (list[i]) if ((error = hfs_lock(list[i], locktype))) { + /* Only stuff error_cnode if requested */ + if (error_cnode) { + *error_cnode = list[i]; + } /* Drop any locks we acquired. */ while (--i >= 0) { if (list[i]) @@ -990,49 +1769,59 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, /* * Unlock a cnode. */ -__private_extern__ void hfs_unlock(struct cnode *cp) { vnode_t rvp = NULLVP; - vnode_t dvp = NULLVP; + vnode_t vp = NULLVP; + u_int32_t c_flag; + void *lockowner; - /* System files need to keep track of owner */ - if ((cp->c_fileid < kHFSFirstUserCatalogNodeID) && - (cp->c_fileid > kHFSRootFolderID) && - (cp->c_datafork != NULL)) { - /* - * The extents and bitmap file locks support - * recursion and are always taken exclusive. - */ - if (cp->c_fileid == kHFSExtentsFileID || - cp->c_fileid == kHFSAllocationFileID) { - if (--cp->c_syslockcount > 0) { - return; - } + /* + * Only the extents and bitmap file's support lock recursion. + */ + if ((cp->c_fileid == kHFSExtentsFileID) || + (cp->c_fileid == kHFSAllocationFileID)) { + if (--cp->c_syslockcount > 0) { + return; } } - if (cp->c_flag & C_NEED_DVNODE_PUT) - dvp = cp->c_vp; + c_flag = cp->c_flag; + cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT | C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE); - if (cp->c_flag & C_NEED_RVNODE_PUT) + if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) { + vp = cp->c_vp; + } + if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) { rvp = cp->c_rsrc_vp; + } - cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT); - - cp-> c_lockowner = NULL; - lck_rw_done(&cp->c_rwlock); + lockowner = cp->c_lockowner; + if (lockowner == current_thread()) { + cp->c_lockowner = NULL; + lck_rw_unlock_exclusive(&cp->c_rwlock); + } else { + lck_rw_unlock_shared(&cp->c_rwlock); + } - if (dvp) - vnode_put(dvp); - if (rvp) - vnode_put(rvp); + /* Perform any vnode post processing after cnode lock is dropped. */ + if (vp) { + if (c_flag & C_NEED_DATA_SETSIZE) + ubc_setsize(vp, 0); + if (c_flag & C_NEED_DVNODE_PUT) + vnode_put(vp); + } + if (rvp) { + if (c_flag & C_NEED_RSRC_SETSIZE) + ubc_setsize(rvp, 0); + if (c_flag & C_NEED_RVNODE_PUT) + vnode_put(rvp); + } } /* * Unlock a pair of cnodes. */ -__private_extern__ void hfs_unlockpair(struct cnode *cp1, struct cnode *cp2) { @@ -1044,7 +1833,6 @@ hfs_unlockpair(struct cnode *cp1, struct cnode *cp2) /* * Unlock a group of cnodes. */ -__private_extern__ void hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4) { @@ -1091,28 +1879,119 @@ skip2: * * The process doing a truncation must take the lock * exclusive. The read/write processes can take it - * non-exclusive. + * shared. The locktype argument is the same as supplied to + * hfs_lock. */ -__private_extern__ void -hfs_lock_truncate(struct cnode *cp, int exclusive) +hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype) { - if (cp->c_lockowner == current_thread()) - panic("hfs_lock_truncate: cnode 0x%08x locked!", cp); + void * thread = current_thread(); - if (exclusive) - lck_rw_lock_exclusive(&cp->c_truncatelock); - else + if (cp->c_truncatelockowner == thread) { + /* + * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse. + * + * This is needed on the hfs_vnop_pagein path where we need to ensure + * the file does not change sizes while we are paging in. However, + * we may already hold the lock exclusive due to another + * VNOP from earlier in the call stack. So if we already hold + * the truncate lock exclusive, allow it to proceed, but ONLY if + * it's in the recursive case. + */ + if (locktype != HFS_RECURSE_TRUNCLOCK) { + panic("hfs_lock_truncate: cnode %p locked!", cp); + } + } + /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */ + else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) { lck_rw_lock_shared(&cp->c_truncatelock); + cp->c_truncatelockowner = HFS_SHARED_OWNER; + } + else { /* must be an HFS_EXCLUSIVE_LOCK */ + lck_rw_lock_exclusive(&cp->c_truncatelock); + cp->c_truncatelockowner = thread; + } } -__private_extern__ -void -hfs_unlock_truncate(struct cnode *cp) -{ - lck_rw_done(&cp->c_truncatelock); + +/* + * Attempt to get the truncate lock. If it cannot be acquired, error out. + * This function is needed in the degenerate hfs_vnop_pagein during force unmount + * case. To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will + * temporarily need to disable V2 semantics. + */ +int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) { + void * thread = current_thread(); + boolean_t didlock = false; + + if (cp->c_truncatelockowner == thread) { + /* + * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse. + * + * This is needed on the hfs_vnop_pagein path where we need to ensure + * the file does not change sizes while we are paging in. However, + * we may already hold the lock exclusive due to another + * VNOP from earlier in the call stack. So if we already hold + * the truncate lock exclusive, allow it to proceed, but ONLY if + * it's in the recursive case. + */ + if (locktype != HFS_RECURSE_TRUNCLOCK) { + panic("hfs_lock_truncate: cnode %p locked!", cp); + } + } + /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */ + else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) { + didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED); + if (didlock) { + cp->c_truncatelockowner = HFS_SHARED_OWNER; + } + } + else { /* must be an HFS_EXCLUSIVE_LOCK */ + didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE); + if (didlock) { + cp->c_truncatelockowner = thread; + } + } + + return didlock; } +/* + * Unlock the truncate lock, which protects against size changes. + * + * The been_recursed argument is used when we may need to return + * from this function without actually unlocking the truncate lock. + */ +void +hfs_unlock_truncate(struct cnode *cp, int been_recursed) +{ + void *thread = current_thread(); + /* + * If been_recursed is nonzero AND the current lock owner of the + * truncate lock is our current thread, then we must have recursively + * taken the lock earlier on. If the lock were unlocked, + * HFS_RECURSE_TRUNCLOCK took a shared lock and it would fall through + * to the SHARED case below. + * + * If been_recursed is zero (most of the time) then we check the + * lockowner field to infer whether the lock was taken exclusively or + * shared in order to know what underlying lock routine to call. + */ + if (been_recursed) { + if (cp->c_truncatelockowner == thread) { + return; + } + } + /* HFS_LOCK_EXCLUSIVE */ + if (thread == cp->c_truncatelockowner) { + cp->c_truncatelockowner = NULL; + lck_rw_unlock_exclusive(&cp->c_truncatelock); + } + /* HFS_LOCK_SHARED */ + else { + lck_rw_unlock_shared(&cp->c_truncatelock); + } +}