X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/39236c6e673c41db228275375ab7fdb0f837b292..3e170ce000f1506b7b5d2c5c7faec85ceabb573d:/bsd/hfs/hfs_cnode.c diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 439e6b270..668cc7870 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2013 Apple Inc. All rights reserved. + * Copyright (c) 2002-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -49,6 +50,8 @@ #include #include #include +#include +#include extern int prtactive; @@ -56,7 +59,7 @@ extern lck_attr_t * hfs_lock_attr; extern lck_grp_t * hfs_mutex_group; extern lck_grp_t * hfs_rwlock_group; -static void hfs_reclaim_cnode(struct cnode *); +static void hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *); static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim); static int hfs_isordered(struct cnode *, struct cnode *); @@ -175,13 +178,13 @@ int hfs_is_backingstore (struct vnode *vp, int *val) { * Assumes that both truncate and cnode locks for 'cp' are held. */ static -int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { - +int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) +{ int forkcount = 0; enum vtype v_type; struct cnode *cp; int error = 0; - int started_tr = 0; + bool started_tr = false; struct hfsmount *hfsmp = VTOHFS(vp); struct proc *p = vfs_context_proc(ctx); int truncated = 0; @@ -199,36 +202,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { if (cp->c_rsrcfork) { ++forkcount; } - - - /* - * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim. - * The dirty regions would have already been synced to disk, so informing UBC - * that they can toss the pages doesn't help anyone at this point. - * - * Note that this is a performance problem if the vnode goes straight to reclaim - * (and skips inactive), since there would be no way for anyone to notify the UBC - * that all pages in this file are basically useless. - */ - if (reclaim == 0) { - /* - * Check whether we are tearing down a cnode with only one remaining fork. - * If there are blocks in its filefork, then we need to unlock the cnode - * before calling ubc_setsize. The cluster layer may re-enter the filesystem - * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock - * panic. - */ - - if ((v_type == VREG || v_type == VLNK) && - (cp->c_flag & C_DELETED) && - (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) { - hfs_unlock(cp); - /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */ - ubc_setsize(vp, 0); - (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - } - } - + /* * Push file data out for normal files that haven't been evicted from * the namespace. We only do this if this function was not called from reclaim, @@ -244,32 +218,14 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { (VTOF(vp)->ff_blocks) && (reclaim == 0)) { /* - * Note that if content protection is enabled, then this is where we will - * attempt to issue IOs for all dirty regions of this file. - * - * If we're called from hfs_vnop_inactive, all this means is at the time + * If we're called from hfs_vnop_inactive, all this means is at the time * the logic for deciding to call this function, there were not any lingering * mmap/fd references for this file. However, there is nothing preventing the system * from creating a new reference in between the time that logic was checked * and we entered hfs_vnop_inactive. As a result, the only time we can guarantee * that there aren't any references is during vnop_reclaim. */ - hfs_filedone(vp, ctx); - } - - /* - * We're holding the cnode lock now. Stall behind any shadow BPs that may - * be involved with this vnode if it is a symlink. We don't want to allow - * the blocks that we're about to release to be put back into the pool if there - * is pending I/O to them. - */ - if (v_type == VLNK) { - /* - * This will block if the asynchronous journal flush is in progress. - * If this symlink is not being renamed over and doesn't have any open FDs, - * then we'll remove it from the journal's bufs below in kill_block. - */ - buf_wait_for_shadow_io (vp, 0); + hfs_filedone(vp, ctx, 0); } /* @@ -281,362 +237,341 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { if (cp->c_flag & C_HARDLINK) { hfs_relorigins(cp); } - + /* - * This check is slightly complicated. We should only truncate data - * in very specific cases for open-unlinked files. This is because - * we want to ensure that the resource fork continues to be available - * if the caller has the data fork open. However, this is not symmetric; - * someone who has the resource fork open need not be able to access the data - * fork once the data fork has gone inactive. - * - * If we're the last fork, then we have cleaning up to do. - * - * A) last fork, and vp == c_vp - * Truncate away own fork data. If rsrc fork is not in core, truncate it too. - * - * B) last fork, and vp == c_rsrc_vp - * Truncate ourselves, assume data fork has been cleaned due to C). - * - * If we're not the last fork, then things are a little different: + * -- Handle open unlinked files -- * - * C) not the last fork, vp == c_vp - * Truncate ourselves. Once the file has gone out of the namespace, - * it cannot be further opened. Further access to the rsrc fork may - * continue, however. - * - * D) not the last fork, vp == c_rsrc_vp - * Don't enter the block below, just clean up vnode and push it out of core. + * If the vnode is in use, it means a force unmount is in progress + * in which case we defer cleaning up until either we come back + * through here via hfs_vnop_reclaim, at which point the UBC + * information will have been torn down and the vnode might no + * longer be in use, or if it's still in use, it will get cleaned + * up when next remounted. */ + if (ISSET(cp->c_flag, C_DELETED) && !vnode_isinuse(vp, 0)) { + /* + * This check is slightly complicated. We should only truncate data + * in very specific cases for open-unlinked files. This is because + * we want to ensure that the resource fork continues to be available + * if the caller has the data fork open. However, this is not symmetric; + * someone who has the resource fork open need not be able to access the data + * fork once the data fork has gone inactive. + * + * If we're the last fork, then we have cleaning up to do. + * + * A) last fork, and vp == c_vp + * Truncate away own fork data. If rsrc fork is not in core, truncate it too. + * + * B) last fork, and vp == c_rsrc_vp + * Truncate ourselves, assume data fork has been cleaned due to C). + * + * If we're not the last fork, then things are a little different: + * + * C) not the last fork, vp == c_vp + * Truncate ourselves. Once the file has gone out of the namespace, + * it cannot be further opened. Further access to the rsrc fork may + * continue, however. + * + * D) not the last fork, vp == c_rsrc_vp + * Don't enter the block below, just clean up vnode and push it out of core. + */ - if ((v_type == VREG || v_type == VLNK) && - (cp->c_flag & C_DELETED) && - ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) { - - /* Truncate away our own fork data. (Case A, B, C above) */ - if (VTOF(vp)->ff_blocks != 0) { + if ((v_type == VREG || v_type == VLNK) && + ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) { + + /* Truncate away our own fork data. (Case A, B, C above) */ + if (VTOF(vp)->ff_blocks != 0) { + /* + * SYMLINKS only: + * + * Encapsulate the entire change (including truncating the link) in + * nested transactions if we are modifying a symlink, because we know that its + * file length will be at most 4k, and we can fit both the truncation and + * any relevant bitmap changes into a single journal transaction. We also want + * the kill_block code to execute in the same transaction so that any dirty symlink + * blocks will not be written. Otherwise, rely on + * hfs_truncate doing its own transactions to ensure that we don't blow up + * the journal. + */ + if (!started_tr && (v_type == VLNK)) { + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + else { + started_tr = true; + } + } - /* - * SYMLINKS only: - * - * Encapsulate the entire change (including truncating the link) in - * nested transactions if we are modifying a symlink, because we know that its - * file length will be at most 4k, and we can fit both the truncation and - * any relevant bitmap changes into a single journal transaction. We also want - * the kill_block code to execute in the same transaction so that any dirty symlink - * blocks will not be written. Otherwise, rely on - * hfs_truncate doing its own transactions to ensure that we don't blow up - * the journal. - */ - if ((started_tr == 0) && (v_type == VLNK)) { - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; + /* + * At this point, we have decided that this cnode is + * suitable for full removal. We are about to deallocate + * its blocks and remove its entry from the catalog. + * If it was a symlink, then it's possible that the operation + * which created it is still in the current transaction group + * due to coalescing. Take action here to kill the data blocks + * of the symlink out of the journal before moving to + * deallocate the blocks. We need to be in the middle of + * a transaction before calling buf_iterate like this. + * + * Note: we have to kill any potential symlink buffers out of + * the journal prior to deallocating their blocks. This is so + * that we don't race with another thread that may be doing an + * an allocation concurrently and pick up these blocks. It could + * generate I/O against them which could go out ahead of our journal + * transaction. + */ + + if (hfsmp->jnl && vnode_islnk(vp)) { + buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp); + } + + + /* + * This truncate call (and the one below) is fine from VNOP_RECLAIM's + * context because we're only removing blocks, not zero-filling new + * ones. The C_DELETED check above makes things much simpler. + */ + error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx); + if (error) { goto out; } - else { - started_tr = 1; + truncated = 1; + + /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */ + if (started_tr) { + hfs_end_transaction(hfsmp); + started_tr = false; } - } - /* - * At this point, we have decided that this cnode is - * suitable for full removal. We are about to deallocate - * its blocks and remove its entry from the catalog. - * If it was a symlink, then it's possible that the operation - * which created it is still in the current transaction group - * due to coalescing. Take action here to kill the data blocks - * of the symlink out of the journal before moving to - * deallocate the blocks. We need to be in the middle of - * a transaction before calling buf_iterate like this. + } + + /* + * Truncate away the resource fork, if we represent the data fork and + * it is the last fork. That means, by definition, the rsrc fork is not in + * core. To avoid bringing a vnode into core for the sole purpose of deleting the + * data in the resource fork, we call cat_lookup directly, then hfs_release_storage + * to get rid of the resource fork's data. Note that because we are holding the + * cnode lock, it is impossible for a competing thread to create the resource fork + * vnode from underneath us while we do this. * - * Note: we have to kill any potential symlink buffers out of - * the journal prior to deallocating their blocks. This is so - * that we don't race with another thread that may be doing an - * an allocation concurrently and pick up these blocks. It could - * generate I/O against them which could go out ahead of our journal - * transaction. + * This is invoked via case A above only. */ + if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) { + struct cat_lookup_buffer *lookup_rsrc = NULL; + struct cat_desc *desc_ptr = NULL; + lockflags = 0; + + MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK); + if (lookup_rsrc == NULL) { + printf("hfs_cnode_teardown: ENOMEM from MALLOC\n"); + error = ENOMEM; + goto out; + } + else { + bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer)); + } - if (hfsmp->jnl && vnode_islnk(vp)) { - buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp); - } + if (cp->c_desc.cd_namelen == 0) { + /* Initialize the rsrc descriptor for lookup if necessary*/ + MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid); + + lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name; + lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name); + lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid; + + desc_ptr = &lookup_rsrc->lookup_desc; + } + else { + desc_ptr = &cp->c_desc; + } + lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - /* - * This truncate call (and the one below) is fine from VNOP_RECLAIM's - * context because we're only removing blocks, not zero-filling new - * ones. The C_DELETED check above makes things much simpler. - */ - error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, 0, ctx); - if (error) { - goto out; - } - truncated = 1; + error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL, + (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL); - /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */ - if (started_tr) { - hfs_end_transaction(hfsmp); - started_tr = 0; - } + hfs_systemfile_unlock (hfsmp, lockflags); + + if (error) { + FREE (lookup_rsrc, M_TEMP); + goto out; + } - } - - /* - * Truncate away the resource fork, if we represent the data fork and - * it is the last fork. That means, by definition, the rsrc fork is not in - * core. To avoid bringing a vnode into core for the sole purpose of deleting the - * data in the resource fork, we call cat_lookup directly, then hfs_release_storage - * to get rid of the resource fork's data. Note that because we are holding the - * cnode lock, it is impossible for a competing thread to create the resource fork - * vnode from underneath us while we do this. - * - * This is invoked via case A above only. - */ - if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) { - struct cat_lookup_buffer *lookup_rsrc = NULL; - struct cat_desc *desc_ptr = NULL; - lockflags = 0; - - MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK); - if (lookup_rsrc == NULL) { - printf("hfs_cnode_teardown: ENOMEM from MALLOC\n"); - error = ENOMEM; - goto out; - } - else { - bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer)); - } + /* + * Make the filefork in our temporary struct look like a real + * filefork. Fill in the cp, sysfileinfo and rangelist fields.. + */ + rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges); + lookup_rsrc->lookup_fork.ff_cp = cp; - if (cp->c_desc.cd_namelen == 0) { - /* Initialize the rsrc descriptor for lookup if necessary*/ - MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid); - - lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name; - lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name); - lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; - lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid; - - desc_ptr = &lookup_rsrc->lookup_desc; - } - else { - desc_ptr = &cp->c_desc; - } + /* + * If there were no errors, then we have the catalog's fork information + * for the resource fork in question. Go ahead and delete the data in it now. + */ - lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid); + FREE(lookup_rsrc, M_TEMP); - error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL, - (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL); + if (error) { + goto out; + } - hfs_systemfile_unlock (hfsmp, lockflags); - - if (error) { - FREE (lookup_rsrc, M_TEMP); - goto out; + /* + * This fileid's resource fork extents have now been fully deleted on-disk + * and this CNID is no longer valid. At this point, we should be able to + * zero out cp->c_blocks to indicate there is no data left in this file. + */ + cp->c_blocks = 0; } + } + /* + * If we represent the last fork (or none in the case of a dir), + * and the cnode has become open-unlinked... + * + * We check c_blocks here because it is possible in the force + * unmount case for the data fork to be in use but the resource + * fork to not be in use in which case we will truncate the + * resource fork, but not the data fork. It will get cleaned + * up upon next mount. + */ + if (forkcount <= 1 && !cp->c_blocks) { /* - * Make the filefork in our temporary struct look like a real - * filefork. Fill in the cp, sysfileinfo and rangelist fields.. + * If it has EA's, then we need to get rid of them. + * + * Note that this must happen outside of any other transactions + * because it starts/ends its own transactions and grabs its + * own locks. This is to prevent a file with a lot of attributes + * from creating a transaction that is too large (which panics). */ - rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges); - lookup_rsrc->lookup_fork.ff_cp = cp; + if (ISSET(cp->c_attr.ca_recflags, kHFSHasAttributesMask)) + ea_error = hfs_removeallattr(hfsmp, cp->c_fileid, &started_tr); - /* - * If there were no errors, then we have the catalog's fork information - * for the resource fork in question. Go ahead and delete the data in it now. + /* + * Remove the cnode's catalog entry and release all blocks it + * may have been using. */ - error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid); - FREE(lookup_rsrc, M_TEMP); - - if (error) { + /* + * Mark cnode in transit so that no one can get this + * cnode from cnode hash. + */ + // hfs_chash_mark_in_transit(hfsmp, cp); + // XXXdbg - remove the cnode from the hash table since it's deleted + // otherwise someone could go to sleep on the cnode and not + // be woken up until this vnode gets recycled which could be + // a very long time... + hfs_chashremove(hfsmp, cp); + + cp->c_flag |= C_NOEXISTS; // XXXdbg + cp->c_rdev = 0; + + if (!started_tr) { + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + started_tr = true; + } + + /* + * Reserve some space in the Catalog file. + */ + if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { goto out; } - + cat_reserve = 1; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + if (cp->c_blocks > 0) { + printf("hfs_inactive: deleting non-empty%sfile %d, " + "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ", + (int)cp->c_fileid, (int)cp->c_blocks); + } + + // + // release the name pointer in the descriptor so that + // cat_delete() will use the file-id to do the deletion. + // in the case of hard links this is imperative (in the + // case of regular files the fileid and cnid are the + // same so it doesn't matter). + // + cat_releasedesc(&cp->c_desc); + /* - * This fileid's resource fork extents have now been fully deleted on-disk - * and this CNID is no longer valid. At this point, we should be able to - * zero out cp->c_blocks to indicate there is no data left in this file. + * The descriptor name may be zero, + * in which case the fileid is used. */ - cp->c_blocks = 0; - } - } - - /* - * If we represent the last fork (or none in the case of a dir), - * and the cnode has become open-unlinked, - * AND it has EA's, then we need to get rid of them. - * - * Note that this must happen outside of any other transactions - * because it starts/ends its own transactions and grabs its - * own locks. This is to prevent a file with a lot of attributes - * from creating a transaction that is too large (which panics). - */ - if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 && - (cp->c_flag & C_DELETED) && - (forkcount <= 1)) { - - ea_error = hfs_removeallattr(hfsmp, cp->c_fileid); - } - - - /* - * If the cnode represented an open-unlinked file, then now - * actually remove the cnode's catalog entry and release all blocks - * it may have been using. - */ - if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) { - /* - * Mark cnode in transit so that no one can get this - * cnode from cnode hash. - */ - // hfs_chash_mark_in_transit(hfsmp, cp); - // XXXdbg - remove the cnode from the hash table since it's deleted - // otherwise someone could go to sleep on the cnode and not - // be woken up until this vnode gets recycled which could be - // a very long time... - hfs_chashremove(hfsmp, cp); - - cp->c_flag |= C_NOEXISTS; // XXXdbg - cp->c_rdev = 0; - - if (started_tr == 0) { - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; + error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); + + if (error && truncated && (error != ENXIO)) { + printf("hfs_inactive: couldn't delete a truncated file!"); + } + + /* Update HFS Private Data dir */ + if (error == 0) { + hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; + if (vnode_isdir(vp)) { + DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + } + (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], + &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); + } + + hfs_systemfile_unlock(hfsmp, lockflags); + + if (error) { goto out; - } - started_tr = 1; - } - - /* - * Reserve some space in the Catalog file. - */ - if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { - goto out; - } - cat_reserve = 1; - - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); - - if (cp->c_blocks > 0) { - printf("hfs_inactive: deleting non-empty%sfile %d, " - "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ", - (int)cp->c_fileid, (int)cp->c_blocks); - } - - // - // release the name pointer in the descriptor so that - // cat_delete() will use the file-id to do the deletion. - // in the case of hard links this is imperative (in the - // case of regular files the fileid and cnid are the - // same so it doesn't matter). - // - cat_releasedesc(&cp->c_desc); - - /* - * The descriptor name may be zero, - * in which case the fileid is used. - */ - error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); - - if (error && truncated && (error != ENXIO)) { - printf("hfs_inactive: couldn't delete a truncated file!"); - } - - /* Update HFS Private Data dir */ - if (error == 0) { - hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; - if (vnode_isdir(vp)) { - DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); - } - (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], - &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); - } - - hfs_systemfile_unlock(hfsmp, lockflags); - - if (error) { - goto out; + } + + #if QUOTA + if (hfsmp->hfs_flags & HFS_QUOTAS) + (void)hfs_chkiq(cp, -1, NOCRED, 0); + #endif /* QUOTA */ + + /* Already set C_NOEXISTS at the beginning of this block */ + cp->c_flag &= ~C_DELETED; + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; + + if (error == 0) + hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0); } - -#if QUOTA - if (hfsmp->hfs_flags & HFS_QUOTAS) - (void)hfs_chkiq(cp, -1, NOCRED, 0); -#endif /* QUOTA */ - - /* Already set C_NOEXISTS at the beginning of this block */ - cp->c_flag &= ~C_DELETED; - cp->c_touch_chgtime = TRUE; - cp->c_touch_modtime = TRUE; - - if (error == 0) - hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0); - } - + } // if + + hfs_update(vp, reclaim ? HFS_UPDATE_FORCE : 0); + /* - * A file may have had delayed allocations, in which case hfs_update - * would not have updated the catalog record (cat_update). We need - * to do that now, before we lose our fork data. We also need to - * force the update, or hfs_update will again skip the cat_update. + * Since we are about to finish what might be an inactive call, propagate + * any remaining modified or touch bits from the cnode to the vnode. This + * serves as a hint to vnode recycling that we shouldn't recycle this vnode + * synchronously. * - * If the file has C_NOEXISTS set, then we can skip the hfs_update call - * because the catalog entry has already been removed. There would be no point - * to looking up the entry in the catalog to modify it when we already know it's gone + * For now, if the node *only* has a dirty atime, we don't mark + * the vnode as dirty. VFS's asynchronous recycling can actually + * lead to worse performance than having it synchronous. When VFS + * is fixed to be more performant, we can be more honest about + * marking vnodes as dirty when it's only the atime that's dirty. */ - if ((!ISSET(cp->c_flag, C_NOEXISTS)) && - ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime || - cp->c_touch_chgtime || cp->c_touch_modtime)) { - - if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){ - cp->c_flag |= C_FORCEUPDATE; - } - hfs_update(vp, 0); - } - + if (hfs_is_dirty(cp) == HFS_DIRTY || ISSET(cp->c_flag, C_DELETED)) { + vnode_setdirty(vp); + } else { + vnode_cleardirty(vp); + } + out: if (cat_reserve) cat_postflight(hfsmp, &cookie, p); - // XXXdbg - have to do this because a goto could have come here if (started_tr) { hfs_end_transaction(hfsmp); - started_tr = 0; + started_tr = false; } -#if 0 -#if CONFIG_PROTECT - /* - * cnode truncate lock and cnode lock are both held exclusive here. - * - * Go ahead and flush the keys out if this cnode is the last fork - * and it is not class F. Class F keys should not be purged because they only - * exist in memory and have no persistent keys. Only do this - * if we haven't already done it yet (maybe a vnode skipped inactive - * and went straight to reclaim). This function gets called from both reclaim and - * inactive, so it will happen first in inactive if possible. - * - * We need to be mindful that all pending IO for this file has already been - * issued and completed before we bzero out the key. This is because - * if it isn't, tossing the key here could result in garbage IO being - * written (by using the bzero'd key) if the writes are happening asynchronously. - * - * In addition, class A files may have already been purged due to the - * lock event occurring. - */ - if (forkcount == 1) { - struct cprotect *entry = cp->c_cpentry; - if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) { - if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) { - cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED; - bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len); - bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx)); - } - } - } -#endif -#endif - - return error; + return error; } @@ -726,7 +661,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) if (took_trunc_lock) { hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } - + hfs_unlock(cp); inactive_done: @@ -740,17 +675,14 @@ inactive_done: */ int -hfs_filedone(struct vnode *vp, vfs_context_t context) +hfs_filedone(struct vnode *vp, vfs_context_t context, + hfs_file_done_opts_t opts) { struct cnode *cp; struct filefork *fp; struct hfsmount *hfsmp; - struct rl_entry *invalid_range; off_t leof; u_int32_t blks, blocksize; - /* flags for zero-filling sparse ranges */ - int cluster_flags = IO_CLOSE; - int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE; cp = VTOC(vp); fp = VTOF(vp); @@ -760,61 +692,8 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0)) return (0); -#if CONFIG_PROTECT - /* - * Figure out if we need to do synchronous IO. - * - * If the file represents a content-protected file, we may need - * to issue synchronous IO when we dispatch to the cluster layer. - * If we didn't, then the IO would go out to the disk asynchronously. - * If the vnode hits the end of inactive before getting reclaimed, the - * content protection keys would be wiped/bzeroed out, and we'd end up - * trying to issue the IO with an invalid key. This will lead to file - * corruption. IO_SYNC will force the cluster_push to wait until all IOs - * have completed (though they may be in the track cache). - */ - if (cp_fs_protected(VTOVFS(vp))) { - cluster_flags |= IO_SYNC; - cluster_zero_flags |= IO_SYNC; - } -#endif - - /* - * If we are being invoked from F_SWAPDATAEXTENTS, then we - * need to issue synchronous IO; Unless we are sure that all - * of the data has been written to the disk, we won't know - * that all of the blocks have been allocated properly. - */ - if (cp->c_flag & C_SWAPINPROGRESS) { - cluster_flags |= IO_SYNC; - } - - hfs_unlock(cp); - (void) cluster_push(vp, cluster_flags); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - - /* - * Explicitly zero out the areas of file - * that are currently marked invalid. - */ - while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { - off_t start = invalid_range->rl_start; - off_t end = invalid_range->rl_end; - - /* The range about to be written must be validated - * first, so that VNOP_BLOCKMAP() will return the - * appropriate mapping for the cluster code: - */ - rl_remove(start, end, &fp->ff_invalidranges); + hfs_flush_invalid_ranges(vp); - hfs_unlock(cp); - (void) cluster_write(vp, (struct uio *) 0, - leof, end + 1, start, (off_t)0, cluster_zero_flags); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cp->c_flag |= C_MODIFIED; - } - cp->c_flag &= ~C_ZFWANTSYNC; - cp->c_zftimeout = 0; blocksize = VTOVCB(vp)->blockSize; blks = leof / blocksize; if (((off_t)blks * (off_t)blocksize) != leof) @@ -823,21 +702,22 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) * Shrink the peof to the smallest size neccessary to contain the leof. */ if (blks < fp->ff_blocks) { - (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context); + (void) hfs_truncate(vp, leof, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES, context); } - hfs_unlock(cp); - (void) cluster_push(vp, cluster_flags); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - - /* - * If the hfs_truncate didn't happen to flush the vnode's - * information out to disk, force it to be updated now that - * all invalid ranges have been zero-filled and validated: - */ - if (cp->c_flag & C_MODIFIED) { + if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) { + hfs_unlock(cp); + cluster_push(vp, IO_CLOSE); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + + /* + * If the hfs_truncate didn't happen to flush the vnode's + * information out to disk, force it to be updated now that + * all invalid ranges have been zero-filled and validated: + */ hfs_update(vp, 0); } + return (0); } @@ -880,11 +760,13 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) } /* - * Keep track of an inactive hot file. + * Keep track of an inactive hot file. Don't bother on ssd's since + * the tracking is done differently (it's done at read() time) */ if (!vnode_isdir(vp) && !vnode_issystem(vp) && - !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) { + !(cp->c_flag & (C_DELETED | C_NOEXISTS)) && + !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) { (void) hfs_addhotfile(vp); } vnode_removefsref(vp); @@ -931,7 +813,8 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) /* Dump cached symlink data */ if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) { FREE(fp->ff_symlinkptr, M_TEMP); - } + } + rl_remove_all(&fp->ff_invalidranges); FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK); } @@ -941,7 +824,7 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) if (reclaim_cnode) { hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT); hfs_unlock(cp); - hfs_reclaim_cnode(cp); + hfs_reclaim_cnode(hfsmp, cp); } else { /* @@ -969,7 +852,39 @@ extern int (**hfs_std_vnodeop_p) (void *); /* * hfs_getnewvnode - get new default vnode * - * The vnode is returned with an iocount and the cnode locked + * The vnode is returned with an iocount and the cnode locked. + * The cnode of the parent vnode 'dvp' may or may not be locked, depending on + * the circumstances. The cnode in question (if acquiring the resource fork), + * may also already be locked at the time we enter this function. + * + * Note that there are both input and output flag arguments to this function. + * If one of the input flags (specifically, GNV_USE_VP), is set, then + * hfs_getnewvnode will use the parameter *vpp, which is traditionally only + * an output parameter, as both an input and output parameter. It will use + * the vnode provided in the output, and pass it to vnode_create with the + * proper flavor so that a new vnode is _NOT_ created on our behalf when + * we dispatch to VFS. This may be important in various HFS vnode creation + * routines, such a create or get-resource-fork, because we risk deadlock if + * jetsam is involved. + * + * Deadlock potential exists if jetsam is synchronously invoked while we are waiting + * for a vnode to be recycled in order to give it the identity we want. If jetsam + * happens to target a process for termination that is blocked in-kernel, waiting to + * acquire the cnode lock on our parent 'dvp', while our current thread has it locked, + * neither side will make forward progress and the watchdog timer will eventually fire. + * To prevent this, a caller of hfs_getnewvnode may choose to proactively force + * any necessary vnode reclamation/recycling while it is not holding any locks and + * thus not prone to deadlock. If this is the case, GNV_USE_VP will be set and + * the parameter will be used as described above. + * + * !!! !!!! + * In circumstances when GNV_USE_VP is set, this function _MUST_ clean up and either consume + * or dispose of the provided vnode. We funnel all errors to a single return value so that + * if provided_vp is still non-NULL, then we will dispose of the vnode. This will occur in + * all error cases of this function -- anywhere we zero/NULL out the *vpp parameter. It may + * also occur if the current thread raced with another to create the same vnode, and we + * find the entry already present in the cnode hash. + * !!! !!! */ int hfs_getnewvnode( @@ -990,27 +905,43 @@ hfs_getnewvnode( struct cnode *cp = NULL; struct filefork *fp = NULL; int hfs_standard = 0; - int retval; + int retval = 0; int issystemfile; int wantrsrc; int hflags = 0; + int need_update_identity = 0; struct vnode_fsparam vfsp; enum vtype vtype; + + struct vnode *provided_vp = NULL; + + #if QUOTA int i; #endif /* QUOTA */ hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD); + if (flags & GNV_USE_VP) { + /* Store the provided VP for later use */ + provided_vp = *vpp; + } + + /* Zero out the vpp regardless of provided input */ + *vpp = NULL; + + /* Zero out the out_flags */ + *out_flags = 0; + if (attrp->ca_fileid == 0) { - *vpp = NULL; - return (ENOENT); + retval = ENOENT; + goto gnv_exit; } #if !FIFO if (IFTOVT(attrp->ca_mode) == VFIFO) { - *vpp = NULL; - return (ENOTSUP); + retval = ENOTSUP; + goto gnv_exit; } #endif /* !FIFO */ vtype = IFTOVT(attrp->ca_mode); @@ -1020,16 +951,14 @@ hfs_getnewvnode( /* Sanity check the vtype and mode */ if (vtype == VBAD) { /* Mark the FS as corrupt and bail out */ - hfs_mark_volume_inconsistent(hfsmp); - return EINVAL; + hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED); + retval = EINVAL; + goto gnv_exit; } - - /* Zero out the out_flags */ - *out_flags = 0; - + #ifdef HFS_CHECK_LOCK_ORDER /* - * The only case were its permissible to hold the parent cnode + * The only case where it's permissible to hold the parent cnode * lock is during a create operation (hfs_makenode) or when * we don't need the cnode lock (GNV_SKIPLOCK). */ @@ -1050,8 +979,18 @@ hfs_getnewvnode( * If the id is no longer valid for lookups we'll get back a NULL cp. */ if (cp == NULL) { - return (ENOENT); + retval = ENOENT; + goto gnv_exit; } + /* + * We may have been provided a vnode via + * GNV_USE_VP. In this case, we have raced with + * a 2nd thread to create the target vnode. The provided + * vnode that was passed in will be dealt with at the + * end of the function, as we don't zero out the field + * until we're ready to pass responsibility to VFS. + */ + /* * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the @@ -1071,10 +1010,28 @@ hfs_getnewvnode( */ if (!(hfs_checkdeleted(cp))) { + // + // If the bytes of the filename in the descp do not match the bytes in the + // cnp (and we're not looking up the resource fork), then we want to update + // the vnode identity to contain the bytes that HFS stores so that when an + // fsevent gets generated, it has the correct filename. otherwise daemons + // that match filenames produced by fsevents with filenames they have stored + // elsewhere (e.g. bladerunner, backupd, mds), the filenames will not match. + // See: FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // for more details. + // +#ifdef CN_WANTSRSRCFORK + if (*vpp && cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) { +#else + if (*vpp && cnp && cnp->cn_nameptr && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) { +#endif + vnode_update_identity (*vpp, dvp, (const char *)descp->cd_nameptr, descp->cd_namelen, 0, VNODE_UPDATE_NAME); + } if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) { /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */ if ((descp->cd_cnid == cp->c_attr.ca_fileid) && (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){ + if ((flags & GNV_SKIPLOCK) == 0) { /* * Then we took the lock. Drop it before calling @@ -1088,7 +1045,7 @@ hfs_getnewvnode( * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to * force a re-drive in the lookup routine. * Drop the iocount on the vnode obtained from - * chash_getcnode if needed. + * chash_getcnode if needed. */ if (*vpp != NULL) { vnode_put (*vpp); @@ -1108,7 +1065,8 @@ hfs_getnewvnode( } *out_flags = GNV_CAT_ATTRCHANGED; - return ERECYCLE; + retval = ERECYCLE; + goto gnv_exit; } else { /* @@ -1128,18 +1086,37 @@ hfs_getnewvnode( * that the new link lived in the same directory as the alternative name for * this item. */ - if ((*vpp != NULL) && (cnp)) { + if ((*vpp != NULL) && (cnp || cp->c_desc.cd_nameptr)) { /* we could be requesting the rsrc of a hardlink file... */ - vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, - (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); +#ifdef CN_WANTSRSRCFORK + if (cp->c_desc.cd_nameptr && (cnp == NULL || !(cnp->cn_flags & CN_WANTSRSRCFORK))) { +#else + if (cp->c_desc.cd_nameptr) { +#endif + // + // Update the identity with what we have stored on disk as + // the name of this file. This is related to: + // FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // + vnode_update_identity (*vpp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0, + (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); + } else if (cnp) { + vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, + (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); + } } } } } - /* Check if we found a matching vnode */ + /* + * At this point, we have performed hardlink and open-unlinked checks + * above. We have now validated the state of the vnode that was given back + * to us from the cnode hash code and find it safe to return. + */ if (*vpp != NULL) { - return (0); + retval = 0; + goto gnv_exit; } /* @@ -1159,7 +1136,7 @@ hfs_getnewvnode( if ((flags & GNV_SKIPLOCK) == 0) { hfs_unlock(cp); } - hfs_reclaim_cnode(cp); + hfs_reclaim_cnode(hfsmp, cp); *vpp = NULL; /* * If we hit this case, that means that the entry was there in the catalog when @@ -1173,7 +1150,8 @@ hfs_getnewvnode( */ if (error == ENOENT) { *out_flags = GNV_CAT_DELETED; - return ENOENT; + retval = ENOENT; + goto gnv_exit; } /* @@ -1184,7 +1162,8 @@ hfs_getnewvnode( */ if (error == ERECYCLE) { *out_flags = GNV_CAT_ATTRCHANGED; - return (ERECYCLE); + retval = ERECYCLE; + goto gnv_exit; } } } @@ -1197,9 +1176,10 @@ hfs_getnewvnode( descp->cd_flags &= ~CD_HASBUF; /* Tag hardlinks */ - if ((vtype == VREG || vtype == VDIR) && - ((descp->cd_cnid != attrp->ca_fileid) || - (attrp->ca_recflags & kHFSHasLinkChainMask))) { + if ((vtype == VREG || vtype == VDIR + || vtype == VSOCK || vtype == VFIFO) + && (descp->cd_cnid != attrp->ca_fileid + || ISSET(attrp->ca_recflags, kHFSHasLinkChainMask))) { cp->c_flag |= C_HARDLINK; } /* @@ -1315,6 +1295,7 @@ hfs_getnewvnode( vfsp.vnfs_dvp = dvp; vfsp.vnfs_cnp = cnp; } + vfsp.vnfs_fsnode = cp; /* @@ -1347,8 +1328,23 @@ hfs_getnewvnode( vfsp.vnfs_filesize = 0; vfsp.vnfs_flags = VNFS_ADDFSREF; - if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE)) +#ifdef CN_WANTSRSRCFORK + if (cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) { +#else + if (cnp && cnp->cn_nameptr && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) { +#endif + // + // We don't want VFS to add an entry for this vnode because the name in the + // cnp does not match the bytes stored on disk for this file. Instead we'll + // update the identity later after the vnode is created and we'll do so with + // the correct bytes for this filename. For more details, see: + // FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // vfsp.vnfs_flags |= VNFS_NOCACHE; + need_update_identity = 1; + } else if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE)) { + vfsp.vnfs_flags |= VNFS_NOCACHE; + } /* Tag system files */ vfsp.vnfs_marksystem = issystemfile; @@ -1358,15 +1354,45 @@ hfs_getnewvnode( vfsp.vnfs_markroot = 1; else vfsp.vnfs_markroot = 0; + + /* + * If provided_vp was non-NULL, then it is an already-allocated (but not + * initialized) vnode. We simply need to initialize it to this identity. + * If it was NULL, then assume that we need to call vnode_create with the + * normal arguments/types. + */ + if (provided_vp) { + vp = provided_vp; + /* + * After we assign the value of provided_vp into 'vp' (so that it can be + * mutated safely by vnode_initialize), we can NULL it out. At this point, the disposal + * and handling of the provided vnode will be the responsibility of VFS, which will + * clean it up and vnode_put it properly if vnode_initialize fails. + */ + provided_vp = NULL; + + retval = vnode_initialize (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp); + /* See error handling below for resolving provided_vp */ + } + else { + /* Do a standard vnode_create */ + retval = vnode_create (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp); + } - if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) { - if (fp) { + /* + * We used a local variable to hold the result of vnode_create/vnode_initialize so that + * on error cases in vnode_create we won't accidentally harm the cnode's fields + */ + + if (retval) { + /* Clean up if we encountered an error */ + if (fp) { if (fp == cp->c_datafork) - cp->c_datafork = NULL; + cp->c_datafork = NULL; else - cp->c_rsrcfork = NULL; + cp->c_rsrcfork = NULL; - FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK); + FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK); } /* * If this is a newly created cnode or a vnode reclaim @@ -1374,7 +1400,7 @@ hfs_getnewvnode( */ if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) { hfs_chash_abort(hfsmp, cp); - hfs_reclaim_cnode(cp); + hfs_reclaim_cnode(hfsmp, cp); } else { hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH); @@ -1383,13 +1409,38 @@ hfs_getnewvnode( } } *vpp = NULL; - return (retval); + goto gnv_exit; } - vp = *cvpp; + + /* If no error, then assign the value into the cnode's fields */ + *cvpp = vp; + vnode_settag(vp, VT_HFS); if (cp->c_flag & C_HARDLINK) { vnode_setmultipath(vp); } + + if (cp->c_attr.ca_recflags & kHFSFastDevCandidateMask) { + vnode_setfastdevicecandidate(vp); + } + + if (cp->c_attr.ca_recflags & kHFSAutoCandidateMask) { + vnode_setautocandidate(vp); + } + + + + + if (vp && need_update_identity) { + // + // As above, update the name of the vnode if the bytes stored in hfs do not match + // the bytes in the cnp. See this radar: + // FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories + // for more details. + // + vnode_update_identity (vp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0, VNODE_UPDATE_NAME); + } + /* * Tag resource fork vnodes as needing an VNOP_INACTIVE * so that any deferred removes (open unlinked files) @@ -1397,7 +1448,8 @@ hfs_getnewvnode( */ if (VNODE_IS_RSRC(vp)) { int err; - KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0); + + KERNEL_DEBUG_CONSTANT(HFSDBG_GETNEWVNODE, VM_KERNEL_ADDRPERM(cp->c_vp), VM_KERNEL_ADDRPERM(cp->c_rsrc_vp), 0, 0, 0); /* Force VL_NEEDINACTIVE on this vnode */ err = vnode_ref(vp); @@ -1410,7 +1462,7 @@ hfs_getnewvnode( /* * Stop tracking an active hot file. */ - if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) { + if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile && !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) { (void) hfs_removehotfile(vp); } @@ -1422,12 +1474,19 @@ hfs_getnewvnode( #endif *vpp = vp; - return (0); + retval = 0; + +gnv_exit: + if (provided_vp) { + /* Release our empty vnode if it was not used */ + vnode_put (provided_vp); + } + return retval; } static void -hfs_reclaim_cnode(struct cnode *cp) +hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *cp) { #if QUOTA int i; @@ -1470,11 +1529,12 @@ hfs_reclaim_cnode(struct cnode *cp) } #endif #if CONFIG_PROTECT - cp_entry_destroy(cp->c_cpentry); + cp_entry_destroy(hfsmp, cp->c_cpentry); cp->c_cpentry = NULL; +#else + (void)hfsmp; // Prevent compiler warning #endif - - + bzero(cp, sizeof(struct cnode)); FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE); } @@ -1641,47 +1701,91 @@ void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) { return; } - -u_int32_t hfs_get_dateadded (struct cnode *cp) { - u_int8_t *finfo = NULL; +static u_int32_t +hfs_get_dateadded_internal(const uint8_t *finderinfo, mode_t mode) +{ + const uint8_t *finfo = NULL; u_int32_t dateadded = 0; - if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) { - /* Date added was never set. Return 0. */ - return dateadded; - } /* overlay the FinderInfo to the correct pointer, and advance */ - finfo = (u_int8_t*)cp->c_finderinfo; - finfo = finfo + 16; + finfo = finderinfo + 16; /* * FinderInfo is written out in big endian... make sure to convert it to host * native before we use it. */ - if (S_ISREG(cp->c_attr.ca_mode)) { - struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + if (S_ISREG(mode)) { + const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo; dateadded = OSSwapBigToHostInt32 (extinfo->date_added); } - else if (S_ISDIR(cp->c_attr.ca_mode)) { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; + else if (S_ISDIR(mode)) { + const struct FndrExtendedDirInfo *extinfo = (const struct FndrExtendedDirInfo *)finfo; dateadded = OSSwapBigToHostInt32 (extinfo->date_added); } return dateadded; } +u_int32_t +hfs_get_dateadded(struct cnode *cp) +{ + if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) { + /* Date added was never set. Return 0. */ + return (0); + } + + return (hfs_get_dateadded_internal((u_int8_t*)cp->c_finderinfo, + cp->c_attr.ca_mode)); +} + +u_int32_t +hfs_get_dateadded_from_blob(const uint8_t *finderinfo, mode_t mode) +{ + return (hfs_get_dateadded_internal(finderinfo, mode)); +} + /* - * Per HI and Finder requirements, HFS maintains a "write/generation count" - * for each file that is incremented on any write & pageout. It should start - * at 1 to reserve "0" as a special value. If it should ever wrap around, - * it will skip using 0. + * Per HI and Finder requirements, HFS maintains a "write/generation + * count" for each file that is incremented on any write & pageout. + * It should start at 1 to reserve "0" as a special value. If it + * should ever wrap around, it will skip using 0. * - * Note that this field is also set explicitly in the hfs_vnop_setxattr code. - * We must ignore user attempts to set this part of the finderinfo, and - * so we need to save a local copy of the date added, write in the user - * finderinfo, then stuff the value back in. + * Note that finderinfo is manipulated in hfs_vnop_setxattr and care + * is and should be taken to ignore user attempts to set the part of + * the finderinfo that records the generation counter. + * + * Any change to the generation counter *must* not be visible before + * the change that caused it (for obvious reasons), and given the + * limitations of our current architecture, the change to the + * generation counter may occur some time afterwards (particularly in + * the case where a file is mapped writable---more on that below). + * + * We make no guarantees about the consistency of a file. In other + * words, a reader that is operating concurrently with a writer might + * see some, but not all of writer's changes, and the generation + * counter will *not* necessarily tell you this has happened. To + * enforce consistency, clients must make their own arrangements + * e.g. use file locking. + * + * We treat files that are mapped writable as a special case: when + * that happens, clients requesting the generation count will be told + * it has a generation count of zero and they use that knowledge as a + * hint that the file is changing and it therefore might be prudent to + * wait until it is no longer mapped writable. Clients should *not* + * rely on this behaviour however; we might decide that it's better + * for us to publish the fact that a file is mapped writable via + * alternate means and return the generation counter when it is mapped + * writable as it still has some, albeit limited, use. We reserve the + * right to make this change. + * + * Lastly, it's important to realise that because data and metadata + * take different paths through the system, it's possible upon crash + * or sudden power loss and after a restart, that a change may be + * visible to the rest of the system without a corresponding change to + * the generation counter. The reverse may also be true, but for all + * practical applications this shouldn't be an issue. */ void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) { u_int8_t *finfo = NULL; @@ -1705,7 +1809,22 @@ void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) { return; } -/* Increase the gen count by 1; if it wraps around to 0, increment by two */ +/* + * Increase the gen count by 1; if it wraps around to 0, increment by + * two. The cnode *must* be locked exclusively by the caller. + * + * You may think holding the lock is unnecessary because we only need + * to change the counter, but consider this sequence of events: thread + * A calls hfs_incr_gencount and the generation counter is 2 upon + * entry. A context switch occurs and thread B increments the counter + * to 3, thread C now gets the generation counter (for whatever + * purpose), and then another thread makes another change and the + * generation counter is incremented again---it's now 4. Now thread A + * continues and it sets the generation counter back to 3. So you can + * see, thread C would miss the change that caused the generation + * counter to increment to 4 and for this reason the cnode *must* + * always be locked exclusively. + */ uint32_t hfs_incr_gencount (struct cnode *cp) { u_int8_t *finfo = NULL; u_int32_t gcount = 0; @@ -1717,8 +1836,12 @@ uint32_t hfs_incr_gencount (struct cnode *cp) { /* * FinderInfo is written out in big endian... make sure to convert it to host * native before we use it. + * + * NOTE: the write_gen_counter is stored in the same location in both the + * FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the + * last 32-bit word) so it is safe to have one code path here. */ - if (S_ISREG(cp->c_attr.ca_mode)) { + if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter); @@ -1735,6 +1858,8 @@ uint32_t hfs_incr_gencount (struct cnode *cp) { gcount++; } extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount); + + SET(cp->c_flag, C_MINOR_MOD); } else { gcount = 0; @@ -1743,21 +1868,31 @@ uint32_t hfs_incr_gencount (struct cnode *cp) { return gcount; } -/* Getter for the gen count */ -u_int32_t hfs_get_gencount (struct cnode *cp) { - u_int8_t *finfo = NULL; +/* + * There is no need for any locks here (other than an iocount on an + * associated vnode) because reading and writing an aligned 32 bit + * integer should be atomic on all platforms we support. + */ +static u_int32_t +hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode) +{ + const uint8_t *finfo = NULL; u_int32_t gcount = 0; /* overlay the FinderInfo to the correct pointer, and advance */ - finfo = (u_int8_t*)cp->c_finderinfo; + finfo = finderinfo; finfo = finfo + 16; /* * FinderInfo is written out in big endian... make sure to convert it to host * native before we use it. + * + * NOTE: the write_gen_counter is stored in the same location in both the + * FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the + * last 32-bit word) so it is safe to have one code path here. */ - if (S_ISREG(cp->c_attr.ca_mode)) { - struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + if (S_ISDIR(mode) || S_ISREG(mode)) { + const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo; gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter); /* @@ -1769,13 +1904,31 @@ u_int32_t hfs_get_gencount (struct cnode *cp) { gcount++; } } - else { - gcount = 0; - } return gcount; } +/* Getter for the gen count */ +u_int32_t hfs_get_gencount (struct cnode *cp) { + return hfs_get_gencount_internal(cp->c_finderinfo, cp->c_attr.ca_mode); +} + +/* Getter for the gen count from a buffer (currently pointer to finderinfo)*/ +u_int32_t hfs_get_gencount_from_blob (const uint8_t *finfoblob, mode_t mode) { + return hfs_get_gencount_internal(finfoblob, mode); +} + +void hfs_clear_might_be_dirty_flag(cnode_t *cp) +{ + /* + * If we're about to touch both mtime and ctime, we can clear the + * C_MIGHT_BE_DIRTY_FROM_MAPPING since we can guarantee that + * subsequent page-outs can only be for data made dirty before + * now. + */ + CLR(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING); +} + /* * Touch cnode times based on c_touch_xxx flags * @@ -1787,11 +1940,12 @@ void hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) { vfs_context_t ctx; - /* don't modify times if volume is read-only */ - if (hfsmp->hfs_flags & HFS_READ_ONLY) { + + if (ISSET(hfsmp->hfs_flags, HFS_READ_ONLY) || ISSET(cp->c_flag, C_NOEXISTS)) { cp->c_touch_acctime = FALSE; cp->c_touch_chgtime = FALSE; cp->c_touch_modtime = FALSE; + CLR(cp->c_flag, C_NEEDS_DATEADDED); return; } #if CONFIG_HFS_STD @@ -1811,7 +1965,7 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) */ if (cp->c_touch_acctime) { if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) || - (hfsmp->hfs_freezing_proc != NULL) || + hfsmp->hfs_freeze_state != HFS_THAWED || (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) || (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) { @@ -1823,45 +1977,51 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) struct timeval tv; int touchvol = 0; + if (cp->c_touch_modtime && cp->c_touch_chgtime) + hfs_clear_might_be_dirty_flag(cp); + microtime(&tv); if (cp->c_touch_acctime) { - cp->c_atime = tv.tv_sec; /* - * When the access time is the only thing changing - * then make sure its sufficiently newer before - * committing it to disk. + * When the access time is the only thing changing, we + * won't necessarily write it to disk immediately. We + * only do the atime update at vnode recycle time, when + * fsync is called or when there's another reason to write + * to the metadata. */ - if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) > - ATIME_ONDISK_ACCURACY)) { - cp->c_flag |= C_MODIFIED; - } + cp->c_atime = tv.tv_sec; cp->c_touch_acctime = FALSE; } if (cp->c_touch_modtime) { - cp->c_mtime = tv.tv_sec; cp->c_touch_modtime = FALSE; - cp->c_flag |= C_MODIFIED; - touchvol = 1; + time_t new_time = tv.tv_sec; #if CONFIG_HFS_STD /* * HFS dates that WE set must be adjusted for DST */ if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) { - cp->c_mtime += 3600; + new_time += 3600; } #endif + if (cp->c_mtime != new_time) { + cp->c_mtime = new_time; + cp->c_flag |= C_MINOR_MOD; + touchvol = 1; + } } if (cp->c_touch_chgtime) { - cp->c_ctime = tv.tv_sec; cp->c_touch_chgtime = FALSE; - cp->c_flag |= C_MODIFIED; - touchvol = 1; + if (cp->c_ctime != tv.tv_sec) { + cp->c_ctime = tv.tv_sec; + cp->c_flag |= C_MINOR_MOD; + touchvol = 1; + } } if (cp->c_flag & C_NEEDS_DATEADDED) { hfs_write_dateadded (&(cp->c_attr), tv.tv_sec); - cp->c_flag |= C_MODIFIED; + cp->c_flag |= C_MINOR_MOD; /* untwiddle the bit */ cp->c_flag &= ~C_NEEDS_DATEADDED; touchvol = 1; @@ -1869,22 +2029,34 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) /* Touch the volume modtime if needed */ if (touchvol) { - MarkVCBDirty(hfsmp); + hfs_note_header_minor_change(hfsmp); HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec; } } } +// Use this if you don't want to check the return code +void hfs_lock_always(cnode_t *cp, enum hfs_locktype locktype) +{ + hfs_lock(cp, locktype, HFS_LOCK_ALWAYS); +} + /* * Lock a cnode. + * N.B. If you add any failure cases, *make* sure hfs_lock_always works */ int hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) { - void * thread = current_thread(); + thread_t thread = current_thread(); if (cp->c_lockowner == thread) { - /* Only the extents and bitmap files support lock recursion. */ + /* + * Only the extents and bitmap files support lock recursion + * here. The other system files support lock recursion in + * hfs_systemfile_lock. Eventually, we should change to + * handle recursion solely in hfs_systemfile_lock. + */ if ((cp->c_fileid == kHFSExtentsFileID) || (cp->c_fileid == kHFSAllocationFileID)) { cp->c_syslockcount++; @@ -1954,6 +2126,15 @@ hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) return (0); } +bool hfs_lock_upgrade(cnode_t *cp) +{ + if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock)) { + cp->c_lockowner = current_thread(); + return true; + } else + return false; +} + /* * Lock a pair of cnodes. */ @@ -2081,10 +2262,9 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, void hfs_unlock(struct cnode *cp) { - vnode_t rvp = NULLVP; - vnode_t vp = NULLVP; - u_int32_t c_flag; - void *lockowner; + vnode_t rvp = NULLVP; + vnode_t vp = NULLVP; + u_int32_t c_flag; /* * Only the extents and bitmap file's support lock recursion. @@ -2095,18 +2275,36 @@ hfs_unlock(struct cnode *cp) return; } } - c_flag = cp->c_flag; - cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT | C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE); - if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) { + const thread_t thread = current_thread(); + + if (cp->c_lockowner == thread) { + c_flag = cp->c_flag; + + // If we have the truncate lock, we must defer the puts + if (cp->c_truncatelockowner == thread) { + if (ISSET(c_flag, C_NEED_DVNODE_PUT) + && !cp->c_need_dvnode_put_after_truncate_unlock) { + CLR(c_flag, C_NEED_DVNODE_PUT); + cp->c_need_dvnode_put_after_truncate_unlock = true; + } + if (ISSET(c_flag, C_NEED_RVNODE_PUT) + && !cp->c_need_rvnode_put_after_truncate_unlock) { + CLR(c_flag, C_NEED_RVNODE_PUT); + cp->c_need_rvnode_put_after_truncate_unlock = true; + } + } + + CLR(cp->c_flag, (C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE + | C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT)); + + if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) { vp = cp->c_vp; - } - if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) { + } + if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) { rvp = cp->c_rsrc_vp; - } + } - lockowner = cp->c_lockowner; - if (lockowner == current_thread()) { cp->c_lockowner = NULL; lck_rw_unlock_exclusive(&cp->c_rwlock); } else { @@ -2115,14 +2313,29 @@ hfs_unlock(struct cnode *cp) /* Perform any vnode post processing after cnode lock is dropped. */ if (vp) { - if (c_flag & C_NEED_DATA_SETSIZE) - ubc_setsize(vp, 0); + if (c_flag & C_NEED_DATA_SETSIZE) { + ubc_setsize(vp, VTOF(vp)->ff_size); +#if HFS_COMPRESSION + /* + * If this is a compressed file, we need to reset the + * compression state. We will have set the size to zero + * above and it will get fixed up later (in exactly the + * same way that new vnodes are fixed up). Note that we + * should only be able to get here if the truncate lock is + * held exclusively and so we do the reset when that's + * unlocked. + */ + decmpfs_cnode *dp = VTOCMP(vp); + if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN) + cp->c_need_decmpfs_reset = true; +#endif + } if (c_flag & C_NEED_DVNODE_PUT) vnode_put(vp); } if (rvp) { if (c_flag & C_NEED_RSRC_SETSIZE) - ubc_setsize(rvp, 0); + ubc_setsize(rvp, VTOF(rvp)->ff_size); if (c_flag & C_NEED_RVNODE_PUT) vnode_put(rvp); } @@ -2194,7 +2407,7 @@ skip2: void hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) { - void * thread = current_thread(); + thread_t thread = current_thread(); if (cp->c_truncatelockowner == thread) { /* @@ -2220,6 +2433,21 @@ hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockfla } } +bool hfs_truncate_lock_upgrade(struct cnode *cp) +{ + assert(cp->c_truncatelockowner == HFS_SHARED_OWNER); + if (!lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock)) + return false; + cp->c_truncatelockowner = current_thread(); + return true; +} + +void hfs_truncate_lock_downgrade(struct cnode *cp) +{ + assert(cp->c_truncatelockowner == current_thread()); + lck_rw_lock_exclusive_to_shared(&cp->c_truncatelock); + cp->c_truncatelockowner = HFS_SHARED_OWNER; +} /* * Attempt to get the truncate lock. If it cannot be acquired, error out. @@ -2229,7 +2457,7 @@ hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockfla */ int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) { - void * thread = current_thread(); + thread_t thread = current_thread(); boolean_t didlock = false; if (cp->c_truncatelockowner == thread) { @@ -2275,7 +2503,7 @@ int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lo void hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags) { - void *thread = current_thread(); + thread_t thread = current_thread(); /* * If HFS_LOCK_SKIP_IF_EXCLUSIVE is set in the flags AND the current @@ -2297,8 +2525,53 @@ hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags) /* HFS_LOCK_EXCLUSIVE */ if (thread == cp->c_truncatelockowner) { + vnode_t vp = NULL, rvp = NULL; + + /* + * If there are pending set sizes, the cnode lock should be dropped + * first. + */ +#if DEBUG + assert(!(cp->c_lockowner == thread + && ISSET(cp->c_flag, C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE))); +#elif DEVELOPMENT + if (cp->c_lockowner == thread + && ISSET(cp->c_flag, C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE)) { + printf("hfs: hfs_unlock_truncate called with C_NEED_DATA/RSRC_SETSIZE set (caller: 0x%llx)\n", + (uint64_t)VM_KERNEL_UNSLIDE(__builtin_return_address(0))); + } +#endif + + if (cp->c_need_dvnode_put_after_truncate_unlock) { + vp = cp->c_vp; + cp->c_need_dvnode_put_after_truncate_unlock = false; + } + if (cp->c_need_rvnode_put_after_truncate_unlock) { + rvp = cp->c_rsrc_vp; + cp->c_need_rvnode_put_after_truncate_unlock = false; + } + +#if HFS_COMPRESSION + bool reset_decmpfs = cp->c_need_decmpfs_reset; + cp->c_need_decmpfs_reset = false; +#endif + cp->c_truncatelockowner = NULL; lck_rw_unlock_exclusive(&cp->c_truncatelock); + +#if HFS_COMPRESSION + if (reset_decmpfs) { + decmpfs_cnode *dp = cp->c_decmp; + if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN) + decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0); + } +#endif + + // Do the puts now + if (vp) + vnode_put(vp); + if (rvp) + vnode_put(rvp); } else { /* HFS_LOCK_SHARED */ lck_rw_unlock_shared(&cp->c_truncatelock); }