X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/7e4a7d3939db04e70062ae6c7bf24b8c8b2f5a7c..22ba694c5857e62b5a553b1505dcf2e509177f28:/bsd/hfs/hfs_vnops.c?ds=sidebyside diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 9114d0a99..414d6de78 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include @@ -72,20 +79,27 @@ /* Always F_FULLFSYNC? 1=yes,0=no (default due to "various" reasons is 'no') */ int always_do_fullfsync = 0; SYSCTL_DECL(_vfs_generic); -SYSCTL_INT (_vfs_generic, OID_AUTO, always_do_fullfsync, CTLFLAG_RW, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called"); +SYSCTL_INT (_vfs_generic, OID_AUTO, always_do_fullfsync, CTLFLAG_RW | CTLFLAG_LOCKED, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called"); -static int hfs_makenode(struct vnode *dvp, struct vnode **vpp, +int hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx); +int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p); +int hfs_metasync_all(struct hfsmount *hfsmp); -static int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p); -static int hfs_metasync_all(struct hfsmount *hfsmp); +int hfs_removedir(struct vnode *, struct vnode *, struct componentname *, + int, int); +int hfs_removefile(struct vnode *, struct vnode *, struct componentname *, + int, int, int, struct vnode *, int); -static int hfs_removedir(struct vnode *, struct vnode *, struct componentname *, - int); +/* Used here and in cnode teardown -- for symlinks */ +int hfs_removefile_callback(struct buf *bp, void *hfsmp); -static int hfs_removefile(struct vnode *, struct vnode *, struct componentname *, - int, int, int, struct vnode *); +int hfs_movedata (struct vnode *, struct vnode*); +static int hfs_move_fork (struct filefork *srcfork, struct cnode *src, + struct filefork *dstfork, struct cnode *dst); + +decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp); #if FIFO static int hfsfifo_read(struct vnop_read_args *); @@ -95,43 +109,64 @@ static int hfsfifo_close(struct vnop_close_args *); extern int (**fifo_vnodeop_p)(void *); #endif /* FIFO */ -static int hfs_vnop_close(struct vnop_close_args*); -static int hfs_vnop_create(struct vnop_create_args*); -static int hfs_vnop_exchange(struct vnop_exchange_args*); -static int hfs_vnop_fsync(struct vnop_fsync_args*); -static int hfs_vnop_mkdir(struct vnop_mkdir_args*); -static int hfs_vnop_mknod(struct vnop_mknod_args*); -static int hfs_vnop_getattr(struct vnop_getattr_args*); -static int hfs_vnop_open(struct vnop_open_args*); -static int hfs_vnop_readdir(struct vnop_readdir_args*); -static int hfs_vnop_remove(struct vnop_remove_args*); -static int hfs_vnop_rename(struct vnop_rename_args*); -static int hfs_vnop_rmdir(struct vnop_rmdir_args*); -static int hfs_vnop_symlink(struct vnop_symlink_args*); -static int hfs_vnop_setattr(struct vnop_setattr_args*); -static int hfs_vnop_readlink(struct vnop_readlink_args *); -static int hfs_vnop_pathconf(struct vnop_pathconf_args *); -static int hfs_vnop_whiteout(struct vnop_whiteout_args *); -static int hfsspec_read(struct vnop_read_args *); -static int hfsspec_write(struct vnop_write_args *); -static int hfsspec_close(struct vnop_close_args *); +int hfs_vnop_close(struct vnop_close_args*); +int hfs_vnop_create(struct vnop_create_args*); +int hfs_vnop_exchange(struct vnop_exchange_args*); +int hfs_vnop_fsync(struct vnop_fsync_args*); +int hfs_vnop_mkdir(struct vnop_mkdir_args*); +int hfs_vnop_mknod(struct vnop_mknod_args*); +int hfs_vnop_getattr(struct vnop_getattr_args*); +int hfs_vnop_open(struct vnop_open_args*); +int hfs_vnop_readdir(struct vnop_readdir_args*); +int hfs_vnop_remove(struct vnop_remove_args*); +int hfs_vnop_rename(struct vnop_rename_args*); +int hfs_vnop_rmdir(struct vnop_rmdir_args*); +int hfs_vnop_symlink(struct vnop_symlink_args*); +int hfs_vnop_setattr(struct vnop_setattr_args*); +int hfs_vnop_readlink(struct vnop_readlink_args *); +int hfs_vnop_pathconf(struct vnop_pathconf_args *); +int hfs_vnop_whiteout(struct vnop_whiteout_args *); +int hfs_vnop_mmap(struct vnop_mmap_args *ap); +int hfsspec_read(struct vnop_read_args *); +int hfsspec_write(struct vnop_write_args *); +int hfsspec_close(struct vnop_close_args *); /* Options for hfs_removedir and hfs_removefile */ #define HFSRM_SKIP_RESERVE 0x01 - /***************************************************************************** * * Common Operations on vnodes * *****************************************************************************/ +/* + * Is the given cnode either the .journal or .journal_info_block file on + * a volume with an active journal? Many VNOPs use this to deny access + * to those files. + * + * Note: the .journal file on a volume with an external journal still + * returns true here, even though it does not actually hold the contents + * of the volume's journal. + */ +static _Bool +hfs_is_journal_file(struct hfsmount *hfsmp, struct cnode *cp) +{ + if (hfsmp->jnl != NULL && + (cp->c_fileid == hfsmp->hfs_jnlinfoblkid || + cp->c_fileid == hfsmp->hfs_jnlfileid)) { + return true; + } else { + return false; + } +} + /* * Create a regular file. */ -static int +int hfs_vnop_create(struct vnop_create_args *ap) { int error; @@ -164,6 +199,7 @@ again: /* Make sure it was file. */ if ((error == 0) && !vnode_isreg(*args.a_vpp)) { vnode_put(*args.a_vpp); + *args.a_vpp = NULLVP; error = EEXIST; } args.a_cnp->cn_nameiop = CREATE; @@ -174,7 +210,7 @@ again: /* * Make device special file. */ -static int +int hfs_vnop_mknod(struct vnop_mknod_args *ap) { struct vnode_attr *vap = ap->a_vap; @@ -224,7 +260,7 @@ hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock) /* maybe we should take the hfs cnode lock here, and if so, use the skiplock parameter to tell us not to */ - if (!skiplock) hfs_lock(cp, HFS_SHARED_LOCK); + if (!skiplock) hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); struct vnode *c_vp = cp->c_vp; if (c_vp) { /* we already have a data vnode */ @@ -245,7 +281,7 @@ hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock) return EINVAL; } - if (0 == hfs_vget(VTOHFS(cp->c_rsrc_vp), cp->c_cnid, data_vp, 1) && + if (0 == hfs_vget(VTOHFS(cp->c_rsrc_vp), cp->c_cnid, data_vp, 1, 0) && 0 != data_vp) { vref = vnode_ref(*data_vp); vnode_put(*data_vp); @@ -263,9 +299,10 @@ hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock) /* * hfs_lazy_init_decmpfs_cnode(): returns the decmpfs_cnode for a cnode, - * allocating it if necessary; returns NULL if there was an allocation error + * allocating it if necessary; returns NULL if there was an allocation error. + * function is non-static so that it can be used from the FCNTL handler. */ -static decmpfs_cnode * +decmpfs_cnode * hfs_lazy_init_decmpfs_cnode(struct cnode *cp) { if (!cp->c_decmp) { @@ -298,7 +335,7 @@ hfs_file_is_compressed(struct cnode *cp, int skiplock) int ret = 0; /* fast check to see if file is compressed. If flag is clear, just answer no */ - if (!(cp->c_flags & UF_COMPRESSED)) { + if (!(cp->c_bsdflags & UF_COMPRESSED)) { return 0; } @@ -334,6 +371,8 @@ hfs_file_is_compressed(struct cnode *cp, int skiplock) * if the caller has passed a valid vnode (has a ref count > 0), then hfsmp and fid are not required. * if the caller doesn't have a vnode, pass NULL in vp, and pass valid hfsmp and fid. * files size is returned in size (required) + * if the indicated file is a directory (or something that doesn't have a data fork), then this call + * will return an error and the caller should fall back to treating the item as an uncompressed file */ int hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *vp, cnid_t fid, off_t *size, int skiplock) @@ -349,7 +388,7 @@ hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *v if (!hfsmp || !fid) { /* make sure we have the required parameters */ return EINVAL; } - if (0 != hfs_vget(hfsmp, fid, &vp, skiplock)) { /* vnode is null, use hfs_vget() to get it */ + if (0 != hfs_vget(hfsmp, fid, &vp, skiplock, 0)) { /* vnode is null, use hfs_vget() to get it */ vp = NULL; } else { putaway = 1; /* note that hfs_vget() was used to aquire the vnode */ @@ -359,10 +398,27 @@ hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *v * ensures the cached size is present in case decmpfs hasn't * encountered this node yet. */ - if ( ( NULL != vp ) && hfs_file_is_compressed(VTOC(vp), skiplock) ) { - *size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp)); /* file info will be cached now, so get size */ - } else { - ret = EINVAL; + if (vp) { + if (hfs_file_is_compressed(VTOC(vp), skiplock) ) { + *size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp)); /* file info will be cached now, so get size */ + } else { + if (VTOCMP(vp) && VTOCMP(vp)->cmp_type >= CMP_MAX) { + if (VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) { + // if we don't recognize this type, just use the real data fork size + if (VTOC(vp)->c_datafork) { + *size = VTOC(vp)->c_datafork->ff_size; + ret = 0; + } else { + ret = EINVAL; + } + } else { + *size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp)); /* file info will be cached now, so get size */ + ret = 0; + } + } else { + ret = EINVAL; + } + } } if (putaway) { /* did we use hfs_vget() to get this vnode? */ @@ -393,10 +449,199 @@ hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int skipl } #endif /* HFS_COMPRESSION */ + +// +// This function gets the doc_tombstone structure for the +// current thread. If the thread doesn't have one, the +// structure is allocated. +// +static struct doc_tombstone * +get_uthread_doc_tombstone(void) +{ + struct uthread *ut; + ut = get_bsdthread_info(current_thread()); + + if (ut->t_tombstone == NULL) { + ut->t_tombstone = kalloc(sizeof(struct doc_tombstone)); + if (ut->t_tombstone) { + memset(ut->t_tombstone, 0, sizeof(struct doc_tombstone)); + } + } + + return ut->t_tombstone; +} + +// +// This routine clears out the current tombstone for the +// current thread and if necessary passes the doc-id of +// the tombstone on to the dst_cnode. +// +// If the doc-id transfers to dst_cnode, we also generate +// a doc-id changed fsevent. Unlike all the other fsevents, +// doc-id changed events can only be generated here in HFS +// where we have the necessary info. +// +static void +clear_tombstone_docid(struct doc_tombstone *ut, struct hfsmount *hfsmp, struct cnode *dst_cnode) +{ + uint32_t old_id = ut->t_lastop_document_id; + + ut->t_lastop_document_id = 0; + ut->t_lastop_parent = NULL; + ut->t_lastop_parent_vid = 0; + ut->t_lastop_filename[0] = '\0'; + + // + // If the lastop item is still the same and needs to be cleared, + // clear it. + // + if (dst_cnode && old_id && ut->t_lastop_item && vnode_vid(ut->t_lastop_item) == ut->t_lastop_item_vid) { + // + // clear the document_id from the file that used to have it. + // XXXdbg - we need to lock the other vnode and make sure to + // update it on disk. + // + struct cnode *ocp = VTOC(ut->t_lastop_item); + struct FndrExtendedFileInfo *ofip = (struct FndrExtendedFileInfo *)((char *)&ocp->c_attr.ca_finderinfo + 16); + + // printf("clearing doc-id from ino %d\n", ocp->c_desc.cd_cnid); + ofip->document_id = 0; + ocp->c_bsdflags &= ~UF_TRACKED; + ocp->c_flag |= C_MODIFIED | C_FORCEUPDATE; // mark it dirty + /* cat_update(hfsmp, &ocp->c_desc, &ocp->c_attr, NULL, NULL); */ + + } + +#if CONFIG_FSE + if (dst_cnode && old_id) { + struct FndrExtendedFileInfo *fip = (struct FndrExtendedFileInfo *)((char *)&dst_cnode->c_attr.ca_finderinfo + 16); + + add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(), + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)ut->t_lastop_fileid, // src inode # + FSE_ARG_INO, (ino64_t)dst_cnode->c_fileid, // dst inode # + FSE_ARG_INT32, (uint32_t)fip->document_id, + FSE_ARG_DONE); + } +#endif + // last, clear these now that we're all done + ut->t_lastop_item = NULL; + ut->t_lastop_fileid = 0; + ut->t_lastop_item_vid = 0; +} + + +// +// This function is used to filter out operations on temp +// filenames. We have to filter out operations on certain +// temp filenames to work-around questionable application +// behavior from apps like Autocad that perform unusual +// sequences of file system operations for a "safe save". +static int +is_ignorable_temp_name(const char *nameptr, int len) +{ + if (len == 0) { + len = strlen(nameptr); + } + + if ( strncmp(nameptr, "atmp", 4) == 0 + || (len > 4 && strncmp(nameptr+len-4, ".bak", 4) == 0) + || (len > 4 && strncmp(nameptr+len-4, ".tmp", 4) == 0)) { + return 1; + } + + return 0; +} + +// +// Decide if we need to save a tombstone or not. Normally we always +// save a tombstone - but if there already is one and the name we're +// given is an ignorable name, then we will not save a tombstone. +// +static int +should_save_docid_tombstone(struct doc_tombstone *ut, struct vnode *vp, struct componentname *cnp) +{ + if (cnp->cn_nameptr == NULL) { + return 0; + } + + if (ut->t_lastop_document_id && ut->t_lastop_item == vp && is_ignorable_temp_name(cnp->cn_nameptr, cnp->cn_namelen)) { + return 0; + } + + return 1; +} + + +// +// This function saves a tombstone for the given vnode and name. The +// tombstone represents the parent directory and name where the document +// used to live and the document-id of that file. This info is recorded +// in the doc_tombstone structure hanging off the uthread (which assumes +// that all safe-save operations happen on the same thread). +// +// If later on the same parent/name combo comes back into existence then +// we'll preserve the doc-id from this vnode onto the new vnode. +// +static void +save_tombstone(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int for_unlink) +{ + struct cnode *cp = VTOC(vp); + struct doc_tombstone *ut; + ut = get_uthread_doc_tombstone(); + + if (for_unlink && vp->v_type == VREG && cp->c_linkcount > 1) { + // + // a regular file that is being unlinked and that is also + // hardlinked should not clear the UF_TRACKED state or + // mess with the tombstone because somewhere else in the + // file system the file is still alive. + // + return; + } + + ut->t_lastop_parent = dvp; + ut->t_lastop_parent_vid = vnode_vid(dvp); + ut->t_lastop_fileid = cp->c_fileid; + if (for_unlink) { + ut->t_lastop_item = NULL; + ut->t_lastop_item_vid = 0; + } else { + ut->t_lastop_item = vp; + ut->t_lastop_item_vid = vnode_vid(vp); + } + + strlcpy((char *)&ut->t_lastop_filename[0], cnp->cn_nameptr, sizeof(ut->t_lastop_filename)); + + struct FndrExtendedFileInfo *fip = (struct FndrExtendedFileInfo *)((char *)&cp->c_attr.ca_finderinfo + 16); + ut->t_lastop_document_id = fip->document_id; + + if (for_unlink) { + // clear this so it's never returned again + fip->document_id = 0; + cp->c_bsdflags &= ~UF_TRACKED; + + if (ut->t_lastop_document_id) { + (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); + +#if CONFIG_FSE + // this event is more of a "pending-delete" + add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(), + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode # + FSE_ARG_INO, (ino64_t)0, // dst inode # + FSE_ARG_INT32, ut->t_lastop_document_id, // document id + FSE_ARG_DONE); +#endif + } + } +} + + /* * Open a file/directory. */ -static int +int hfs_vnop_open(struct vnop_open_args *ap) { struct vnode *vp = ap->a_vp; @@ -443,27 +688,17 @@ hfs_vnop_open(struct vnop_open_args *ap) /* * Files marked append-only must be opened for appending. */ - if ((cp->c_flags & APPEND) && !vnode_isdir(vp) && + if ((cp->c_bsdflags & APPEND) && !vnode_isdir(vp) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); if (vnode_isreg(vp) && !UBCINFOEXISTS(vp)) return (EBUSY); /* file is in use by the kernel */ - /* Don't allow journal file to be opened externally. */ - if (cp->c_fileid == hfsmp->hfs_jnlfileid) + /* Don't allow journal to be opened externally. */ + if (hfs_is_journal_file(hfsmp, cp)) return (EPERM); - /* If we're going to write to the file, initialize quotas. */ -#if QUOTA - if ((ap->a_mode & FWRITE) && (hfsmp->hfs_flags & HFS_QUOTAS)) - (void)hfs_getinoquota(cp); -#endif /* QUOTA */ - - /* - * On the first (non-busy) open of a fragmented - * file attempt to de-frag it (if its less than 20MB). - */ if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (hfsmp->jnl == NULL) || #if NAMEDSTREAMS @@ -474,8 +709,19 @@ hfs_vnop_open(struct vnop_open_args *ap) return (0); } - if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); + +#if QUOTA + /* If we're going to write to the file, initialize quotas. */ + if ((ap->a_mode & FWRITE) && (hfsmp->hfs_flags & HFS_QUOTAS)) + (void)hfs_getinoquota(cp); +#endif /* QUOTA */ + + /* + * On the first (non-busy) open of a fragmented + * file attempt to de-frag it (if its less than 20MB). + */ fp = VTOF(vp); if (fp->ff_blocks && fp->ff_extents[7].blockCount != 0 && @@ -507,6 +753,7 @@ hfs_vnop_open(struct vnop_open_args *ap) vfs_context_proc(ap->a_context)); } } + hfs_unlock(cp); return (0); @@ -516,7 +763,7 @@ hfs_vnop_open(struct vnop_open_args *ap) /* * Close a file/directory. */ -static int +int hfs_vnop_close(ap) struct vnop_close_args /* { struct vnode *a_vp; @@ -532,7 +779,7 @@ hfs_vnop_close(ap) int tooktrunclock = 0; int knownrefs = 0; - if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) + if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) return (0); cp = VTOC(vp); hfsmp = VTOHFS(vp); @@ -559,11 +806,11 @@ hfs_vnop_close(ap) // release cnode lock; must acquire truncate lock BEFORE cnode lock hfs_unlock(cp); - hfs_lock_truncate(cp, TRUE); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); tooktrunclock = 1; - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { - hfs_unlock_truncate(cp, TRUE); + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); // bail out if we can't re-acquire cnode lock return 0; } @@ -585,8 +832,8 @@ hfs_vnop_close(ap) // if we froze the fs and we're exiting, then "thaw" the fs if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) { hfsmp->hfs_freezing_proc = NULL; - hfs_global_exclusive_lock_release(hfsmp); - lck_rw_unlock_exclusive(&hfsmp->hfs_insync); + hfs_unlock_global (hfsmp); + lck_rw_unlock_exclusive(&hfsmp->hfs_insync); } busy = vnode_isinuse(vp, 1); @@ -601,7 +848,7 @@ hfs_vnop_close(ap) } if (tooktrunclock){ - hfs_unlock_truncate(cp, TRUE); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } hfs_unlock(cp); @@ -615,7 +862,7 @@ hfs_vnop_close(ap) /* * Get basic attributes. */ -static int +int hfs_vnop_getattr(struct vnop_getattr_args *ap) { #define VNODE_ATTR_TIMES \ @@ -648,10 +895,16 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) /* if it's a data fork, we need to know if it was compressed so we can report the uncompressed size */ compressed = hfs_file_is_compressed(cp, 0); } - if (compressed && (VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_size))) { - if (0 != hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0)) { - /* failed to get the uncompressed size, we'll check for this later */ - uncompressed_size = -1; + if ((VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_size))) { + // if it's compressed + if (compressed || (!VNODE_IS_RSRC(vp) && cp->c_decmp && cp->c_decmp->cmp_type >= CMP_MAX)) { + if (0 != hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0)) { + /* failed to get the uncompressed size, we'll check for this later */ + uncompressed_size = -1; + } else { + // fake that it's compressed + compressed = 1; + } } } } @@ -670,7 +923,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) vap->va_uid = cp->c_uid; vap->va_gid = cp->c_gid; vap->va_mode = cp->c_mode; - vap->va_flags = cp->c_flags; + vap->va_flags = cp->c_bsdflags; vap->va_supported |= VNODE_ATTR_AUTH & ~VNODE_ATTR_va_acl; if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) { @@ -691,12 +944,12 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) */ if ((vap->va_active & VNODE_ATTR_TIMES) && (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime)) { - if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); hfs_touchtimes(hfsmp, cp); } else { - if ((error = hfs_lock(cp, HFS_SHARED_LOCK))) + if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) return (error); } @@ -812,8 +1065,18 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) if (cp->c_blocks - VTOF(vp)->ff_blocks) { /* We deal with rsrc fork vnode iocount at the end of the function */ - error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); + error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE); if (error) { + /* + * Note that we call hfs_vgetrsrc with error_on_unlinked + * set to FALSE. This is because we may be invoked via + * fstat() on an open-unlinked file descriptor and we must + * continue to support access to the rsrc fork until it disappears. + * The code at the end of this function will be + * responsible for releasing the iocount generated by + * hfs_vgetrsrc. This is because we can't drop the iocount + * without unlocking the cnode first. + */ goto out; } @@ -870,12 +1133,22 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) vap->va_backup_time.tv_sec = cp->c_btime; vap->va_backup_time.tv_nsec = 0; + /* See if we need to emit the date added field to the user */ + if (VATTR_IS_ACTIVE(vap, va_addedtime)) { + u_int32_t dateadded = hfs_get_dateadded (cp); + if (dateadded) { + vap->va_addedtime.tv_sec = dateadded; + vap->va_addedtime.tv_nsec = 0; + VATTR_SET_SUPPORTED (vap, va_addedtime); + } + } + /* XXX is this really a good 'optimal I/O size'? */ vap->va_iosize = hfsmp->hfs_logBlockSize; vap->va_uid = cp->c_uid; vap->va_gid = cp->c_gid; vap->va_mode = cp->c_mode; - vap->va_flags = cp->c_flags; + vap->va_flags = cp->c_bsdflags; /* * Exporting file IDs from HFS Plus: @@ -929,7 +1202,29 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) vap->va_data_size = data_size; vap->va_supported |= VNODE_ATTR_va_data_size; #endif - + + if (VATTR_IS_ACTIVE(vap, va_gen)) { + if (UBCINFOEXISTS(vp) && (vp->v_ubcinfo->ui_flags & UI_ISMAPPED)) { + /* While file is mmapped the generation count is invalid. + * However, bump the value so that the write-gen counter + * will be different once the file is unmapped (since, + * when unmapped the pageouts may not yet have happened) + */ + if (vp->v_ubcinfo->ui_flags & UI_MAPPEDWRITE) { + hfs_incr_gencount (cp); + } + vap->va_gen = 0; + } else { + vap->va_gen = hfs_get_gencount(cp); + } + + VATTR_SET_SUPPORTED(vap, va_gen); + } + if (VATTR_IS_ACTIVE(vap, va_document_id)) { + vap->va_document_id = hfs_get_document_id(cp); + VATTR_SET_SUPPORTED(vap, va_document_id); + } + /* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */ vap->va_supported |= VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time | VNODE_ATTR_va_change_time| VNODE_ATTR_va_backup_time | @@ -962,11 +1257,12 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) if ((cp->c_flag & C_HARDLINK) && ((cp->c_desc.cd_namelen == 0) || (vap->va_linkid != cp->c_cnid))) { - /* If we have no name and our link ID is the raw inode number, then we may + /* + * If we have no name and our link ID is the raw inode number, then we may * have an open-unlinked file. Go to the next link in this case. */ if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) { - if ((error = hfs_lookuplink(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))){ + if ((error = hfs_lookup_siblinglinks(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))){ goto out; } } @@ -1023,7 +1319,7 @@ out: return (error); } -static int +int hfs_vnop_setattr(ap) struct vnop_setattr_args /* { struct vnode *a_vp; @@ -1040,7 +1336,10 @@ hfs_vnop_setattr(ap) int error = 0; uid_t nuid; gid_t ngid; + time_t orig_ctime; + orig_ctime = VTOC(vp)->c_ctime; + #if HFS_COMPRESSION int decmpfs_reset_state = 0; /* @@ -1051,14 +1350,47 @@ hfs_vnop_setattr(ap) if (error) return error; #endif + // + // if this is not a size-changing setattr and it is not just + // an atime update, then check for a snapshot. + // + if (!VATTR_IS_ACTIVE(vap, va_data_size) && !(vap->va_active == VNODE_ATTR_va_access_time)) { + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_MOD, NSPACE_REARM_NO_ARG); + } + +#if CONFIG_PROTECT + if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { + return (error); + } +#endif /* CONFIG_PROTECT */ hfsmp = VTOHFS(vp); - /* Don't allow modification of the journal file. */ - if (hfsmp->hfs_jnlfileid == VTOC(vp)->c_fileid) { + /* Don't allow modification of the journal. */ + if (hfs_is_journal_file(hfsmp, VTOC(vp))) { return (EPERM); } + // + // Check if we'll need a document_id and if so, get it before we lock the + // the cnode to avoid any possible deadlock with the root vnode which has + // to get locked to get the document id + // + u_int32_t document_id=0; + if (VATTR_IS_ACTIVE(vap, va_flags) && (vap->va_flags & UF_TRACKED) && !(VTOC(vp)->c_bsdflags & UF_TRACKED)) { + struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&(VTOC(vp)->c_attr.ca_finderinfo) + 16); + // + // If the document_id is not set, get a new one. It will be set + // on the file down below once we hold the cnode lock. + // + if (fip->document_id == 0) { + if (hfs_generate_document_id(hfsmp, &document_id) != 0) { + document_id = 0; + } + } + } + + /* * File size change request. * We are guaranteed that this is not a directory, and that @@ -1084,6 +1416,8 @@ hfs_vnop_setattr(ap) } } + check_for_tracked_file(vp, orig_ctime, vap->va_data_size == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL); + decmpfs_lock_compressed_data(dp, 1); if (hfs_file_is_compressed(VTOC(vp), 1)) { error = decmpfs_decompress_file(vp, dp, -1/*vap->va_data_size*/, 0, 1); @@ -1095,13 +1429,13 @@ hfs_vnop_setattr(ap) #endif /* Take truncate lock before taking cnode lock. */ - hfs_lock_truncate(VTOC(vp), TRUE); + hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); /* Perform the ubc_setsize before taking the cnode lock. */ ubc_setsize(vp, vap->va_data_size); - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { - hfs_unlock_truncate(VTOC(vp), TRUE); + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { + hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT); #if HFS_COMPRESSION decmpfs_unlock_compressed_data(dp, 1); #endif @@ -1111,7 +1445,7 @@ hfs_vnop_setattr(ap) error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, 0, ap->a_context); - hfs_unlock_truncate(cp, TRUE); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); #if HFS_COMPRESSION decmpfs_unlock_compressed_data(dp, 1); #endif @@ -1119,7 +1453,7 @@ hfs_vnop_setattr(ap) goto out; } if (cp == NULL) { - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); cp = VTOC(vp); } @@ -1171,7 +1505,7 @@ hfs_vnop_setattr(ap) u_int16_t *fdFlags; #if HFS_COMPRESSION - if ((cp->c_flags ^ vap->va_flags) & UF_COMPRESSED) { + if ((cp->c_bsdflags ^ vap->va_flags) & UF_COMPRESSED) { /* * the UF_COMPRESSED was toggled, so reset our cached compressed state * but we don't want to actually do the update until we've released the cnode lock down below @@ -1181,9 +1515,53 @@ hfs_vnop_setattr(ap) decmpfs_reset_state = 1; } #endif + if ((vap->va_flags & UF_TRACKED) && !(cp->c_bsdflags & UF_TRACKED)) { + struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16); + + // + // we're marking this item UF_TRACKED. if the document_id is + // not set, get a new one and put it on the file. + // + if (fip->document_id == 0) { + if (document_id != 0) { + // printf("SETATTR: assigning doc-id %d to %s (ino %d)\n", document_id, vp->v_name, cp->c_desc.cd_cnid); + fip->document_id = (uint32_t)document_id; +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, ap->a_context, + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)0, // src inode # + FSE_ARG_INO, (ino64_t)cp->c_fileid, // dst inode # + FSE_ARG_INT32, document_id, + FSE_ARG_DONE); +#endif + } else { + // printf("hfs: could not acquire a new document_id for %s (ino %d)\n", vp->v_name, cp->c_desc.cd_cnid); + } + } + + } else if (!(vap->va_flags & UF_TRACKED) && (cp->c_bsdflags & UF_TRACKED)) { + // + // UF_TRACKED is being cleared so clear the document_id + // + struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16); + if (fip->document_id) { + // printf("SETATTR: clearing doc-id %d from %s (ino %d)\n", fip->document_id, vp->v_name, cp->c_desc.cd_cnid); +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, ap->a_context, + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode # + FSE_ARG_INO, (ino64_t)0, // dst inode # + FSE_ARG_INT32, fip->document_id, // document id + FSE_ARG_DONE); +#endif + fip->document_id = 0; + cp->c_bsdflags &= ~UF_TRACKED; + } + } - cp->c_flags = vap->va_flags; + cp->c_bsdflags = vap->va_flags; cp->c_touch_chgtime = TRUE; + /* * Mirror the UF_HIDDEN flag to the invisible bit of the Finder Info. @@ -1291,7 +1669,6 @@ out: * Change the mode on a file. * cnode must be locked before calling. */ -__private_extern__ int hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struct proc *p) { @@ -1300,14 +1677,9 @@ hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struc if (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord) return (0); - // XXXdbg - don't allow modification of the journal or journal_info_block - if (VTOHFS(vp)->jnl && cp && cp->c_datafork) { - struct HFSPlusExtentDescriptor *extd; - - extd = &cp->c_datafork->ff_extents[0]; - if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) { - return EPERM; - } + // Don't allow modification of the journal or journal_info_block + if (hfs_is_journal_file(VTOHFS(vp), cp)) { + return EPERM; } #if OVERRIDE_UNKNOWN_PERMISSIONS @@ -1322,7 +1694,6 @@ hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struc } -__private_extern__ int hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags) { @@ -1347,7 +1718,7 @@ hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean co } /* If immutable bit set, nobody gets to write it. */ - if (considerFlags && (cp->c_flags & IMMUTABLE)) + if (considerFlags && (cp->c_bsdflags & IMMUTABLE)) return (EPERM); /* Otherwise, user id 0 always gets access. */ @@ -1372,7 +1743,6 @@ hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean co * Perform chown operation on cnode cp; * code must be locked prior to call. */ -__private_extern__ int #if !QUOTA hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, __unused kauth_cred_t cred, @@ -1500,13 +1870,26 @@ good: /* - * The hfs_exchange routine swaps the fork data in two files by - * exchanging some of the information in the cnode. It is used - * to preserve the file ID when updating an existing file, in - * case the file is being tracked through its file ID. Typically - * its used after creating a new file during a safe-save. + * hfs_vnop_exchange: + * + * Inputs: + * 'from' vnode/cnode + * 'to' vnode/cnode + * options flag bits + * vfs_context + * + * Discussion: + * hfs_vnop_exchange is used to service the exchangedata(2) system call. + * Per the requirements of that system call, this function "swaps" some + * of the information that lives in one catalog record for some that + * lives in another. Note that not everything is swapped; in particular, + * the extent information stored in each cnode is kept local to that + * cnode. This allows existing file descriptor references to continue + * to operate on the same content, regardless of the location in the + * namespace that the file may have moved to. See inline comments + * in the function for more information. */ -static int +int hfs_vnop_exchange(ap) struct vnop_exchange_args /* { struct vnode *a_fvp; @@ -1526,19 +1909,36 @@ hfs_vnop_exchange(ap) const unsigned char *to_nameptr; char from_iname[32]; char to_iname[32]; - u_int32_t tempflag; + uint32_t to_flag_special; + uint32_t from_flag_special; cnid_t from_parid; cnid_t to_parid; int lockflags; int error = 0, started_tr = 0, got_cookie = 0; cat_cookie_t cookie; + time_t orig_from_ctime, orig_to_ctime; - /* The files must be on the same volume. */ - if (vnode_mount(from_vp) != vnode_mount(to_vp)) - return (EXDEV); + /* + * VFS does the following checks: + * 1. Validate that both are files. + * 2. Validate that both are on the same mount. + * 3. Validate that they're not the same vnode. + */ + + orig_from_ctime = VTOC(from_vp)->c_ctime; + orig_to_ctime = VTOC(to_vp)->c_ctime; - if (from_vp == to_vp) - return (EINVAL); + +#if CONFIG_PROTECT + /* + * Do not allow exchangedata/F_MOVEDATAEXTENTS on data-protected filesystems + * because the EAs will not be swapped. As a result, the persistent keys would not + * match and the files will be garbage. + */ + if (cp_fs_protected (vnode_mount(from_vp))) { + return EINVAL; + } +#endif #if HFS_COMPRESSION if ( hfs_file_is_compressed(VTOC(from_vp), 0) ) { @@ -1554,6 +1954,48 @@ hfs_vnop_exchange(ap) } #endif // HFS_COMPRESSION + /* + * Normally, we want to notify the user handlers about the event, + * except if it's a handler driving the event. + */ + if ((ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) == 0) { + check_for_tracked_file(from_vp, orig_from_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); + check_for_tracked_file(to_vp, orig_to_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); + } else { + /* + * We're doing a data-swap. + * Take the truncate lock/cnode lock, then verify there are no mmap references. + * Issue a hfs_filedone to flush out all of the remaining state for this file. + * Allow the rest of the codeflow to re-acquire the cnode locks in order. + */ + + hfs_lock_truncate (VTOC(from_vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); + + if ((error = hfs_lock(VTOC(from_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { + hfs_unlock_truncate (VTOC(from_vp), HFS_LOCK_DEFAULT); + return error; + } + + /* Verify the source file is not in use by anyone besides us (including mmap refs) */ + if (vnode_isinuse(from_vp, 1)) { + error = EBUSY; + hfs_unlock(VTOC(from_vp)); + hfs_unlock_truncate (VTOC(from_vp), HFS_LOCK_DEFAULT); + return error; + } + + /* Flush out the data in the source file */ + VTOC(from_vp)->c_flag |= C_SWAPINPROGRESS; + error = hfs_filedone (from_vp, ap->a_context); + VTOC(from_vp)->c_flag &= ~C_SWAPINPROGRESS; + hfs_unlock(VTOC(from_vp)); + hfs_unlock_truncate(VTOC(from_vp), HFS_LOCK_DEFAULT); + + if (error) { + return error; + } + } + if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK))) return (error); @@ -1561,33 +2003,28 @@ hfs_vnop_exchange(ap) to_cp = VTOC(to_vp); hfsmp = VTOHFS(from_vp); - /* Only normal files can be exchanged. */ - if (!vnode_isreg(from_vp) || !vnode_isreg(to_vp) || - VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) { + /* Resource forks cannot be exchanged. */ + if ( VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) { error = EINVAL; goto exit; } - // XXXdbg - don't allow modification of the journal or journal_info_block - if (hfsmp->jnl) { - struct HFSPlusExtentDescriptor *extd; - - if (from_cp->c_datafork) { - extd = &from_cp->c_datafork->ff_extents[0]; - if (extd->startBlock == VTOVCB(from_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) { - error = EPERM; - goto exit; - } - } - - if (to_cp->c_datafork) { - extd = &to_cp->c_datafork->ff_extents[0]; - if (extd->startBlock == VTOVCB(to_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) { - error = EPERM; - goto exit; - } - } + // Don't allow modification of the journal or journal_info_block + if (hfs_is_journal_file(hfsmp, from_cp) || + hfs_is_journal_file(hfsmp, to_cp)) { + error = EPERM; + goto exit; + } + + /* + * Ok, now that all of the pre-flighting is done, call the underlying + * function if needed. + */ + if (ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) { + error = hfs_movedata(from_vp, to_vp); + goto exit; } + if ((error = hfs_start_transaction(hfsmp)) != 0) { goto exit; @@ -1630,7 +2067,23 @@ hfs_vnop_exchange(ap) to_parid = to_cp->c_parentcnid; } - /* Do the exchange */ + /* + * ExchangeFileIDs swaps the extent information attached to two + * different file IDs. It also swaps the extent information that + * may live in the extents-overflow B-Tree. + * + * We do this in a transaction as this may require a lot of B-Tree nodes + * to do completely, particularly if one of the files in question + * has a lot of extents. + * + * For example, assume "file1" has fileID 50, and "file2" has fileID 52. + * For the on-disk records, which are assumed to be synced, we will + * first swap the resident inline-8 extents as part of the catalog records. + * Then we will swap any extents overflow records for each file. + * + * When this function is done, "file1" will have fileID 52, and "file2" will + * have fileID 50. + */ error = ExchangeFileIDs(hfsmp, from_nameptr, to_nameptr, from_parid, to_parid, from_cp->c_hint, to_cp->c_hint); hfs_systemfile_unlock(hfsmp, lockflags); @@ -1651,48 +2104,134 @@ hfs_vnop_exchange(ap) if (to_vp) cache_purge(to_vp); - /* Save a copy of from attributes before swapping. */ + /* Bump both source and destination write counts before any swaps. */ + { + hfs_incr_gencount (from_cp); + hfs_incr_gencount (to_cp); + } + + + /* Save a copy of "from" attributes before swapping. */ bcopy(&from_cp->c_desc, &tempdesc, sizeof(struct cat_desc)); bcopy(&from_cp->c_attr, &tempattr, sizeof(struct cat_attr)); - tempflag = from_cp->c_flag & (C_HARDLINK | C_HASXATTRS); + + /* Save whether or not each cnode is a hardlink or has EAs */ + from_flag_special = from_cp->c_flag & (C_HARDLINK | C_HASXATTRS); + to_flag_special = to_cp->c_flag & (C_HARDLINK | C_HASXATTRS); + + /* Drop the special bits from each cnode */ + from_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS); + to_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS); /* - * Swap the descriptors and all non-fork related attributes. - * (except the modify date) + * Complete the in-memory portion of the copy. + * + * ExchangeFileIDs swaps the on-disk records involved. We complete the + * operation by swapping the in-memory contents of the two files here. + * We swap the cnode descriptors, which contain name, BSD attributes, + * timestamps, etc, about the file. + * + * NOTE: We do *NOT* swap the fileforks of the two cnodes. We have + * already swapped the on-disk extent information. As long as we swap the + * IDs, the in-line resident 8 extents that live in the filefork data + * structure will point to the right data for the new file ID if we leave + * them alone. + * + * As a result, any file descriptor that points to a particular + * vnode (even though it should change names), will continue + * to point to the same content. */ + + /* Copy the "to" -> "from" cnode */ bcopy(&to_cp->c_desc, &from_cp->c_desc, sizeof(struct cat_desc)); from_cp->c_hint = 0; - from_cp->c_fileid = from_cp->c_cnid; + /* + * If 'to' was a hardlink, then we copied over its link ID/CNID/(namespace ID) + * when we bcopy'd the descriptor above. However, the cnode attributes + * are not bcopied. As a result, make sure to swap the file IDs of each item. + * + * Further, other hardlink attributes must be moved along in this swap: + * the linkcount, the linkref, and the firstlink all need to move + * along with the file IDs. See note below regarding the flags and + * what moves vs. what does not. + * + * For Reference: + * linkcount == total # of hardlinks. + * linkref == the indirect inode pointer. + * firstlink == the first hardlink in the chain (written to the raw inode). + * These three are tied to the fileID and must move along with the rest of the data. + */ + from_cp->c_fileid = to_cp->c_attr.ca_fileid; + from_cp->c_itime = to_cp->c_itime; from_cp->c_btime = to_cp->c_btime; from_cp->c_atime = to_cp->c_atime; from_cp->c_ctime = to_cp->c_ctime; from_cp->c_gid = to_cp->c_gid; from_cp->c_uid = to_cp->c_uid; - from_cp->c_flags = to_cp->c_flags; + from_cp->c_bsdflags = to_cp->c_bsdflags; from_cp->c_mode = to_cp->c_mode; from_cp->c_linkcount = to_cp->c_linkcount; - from_cp->c_flag = to_cp->c_flag & (C_HARDLINK | C_HASXATTRS); + from_cp->c_attr.ca_linkref = to_cp->c_attr.ca_linkref; + from_cp->c_attr.ca_firstlink = to_cp->c_attr.ca_firstlink; + + /* + * The cnode flags need to stay with the cnode and not get transferred + * over along with everything else because they describe the content; they are + * not attributes that reflect changes specific to the file ID. In general, + * fields that are tied to the file ID are the ones that will move. + * + * This reflects the fact that the file may have borrowed blocks, dirty metadata, + * or other extents, which may not yet have been written to the catalog. If + * they were, they would have been transferred above in the ExchangeFileIDs call above... + * + * The flags that are special are: + * C_HARDLINK, C_HASXATTRS + * + * These flags move with the item and file ID in the namespace since their + * state is tied to that of the file ID. + * + * So to transfer the flags, we have to take the following steps + * 1) Store in a localvar whether or not the special bits are set. + * 2) Drop the special bits from the current flags + * 3) swap the special flag bits to their destination + */ + from_cp->c_flag |= to_flag_special; from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags; bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32); + + /* Copy the "from" -> "to" cnode */ bcopy(&tempdesc, &to_cp->c_desc, sizeof(struct cat_desc)); to_cp->c_hint = 0; - to_cp->c_fileid = to_cp->c_cnid; + /* + * Pull the file ID from the tempattr we copied above. We can't assume + * it is the same as the CNID. + */ + to_cp->c_fileid = tempattr.ca_fileid; to_cp->c_itime = tempattr.ca_itime; to_cp->c_btime = tempattr.ca_btime; to_cp->c_atime = tempattr.ca_atime; to_cp->c_ctime = tempattr.ca_ctime; to_cp->c_gid = tempattr.ca_gid; to_cp->c_uid = tempattr.ca_uid; - to_cp->c_flags = tempattr.ca_flags; + to_cp->c_bsdflags = tempattr.ca_flags; to_cp->c_mode = tempattr.ca_mode; to_cp->c_linkcount = tempattr.ca_linkcount; - to_cp->c_flag = tempflag; + to_cp->c_attr.ca_linkref = tempattr.ca_linkref; + to_cp->c_attr.ca_firstlink = tempattr.ca_firstlink; + + /* + * Only OR in the "from" flags into our cnode flags below. + * Leave the rest of the flags alone. + */ + to_cp->c_flag |= from_flag_special; + to_cp->c_attr.ca_recflags = tempattr.ca_recflags; bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32); + /* Rehash the cnodes using their new file IDs */ hfs_chash_rehash(hfsmp, from_cp, to_cp); @@ -1700,14 +2239,14 @@ hfs_vnop_exchange(ap) * When a file moves out of "Cleanup At Startup" * we can drop its NODUMP status. */ - if ((from_cp->c_flags & UF_NODUMP) && + if ((from_cp->c_bsdflags & UF_NODUMP) && (from_cp->c_parentcnid != to_cp->c_parentcnid)) { - from_cp->c_flags &= ~UF_NODUMP; + from_cp->c_bsdflags &= ~UF_NODUMP; from_cp->c_touch_chgtime = TRUE; } - if ((to_cp->c_flags & UF_NODUMP) && + if ((to_cp->c_bsdflags & UF_NODUMP) && (to_cp->c_parentcnid != from_cp->c_parentcnid)) { - to_cp->c_flags &= ~UF_NODUMP; + to_cp->c_bsdflags &= ~UF_NODUMP; to_cp->c_touch_chgtime = TRUE; } @@ -1723,35 +2262,371 @@ exit: return (error); } - -/* - * cnode must be locked - */ -__private_extern__ int -hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) +hfs_vnop_mmap(struct vnop_mmap_args *ap) { - struct cnode *cp = VTOC(vp); - struct filefork *fp = NULL; - int retval = 0; - struct hfsmount *hfsmp = VTOHFS(vp); - struct rl_entry *invalid_range; - struct timeval tv; - int waitdata; /* attributes necessary for data retrieval */ - int wait; /* all other attributes (e.g. atime, etc.) */ - int lockflag; - int took_trunc_lock = 0; - boolean_t trunc_lock_exclusive = FALSE; - - /* - * Applications which only care about data integrity rather than full - * file integrity may opt out of (delay) expensive metadata update - * operations as a performance optimization. - */ - wait = (waitfor == MNT_WAIT); - waitdata = (waitfor == MNT_DWAIT) | wait; - if (always_do_fullfsync) - fullsync = 1; + struct vnode *vp = ap->a_vp; + int error; + + if (VNODE_IS_RSRC(vp)) { + /* allow pageins of the resource fork */ + } else { + int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ + time_t orig_ctime = VTOC(vp)->c_ctime; + + if (!compressed && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { + error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP); + if (error != 0) { + return error; + } + } + + if (ap->a_fflags & PROT_WRITE) { + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); + + /* even though we're manipulating a cnode field here, we're only monotonically increasing + * the generation counter. The vnode can't be recycled (because we hold a FD in order to cause the + * map to happen). So it's safe to do this without holding the cnode lock. The caller's only + * requirement is that the number has been changed. + */ + struct cnode *cp = VTOC(vp); + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { + hfs_incr_gencount(cp); + } + } + } + + // + // NOTE: we return ENOTSUP because we want the cluster layer + // to actually do all the real work. + // + return (ENOTSUP); +} + +/* + * hfs_movedata + * + * This is a non-symmetric variant of exchangedata. In this function, + * the contents of the fork in from_vp are moved to the fork + * specified by to_vp. + * + * The cnodes pointed to by 'from_vp' and 'to_vp' must be locked. + * + * The vnode pointed to by 'to_vp' *must* be empty prior to invoking this function. + * We impose this restriction because we may not be able to fully delete the entire + * file's contents in a single transaction, particularly if it has a lot of extents. + * In the normal file deletion codepath, the file is screened for two conditions: + * 1) bigger than 400MB, and 2) more than 8 extents. If so, the file is relocated to + * the hidden directory and the deletion is broken up into multiple truncates. We can't + * do that here because both files need to exist in the namespace. The main reason this + * is imposed is that we may have to touch a whole lot of bitmap blocks if there are + * many extents. + * + * Any data written to 'from_vp' after this call completes is not guaranteed + * to be moved. + * + * Arguments: + * vnode from_vp: source file + * vnode to_vp: destination file; must be empty + * + * Returns: + * EFBIG - Destination file was not empty + * 0 - success + * + * + */ +int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) { + + struct cnode *from_cp; + struct cnode *to_cp; + struct hfsmount *hfsmp = NULL; + int error = 0; + int started_tr = 0; + int lockflags = 0; + int overflow_blocks; + int rsrc = 0; + + + /* Get the HFS pointers */ + from_cp = VTOC(from_vp); + to_cp = VTOC(to_vp); + hfsmp = VTOHFS(from_vp); + + /* Verify that neither source/dest file is open-unlinked */ + if (from_cp->c_flag & (C_DELETED | C_NOEXISTS)) { + error = EBUSY; + goto movedata_exit; + } + + if (to_cp->c_flag & (C_DELETED | C_NOEXISTS)) { + error = EBUSY; + goto movedata_exit; + } + + /* + * Verify the source file is not in use by anyone besides us. + * + * This function is typically invoked by a namespace handler + * process responding to a temporarily stalled system call. + * The FD that it is working off of is opened O_EVTONLY, so + * it really has no active usecounts (the kusecount from O_EVTONLY + * is subtracted from the total usecounts). + * + * As a result, we shouldn't have any active usecounts against + * this vnode when we go to check it below. + */ + if (vnode_isinuse(from_vp, 0)) { + error = EBUSY; + goto movedata_exit; + } + + if (from_cp->c_rsrc_vp == from_vp) { + rsrc = 1; + } + + /* + * We assume that the destination file is already empty. + * Verify that it is. + */ + if (rsrc) { + if (to_cp->c_rsrcfork->ff_size > 0) { + error = EFBIG; + goto movedata_exit; + } + } + else { + if (to_cp->c_datafork->ff_size > 0) { + error = EFBIG; + goto movedata_exit; + } + } + + /* If the source has the rsrc open, make sure the destination is also the rsrc */ + if (rsrc) { + if (to_vp != to_cp->c_rsrc_vp) { + error = EINVAL; + goto movedata_exit; + } + } + else { + /* Verify that both forks are data forks */ + if (to_vp != to_cp->c_vp) { + error = EINVAL; + goto movedata_exit; + } + } + + /* + * See if the source file has overflow extents. If it doesn't, we don't + * need to call into MoveData, and the catalog will be enough. + */ + if (rsrc) { + overflow_blocks = overflow_extents(from_cp->c_rsrcfork); + } + else { + overflow_blocks = overflow_extents(from_cp->c_datafork); + } + + if ((error = hfs_start_transaction (hfsmp)) != 0) { + goto movedata_exit; + } + started_tr = 1; + + /* Lock the system files: catalog, extents, attributes */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + /* Copy over any catalog allocation data into the new spot. */ + if (rsrc) { + if ((error = hfs_move_fork (from_cp->c_rsrcfork, from_cp, to_cp->c_rsrcfork, to_cp))){ + hfs_systemfile_unlock(hfsmp, lockflags); + goto movedata_exit; + } + } + else { + if ((error = hfs_move_fork (from_cp->c_datafork, from_cp, to_cp->c_datafork, to_cp))) { + hfs_systemfile_unlock(hfsmp, lockflags); + goto movedata_exit; + } + } + + /* + * Note that because all we're doing is moving the extents around, we can + * probably do this in a single transaction: Each extent record (group of 8) + * is 64 bytes. A extent overflow B-Tree node is typically 4k. This means + * each node can hold roughly ~60 extent records == (480 extents). + * + * If a file was massively fragmented and had 20k extents, this means we'd + * roughly touch 20k/480 == 41 to 42 nodes, plus the index nodes, for half + * of the operation. (inserting or deleting). So if we're manipulating 80-100 + * nodes, this is basically 320k of data to write to the journal in + * a bad case. + */ + if (overflow_blocks != 0) { + if (rsrc) { + error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 1); + } + else { + error = MoveData (hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0); + } + } + + if (error) { + /* Reverse the operation. Copy the fork data back into the source */ + if (rsrc) { + hfs_move_fork (to_cp->c_rsrcfork, to_cp, from_cp->c_rsrcfork, from_cp); + } + else { + hfs_move_fork (to_cp->c_datafork, to_cp, from_cp->c_datafork, from_cp); + } + } + else { + struct cat_fork *src_data = NULL; + struct cat_fork *src_rsrc = NULL; + struct cat_fork *dst_data = NULL; + struct cat_fork *dst_rsrc = NULL; + + /* Touch the times*/ + to_cp->c_touch_acctime = TRUE; + to_cp->c_touch_chgtime = TRUE; + to_cp->c_touch_modtime = TRUE; + + from_cp->c_touch_acctime = TRUE; + from_cp->c_touch_chgtime = TRUE; + from_cp->c_touch_modtime = TRUE; + + hfs_touchtimes(hfsmp, to_cp); + hfs_touchtimes(hfsmp, from_cp); + + if (from_cp->c_datafork) { + src_data = &from_cp->c_datafork->ff_data; + } + if (from_cp->c_rsrcfork) { + src_rsrc = &from_cp->c_rsrcfork->ff_data; + } + + if (to_cp->c_datafork) { + dst_data = &to_cp->c_datafork->ff_data; + } + if (to_cp->c_rsrcfork) { + dst_rsrc = &to_cp->c_rsrcfork->ff_data; + } + + /* Update the catalog nodes */ + (void) cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, + src_data, src_rsrc); + + (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, + dst_data, dst_rsrc); + + } + /* unlock the system files */ + hfs_systemfile_unlock(hfsmp, lockflags); + + +movedata_exit: + if (started_tr) { + hfs_end_transaction(hfsmp); + } + + return error; + +} + +/* + * Copy all of the catalog and runtime data in srcfork to dstfork. + * + * This allows us to maintain the invalid ranges across the movedata operation so + * we don't need to force all of the pending IO right now. In addition, we move all + * non overflow-extent extents into the destination here. + */ +static int hfs_move_fork (struct filefork *srcfork, struct cnode *src_cp, + struct filefork *dstfork, struct cnode *dst_cp) { + struct rl_entry *invalid_range; + int size = sizeof(struct HFSPlusExtentDescriptor); + size = size * kHFSPlusExtentDensity; + + /* If the dstfork has any invalid ranges, bail out */ + invalid_range = TAILQ_FIRST(&dstfork->ff_invalidranges); + if (invalid_range != NULL) { + return EFBIG; + } + + if (dstfork->ff_data.cf_size != 0 || dstfork->ff_data.cf_new_size != 0) { + return EFBIG; + } + + /* First copy the invalid ranges */ + while ((invalid_range = TAILQ_FIRST(&srcfork->ff_invalidranges))) { + off_t start = invalid_range->rl_start; + off_t end = invalid_range->rl_end; + + /* Remove it from the srcfork and add it to dstfork */ + rl_remove(start, end, &srcfork->ff_invalidranges); + rl_add(start, end, &dstfork->ff_invalidranges); + } + + /* + * Ignore the ff_union. We don't move symlinks or system files. + * Now copy the in-catalog extent information + */ + dstfork->ff_data.cf_size = srcfork->ff_data.cf_size; + dstfork->ff_data.cf_new_size = srcfork->ff_data.cf_new_size; + dstfork->ff_data.cf_vblocks = srcfork->ff_data.cf_vblocks; + dstfork->ff_data.cf_blocks = srcfork->ff_data.cf_blocks; + + /* just memcpy the whole array of extents to the new location. */ + memcpy (dstfork->ff_data.cf_extents, srcfork->ff_data.cf_extents, size); + + /* + * Copy the cnode attribute data. + * + */ + src_cp->c_blocks -= srcfork->ff_data.cf_vblocks; + src_cp->c_blocks -= srcfork->ff_data.cf_blocks; + + dst_cp->c_blocks += srcfork->ff_data.cf_vblocks; + dst_cp->c_blocks += srcfork->ff_data.cf_blocks; + + /* Now delete the entries in the source fork */ + srcfork->ff_data.cf_size = 0; + srcfork->ff_data.cf_new_size = 0; + srcfork->ff_data.cf_union.cfu_bytesread = 0; + srcfork->ff_data.cf_vblocks = 0; + srcfork->ff_data.cf_blocks = 0; + + /* Zero out the old extents */ + bzero (srcfork->ff_data.cf_extents, size); + return 0; +} + + +/* + * cnode must be locked + */ +int +hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) +{ + struct cnode *cp = VTOC(vp); + struct filefork *fp = NULL; + int retval = 0; + struct hfsmount *hfsmp = VTOHFS(vp); + struct rl_entry *invalid_range; + struct timeval tv; + int waitdata; /* attributes necessary for data retrieval */ + int wait; /* all other attributes (e.g. atime, etc.) */ + int lockflag; + int took_trunc_lock = 0; + int locked_buffers = 0; + + /* + * Applications which only care about data integrity rather than full + * file integrity may opt out of (delay) expensive metadata update + * operations as a performance optimization. + */ + wait = (waitfor == MNT_WAIT); + waitdata = (waitfor == MNT_DWAIT) | wait; + if (always_do_fullfsync) + fullsync = 1; /* HFS directories don't have any data blocks. */ if (vnode_isdir(vp)) @@ -1771,19 +2646,18 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) } } else if (UBCINFOEXISTS(vp)) { hfs_unlock(cp); - hfs_lock_truncate(cp, trunc_lock_exclusive); + hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); took_trunc_lock = 1; if (fp->ff_unallocblocks != 0) { - hfs_unlock_truncate(cp, trunc_lock_exclusive); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); - trunc_lock_exclusive = TRUE; - hfs_lock_truncate(cp, trunc_lock_exclusive); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); } /* Don't hold cnode lock when calling into cluster layer. */ (void) cluster_push(vp, waitdata ? IO_SYNC : 0); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } /* * When MNT_WAIT is requested and the zero fill timeout @@ -1794,7 +2668,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) */ if (fp && (((cp->c_flag & C_ALWAYS_ZEROFILL) && !TAILQ_EMPTY(&fp->ff_invalidranges)) || ((wait || (cp->c_flag & C_ZFWANTSYNC)) && - ((cp->c_flags & UF_NODUMP) == 0) && + ((cp->c_bsdflags & UF_NODUMP) == 0) && UBCINFOEXISTS(vp) && (vnode_issystem(vp) ==0) && cp->c_zftimeout != 0))) { @@ -1805,14 +2679,13 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) goto datasync; } if (!TAILQ_EMPTY(&fp->ff_invalidranges)) { - if (!took_trunc_lock || trunc_lock_exclusive == FALSE) { + if (!took_trunc_lock || (cp->c_truncatelockowner == HFS_SHARED_OWNER)) { hfs_unlock(cp); - if (took_trunc_lock) - hfs_unlock_truncate(cp, trunc_lock_exclusive); - - trunc_lock_exclusive = TRUE; - hfs_lock_truncate(cp, trunc_lock_exclusive); - hfs_lock(cp, HFS_FORCE_LOCK); + if (took_trunc_lock) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); + } + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); took_trunc_lock = 1; } while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { @@ -1830,19 +2703,19 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) (void) cluster_write(vp, (struct uio *) 0, fp->ff_size, end + 1, start, (off_t)0, IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); cp->c_flag |= C_MODIFIED; } hfs_unlock(cp); (void) cluster_push(vp, waitdata ? IO_SYNC : 0); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } cp->c_flag &= ~C_ZFWANTSYNC; cp->c_zftimeout = 0; } datasync: if (took_trunc_lock) { - hfs_unlock_truncate(cp, trunc_lock_exclusive); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); took_trunc_lock = 0; } /* @@ -1862,8 +2735,32 @@ datasync: /* * Flush all dirty buffers associated with a vnode. + * Record how many of them were dirty AND locked (if necessary). */ - buf_flushdirtyblks(vp, waitdata, lockflag, "hfs_fsync"); + locked_buffers = buf_flushdirtyblks_skipinfo(vp, waitdata, lockflag, "hfs_fsync"); + if ((lockflag & BUF_SKIP_LOCKED) && (locked_buffers) && (vnode_vtype(vp) == VLNK)) { + /* + * If there are dirty symlink buffers, then we may need to take action + * to prevent issues later on if we are journaled. If we're fsyncing a + * symlink vnode then we are in one of three cases: + * + * 1) automatic sync has fired. In this case, we don't want the behavior to change. + * + * 2) Someone has opened the FD for the symlink (not what it points to) + * and has issued an fsync against it. This should be rare, and we don't + * want the behavior to change. + * + * 3) We are being called by a vclean which is trying to reclaim this + * symlink vnode. If this is the case, then allowing this fsync to + * proceed WITHOUT flushing the journal could result in the vclean + * invalidating the buffer's blocks before the journal transaction is + * written to disk. To prevent this, we force a journal flush + * if the vnode is in the middle of a recycle (VL_TERMINATE or VL_DEAD is set). + */ + if (vnode_isrecycled(vp)) { + fullsync = 1; + } + } metasync: if (vnode_isreg(vp) && vnode_issystem(vp)) { @@ -1893,13 +2790,23 @@ metasync: * changes get to stable storage. */ if (fullsync) { - if (hfsmp->jnl) { - hfs_journal_flush(hfsmp); - } else { - retval = hfs_metasync_all(hfsmp); - /* XXX need to pass context! */ - VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); - } + if (hfsmp->jnl) { + hfs_journal_flush(hfsmp, FALSE); + + if (journal_uses_fua(hfsmp->jnl)) { + /* + * the journal_flush did NOT issue a sync track cache command, + * and the fullsync indicates we are supposed to flush all cached + * data to the media, so issue the sync track cache command + * explicitly + */ + VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); + } + } else { + retval = hfs_metasync_all(hfsmp); + /* XXX need to pass context! */ + VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); + } } } @@ -1908,7 +2815,7 @@ metasync: /* Sync an hfs catalog b-tree node */ -static int +int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p) { vnode_t vp; @@ -1954,7 +2861,7 @@ hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p) * we rely on fsck_hfs to fix that up (which it can do without any loss * of data). */ -static int +int hfs_metasync_all(struct hfsmount *hfsmp) { int lockflags; @@ -1996,7 +2903,6 @@ hfs_btsync_callback(struct buf *bp, __unused void *dummy) } -__private_extern__ int hfs_btsync(struct vnode *vp, int sync_transaction) { @@ -2024,7 +2930,7 @@ hfs_btsync(struct vnode *vp, int sync_transaction) /* * Remove a directory. */ -static int +int hfs_vnop_rmdir(ap) struct vnop_rmdir_args /* { struct vnode *a_dvp; @@ -2038,6 +2944,9 @@ hfs_vnop_rmdir(ap) struct cnode *dcp = VTOC(dvp); struct cnode *cp = VTOC(vp); int error; + time_t orig_ctime; + + orig_ctime = VTOC(vp)->c_ctime; if (!S_ISDIR(cp->c_mode)) { return (ENOTDIR); @@ -2045,6 +2954,10 @@ hfs_vnop_rmdir(ap) if (dvp == vp) { return (EINVAL); } + + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL); + cp = VTOC(vp); + if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) { return (error); } @@ -2054,7 +2967,33 @@ hfs_vnop_rmdir(ap) hfs_unlockpair (dcp, cp); return ENOENT; } - error = hfs_removedir(dvp, vp, ap->a_cnp, 0); + + // + // if the item is tracked but doesn't have a document_id, assign one and generate an fsevent for it + // + if ((cp->c_bsdflags & UF_TRACKED) && ((struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16))->document_id == 0) { + uint32_t newid; + + hfs_unlockpair(dcp, cp); + + if (hfs_generate_document_id(VTOHFS(vp), &newid) == 0) { + hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK); + ((struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16))->document_id = newid; +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(), + FSE_ARG_DEV, VTOHFS(vp)->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)0, // src inode # + FSE_ARG_INO, (ino64_t)cp->c_fileid, // dst inode # + FSE_ARG_INT32, newid, + FSE_ARG_DONE); +#endif + } else { + // XXXdbg - couldn't get a new docid... what to do? can't really fail the rm... + hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK); + } + } + + error = hfs_removedir(dvp, vp, ap->a_cnp, 0, 0); hfs_unlockpair(dcp, cp); @@ -2066,9 +3005,9 @@ hfs_vnop_rmdir(ap) * * Both dvp and vp cnodes are locked */ -static int +int hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, - int skip_reserve) + int skip_reserve, int only_unlink) { struct cnode *cp; struct cnode *dcp; @@ -2090,24 +3029,77 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (cp->c_entries != 0) { return (ENOTEMPTY); } + + /* + * If the directory is open or in use (e.g. opendir() or current working + * directory for some process); wait for inactive/reclaim to actually + * remove cnode from the catalog. Both inactive and reclaim codepaths are capable + * of removing open-unlinked directories from the catalog, as well as getting rid + * of EAs still on the element. So change only_unlink to true, so that it will get + * cleaned up below. + * + * Otherwise, we can get into a weird old mess where the directory has C_DELETED, + * but it really means C_NOEXISTS because the item was actually removed from the + * catalog. Then when we try to remove the entry from the catalog later on, it won't + * really be there anymore. + */ + if (vnode_isinuse(vp, 0)) { + only_unlink = 1; + } - /* Check if we're removing the last link to an empty directory. */ + /* Deal with directory hardlinks */ if (cp->c_flag & C_HARDLINK) { - /* We could also return EBUSY here */ + /* + * Note that if we have a directory which was a hardlink at any point, + * its actual directory data is stored in the directory inode in the hidden + * directory rather than the leaf element(s) present in the namespace. + * + * If there are still other hardlinks to this directory, + * then we'll just eliminate this particular link and the vnode will still exist. + * If this is the last link to an empty directory, then we'll open-unlink the + * directory and it will be only tagged with C_DELETED (as opposed to C_NOEXISTS). + * + * We could also return EBUSY here. + */ + return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve); } /* - * We want to make sure that if the directory has a lot of attributes, we process them - * in separate transactions to ensure we don't panic in the journal with a gigantic - * transaction. This means we'll let hfs_removefile deal with the directory, which generally - * follows the same codepath as open-unlinked files. Note that the last argument to - * hfs_removefile specifies that it is supposed to handle directories for this case. - */ - if ((hfsmp->hfs_attribute_vp != NULL) && - (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) { - - return hfs_removefile(dvp, vp, cnp, 0, 0, 1, NULL); + * In a few cases, we may want to allow the directory to persist in an + * open-unlinked state. If the directory is being open-unlinked (still has usecount + * references), or if it has EAs, or if it was being deleted as part of a rename, + * then we go ahead and move it to the hidden directory. + * + * If the directory is being open-unlinked, then we want to keep the catalog entry + * alive so that future EA calls and fchmod/fstat etc. do not cause issues later. + * + * If the directory had EAs, then we want to use the open-unlink trick so that the + * EA removal is not done in one giant transaction. Otherwise, it could cause a panic + * due to overflowing the journal. + * + * Finally, if it was deleted as part of a rename, we move it to the hidden directory + * in order to maintain rename atomicity. + * + * Note that the allow_dirs argument to hfs_removefile specifies that it is + * supposed to handle directories for this case. + */ + + if (((hfsmp->hfs_attribute_vp != NULL) && + ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0)) || + (only_unlink != 0)) { + + int ret = hfs_removefile(dvp, vp, cnp, 0, 0, 1, NULL, only_unlink); + /* + * Even though hfs_vnop_rename calls vnode_recycle for us on tvp we call + * it here just in case we were invoked by rmdir() on a directory that had + * EAs. To ensure that we start reclaiming the space as soon as possible, + * we call vnode_recycle on the directory. + */ + vnode_recycle(vp); + + return ret; + } dcp->c_flag |= C_DIR_MODIFICATION; @@ -2128,7 +3120,7 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, * the current directory and thus be * non-empty.) */ - if ((dcp->c_flags & APPEND) || (cp->c_flags & (IMMUTABLE | APPEND))) { + if ((dcp->c_bsdflags & APPEND) || (cp->c_bsdflags & (IMMUTABLE | APPEND))) { error = EPERM; goto out; } @@ -2149,7 +3141,7 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, desc.cd_encoding = cp->c_encoding; desc.cd_hint = 0; - if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid)) { + if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid, NULL, &error)) { error = 0; goto out; } @@ -2168,12 +3160,34 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, } error = cat_delete(hfsmp, &desc, &cp->c_attr); - if (error == 0) { + + if (!error) { + // + // if skip_reserve == 1 then we're being called from hfs_vnop_rename() and thus + // we don't need to touch the document_id as it's handled by the rename code. + // otherwise it's a normal remove and we need to save the document id in the + // per thread struct and clear it from the cnode. + // + struct doc_tombstone *ut; + ut = get_uthread_doc_tombstone(); + if (!skip_reserve && (cp->c_bsdflags & UF_TRACKED) && should_save_docid_tombstone(ut, vp, cnp)) { + + if (ut->t_lastop_document_id) { + clear_tombstone_docid(ut, hfsmp, NULL); + } + save_tombstone(hfsmp, dvp, vp, cnp, 1); + + } + /* The parent lost a child */ if (dcp->c_entries > 0) dcp->c_entries--; DEC_FOLDERCOUNT(hfsmp, dcp->c_attr); dcp->c_dirchangecnt++; + { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); + extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); + } dcp->c_touch_chgtime = TRUE; dcp->c_touch_modtime = TRUE; hfs_touchtimes(hfsmp, cp); @@ -2193,16 +3207,8 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID)); - /* - * directory open or in use (e.g. opendir() or current working - * directory for some process); wait for inactive to actually - * remove catalog entry - */ - if (vnode_isinuse(vp, 0)) { - cp->c_flag |= C_DELETED; - } else { - cp->c_flag |= C_NOEXISTS; - } + /* Mark C_NOEXISTS since the catalog entry is now gone */ + cp->c_flag |= C_NOEXISTS; out: dcp->c_flag &= ~C_DIR_MODIFICATION; wakeup((caddr_t)&dcp->c_flag); @@ -2218,7 +3224,7 @@ out: /* * Remove a file or link. */ -static int +int hfs_vnop_remove(ap) struct vnop_remove_args /* { struct vnode *a_dvp; @@ -2231,56 +3237,101 @@ hfs_vnop_remove(ap) struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; struct cnode *dcp = VTOC(dvp); - struct cnode *cp = VTOC(vp); + struct cnode *cp; struct vnode *rvp = NULL; - struct hfsmount *hfsmp = VTOHFS(vp); int error=0, recycle_rsrc=0; - int drop_rsrc_vnode = 0; - int vref; + int recycle_vnode = 0; + uint32_t rsrc_vid = 0; + time_t orig_ctime; if (dvp == vp) { return (EINVAL); } - /* - * We need to grab the cnode lock on 'cp' before the lockpair() - * to get an iocount on the rsrc fork BEFORE we enter hfs_removefile. - * To prevent other deadlocks, it's best to call hfs_vgetrsrc in a way that - * allows it to drop the cnode lock that it expects to be held coming in. - * If we don't, we could commit a lock order violation, causing a deadlock. - * In order to safely get the rsrc vnode with an iocount, we need to only hold the - * lock on the file temporarily. Unlike hfs_vnop_rename, we don't have to worry - * about one rsrc fork getting recycled for another, but we do want to ensure - * that there are no deadlocks due to lock ordering issues. - * - * Note: this function may be invoked for directory hardlinks, so just skip these - * steps if 'vp' is a directory. - */ + orig_ctime = VTOC(vp)->c_ctime; + if (!vnode_isnamedstream(vp) && ((ap->a_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) == 0)) { + error = check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL); + if (error) { + // XXXdbg - decide on a policy for handling namespace handler failures! + // for now we just let them proceed. + } + } + error = 0; + cp = VTOC(vp); - if ((vp->v_type == VLNK) || (vp->v_type == VREG)) { +relock: - if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK))) { - return (error); - } + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); - error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); - hfs_unlock(cp); - if (error) { - return (error); + if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); + if (rvp) { + vnode_put (rvp); + } + return (error); + } + // + // if the item is tracked but doesn't have a document_id, assign one and generate an fsevent for it + // + if ((cp->c_bsdflags & UF_TRACKED) && ((struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16))->document_id == 0) { + uint32_t newid; + + hfs_unlockpair(dcp, cp); + + if (hfs_generate_document_id(VTOHFS(vp), &newid) == 0) { + hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK); + ((struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16))->document_id = newid; +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(), + FSE_ARG_DEV, VTOHFS(vp)->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)0, // src inode # + FSE_ARG_INO, (ino64_t)cp->c_fileid, // dst inode # + FSE_ARG_INT32, newid, + FSE_ARG_DONE); +#endif + } else { + // XXXdbg - couldn't get a new docid... what to do? can't really fail the rm... + hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK); } - drop_rsrc_vnode = 1; } - /* Now that we may have an iocount on rvp, do the lock pair */ - hfs_lock_truncate(cp, TRUE); + + /* + * Lazily respond to determining if there is a valid resource fork + * vnode attached to 'cp' if it is a regular file or symlink. + * If the vnode does not exist, then we may proceed without having to + * create it. + * + * If, however, it does exist, then we need to acquire an iocount on the + * vnode after acquiring its vid. This ensures that if we have to do I/O + * against it, it can't get recycled from underneath us in the middle + * of this call. + * + * Note: this function may be invoked for directory hardlinks, so just skip these + * steps if 'vp' is a directory. + */ - if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) { - hfs_unlock_truncate(cp, TRUE); - /* drop the iocount on rvp if necessary */ - if (drop_rsrc_vnode) { - vnode_put (rvp); + if ((vp->v_type == VLNK) || (vp->v_type == VREG)) { + if ((cp->c_rsrc_vp) && (rvp == NULL)) { + /* We need to acquire the rsrc vnode */ + rvp = cp->c_rsrc_vp; + rsrc_vid = vnode_vid (rvp); + + /* Unlock everything to acquire iocount on the rsrc vnode */ + hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); + hfs_unlockpair (dcp, cp); + /* Use the vid to maintain identity on rvp */ + if (vnode_getwithvid(rvp, rsrc_vid)) { + /* + * If this fails, then it was recycled or + * reclaimed in the interim. Reset fields and + * start over. + */ + rvp = NULL; + rsrc_vid = 0; + } + goto relock; } - return (error); } /* @@ -2292,23 +3343,41 @@ hfs_vnop_remove(ap) goto rm_done; } - error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, rvp); - - // - // If the remove succeeded and it's an open-unlinked file that has - // a resource fork vnode that's not in use, we will want to recycle - // the rvp *after* we're done unlocking everything. Otherwise the - // resource vnode will keep a v_parent reference on this vnode which - // prevents it from going through inactive/reclaim which means that - // the disk space associated with this file won't get free'd until - // something forces the resource vnode to get recycled (and that can - // take a very long time). - // - if (error == 0 && (cp->c_flag & C_DELETED) && - (rvp) && !vnode_isinuse(rvp, 0)) { - recycle_rsrc = 1; + error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, NULL, 0); + + /* + * If the remove succeeded in deleting the file, then we may need to mark + * the resource fork for recycle so that it is reclaimed as quickly + * as possible. If it were not recycled quickly, then this resource fork + * vnode could keep a v_parent reference on the data fork, which prevents it + * from going through reclaim (by giving it extra usecounts), except in the force- + * unmount case. + * + * However, a caveat: we need to continue to supply resource fork + * access to open-unlinked files even if the resource fork is not open. This is + * a requirement for the compressed files work. Luckily, hfs_vgetrsrc will handle + * this already if the data fork has been re-parented to the hidden directory. + * + * As a result, all we really need to do here is mark the resource fork vnode + * for recycle. If it goes out of core, it can be brought in again if needed. + * If the cnode was instead marked C_NOEXISTS, then there wouldn't be any + * more work. + */ + if (error == 0) { + if (rvp) { + recycle_rsrc = 1; + } + /* + * If the target was actually removed from the catalog schedule it for + * full reclamation/inactivation. We hold an iocount on it so it should just + * get marked with MARKTERM + */ + if (cp->c_flag & C_NOEXISTS) { + recycle_vnode = 1; + } } + /* * Drop the truncate lock before unlocking the cnode * (which can potentially perform a vnode_put and @@ -2316,19 +3385,18 @@ hfs_vnop_remove(ap) * truncate lock) */ rm_done: - hfs_unlock_truncate(cp, TRUE); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); hfs_unlockpair(dcp, cp); if (recycle_rsrc) { - vref = vnode_ref(rvp); - if (vref == 0) { - /* vnode_ref could return an error, only release if we got a ref */ - vnode_rele(rvp); - } + /* inactive or reclaim on rvp will clean up the blocks from the rsrc fork */ vnode_recycle(rvp); } - - if (drop_rsrc_vnode) { + if (recycle_vnode) { + vnode_recycle (vp); + } + + if (rvp) { /* drop iocount on rsrc fork, was obtained at beginning of fxn */ vnode_put(rvp); } @@ -2337,7 +3405,7 @@ rm_done: } -static int +int hfs_removefile_callback(struct buf *bp, void *hfsmp) { if ( !(buf_flags(bp) & B_META)) @@ -2357,32 +3425,37 @@ hfs_removefile_callback(struct buf *bp, void *hfsmp) { * This function may be used to remove directories if they have * lots of EA's -- note the 'allow_dirs' argument. * - * The 'rvp' argument is used to pass in a resource fork vnode with - * an iocount to prevent it from getting recycled during usage. If it - * is NULL, then it is assumed the caller is a VNOP that cannot operate - * on resource forks, like hfs_vnop_symlink or hfs_removedir. Otherwise in - * a VNOP that takes multiple vnodes, we could violate lock order and - * cause a deadlock. + * This function is able to delete blocks & fork data for the resource + * fork even if it does not exist in core (and have a backing vnode). + * It should infer the correct behavior based on the number of blocks + * in the cnode and whether or not the resource fork pointer exists or + * not. As a result, one only need pass in the 'vp' corresponding to the + * data fork of this file (or main vnode in the case of a directory). + * Passing in a resource fork will result in an error. + * + * Because we do not create any vnodes in this function, we are not at + * risk of deadlocking against ourselves by double-locking. * * Requires cnode and truncate locks to be held. */ -static int +int hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, - int flags, int skip_reserve, int allow_dirs, struct vnode *rvp) + int flags, int skip_reserve, int allow_dirs, + __unused struct vnode *rvp, int only_unlink) { struct cnode *cp; struct cnode *dcp; + struct vnode *rsrc_vp = NULL; struct hfsmount *hfsmp; struct cat_desc desc; struct timeval tv; - vfs_context_t ctx = cnp->cn_context; int dataforkbusy = 0; int rsrcforkbusy = 0; - int truncated = 0; int lockflags; int error = 0; int started_tr = 0; int isbigfile = 0, defer_remove=0, isdir=0; + int update_vh = 0; cp = VTOC(vp); dcp = VTOC(dvp); @@ -2393,7 +3466,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, return (0); } - if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid)) { + if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid, NULL, &error)) { return 0; } @@ -2401,11 +3474,37 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (VNODE_IS_RSRC(vp)) { return (EPERM); } + else { + /* + * We know it's a data fork. + * Probe the cnode to see if we have a valid resource fork + * in hand or not. + */ + rsrc_vp = cp->c_rsrc_vp; + } + /* Don't allow deleting the journal or journal_info_block. */ - if (hfsmp->jnl && - (cp->c_fileid == hfsmp->hfs_jnlfileid || cp->c_fileid == hfsmp->hfs_jnlinfoblkid)) { + if (hfs_is_journal_file(hfsmp, cp)) { return (EPERM); } + + /* + * If removing a symlink, then we need to ensure that the + * data blocks for the symlink are not still in-flight or pending. + * If so, we will unlink the symlink here, making its blocks + * available for re-allocation by a subsequent transaction. That is OK, but + * then the I/O for the data blocks could then go out before the journal + * transaction that created it was flushed, leading to I/O ordering issues. + */ + if (vp->v_type == VLNK) { + /* + * This will block if the asynchronous journal flush is in progress. + * If this symlink is not being renamed over and doesn't have any open FDs, + * then we'll remove it from the journal's bufs below in kill_block. + */ + buf_wait_for_shadow_io (vp, 0); + } + /* * Hard links require special handling. */ @@ -2424,6 +3523,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve); } } + /* Directories should call hfs_rmdir! (unless they have a lot of attributes) */ if (vnode_isdir(vp)) { if (allow_dirs == 0) @@ -2444,23 +3544,30 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, /* Remove our entry from the namei cache. */ cache_purge(vp); - + /* - * We expect the caller, if operating on files, - * will have passed in a resource fork vnode with - * an iocount, even if there was no content. - * We only do the hfs_truncate on the rsrc fork - * if we know that it DID have content, however. - * This has the bonus of not requiring us to defer - * its removal, unless it is in use. + * If the caller was operating on a file (as opposed to a + * directory with EAs), then we need to figure out + * whether or not it has a valid resource fork vnode. + * + * If there was a valid resource fork vnode, then we need + * to use hfs_truncate to eliminate its data. If there is + * no vnode, then we hold the cnode lock which would + * prevent it from being created. As a result, + * we can use the data deletion functions which do not + * require that a cnode/vnode pair exist. */ /* Check if this file is being used. */ if (isdir == 0) { dataforkbusy = vnode_isinuse(vp, 0); - /* Only need to defer resource fork removal if in use and has content */ - if (rvp && (cp->c_blocks - VTOF(vp)->ff_blocks)) { - rsrcforkbusy = vnode_isinuse(rvp, 0); + /* + * At this point, we know that 'vp' points to the + * a data fork because we checked it up front. And if + * there is no rsrc fork, rsrc_vp will be NULL. + */ + if (rsrc_vp && (cp->c_blocks - VTOF(vp)->ff_blocks)) { + rsrcforkbusy = vnode_isinuse(rsrc_vp, 0); } } @@ -2475,6 +3582,11 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) { defer_remove = 1; } + + /* If we are explicitly told to only unlink item and move to hidden dir, then do it */ + if (only_unlink) { + defer_remove = 1; + } /* * Carbon semantics prohibit deleting busy files. @@ -2492,9 +3604,16 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (hfsmp->hfs_flags & HFS_QUOTAS) (void)hfs_getinoquota(cp); #endif /* QUOTA */ - - /* Check if we need a ubc_setsize. */ - if (isdir == 0 && (!dataforkbusy || !rsrcforkbusy)) { + + /* + * Do a ubc_setsize to indicate we need to wipe contents if: + * 1) item is a regular file. + * 2) Neither fork is busy AND we are not told to unlink this. + * + * We need to check for the defer_remove since it can be set without + * having a busy data or rsrc fork + */ + if (isdir == 0 && (!dataforkbusy || !rsrcforkbusy) && (defer_remove == 0)) { /* * A ubc_setsize can cause a pagein so defer it * until after the cnode lock is dropped. The @@ -2504,7 +3623,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (!dataforkbusy && cp->c_datafork->ff_blocks && !isbigfile) { cp->c_flag |= C_NEED_DATA_SETSIZE; } - if (!rsrcforkbusy && rvp) { + if (!rsrcforkbusy && rsrc_vp) { cp->c_flag |= C_NEED_RSRC_SETSIZE; } } @@ -2515,40 +3634,50 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, started_tr = 1; // XXXdbg - if we're journaled, kill any dirty symlink buffers - if (hfsmp->jnl && vnode_islnk(vp)) + if (hfsmp->jnl && vnode_islnk(vp) && (defer_remove == 0)) { buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp); + } /* - * Truncate any non-busy forks. Busy forks will + * Prepare to truncate any non-busy forks. Busy forks will * get truncated when their vnode goes inactive. * Note that we will only enter this region if we * can avoid creating an open-unlinked file. If * either region is busy, we will have to create an open * unlinked file. - * Since we're already inside a transaction, - * tell hfs_truncate to skip the ubc_setsize. + * + * Since we are deleting the file, we need to stagger the runtime + * modifications to do things in such a way that a crash won't + * result in us getting overlapped extents or any other + * bad inconsistencies. As such, we call prepare_release_storage + * which updates the UBC, updates quota information, and releases + * any loaned blocks that belong to this file. No actual + * truncation or bitmap manipulation is done until *AFTER* + * the catalog record is removed. */ - if (isdir == 0 && (!dataforkbusy && !rsrcforkbusy)) { - /* - * Note that 5th argument to hfs_truncate indicates whether or not - * hfs_update calls should be suppressed in call to do_hfs_truncate - */ + if (isdir == 0 && (!dataforkbusy && !rsrcforkbusy) && (only_unlink == 0)) { + if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) { - /* skip update in hfs_truncate */ - error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 1, ctx); - if (error) + + error = hfs_prepare_release_storage (hfsmp, vp); + if (error) { goto out; - truncated = 1; + } + update_vh = 1; } - if (!rsrcforkbusy && rvp) { - /* skip update in hfs_truncate */ - error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 1, ctx); - if (error) + + /* + * If the resource fork vnode does not exist, we can skip this step. + */ + if (!rsrcforkbusy && rsrc_vp) { + error = hfs_prepare_release_storage (hfsmp, rsrc_vp); + if (error) { goto out; - truncated = 1; + } + update_vh = 1; } } - + /* * Protect against a race with rename by using the component * name passed in and parent id from dvp (instead of using @@ -2634,6 +3763,10 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, DEC_FOLDERCOUNT(hfsmp, dcp->c_attr); } dcp->c_dirchangecnt++; + { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); + extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); + } dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); @@ -2648,71 +3781,188 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (error) goto out; - } else /* Not busy */ { + } + else { + /* + * Nobody is using this item; we can safely remove everything. + */ + struct filefork *temp_rsrc_fork = NULL; +#if QUOTA + off_t savedbytes; + int blksize = hfsmp->blockSize; +#endif + u_int32_t fileid = cp->c_fileid; + + /* + * Figure out if we need to read the resource fork data into + * core before wiping out the catalog record. + * + * 1) Must not be a directory + * 2) cnode's c_rsrcfork ptr must be NULL. + * 3) rsrc fork must have actual blocks + */ + if ((isdir == 0) && (cp->c_rsrcfork == NULL) && + (cp->c_blocks - VTOF(vp)->ff_blocks)) { + /* + * The resource fork vnode & filefork did not exist. + * Create a temporary one for use in this function only. + */ + MALLOC_ZONE (temp_rsrc_fork, struct filefork *, sizeof (struct filefork), M_HFSFORK, M_WAITOK); + bzero(temp_rsrc_fork, sizeof(struct filefork)); + temp_rsrc_fork->ff_cp = cp; + rl_init(&temp_rsrc_fork->ff_invalidranges); + } + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - if (cp->c_blocks > 0) { - printf("hfs_remove: attempting to delete a non-empty file %s\n", - cp->c_desc.cd_nameptr); - error = EBUSY; - goto out; + /* Look up the resource fork first, if necessary */ + if (temp_rsrc_fork) { + error = cat_lookup (hfsmp, &desc, 1, 0, (struct cat_desc*) NULL, + (struct cat_attr*) NULL, &temp_rsrc_fork->ff_data, NULL); + if (error) { + FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); + hfs_systemfile_unlock (hfsmp, lockflags); + goto out; + } } - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); if (!skip_reserve) { if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) { + if (temp_rsrc_fork) { + FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); + } hfs_systemfile_unlock(hfsmp, lockflags); goto out; } } - + error = cat_delete(hfsmp, &desc, &cp->c_attr); - - if (error && error != ENXIO && error != ENOENT && truncated) { - if ((cp->c_datafork && cp->c_datafork->ff_size != 0) || - (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) { - printf("hfs: remove: couldn't delete a truncated file (%s)" - "(error %d, data sz %lld; rsrc sz %lld)", - cp->c_desc.cd_nameptr, error, cp->c_datafork->ff_size, - cp->c_rsrcfork->ff_size); - hfs_mark_volume_inconsistent(hfsmp); - } else { - printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n", - cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error); - } + + if (error && error != ENXIO && error != ENOENT) { + printf("hfs_removefile: deleting file %s (id=%d) vol=%s err=%d\n", + cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, hfsmp->vcbVN, error); } - + if (error == 0) { /* Update the parent directory */ if (dcp->c_entries > 0) dcp->c_entries--; dcp->c_dirchangecnt++; + { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); + extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); + } dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); } hfs_systemfile_unlock(hfsmp, lockflags); - if (error) - goto out; + if (error) { + if (temp_rsrc_fork) { + FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); + } + goto out; + } + + /* + * Now that we've wiped out the catalog record, the file effectively doesn't + * exist anymore. So update the quota records to reflect the loss of the + * data fork and the resource fork. + */ #if QUOTA - if (hfsmp->hfs_flags & HFS_QUOTAS) + if (cp->c_datafork->ff_blocks > 0) { + savedbytes = ((off_t)cp->c_datafork->ff_blocks * (off_t)blksize); + (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0); + } + + /* + * We may have just deleted the catalog record for a resource fork even + * though it did not exist in core as a vnode. However, just because there + * was a resource fork pointer in the cnode does not mean that it had any blocks. + */ + if (temp_rsrc_fork || cp->c_rsrcfork) { + if (cp->c_rsrcfork) { + if (cp->c_rsrcfork->ff_blocks > 0) { + savedbytes = ((off_t)cp->c_rsrcfork->ff_blocks * (off_t)blksize); + (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0); + } + } + else { + /* we must have used a temporary fork */ + savedbytes = ((off_t)temp_rsrc_fork->ff_blocks * (off_t)blksize); + (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0); + } + } + + if (hfsmp->hfs_flags & HFS_QUOTAS) { (void)hfs_chkiq(cp, -1, NOCRED, 0); -#endif /* QUOTA */ + } +#endif + + /* + * If we didn't get any errors deleting the catalog entry, then go ahead + * and release the backing store now. The filefork pointers are still valid. + */ + if (temp_rsrc_fork) { + error = hfs_release_storage (hfsmp, cp->c_datafork, temp_rsrc_fork, fileid); + } + else { + /* if cp->c_rsrcfork == NULL, hfs_release_storage will skip over it. */ + error = hfs_release_storage (hfsmp, cp->c_datafork, cp->c_rsrcfork, fileid); + } + if (error) { + /* + * If we encountered an error updating the extents and bitmap, + * mark the volume inconsistent. At this point, the catalog record has + * already been deleted, so we can't recover it at this point. We need + * to proceed and update the volume header and mark the cnode C_NOEXISTS. + * The subsequent fsck should be able to recover the free space for us. + */ + hfs_mark_volume_inconsistent(hfsmp); + } + else { + /* reset update_vh to 0, since hfs_release_storage should have done it for us */ + update_vh = 0; + } + + /* Get rid of the temporary rsrc fork */ + if (temp_rsrc_fork) { + FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); + } cp->c_flag |= C_NOEXISTS; cp->c_flag &= ~C_DELETED; - truncated = 0; // because the catalog entry is gone - + cp->c_touch_chgtime = TRUE; /* XXX needed ? */ --cp->c_linkcount; - + /* * We must never get a directory if we're in this else block. We could * accidentally drop the number of files in the volume header if we did. */ hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID)); + + } + + // + // if skip_reserve == 1 then we're being called from hfs_vnop_rename() and thus + // we don't need to touch the document_id as it's handled by the rename code. + // otherwise it's a normal remove and we need to save the document id in the + // per thread struct and clear it from the cnode. + // + struct doc_tombstone *ut; + ut = get_uthread_doc_tombstone(); + if (!error && !skip_reserve && (cp->c_bsdflags & UF_TRACKED) && should_save_docid_tombstone(ut, vp, cnp)) { + + if (ut->t_lastop_document_id) { + clear_tombstone_docid(ut, hfsmp, NULL); + } + save_tombstone(hfsmp, dvp, vp, cnp, 1); + } + /* * All done with this cnode's descriptor... * @@ -2727,14 +3977,14 @@ out: if (error) { cp->c_flag &= ~C_DELETED; } - - /* Commit the truncation to the catalog record */ - if (truncated) { - cp->c_flag |= C_FORCEUPDATE; - cp->c_touch_chgtime = TRUE; - cp->c_touch_modtime = TRUE; - (void) hfs_update(vp, 0); - } + + if (update_vh) { + /* + * If we bailed out earlier, we may need to update the volume header + * to deal with the borrowed blocks accounting. + */ + hfs_volupdate (hfsmp, VOL_UPDATE, 0); + } if (started_tr) { hfs_end_transaction(hfsmp); @@ -2796,7 +4046,7 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp) * been locked. By taking the rsrc fork vnodes up front we ensure that they * cannot be recycled, and that the situation mentioned above cannot happen. */ -static int +int hfs_vnop_rename(ap) struct vnop_rename_args /* { struct vnode *a_fdvp; @@ -2812,8 +4062,13 @@ hfs_vnop_rename(ap) struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; - struct vnode *fvp_rsrc = NULLVP; - struct vnode *tvp_rsrc = NULLVP; + /* + * Note that we only need locals for the target/destination's + * resource fork vnode (and only if necessary). We don't care if the + * source has a resource fork vnode or not. + */ + struct vnode *tvp_rsrc = NULLVP; + uint32_t tvp_rsrc_vid = 0; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = vfs_context_proc(ap->a_context); @@ -2832,64 +4087,106 @@ hfs_vnop_rename(ap) int took_trunc_lock = 0; int lockflags; int error; - int recycle_rsrc = 0; - + time_t orig_from_ctime, orig_to_ctime; + int emit_rename = 1; + int emit_delete = 1; + int is_tracked = 0; + int unlocked; + + orig_from_ctime = VTOC(fvp)->c_ctime; + if (tvp && VTOC(tvp)) { + orig_to_ctime = VTOC(tvp)->c_ctime; + } else { + orig_to_ctime = ~0; + } + hfsmp = VTOHFS(tdvp); /* - * Before grabbing the four locks, we may need to get an iocount on the resource fork - * vnodes in question, just like hfs_vnop_remove. If fvp and tvp are not - * directories, then go ahead and grab the resource fork vnodes now - * one at a time. We don't actively need the fvp_rsrc to do the rename operation, - * but we need the iocount to prevent the vnode from getting recycled/reclaimed - * during the middle of the VNOP. + * Do special case checks here. If fvp == tvp then we need to check the + * cnode with locks held. */ - - - if ((vnode_isreg(fvp)) || (vnode_islnk(fvp))) { - - if ((error = hfs_lock (VTOC(fvp), HFS_EXCLUSIVE_LOCK))) { - return (error); - } - - error = hfs_vgetrsrc(VTOHFS(fvp), fvp, &fvp_rsrc, TRUE); - hfs_unlock (VTOC(fvp)); - if (error) { + if (fvp == tvp) { + int is_hardlink = 0; + /* + * In this case, we do *NOT* ever emit a DELETE event. + * We may not necessarily emit a RENAME event + */ + emit_delete = 0; + if ((error = hfs_lock(VTOC(fvp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) { return error; } - } + /* Check to see if the item is a hardlink or not */ + is_hardlink = (VTOC(fvp)->c_flag & C_HARDLINK); + hfs_unlock (VTOC(fvp)); - if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) { /* - * Lock failure is OK on tvp, since we may race with a remove on the dst. - * But this shouldn't stop rename from proceeding, so only try to - * grab the resource fork if the lock succeeded. + * If the item is not a hardlink, then case sensitivity must be off, otherwise + * two names should not resolve to the same cnode unless they were case variants. */ - if (hfs_lock (VTOC(tvp), HFS_EXCLUSIVE_LOCK) == 0) { - error = hfs_vgetrsrc(VTOHFS(tvp), tvp, &tvp_rsrc, TRUE); - hfs_unlock (VTOC(tvp)); - if (error) { - if (fvp_rsrc) { - vnode_put (fvp_rsrc); + if (is_hardlink) { + emit_rename = 0; + /* + * Hardlinks are a little trickier. We only want to emit a rename event + * if the item is a hardlink, the parent directories are the same, case sensitivity + * is off, and the case folded names are the same. See the fvp == tvp case below for more + * info. + */ + + if ((fdvp == tdvp) && ((hfsmp->hfs_flags & HFS_CASE_SENSITIVE) == 0)) { + if (hfs_namecmp((const u_int8_t *)fcnp->cn_nameptr, fcnp->cn_namelen, + (const u_int8_t *)tcnp->cn_nameptr, tcnp->cn_namelen) == 0) { + /* Then in this case only it is ok to emit a rename */ + emit_rename = 1; } - return error; } } } + if (emit_rename) { + /* c_bsdflags should only be assessed while holding the cnode lock. + * This is not done consistently throughout the code and can result + * in race. This will be fixed via rdar://12181064 + */ + if (VTOC(fvp)->c_bsdflags & UF_TRACKED) { + is_tracked = 1; + } + check_for_tracked_file(fvp, orig_from_ctime, NAMESPACE_HANDLER_RENAME_OP, NULL); + } + if (tvp && VTOC(tvp)) { + if (emit_delete) { + check_for_tracked_file(tvp, orig_to_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL); + } + } + +retry: /* When tvp exists, take the truncate lock for hfs_removefile(). */ if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) { - hfs_lock_truncate(VTOC(tvp), TRUE); + hfs_lock_truncate(VTOC(tvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); took_trunc_lock = 1; } - retry: +relock: error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL, HFS_EXCLUSIVE_LOCK, &error_cnode); if (error) { if (took_trunc_lock) { - hfs_unlock_truncate(VTOC(tvp), TRUE); + hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT); took_trunc_lock = 0; } + + /* + * We hit an error path. If we were trying to re-acquire the locks + * after coming through here once, we might have already obtained + * an iocount on tvp's resource fork vnode. Drop that before dealing + * with the failure. Note this is safe -- since we are in an + * error handling path, we can't be holding the cnode locks. + */ + if (tvp_rsrc) { + vnode_put (tvp_rsrc); + tvp_rsrc_vid = 0; + tvp_rsrc = NULL; + } + /* * tvp might no longer exist. If the cause of the lock failure * was tvp, then we can try again with tvp/tcp set to NULL. @@ -2901,13 +4198,11 @@ hfs_vnop_rename(ap) tvp = NULL; goto retry; } - /* otherwise, drop iocounts on the rsrc forks and bail out */ - if (fvp_rsrc) { - vnode_put (fvp_rsrc); - } - if (tvp_rsrc) { - vnode_put (tvp_rsrc); + + if (emit_rename && is_tracked) { + resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_FAILED_OP | NAMESPACE_HANDLER_TRACK_EVENT); } + return (error); } @@ -2915,7 +4210,108 @@ hfs_vnop_rename(ap) fcp = VTOC(fvp); tdcp = VTOC(tdvp); tcp = tvp ? VTOC(tvp) : NULL; - hfsmp = VTOHFS(tdvp); + + // + // if the item is tracked but doesn't have a document_id, assign one and generate an fsevent for it + // + unlocked = 0; + if ((fcp->c_bsdflags & UF_TRACKED) && ((struct FndrExtendedDirInfo *)((char *)&fcp->c_attr.ca_finderinfo + 16))->document_id == 0) { + uint32_t newid; + + hfs_unlockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL); + unlocked = 1; + + if (hfs_generate_document_id(hfsmp, &newid) == 0) { + hfs_lock(fcp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + ((struct FndrExtendedDirInfo *)((char *)&fcp->c_attr.ca_finderinfo + 16))->document_id = newid; +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(), + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)0, // src inode # + FSE_ARG_INO, (ino64_t)fcp->c_fileid, // dst inode # + FSE_ARG_INT32, newid, + FSE_ARG_DONE); +#endif + hfs_unlock(fcp); + } else { + // XXXdbg - couldn't get a new docid... what to do? can't really fail the rename... + } + + // + // check if we're going to need to fix tcp as well. if we aren't, go back relock + // everything. otherwise continue on and fix up tcp as well before relocking. + // + if (tcp == NULL || !(tcp->c_bsdflags & UF_TRACKED) || ((struct FndrExtendedDirInfo *)((char *)&tcp->c_attr.ca_finderinfo + 16))->document_id != 0) { + goto relock; + } + } + + // + // same thing for tcp if it's set + // + if (tcp && (tcp->c_bsdflags & UF_TRACKED) && ((struct FndrExtendedDirInfo *)((char *)&tcp->c_attr.ca_finderinfo + 16))->document_id == 0) { + uint32_t newid; + + if (!unlocked) { + hfs_unlockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL); + unlocked = 1; + } + + if (hfs_generate_document_id(hfsmp, &newid) == 0) { + hfs_lock(tcp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + ((struct FndrExtendedDirInfo *)((char *)&tcp->c_attr.ca_finderinfo + 16))->document_id = newid; +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(), + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)0, // src inode # + FSE_ARG_INO, (ino64_t)tcp->c_fileid, // dst inode # + FSE_ARG_INT32, newid, + FSE_ARG_DONE); +#endif + hfs_unlock(tcp); + } else { + // XXXdbg - couldn't get a new docid... what to do? can't really fail the rename... + } + + // go back up and relock everything. next time through the if statement won't be true + // and we'll skip over this block of code. + goto relock; + } + + + + /* + * Acquire iocounts on the destination's resource fork vnode + * if necessary. If dst/src are files and the dst has a resource + * fork vnode, then we need to try and acquire an iocount on the rsrc vnode. + * If it does not exist, then we don't care and can skip it. + */ + if ((vnode_isreg(fvp)) || (vnode_islnk(fvp))) { + if ((tvp) && (tcp->c_rsrc_vp) && (tvp_rsrc == NULL)) { + tvp_rsrc = tcp->c_rsrc_vp; + /* + * We can look at the vid here because we're holding the + * cnode lock on the underlying cnode for this rsrc vnode. + */ + tvp_rsrc_vid = vnode_vid (tvp_rsrc); + + /* Unlock everything to acquire iocount on this rsrc vnode */ + if (took_trunc_lock) { + hfs_unlock_truncate (VTOC(tvp), HFS_LOCK_DEFAULT); + took_trunc_lock = 0; + } + hfs_unlockfour(fdcp, fcp, tdcp, tcp); + + if (vnode_getwithvid (tvp_rsrc, tvp_rsrc_vid)) { + /* iocount acquisition failed. Reset fields and start over.. */ + tvp_rsrc_vid = 0; + tvp_rsrc = NULL; + } + goto retry; + } + } + + /* Ensure we didn't race src or dst parent directories with rmdir. */ if (fdcp->c_flag & (C_NOEXISTS | C_DELETED)) { @@ -2933,21 +4329,22 @@ hfs_vnop_rename(ap) * the parent/child relationship with fdcp and tdcp, as well as the * component name of the target cnodes. */ - if ((fcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, fdvp, fcnp, fcp->c_fileid)) { + if ((fcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, fdvp, fcnp, fcp->c_fileid, NULL, &error)) { error = ENOENT; goto out; } - if (tcp && ((tcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, tdvp, tcnp, tcp->c_fileid))) { + if (tcp && ((tcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, tdvp, tcnp, tcp->c_fileid, NULL, &error))) { // // hmm, the destination vnode isn't valid any more. // in this case we can just drop him and pretend he // never existed in the first place. // if (took_trunc_lock) { - hfs_unlock_truncate(VTOC(tvp), TRUE); - took_trunc_lock = 0; + hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT); + took_trunc_lock = 0; } + error = 0; hfs_unlockfour(fdcp, fcp, tdcp, tcp); @@ -3042,7 +4439,7 @@ hfs_vnop_rename(ap) /* * Make sure "from" vnode and its parent are changeable. */ - if ((fcp->c_flags & (IMMUTABLE | APPEND)) || (fdcp->c_flags & APPEND)) { + if ((fcp->c_bsdflags & (IMMUTABLE | APPEND)) || (fdcp->c_bsdflags & APPEND)) { error = EPERM; goto out; } @@ -3064,6 +4461,13 @@ hfs_vnop_rename(ap) goto out; } + /* Don't allow modification of the journal or journal_info_block */ + if (hfs_is_journal_file(hfsmp, fcp) || + (tcp && hfs_is_journal_file(hfsmp, tcp))) { + error = EPERM; + goto out; + } + #if QUOTA if (tvp) (void)hfs_getinoquota(tcp); @@ -3126,7 +4530,7 @@ hfs_vnop_rename(ap) lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - if (cat_lookup(hfsmp, &tmpdesc, 0, NULL, NULL, NULL, &real_cnid) != 0) { + if (cat_lookup(hfsmp, &tmpdesc, 0, 0, NULL, NULL, NULL, &real_cnid) != 0) { hfs_systemfile_unlock(hfsmp, lockflags); goto out; } @@ -3145,51 +4549,229 @@ hfs_vnop_rename(ap) got_cookie = 1; /* - * If the destination exists then it may need to be removed. + * If the destination exists then it may need to be removed. + * + * Due to HFS's locking system, we should always move the + * existing 'tvp' element to the hidden directory in hfs_vnop_rename. + * Because the VNOP_LOOKUP call enters and exits the filesystem independently + * of the actual vnop that it was trying to do (stat, link, readlink), + * we must release the cnode lock of that element during the interim to + * do MAC checking, vnode authorization, and other calls. In that time, + * the item can be deleted (or renamed over). However, only in the rename + * case is it inappropriate to return ENOENT from any of those calls. Either + * the call should return information about the old element (stale), or get + * information about the newer element that we are about to write in its place. + * + * HFS lookup has been modified to detect a rename and re-drive its + * lookup internally. For other calls that have already succeeded in + * their lookup call and are waiting to acquire the cnode lock in order + * to proceed, that cnode lock will not fail due to the cnode being marked + * C_NOEXISTS, because it won't have been marked as such. It will only + * have C_DELETED. Thus, they will simply act on the stale open-unlinked + * element. All future callers will get the new element. + * + * To implement this behavior, we pass the "only_unlink" argument to + * hfs_removefile and hfs_removedir. This will result in the vnode acting + * as though it is open-unlinked. Additionally, when we are done moving the + * element to the hidden directory, we vnode_recycle the target so that it is + * reclaimed as soon as possible. Reclaim and inactive are both + * capable of clearing out unused blocks for an open-unlinked file or dir. */ if (tvp) { + // + // if the destination has a document id, we need to preserve it + // + if (fvp != tvp) { + uint32_t document_id; + struct FndrExtendedDirInfo *ffip = (struct FndrExtendedDirInfo *)((char *)&fcp->c_attr.ca_finderinfo + 16); + struct FndrExtendedDirInfo *tfip = (struct FndrExtendedDirInfo *)((char *)&tcp->c_attr.ca_finderinfo + 16); + + if (ffip->document_id && tfip->document_id) { + // both documents are tracked. only save a tombstone from tcp and do nothing else. + save_tombstone(hfsmp, tdvp, tvp, tcnp, 0); + } else { + struct doc_tombstone *ut; + ut = get_uthread_doc_tombstone(); + + document_id = tfip->document_id; + tfip->document_id = 0; + + if (document_id != 0) { + // clear UF_TRACKED as well since tcp is now no longer tracked + tcp->c_bsdflags &= ~UF_TRACKED; + (void) cat_update(hfsmp, &tcp->c_desc, &tcp->c_attr, NULL, NULL); + } + + if (ffip->document_id == 0 && document_id != 0) { + // printf("RENAME: preserving doc-id %d onto %s (from ino %d, to ino %d)\n", document_id, tcp->c_desc.cd_nameptr, tcp->c_desc.cd_cnid, fcp->c_desc.cd_cnid); + fcp->c_bsdflags |= UF_TRACKED; + ffip->document_id = document_id; + + (void) cat_update(hfsmp, &fcp->c_desc, &fcp->c_attr, NULL, NULL); +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(), + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)tcp->c_fileid, // src inode # + FSE_ARG_INO, (ino64_t)fcp->c_fileid, // dst inode # + FSE_ARG_INT32, (uint32_t)ffip->document_id, + FSE_ARG_DONE); +#endif + } else if ((fcp->c_bsdflags & UF_TRACKED) && should_save_docid_tombstone(ut, fvp, fcnp)) { + + if (ut->t_lastop_document_id) { + clear_tombstone_docid(ut, hfsmp, NULL); + } + save_tombstone(hfsmp, fdvp, fvp, fcnp, 0); + + //printf("RENAME: (dest-exists): saving tombstone doc-id %lld @ %s (ino %d)\n", + // ut->t_lastop_document_id, ut->t_lastop_filename, fcp->c_desc.cd_cnid); + } + } + } + /* * When fvp matches tvp they could be case variants * or matching hard links. */ if (fvp == tvp) { if (!(fcp->c_flag & C_HARDLINK)) { + /* + * If they're not hardlinks, then fvp == tvp must mean we + * are using case-insensitive HFS because case-sensitive would + * not use the same vnode for both. In this case we just update + * the catalog for: a -> A + */ goto skip_rm; /* simple case variant */ - } else if ((fdvp != tdvp) || + } + /* For all cases below, we must be using hardlinks */ + else if ((fdvp != tdvp) || (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) { + /* + * If the parent directories are not the same, AND the two items + * are hardlinks, posix says to do nothing: + * dir1/fred <-> dir2/bob and the op was mv dir1/fred -> dir2/bob + * We just return 0 in this case. + * + * If case sensitivity is on, and we are using hardlinks + * then renaming is supposed to do nothing. + * dir1/fred <-> dir2/FRED, and op == mv dir1/fred -> dir2/FRED + */ goto out; /* matching hardlinks, nothing to do */ } else if (hfs_namecmp((const u_int8_t *)fcnp->cn_nameptr, fcnp->cn_namelen, (const u_int8_t *)tcnp->cn_nameptr, tcnp->cn_namelen) == 0) { + /* + * If we get here, then the following must be true: + * a) We are running case-insensitive HFS+. + * b) Both paths 'fvp' and 'tvp' are in the same parent directory. + * c) the two names are case-variants of each other. + * + * In this case, we are really only dealing with a single catalog record + * whose name is being updated. + * + * op is dir1/fred -> dir1/FRED + * + * We need to special case the name matching, because if + * dir1/fred <-> dir1/bob were the two links, and the + * op was dir1/fred -> dir1/bob + * That would fail/do nothing. + */ goto skip_rm; /* case-variant hardlink in the same dir */ } else { goto out; /* matching hardlink, nothing to do */ } } - if (vnode_isdir(tvp)) - error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE); - else { - error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, tvp_rsrc); - - /* - * If the destination file had a rsrc fork vnode, it may have been cleaned up - * in hfs_removefile if it was not busy (had no usecounts). This is possible - * because we grabbed the iocount on the rsrc fork safely at the beginning - * of the function before we did the lockfour. However, we may still need - * to take action to prevent block leaks, so aggressively recycle the vnode - * if possible. The vnode cannot be recycled because we hold an iocount on it. + + if (vnode_isdir(tvp)) { + /* + * hfs_removedir will eventually call hfs_removefile on the directory + * we're working on, because only hfs_removefile does the renaming of the + * item to the hidden directory. The directory will stay around in the + * hidden directory with C_DELETED until it gets an inactive or a reclaim. + * That way, we can destroy all of the EAs as needed and allow new ones to be + * written. */ - - if ((error == 0) && (tcp->c_flag & C_DELETED) && tvp_rsrc && !vnode_isinuse(tvp_rsrc, 0)) { - recycle_rsrc = 1; - } + error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE, 1); + } + else { + error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, NULL, 1); + + /* + * If the destination file had a resource fork vnode, then we need to get rid of + * its blocks when there are no more references to it. Because the call to + * hfs_removefile above always open-unlinks things, we need to force an inactive/reclaim + * on the resource fork vnode, in order to prevent block leaks. Otherwise, + * the resource fork vnode could prevent the data fork vnode from going out of scope + * because it holds a v_parent reference on it. So we mark it for termination + * with a call to vnode_recycle. hfs_vnop_reclaim has been modified so that it + * can clean up the blocks of open-unlinked files and resource forks. + * + * We can safely call vnode_recycle on the resource fork because we took an iocount + * reference on it at the beginning of the function. + */ + + if ((error == 0) && (tcp->c_flag & C_DELETED) && (tvp_rsrc)) { + vnode_recycle(tvp_rsrc); + } } - if (error) + if (error) { goto out; + } + tvp_deleted = 1; + + /* Mark 'tcp' as being deleted due to a rename */ + tcp->c_flag |= C_RENAMED; + + /* + * Aggressively mark tvp/tcp for termination to ensure that we recover all blocks + * as quickly as possible. + */ + vnode_recycle(tvp); + } else { + struct doc_tombstone *ut; + ut = get_uthread_doc_tombstone(); + + // + // There is nothing at the destination. If the file being renamed is + // tracked, save a "tombstone" of the document_id. If the file is + // not a tracked file, then see if it needs to inherit a tombstone. + // + // NOTE: we do not save a tombstone if the file being renamed begins + // with "atmp" which is done to work-around AutoCad's bizarre + // 5-step un-safe save behavior + // + if (fcp->c_bsdflags & UF_TRACKED) { + if (should_save_docid_tombstone(ut, fvp, fcnp)) { + save_tombstone(hfsmp, fdvp, fvp, fcnp, 0); + + //printf("RENAME: (no dest): saving tombstone doc-id %lld @ %s (ino %d)\n", + // ut->t_lastop_document_id, ut->t_lastop_filename, fcp->c_desc.cd_cnid); + } else { + // intentionally do nothing + } + } else if ( ut->t_lastop_document_id != 0 + && tdvp == ut->t_lastop_parent + && vnode_vid(tdvp) == ut->t_lastop_parent_vid + && strcmp((char *)ut->t_lastop_filename, (char *)tcnp->cn_nameptr) == 0) { + + //printf("RENAME: %s (ino %d) inheriting doc-id %lld\n", tcnp->cn_nameptr, fcp->c_desc.cd_cnid, ut->t_lastop_document_id); + struct FndrExtendedFileInfo *fip = (struct FndrExtendedFileInfo *)((char *)&fcp->c_attr.ca_finderinfo + 16); + fcp->c_bsdflags |= UF_TRACKED; + fip->document_id = ut->t_lastop_document_id; + cat_update(hfsmp, &fcp->c_desc, &fcp->c_attr, NULL, NULL); + + clear_tombstone_docid(ut, hfsmp, fcp); // will send the docid-changed fsevent + + } else if (ut->t_lastop_document_id && should_save_docid_tombstone(ut, fvp, fcnp) && should_save_docid_tombstone(ut, tvp, tcnp)) { + // no match, clear the tombstone + //printf("RENAME: clearing the tombstone %lld @ %s\n", ut->t_lastop_document_id, ut->t_lastop_filename); + clear_tombstone_docid(ut, hfsmp, NULL); + } + } skip_rm: /* @@ -3228,6 +4810,11 @@ skip_rm: fcp->c_parentcnid = tdcp->c_fileid; fcp->c_hint = 0; + /* Now indicate this cnode needs to have date-added written to the finderinfo */ + fcp->c_flag |= C_NEEDS_DATEADDED; + (void) hfs_update (fvp, 0); + + hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_RMDIR : VOL_RMFILE, (fdcp->c_cnid == kHFSRootFolderID)); hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_MKDIR : VOL_MKFILE, @@ -3256,6 +4843,10 @@ skip_rm: } tdcp->c_entries++; tdcp->c_dirchangecnt++; + { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)tdcp->c_finderinfo + 16); + extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); + } if (fdcp->c_entries > 0) fdcp->c_entries--; fdcp->c_dirchangecnt++; @@ -3265,12 +4856,52 @@ skip_rm: fdcp->c_flag |= C_FORCEUPDATE; // XXXdbg - force it out! (void) hfs_update(fdvp, 0); } + { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)fdcp->c_finderinfo + 16); + extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); + } + tdcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ tdcp->c_touch_chgtime = TRUE; tdcp->c_touch_modtime = TRUE; tdcp->c_flag |= C_FORCEUPDATE; // XXXdbg - force it out! (void) hfs_update(tdvp, 0); + + /* Update the vnode's name now that the rename has completed. */ + vnode_update_identity(fvp, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, + tcnp->cn_hash, (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); + + /* + * At this point, we may have a resource fork vnode attached to the + * 'from' vnode. If it exists, we will want to update its name, because + * it contains the old name + _PATH_RSRCFORKSPEC. ("/..namedfork/rsrc"). + * + * Note that the only thing we need to update here is the name attached to + * the vnode, since a resource fork vnode does not have a separate resource + * cnode -- it's still 'fcp'. + */ + if (fcp->c_rsrc_vp) { + char* rsrc_path = NULL; + int len; + + /* Create a new temporary buffer that's going to hold the new name */ + MALLOC_ZONE (rsrc_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + len = snprintf (rsrc_path, MAXPATHLEN, "%s%s", tcnp->cn_nameptr, _PATH_RSRCFORKSPEC); + len = MIN(len, MAXPATHLEN); + + /* + * vnode_update_identity will do the following for us: + * 1) release reference on the existing rsrc vnode's name. + * 2) copy/insert new name into the name cache + * 3) attach the new name to the resource vnode + * 4) update the vnode's vid + */ + vnode_update_identity (fcp->c_rsrc_vp, fvp, rsrc_path, len, 0, (VNODE_UPDATE_NAME | VNODE_UPDATE_CACHE)); + + /* Free the memory associated with the resource fork's name */ + FREE_ZONE (rsrc_path, MAXPATHLEN, M_NAMEI); + } out: if (got_cookie) { cat_postflight(hfsmp, &cookie, p); @@ -3286,40 +4917,30 @@ out: wakeup((caddr_t)&tdcp->c_flag); } - if (took_trunc_lock) - hfs_unlock_truncate(VTOC(tvp), TRUE); + if (took_trunc_lock) { + hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT); + } hfs_unlockfour(fdcp, fcp, tdcp, tcp); - /* - * Now that we've dropped all of the locks, we need to force an inactive and a recycle - * on the old destination's rsrc fork to prevent a leak of its blocks. Note that - * doing the ref/rele is to twiddle the VL_NEEDINACTIVE bit of the vnode's flags, so that - * on the last vnode_put for this vnode, we will force inactive to get triggered. - * We hold an iocount from the beginning of this function so we know it couldn't have been - * recycled already. - */ - if (recycle_rsrc) { - int vref; - vref = vnode_ref(tvp_rsrc); - if (vref == 0) { - vnode_rele(tvp_rsrc); - } - vnode_recycle(tvp_rsrc); - } - /* Now vnode_put the resource forks vnodes if necessary */ if (tvp_rsrc) { vnode_put(tvp_rsrc); + tvp_rsrc = NULL; } - if (fvp_rsrc) { - vnode_put(fvp_rsrc); + + /* After tvp is removed the only acceptable error is EIO */ + if (error && tvp_deleted) + error = EIO; + + if (emit_rename && is_tracked) { + if (error) { + resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_FAILED_OP | NAMESPACE_HANDLER_TRACK_EVENT); + } else { + resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_SUCCESS_OP | NAMESPACE_HANDLER_TRACK_EVENT); + } } - /* After tvp is removed the only acceptable error is EIO */ - if (error && tvp_deleted) - error = EIO; - return (error); } @@ -3327,7 +4948,7 @@ out: /* * Make a directory. */ -static int +int hfs_vnop_mkdir(struct vnop_mkdir_args *ap) { /***** HACK ALERT ********/ @@ -3339,7 +4960,7 @@ hfs_vnop_mkdir(struct vnop_mkdir_args *ap) /* * Create a symbolic link. */ -static int +int hfs_vnop_symlink(struct vnop_symlink_args *ap) { struct vnode **vpp = ap->a_vpp; @@ -3376,7 +4997,7 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) goto out; } vp = *vpp; - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { goto out; } cp = VTOC(vp); @@ -3415,17 +5036,17 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) /* hfs_removefile() requires holding the truncate lock */ hfs_unlock(cp); - hfs_lock_truncate(cp, TRUE); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); if (hfs_start_transaction(hfsmp) != 0) { started_tr = 0; - hfs_unlock_truncate(cp, TRUE); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); goto out; } - (void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0, NULL); - hfs_unlock_truncate(cp, TRUE); + (void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0, NULL, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); goto out; } @@ -3511,7 +5132,7 @@ typedef union { * * In fact, the offset used by HFS is essentially an index (26 bits) * with a tag (6 bits). The tag is for associating the next request - * with the current request. This enables us to have multiple threads + * with the current request. This enables us to have multiple threads * reading the directory while the directory is also being modified. * * Each tag/index pair is tied to a unique directory hint. The hint @@ -3521,7 +5142,7 @@ typedef union { * If the directory is marked as deleted-but-in-use (cp->c_flag & C_DELETED), * do NOT synthesize entries for "." and "..". */ -static int +int hfs_vnop_readdir(ap) struct vnop_readdir_args /* { vnode_t a_vp; @@ -3560,12 +5181,24 @@ hfs_vnop_readdir(ap) /* Sanity check the uio data. */ if (uio_iovcnt(uio) > 1) return (EINVAL); - /* Note that the dirhint calls require an exclusive lock. */ - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) - return (error); + + if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) { + int compressed = hfs_file_is_compressed(VTOC(vp), 0); /* 0 == take the cnode lock */ + if (VTOCMP(vp) != NULL && !compressed) { + error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP); + if (error) { + return error; + } + } + } + cp = VTOC(vp); hfsmp = VTOHFS(vp); + /* Note that the dirhint calls require an exclusive lock. */ + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) + return (error); + /* Pick up cnid hint (if any). */ if (nfs_cookies) { cnid_hint = (cnid_t)(uio_offset(uio) >> 32); @@ -3700,7 +5333,7 @@ hfs_vnop_readdir(ap) if (index == 0) { dirhint->dh_threadhint = cp->c_dirthreadhint; - } + } else { /* * If we have a non-zero index, there is a possibility that during the last @@ -3722,7 +5355,7 @@ hfs_vnop_readdir(ap) } /* Pack the buffer with dirent entries. */ - error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items, &eofflag); + error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, ap->a_flags, &items, &eofflag); if (index == 0 && error == 0) { cp->c_dirthreadhint = dirhint->dh_threadhint; @@ -3757,7 +5390,7 @@ seekoffcalc: } out: - if (hfsmp->jnl && user_start) { + if (user_start) { vsunlock(user_start, user_len, TRUE); } /* If we didn't do anything then go ahead and dump the hint. */ @@ -3781,7 +5414,7 @@ out: /* * Read contents of a symbolic link. */ -static int +int hfs_vnop_readlink(ap) struct vnop_readlink_args /* { struct vnode *a_vp; @@ -3797,14 +5430,13 @@ hfs_vnop_readlink(ap) if (!vnode_islnk(vp)) return (EINVAL); - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); cp = VTOC(vp); fp = VTOF(vp); /* Zero length sym links are not allowed */ if (fp->ff_size == 0 || fp->ff_size > MAXPATHLEN) { - printf("hfs: zero length symlink on fileid %d\n", cp->c_fileid); error = EINVAL; goto exit; } @@ -3866,7 +5498,7 @@ exit: /* * Get configurable pathname variables. */ -static int +int hfs_vnop_pathconf(ap) struct vnop_pathconf_args /* { struct vnode *a_vp; @@ -3875,18 +5507,28 @@ hfs_vnop_pathconf(ap) vfs_context_t a_context; } */ *ap; { + + int std_hfs = (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD); switch (ap->a_name) { case _PC_LINK_MAX: - if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) - *ap->a_retval = 1; - else + if (std_hfs == 0){ *ap->a_retval = HFS_LINK_MAX; + } +#if CONFIG_HFS_STD + else { + *ap->a_retval = 1; + } +#endif break; case _PC_NAME_MAX: - if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) - *ap->a_retval = kHFSMaxFileNameChars; /* 255 */ - else - *ap->a_retval = kHFSPlusMaxFileNameChars; /* 31 */ + if (std_hfs == 0) { + *ap->a_retval = kHFSPlusMaxFileNameChars; /* 255 */ + } +#if CONFIG_HFS_STD + else { + *ap->a_retval = kHFSMaxFileNameChars; /* 31 */ + } +#endif break; case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; /* 1024 */ @@ -3901,7 +5543,14 @@ hfs_vnop_pathconf(ap) *ap->a_retval = 200112; /* _POSIX_NO_TRUNC */ break; case _PC_NAME_CHARS_MAX: - *ap->a_retval = kHFSPlusMaxFileNameChars; + if (std_hfs == 0) { + *ap->a_retval = kHFSPlusMaxFileNameChars; /* 255 */ + } +#if CONFIG_HFS_STD + else { + *ap->a_retval = kHFSMaxFileNameChars; /* 31 */ + } +#endif break; case _PC_CASE_SENSITIVE: if (VTOHFS(ap->a_vp)->hfs_flags & HFS_CASE_SENSITIVE) @@ -3913,10 +5562,19 @@ hfs_vnop_pathconf(ap) *ap->a_retval = 1; break; case _PC_FILESIZEBITS: - if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) + /* number of bits to store max file size */ + if (std_hfs == 0) { + *ap->a_retval = 64; + } +#if CONFIG_HFS_STD + else { *ap->a_retval = 32; - else - *ap->a_retval = 64; /* number of bits to store max file size */ + } +#endif + break; + case _PC_XATTR_SIZE_BITS: + /* Number of bits to store maximum extended attribute size */ + *ap->a_retval = HFS_XATTR_SIZE_BITS; break; default: return (EINVAL); @@ -3934,7 +5592,6 @@ hfs_vnop_pathconf(ap) * * The cnode must be locked exclusive */ -__private_extern__ int hfs_update(struct vnode *vp, __unused int waitfor) { @@ -3947,6 +5604,7 @@ hfs_update(struct vnode *vp, __unused int waitfor) struct hfsmount *hfsmp; int lockflags; int error; + uint32_t tstate = 0; p = current_proc(); hfsmp = VTOHFS(vp); @@ -3962,7 +5620,21 @@ hfs_update(struct vnode *vp, __unused int waitfor) cp->c_touch_modtime = 0; return (0); } - + if (kdebug_enable) { + if (cp->c_touch_acctime) + tstate |= DBG_HFS_UPDATE_ACCTIME; + if (cp->c_touch_modtime) + tstate |= DBG_HFS_UPDATE_MODTIME; + if (cp->c_touch_chgtime) + tstate |= DBG_HFS_UPDATE_CHGTIME; + + if (cp->c_flag & C_MODIFIED) + tstate |= DBG_HFS_UPDATE_MODIFIED; + if (cp->c_flag & C_FORCEUPDATE) + tstate |= DBG_HFS_UPDATE_FORCE; + if (cp->c_flag & C_NEEDS_DATEADDED) + tstate |= DBG_HFS_UPDATE_DATEADDED; + } hfs_touchtimes(hfsmp, cp); /* Nothing to update. */ @@ -3995,34 +5667,60 @@ hfs_update(struct vnode *vp, __unused int waitfor) return (0); } + KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_START, vp, tstate, 0, 0, 0); + if ((error = hfs_start_transaction(hfsmp)) != 0) { - return error; - } - /* - * For files with invalid ranges (holes) the on-disk - * field representing the size of the file (cf_size) - * must be no larger than the start of the first hole. - */ - if (dataforkp && !TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges)) { - bcopy(dataforkp, &datafork, sizeof(datafork)); - datafork.cf_size = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start; - dataforkp = &datafork; - } else if (dataforkp && (cp->c_datafork->ff_unallocblocks != 0)) { - // always make sure the block count and the size - // of the file match the number of blocks actually - // allocated to the file on disk - bcopy(dataforkp, &datafork, sizeof(datafork)); - // make sure that we don't assign a negative block count - if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) { - panic("hfs: ff_blocks %d is less than unalloc blocks %d\n", - cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks); - } - datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks); - datafork.cf_size = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; - dataforkp = &datafork; + KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_END, vp, tstate, error, -1, 0); + return error; } + /* + * Modify the values passed to cat_update based on whether or not + * the file has invalid ranges or borrowed blocks. + */ + if (dataforkp) { + off_t numbytes = 0; + + /* copy the datafork into a temporary copy so we don't pollute the cnode's */ + bcopy(dataforkp, &datafork, sizeof(datafork)); + dataforkp = &datafork; + + /* + * If there are borrowed blocks, ensure that they are subtracted + * from the total block count before writing the cnode entry to disk. + * Only extents that have actually been marked allocated in the bitmap + * should be reflected in the total block count for this fork. + */ + if (cp->c_datafork->ff_unallocblocks != 0) { + // make sure that we don't assign a negative block count + if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) { + panic("hfs: ff_blocks %d is less than unalloc blocks %d\n", + cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks); + } + + /* Also cap the LEOF to the total number of bytes that are allocated. */ + datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks); + datafork.cf_size = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; + } + + /* + * For files with invalid ranges (holes) the on-disk + * field representing the size of the file (cf_size) + * must be no larger than the start of the first hole. + * However, note that if the first invalid range exists + * solely within borrowed blocks, then our LEOF and block + * count should both be zero. As a result, set it to the + * min of the current cf_size and the start of the first + * invalid range, because it may have already been reduced + * to zero by the borrowed blocks check above. + */ + if (!TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges)) { + numbytes = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start; + datafork.cf_size = MIN((numbytes), (datafork.cf_size)); + } + } + /* * For resource forks with delayed allocations, make sure * the block count and file size match the number of blocks @@ -4034,6 +5732,18 @@ hfs_update(struct vnode *vp, __unused int waitfor) rsrcfork.cf_size = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; rsrcforkp = &rsrcfork; } + if (kdebug_enable) { + long dbg_parms[NUMPARMS]; + int dbg_namelen; + + dbg_namelen = NUMPARMS * sizeof(long); + vn_getpath(vp, (char *)dbg_parms, &dbg_namelen); + + if (dbg_namelen < (int)sizeof(dbg_parms)) + memset((char *)dbg_parms + dbg_namelen, 0, sizeof(dbg_parms) - dbg_namelen); + + kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE); + } /* * Lock the Catalog b-tree file. @@ -4050,6 +5760,8 @@ hfs_update(struct vnode *vp, __unused int waitfor) hfs_end_transaction(hfsmp); + KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_END, vp, tstate, error, 0, 0); + return (error); } @@ -4057,7 +5769,7 @@ hfs_update(struct vnode *vp, __unused int waitfor) * Allocate a new node * Note - Function does not create and return a vnode for whiteout creation. */ -static int +int hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx) { @@ -4072,15 +5784,37 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int error, started_tr = 0; enum vtype vnodetype; int mode; + int newvnode_flags = 0; + u_int32_t gnv_flags = 0; + int protectable_target = 0; + int nocache = 0; + +#if CONFIG_PROTECT + struct cprotect *entry = NULL; + int32_t cp_class = -1; + if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) { + cp_class = (int32_t)vap->va_dataprotect_class; + } + int protected_mount = 0; +#endif - if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK))) + + if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); /* set the cnode pointer only after successfully acquiring lock */ dcp = VTOC(dvp); + + /* Don't allow creation of new entries in open-unlinked directories */ + if ((error = hfs_checkdeleted(dcp))) { + hfs_unlock(dcp); + return error; + } + dcp->c_flag |= C_DIR_MODIFICATION; - + hfsmp = VTOHFS(dvp); + *vpp = NULL; tvp = NULL; out_desc.cd_flags = 0; @@ -4091,6 +5825,11 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, vnodetype = VREG; mode = MAKEIMODE(vnodetype, vap->va_mode); + if (S_ISDIR (mode) || S_ISREG (mode)) { + protectable_target = 1; + } + + /* Check if were out of usable disk space. */ if ((hfs_freeblks(hfsmp, 1) == 0) && (vfs_context_suser(ctx) != 0)) { error = ENOSPC; @@ -4112,9 +5851,11 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, } else { attr.ca_itime = tv.tv_sec; } +#if CONFIG_HFS_STD if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) { attr.ca_itime += 3600; /* Same as what hfs_update does */ } +#endif attr.ca_atime = attr.ca_ctime = attr.ca_mtime = attr.ca_itime; attr.ca_atimeondisk = attr.ca_atime; if (VATTR_IS_ACTIVE(vap, va_flags)) { @@ -4135,6 +5876,36 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, } } +#if CONFIG_PROTECT + if (cp_fs_protected(hfsmp->hfs_mp)) { + protected_mount = 1; + } + /* + * On a content-protected HFS+/HFSX filesystem, files and directories + * cannot be created without atomically setting/creating the EA that + * contains the protection class metadata and keys at the same time, in + * the same transaction. As a result, pre-set the "EAs exist" flag + * on the cat_attr for protectable catalog record creations. This will + * cause the cnode creation routine in hfs_getnewvnode to mark the cnode + * as having EAs. + */ + if ((protected_mount) && (protectable_target)) { + attr.ca_recflags |= kHFSHasAttributesMask; + /* delay entering in the namecache */ + nocache = 1; + } +#endif + + + /* + * Add the date added to the item. See above, as + * all of the dates are set to the itime. + */ + hfs_write_dateadded (&attr, attr.ca_atime); + + /* Initialize the gen counter to 1 */ + hfs_write_gencount(&attr, (uint32_t)1); + attr.ca_uid = vap->va_uid; attr.ca_gid = vap->va_gid; VATTR_SET_SUPPORTED(vap, va_mode); @@ -4173,6 +5944,24 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, in_desc.cd_hint = dcp->c_childhint; in_desc.cd_encoding = 0; +#if CONFIG_PROTECT + /* + * To preserve file creation atomicity with regards to the content protection EA, + * we must create the file in the catalog and then write out its EA in the same + * transaction. + * + * We only denote the target class in this EA; key generation is not completed + * until the file has been inserted into the catalog and will be done + * in a separate transaction. + */ + if ((protected_mount) && (protectable_target)) { + error = cp_setup_newentry(hfsmp, dcp, cp_class, attr.ca_mode, &entry); + if (error) { + goto exit; + } + } +#endif + if ((error = hfs_start_transaction(hfsmp)) != 0) { goto exit; } @@ -4182,24 +5971,73 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, // to check that any fileID it wants to use does not have orphaned // attributes in it. lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + cnid_t new_id; /* Reserve some space in the Catalog file. */ if ((error = cat_preflight(hfsmp, CAT_CREATE, NULL, 0))) { hfs_systemfile_unlock(hfsmp, lockflags); goto exit; } - error = cat_create(hfsmp, &in_desc, &attr, &out_desc); + + if ((error = cat_acquire_cnid(hfsmp, &new_id))) { + hfs_systemfile_unlock (hfsmp, lockflags); + goto exit; + } + + error = cat_create(hfsmp, new_id, &in_desc, &attr, &out_desc); if (error == 0) { /* Update the parent directory */ dcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ dcp->c_entries++; + { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); + extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); + } if (vnodetype == VDIR) { INC_FOLDERCOUNT(hfsmp, dcp->c_attr); } dcp->c_dirchangecnt++; + { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)dcp->c_finderinfo + 16); + extinfo->write_gen_counter = OSSwapHostToBigInt32(OSSwapBigToHostInt32(extinfo->write_gen_counter) + 1); + } dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + +#if CONFIG_PROTECT + /* + * If we are creating a content protected file, now is when + * we create the EA. We must create it in the same transaction + * that creates the file. We can also guarantee that the file + * MUST exist because we are still holding the catalog lock + * at this point. + */ + if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target)) { + error = cp_setxattr (NULL, entry, hfsmp, attr.ca_fileid, XATTR_CREATE); + + if (error) { + int delete_err; + /* + * If we fail the EA creation, then we need to delete the file. + * Luckily, we are still holding all of the right locks. + */ + delete_err = cat_delete (hfsmp, &out_desc, &attr); + if (delete_err == 0) { + /* Update the parent directory */ + if (dcp->c_entries > 0) + dcp->c_entries--; + dcp->c_dirchangecnt++; + dcp->c_ctime = tv.tv_sec; + dcp->c_mtime = tv.tv_sec; + (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + } + + /* Emit EINVAL if we fail to create EA*/ + error = EINVAL; + } + } +#endif } hfs_systemfile_unlock(hfsmp, lockflags); if (error) @@ -4231,9 +6069,26 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, started_tr = 0; } +#if CONFIG_PROTECT + /* + * At this point, we must have encountered success with writing the EA. + * Destroy our temporary cprotect (which had no keys). + */ + + if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target)) { + cp_entry_destroy (entry); + entry = NULL; + } +#endif + /* Do not create vnode for whiteouts */ if (S_ISWHT(mode)) { goto exit; + } + + gnv_flags |= GNV_CREATE; + if (nocache) { + gnv_flags |= GNV_NOCACHE; } /* @@ -4249,18 +6104,155 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, * try to create a new vnode, and then end up reclaiming another shadow vnode to * create the new one. However, if everything is working properly, this should * be a non-issue as we would never enter that reclaim codepath. - * + * * The cnode is locked on successful return. */ - error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, GNV_CREATE, &attr, NULL, &tvp); + error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, gnv_flags, &attr, + NULL, &tvp, &newvnode_flags); if (error) goto exit; cp = VTOC(tvp); + + struct doc_tombstone *ut; + ut = get_uthread_doc_tombstone(); + if ( ut->t_lastop_document_id != 0 + && ut->t_lastop_parent == dvp + && ut->t_lastop_parent_vid == vnode_vid(dvp) + && strcmp((char *)ut->t_lastop_filename, (char *)cp->c_desc.cd_nameptr) == 0) { + struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16); + + //printf("CREATE: preserving doc-id %lld on %s\n", ut->t_lastop_document_id, ut->t_lastop_filename); + fip->document_id = (uint32_t)(ut->t_lastop_document_id & 0xffffffff); + + cp->c_bsdflags |= UF_TRACKED; + // mark the cnode dirty + cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; + + if ((error = hfs_start_transaction(hfsmp)) == 0) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + + (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); + + hfs_systemfile_unlock (hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); + } + + clear_tombstone_docid(ut, hfsmp, cp); // will send the docid-changed fsevent + } else if (ut->t_lastop_document_id != 0) { + int len = cnp->cn_namelen; + if (len == 0) { + len = strlen(cnp->cn_nameptr); + } + + if (is_ignorable_temp_name(cnp->cn_nameptr, cnp->cn_namelen)) { + // printf("CREATE: not clearing tombstone because %s is a temp name.\n", cnp->cn_nameptr); + } else { + // Clear the tombstone because the thread is not recreating the same path + // printf("CREATE: clearing tombstone because %s is NOT a temp name.\n", cnp->cn_nameptr); + clear_tombstone_docid(ut, hfsmp, NULL); + } + } + *vpp = tvp; + +#if CONFIG_PROTECT + /* + * Now that we have a vnode-in-hand, generate keys for this namespace item. + * If we fail to create the keys, then attempt to delete the item from the + * namespace. If we can't delete the item, that's not desirable but also not fatal.. + * All of the places which deal with restoring/unwrapping keys must also be + * prepared to encounter an entry that does not have keys. + */ + if ((protectable_target) && (protected_mount)) { + struct cprotect *keyed_entry = NULL; + + if (cp->c_cpentry == NULL) { + panic ("hfs_makenode: no cpentry for cnode (%p)", cp); + } + + error = cp_generate_keys (hfsmp, cp, cp->c_cpentry->cp_pclass, &keyed_entry); + if (error == 0) { + /* + * Upon success, the keys were generated and written out. + * Update the cp pointer in the cnode. + */ + cp_replace_entry (cp, keyed_entry); + if (nocache) { + cache_enter (dvp, tvp, cnp); + } + } + else { + /* If key creation OR the setxattr failed, emit EPERM to userland */ + error = EPERM; + + /* + * Beware! This slightly violates the lock ordering for the + * cnode/vnode 'tvp'. Ordinarily, you must acquire the truncate lock + * which guards file size changes before acquiring the normal cnode lock + * and calling hfs_removefile on an item. + * + * However, in this case, we are still holding the directory lock so + * 'tvp' is not lookup-able and it was a newly created vnode so it + * cannot have any content yet. The only reason we are initiating + * the removefile is because we could not generate content protection keys + * for this namespace item. Note also that we pass a '1' in the allow_dirs + * argument for hfs_removefile because we may be creating a directory here. + * + * All this to say that while it is technically a violation it is + * impossible to race with another thread for this cnode so it is safe. + */ + int err = hfs_removefile (dvp, tvp, cnp, 0, 0, 1, NULL, 0); + if (err) { + printf("hfs_makenode: removefile failed (%d) for CP entry %p\n", err, tvp); + } + + /* Release the cnode lock and mark the vnode for termination */ + hfs_unlock (cp); + err = vnode_recycle (tvp); + if (err) { + printf("hfs_makenode: vnode_recycle failed (%d) for CP entry %p\n", err, tvp); + } + + /* Drop the iocount on the new vnode to force reclamation/recycling */ + vnode_put (tvp); + cp = NULL; + *vpp = NULL; + } + } +#endif + +#if QUOTA + /* + * Once we create this vnode, we need to initialize its quota data + * structures, if necessary. We know that it is OK to just go ahead and + * initialize because we've already validated earlier (through the hfs_quotacheck + * function) to see if creating this cnode/vnode would cause us to go over quota. + */ + if (hfsmp->hfs_flags & HFS_QUOTAS) { + if (cp) { + /* cp could have been zeroed earlier */ + (void) hfs_getinoquota(cp); + } + } +#endif + exit: cat_releasedesc(&out_desc); +#if CONFIG_PROTECT + /* + * We may have jumped here in error-handling various situations above. + * If we haven't already dumped the temporary CP used to initialize + * the file atomically, then free it now. cp_entry_destroy should null + * out the pointer if it was called already. + */ + if (entry) { + cp_entry_destroy (entry); + entry = NULL; + } +#endif + /* * Make sure we release cnode lock on dcp. */ @@ -4283,24 +6275,52 @@ exit: /* - * Return a referenced vnode for the resource fork - * - * cnode for vnode vp must already be locked. - * - * can_drop_lock is true if its safe to temporarily drop/re-acquire the cnode lock + * hfs_vgetrsrc acquires a resource fork vnode corresponding to the cnode that is + * found in 'vp'. The rsrc fork vnode is returned with the cnode locked and iocount + * on the rsrc vnode. + * + * *rvpp is an output argument for returning the pointer to the resource fork vnode. + * In most cases, the resource fork vnode will not be set if we return an error. + * However, if error_on_unlinked is set, we may have already acquired the resource fork vnode + * before we discover the error (the file has gone open-unlinked). In this case only, + * we may return a vnode in the output argument despite an error. + * + * If can_drop_lock is set, then it is safe for this function to temporarily drop + * and then re-acquire the cnode lock. We may need to do this, for example, in order to + * acquire an iocount or promote our lock. + * + * error_on_unlinked is an argument which indicates that we are to return an error if we + * discover that the cnode has gone into an open-unlinked state ( C_DELETED or C_NOEXISTS) + * is set in the cnode flags. This is only necessary if can_drop_lock is true, otherwise + * there's really no reason to double-check for errors on the cnode. */ -__private_extern__ + int -hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int can_drop_lock) +hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, + int can_drop_lock, int error_on_unlinked) { struct vnode *rvp; struct vnode *dvp = NULLVP; struct cnode *cp = VTOC(vp); int error; int vid; + int delete_status = 0; + + if (vnode_vtype(vp) == VDIR) { + return EINVAL; + } + + /* + * Need to check the status of the cnode to validate it hasn't gone + * open-unlinked on us before we can actually do work with it. + */ + delete_status = hfs_checkdeleted(cp); + if ((delete_status) && (error_on_unlinked)) { + return delete_status; + } restart: - /* Attempt to use exising vnode */ + /* Attempt to use existing vnode */ if ((rvp = cp->c_rsrc_vp)) { vid = vnode_vid(rvp); @@ -4323,7 +6343,32 @@ restart: error = vnode_getwithvid(rvp, vid); if (can_drop_lock) { - (void) hfs_lock(cp, HFS_FORCE_LOCK); + (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + + /* + * When we relinquished our cnode lock, the cnode could have raced + * with a delete and gotten deleted. If the caller did not want + * us to ignore open-unlinked files, then re-check the C_DELETED + * state and see if we need to return an ENOENT here because the item + * got deleted in the intervening time. + */ + if (error_on_unlinked) { + if ((delete_status = hfs_checkdeleted(cp))) { + /* + * If error == 0, this means that we succeeded in acquiring an iocount on the + * rsrc fork vnode. However, if we're in this block of code, that means that we noticed + * that the cnode has gone open-unlinked. In this case, the caller requested that we + * not do any other work and return an errno. The caller will be responsible for + * dropping the iocount we just acquired because we can't do it until we've released + * the cnode lock. + */ + if (error == 0) { + *rvpp = rvp; + } + return delete_status; + } + } + /* * When our lock was relinquished, the resource fork * could have been recycled. Check for this and try @@ -4337,7 +6382,7 @@ restart: if (name) printf("hfs_vgetrsrc: couldn't get resource" - " fork for %s, err %d\n", name, error); + " fork for %s, vol=%s, err=%d\n", name, hfsmp->vcbVN, error); return (error); } } else { @@ -4347,7 +6392,8 @@ restart: struct cat_desc to_desc; char delname[32]; int lockflags; - + int newvnode_flags = 0; + /* * Make sure cnode lock is exclusive, if not upgrade it. * @@ -4359,7 +6405,7 @@ restart: return (EINVAL); } /* - * If the upgrade fails we loose the lock and + * If the upgrade fails we lose the lock and * have to take the exclusive lock on our own. */ if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE) @@ -4372,9 +6418,17 @@ restart: * C_DELETED. This is because we need to continue to provide rsrc * fork access to open-unlinked files. In this case, build a fake descriptor * like in hfs_removefile. If we don't do this, buildkey will fail in - * cat_lookup because this cnode has no name in its descriptor. + * cat_lookup because this cnode has no name in its descriptor. However, + * only do this if the caller did not specify that they wanted us to + * error out upon encountering open-unlinked files. */ + if ((error_on_unlinked) && (can_drop_lock)) { + if ((error = hfs_checkdeleted(cp))) { + return error; + } + } + if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) { bzero (&to_desc, sizeof(to_desc)); bzero (delname, 32); @@ -4394,9 +6448,59 @@ restart: lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + /* + * We call cat_idlookup (instead of cat_lookup) below because we can't + * trust the descriptor in the provided cnode for lookups at this point. + * Between the time of the original lookup of this vnode and now, the + * descriptor could have gotten swapped or replaced. If this occurred, + * the parent/name combo originally desired may not necessarily be provided + * if we use the descriptor. Even worse, if the vnode represents + * a hardlink, we could have removed one of the links from the namespace + * but left the descriptor alone, since hfs_unlink does not invalidate + * the descriptor in the cnode if other links still point to the inode. + * + * Consider the following (slightly contrived) scenario: + * /tmp/a <--> /tmp/b (hardlinks). + * 1. Thread A: open rsrc fork on /tmp/b. + * 1a. Thread A: does lookup, goes out to lunch right before calling getnamedstream. + * 2. Thread B does 'mv /foo/b /tmp/b' + * 2. Thread B succeeds. + * 3. Thread A comes back and wants rsrc fork info for /tmp/b. + * + * Even though the hardlink backing /tmp/b is now eliminated, the descriptor + * is not removed/updated during the unlink process. So, if you were to + * do a lookup on /tmp/b, you'd acquire an entirely different record's resource + * fork. + * + * As a result, we use the fileid, which should be invariant for the lifetime + * of the cnode (possibly barring calls to exchangedata). + * + * Addendum: We can't do the above for HFS standard since we aren't guaranteed to + * have thread records for files. They were only required for directories. So + * we need to do the lookup with the catalog name. This is OK since hardlinks were + * never allowed on HFS standard. + */ + /* Get resource fork data */ - error = cat_lookup(hfsmp, descptr, 1, (struct cat_desc *)0, - (struct cat_attr *)0, &rsrcfork, NULL); + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) { + error = cat_idlookup (hfsmp, cp->c_fileid, 0, 1, NULL, NULL, &rsrcfork); + } +#if CONFIG_HFS_STD + else { + /* + * HFS standard only: + * + * Get the resource fork for this item with a cat_lookup call, but do not + * force a case lookup since HFS standard is case-insensitive only. We + * don't want the descriptor; just the fork data here. If we tried to + * do a ID lookup (via thread record -> catalog record), then we might fail + * prematurely since, as noted above, thread records were not strictly required + * on files in HFS. + */ + error = cat_lookup (hfsmp, descptr, 1, 0, (struct cat_desc*)NULL, + (struct cat_attr*)NULL, &rsrcfork, NULL); + } +#endif hfs_systemfile_unlock(hfsmp, lockflags); if (error) { @@ -4422,7 +6526,7 @@ restart: dvp = vnode_getparent(vp); error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL, descptr, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr, - &rsrcfork, &rvp); + &rsrcfork, &rvp, &newvnode_flags); if (dvp) vnode_put(dvp); if (cn.cn_pnbuf) @@ -4438,7 +6542,7 @@ restart: /* * Wrapper for special device reads */ -static int +int hfsspec_read(ap) struct vnop_read_args /* { struct vnode *a_vp; @@ -4457,7 +6561,7 @@ hfsspec_read(ap) /* * Wrapper for special device writes */ -static int +int hfsspec_write(ap) struct vnop_write_args /* { struct vnode *a_vp; @@ -4479,7 +6583,7 @@ hfsspec_write(ap) * * Update the times on the cnode then do device close. */ -static int +int hfsspec_close(ap) struct vnop_close_args /* { struct vnode *a_vp; @@ -4491,7 +6595,7 @@ hfsspec_close(ap) struct cnode *cp; if (vnode_isinuse(ap->a_vp, 0)) { - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0) { cp = VTOC(vp); hfs_touchtimes(VTOHFS(vp), cp); hfs_unlock(cp); @@ -4557,7 +6661,7 @@ hfsfifo_close(ap) struct cnode *cp; if (vnode_isinuse(ap->a_vp, 1)) { - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0) { cp = VTOC(vp); hfs_touchtimes(VTOHFS(vp), cp); hfs_unlock(cp); @@ -4569,10 +6673,50 @@ hfsfifo_close(ap) #endif /* FIFO */ +/* + * Getter for the document_id + * the document_id is stored in FndrExtendedFileInfo/FndrExtendedDirInfo + */ +static u_int32_t +hfs_get_document_id_internal(const uint8_t *finderinfo, mode_t mode) +{ + u_int8_t *finfo = NULL; + u_int32_t doc_id = 0; + + /* overlay the FinderInfo to the correct pointer, and advance */ + finfo = ((uint8_t *)finderinfo) + 16; + + if (S_ISDIR(mode) || S_ISREG(mode)) { + struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + doc_id = extinfo->document_id; + } else if (S_ISDIR(mode)) { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)finderinfo + 16); + doc_id = extinfo->document_id; + } + + return doc_id; +} + + +/* getter(s) for document id */ +u_int32_t +hfs_get_document_id(struct cnode *cp) +{ + return (hfs_get_document_id_internal((u_int8_t*)cp->c_finderinfo, + cp->c_attr.ca_mode)); +} + +/* If you have finderinfo and mode, you can use this */ +u_int32_t +hfs_get_document_id_from_blob(const uint8_t *finderinfo, mode_t mode) +{ + return (hfs_get_document_id_internal(finderinfo, mode)); +} + /* * Synchronize a file's in-core state with that on disk. */ -static int +int hfs_vnop_fsync(ap) struct vnop_fsync_args /* { struct vnode *a_vp; @@ -4583,11 +6727,26 @@ hfs_vnop_fsync(ap) struct vnode* vp = ap->a_vp; int error; + /* Note: We check hfs flags instead of vfs mount flag because during + * read-write update, hfs marks itself read-write much earlier than + * the vfs, and hence won't result in skipping of certain writes like + * zero'ing out of unused nodes, creation of hotfiles btree, etc. + */ + if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) { + return 0; + } + +#if CONFIG_PROTECT + if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { + return (error); + } +#endif /* CONFIG_PROTECT */ + /* * We need to allow ENOENT lock errors since unlink * systenm call can call VNOP_FSYNC during vclean. */ - error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (error) return (0); @@ -4598,7 +6757,7 @@ hfs_vnop_fsync(ap) } -static int +int hfs_vnop_whiteout(ap) struct vnop_whiteout_args /* { struct vnode *a_dvp; @@ -4666,10 +6825,12 @@ exit: } int (**hfs_vnodeop_p)(void *); -int (**hfs_std_vnodeop_p) (void *); #define VOPFUNC int (*)(void *) + +#if CONFIG_HFS_STD +int (**hfs_std_vnodeop_p) (void *); static int hfs_readonly_op (__unused void* ap) { return (EROFS); } /* @@ -4709,7 +6870,11 @@ struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ { &vnop_allocate_desc, (VOPFUNC)hfs_readonly_op }, /* allocate (READONLY) */ +#if CONFIG_SEARCHFS { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search }, /* search fs */ +#else + { &vnop_searchfs_desc, (VOPFUNC)err_searchfs }, /* search fs */ +#endif { &vnop_bwrite_desc, (VOPFUNC)hfs_readonly_op }, /* bwrite (READONLY) */ { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* pagein */ { &vnop_pageout_desc,(VOPFUNC) hfs_readonly_op }, /* pageout (READONLY) */ @@ -4732,7 +6897,7 @@ struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = { struct vnodeopv_desc hfs_std_vnodeop_opv_desc = { &hfs_std_vnodeop_p, hfs_standard_vnodeop_entries }; - +#endif /* VNOP table for HFS+ */ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { @@ -4750,7 +6915,7 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_select_desc, (VOPFUNC)hfs_vnop_select }, /* select */ { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ { &vnop_exchange_desc, (VOPFUNC)hfs_vnop_exchange }, /* exchange */ - { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_mmap_desc, (VOPFUNC)hfs_vnop_mmap }, /* mmap */ { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync }, /* fsync */ { &vnop_remove_desc, (VOPFUNC)hfs_vnop_remove }, /* remove */ { &vnop_link_desc, (VOPFUNC)hfs_vnop_link }, /* link */ @@ -4767,7 +6932,11 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ { &vnop_allocate_desc, (VOPFUNC)hfs_vnop_allocate }, /* allocate */ +#if CONFIG_SEARCHFS { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search }, /* search fs */ +#else + { &vnop_searchfs_desc, (VOPFUNC)err_searchfs }, /* search fs */ +#endif { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite }, /* bwrite */ { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* pagein */ { &vnop_pageout_desc,(VOPFUNC) hfs_vnop_pageout }, /* pageout */ @@ -4829,6 +6998,10 @@ struct vnodeopv_entry_desc hfs_specop_entries[] = { { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ + { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, + { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr}, + { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr}, + { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_specop_opv_desc = @@ -4873,6 +7046,10 @@ struct vnodeopv_entry_desc hfs_fifoop_entries[] = { { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ + { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, + { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr}, + { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr}, + { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_fifoop_opv_desc =