X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3a60a9f5b85abb8c2cf24e1926c5c7b3f608a5e2..bd504ef0e0b883cdd7917b73b3574eb9ce669905:/bsd/hfs/hfs_vnops.c diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 59c278f10..fad99d0a4 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -1,41 +1,55 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include +#include #include #include #include #include #include +#include #include +#include #include #include #include +#include #include #include #include #include #include #include +#include +#include +#include +#include #include #include @@ -56,59 +70,68 @@ #include "hfscommon/headers/BTreesInternal.h" #include "hfscommon/headers/FileMgrInternal.h" -#define MAKE_DELETED_NAME(NAME,FID) \ - (void) sprintf((NAME), "%s%d", HFS_DELETE_PREFIX, (FID)) - #define KNDETACH_VNLOCKED 0x00000001 -#define CARBON_TEMP_DIR_NAME "Cleanup At Startup" - - /* Global vfs data structures for hfs */ /* Always F_FULLFSYNC? 1=yes,0=no (default due to "various" reasons is 'no') */ int always_do_fullfsync = 0; -SYSCTL_INT (_kern, OID_AUTO, always_do_fullfsync, CTLFLAG_RW, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called"); - -extern unsigned long strtoul(const char *, char **, int); +SYSCTL_DECL(_vfs_generic); +SYSCTL_INT (_vfs_generic, OID_AUTO, always_do_fullfsync, CTLFLAG_RW | CTLFLAG_LOCKED, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called"); -static int hfs_makenode(struct vnode *dvp, struct vnode **vpp, +int hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx); +int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p); +int hfs_metasync_all(struct hfsmount *hfsmp); + +int hfs_removedir(struct vnode *, struct vnode *, struct componentname *, + int, int); +int hfs_removefile(struct vnode *, struct vnode *, struct componentname *, + int, int, int, struct vnode *, int); + +/* Used here and in cnode teardown -- for symlinks */ +int hfs_removefile_callback(struct buf *bp, void *hfsmp); + +int hfs_movedata (struct vnode *, struct vnode*); +static int hfs_move_fork (struct filefork *srcfork, struct cnode *src, + struct filefork *dstfork, struct cnode *dst); + +#if FIFO +static int hfsfifo_read(struct vnop_read_args *); +static int hfsfifo_write(struct vnop_write_args *); +static int hfsfifo_close(struct vnop_close_args *); -static int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, struct proc *p); - -static int hfs_removedir(struct vnode *, struct vnode *, struct componentname *, - int); - -static int hfs_removefile(struct vnode *, struct vnode *, struct componentname *, - int, int); - -static int hfs_vnop_close(struct vnop_close_args*); -static int hfs_vnop_create(struct vnop_create_args*); -static int hfs_vnop_exchange(struct vnop_exchange_args*); -static int hfs_vnop_fsync(struct vnop_fsync_args*); -static int hfs_vnop_mkdir(struct vnop_mkdir_args*); -static int hfs_vnop_mknod(struct vnop_mknod_args*); -static int hfs_vnop_getattr(struct vnop_getattr_args*); -static int hfs_vnop_open(struct vnop_open_args*); -static int hfs_vnop_readdir(struct vnop_readdir_args*); -static int hfs_vnop_remove(struct vnop_remove_args*); -static int hfs_vnop_rename(struct vnop_rename_args*); -static int hfs_vnop_rmdir(struct vnop_rmdir_args*); -static int hfs_vnop_symlink(struct vnop_symlink_args*); -static int hfs_vnop_setattr(struct vnop_setattr_args*); +extern int (**fifo_vnodeop_p)(void *); +#endif /* FIFO */ + +int hfs_vnop_close(struct vnop_close_args*); +int hfs_vnop_create(struct vnop_create_args*); +int hfs_vnop_exchange(struct vnop_exchange_args*); +int hfs_vnop_fsync(struct vnop_fsync_args*); +int hfs_vnop_mkdir(struct vnop_mkdir_args*); +int hfs_vnop_mknod(struct vnop_mknod_args*); +int hfs_vnop_getattr(struct vnop_getattr_args*); +int hfs_vnop_open(struct vnop_open_args*); +int hfs_vnop_readdir(struct vnop_readdir_args*); +int hfs_vnop_remove(struct vnop_remove_args*); +int hfs_vnop_rename(struct vnop_rename_args*); +int hfs_vnop_rmdir(struct vnop_rmdir_args*); +int hfs_vnop_symlink(struct vnop_symlink_args*); +int hfs_vnop_setattr(struct vnop_setattr_args*); +int hfs_vnop_readlink(struct vnop_readlink_args *); +int hfs_vnop_pathconf(struct vnop_pathconf_args *); +int hfs_vnop_whiteout(struct vnop_whiteout_args *); +int hfs_vnop_mmap(struct vnop_mmap_args *ap); +int hfsspec_read(struct vnop_read_args *); +int hfsspec_write(struct vnop_write_args *); +int hfsspec_close(struct vnop_close_args *); /* Options for hfs_removedir and hfs_removefile */ #define HFSRM_SKIP_RESERVE 0x01 -int hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags); -int hfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, - struct proc *p); -int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, - kauth_cred_t cred, struct proc *p); /***************************************************************************** * @@ -116,19 +139,75 @@ int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, * *****************************************************************************/ +/* + * Is the given cnode either the .journal or .journal_info_block file on + * a volume with an active journal? Many VNOPs use this to deny access + * to those files. + * + * Note: the .journal file on a volume with an external journal still + * returns true here, even though it does not actually hold the contents + * of the volume's journal. + */ +static _Bool +hfs_is_journal_file(struct hfsmount *hfsmp, struct cnode *cp) +{ + if (hfsmp->jnl != NULL && + (cp->c_fileid == hfsmp->hfs_jnlinfoblkid || + cp->c_fileid == hfsmp->hfs_jnlfileid)) { + return true; + } else { + return false; + } +} + /* * Create a regular file. */ -static int +int hfs_vnop_create(struct vnop_create_args *ap) { - return hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context); + int error; + +again: + error = hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context); + + /* + * We speculatively skipped the original lookup of the leaf + * for CREATE. Since it exists, go get it as long as they + * didn't want an exclusive create. + */ + if ((error == EEXIST) && !(ap->a_vap->va_vaflags & VA_EXCLUSIVE)) { + struct vnop_lookup_args args; + + args.a_desc = &vnop_lookup_desc; + args.a_dvp = ap->a_dvp; + args.a_vpp = ap->a_vpp; + args.a_cnp = ap->a_cnp; + args.a_context = ap->a_context; + args.a_cnp->cn_nameiop = LOOKUP; + error = hfs_vnop_lookup(&args); + /* + * We can also race with remove for this file. + */ + if (error == ENOENT) { + goto again; + } + + /* Make sure it was file. */ + if ((error == 0) && !vnode_isreg(*args.a_vpp)) { + vnode_put(*args.a_vpp); + *args.a_vpp = NULLVP; + error = EEXIST; + } + args.a_cnp->cn_nameiop = CREATE; + } + return (error); } /* * Make device special file. */ -static int +int hfs_vnop_mknod(struct vnop_mknod_args *ap) { struct vnode_attr *vap = ap->a_vap; @@ -158,57 +237,331 @@ hfs_vnop_mknod(struct vnop_mknod_args *ap) return (0); } +#if HFS_COMPRESSION +/* + * hfs_ref_data_vp(): returns the data fork vnode for a given cnode. + * In the (hopefully rare) case where the data fork vnode is not + * present, it will use hfs_vget() to create a new vnode for the + * data fork. + * + * NOTE: If successful and a vnode is returned, the caller is responsible + * for releasing the returned vnode with vnode_rele(). + */ +static int +hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock) +{ + int vref = 0; + + if (!data_vp || !cp) /* sanity check incoming parameters */ + return EINVAL; + + /* maybe we should take the hfs cnode lock here, and if so, use the skiplock parameter to tell us not to */ + + if (!skiplock) hfs_lock(cp, HFS_SHARED_LOCK); + struct vnode *c_vp = cp->c_vp; + if (c_vp) { + /* we already have a data vnode */ + *data_vp = c_vp; + vref = vnode_ref(*data_vp); + if (!skiplock) hfs_unlock(cp); + if (vref == 0) { + return 0; + } + return EINVAL; + } + /* no data fork vnode in the cnode, so ask hfs for one. */ + + if (!cp->c_rsrc_vp) { + /* if we don't have either a c_vp or c_rsrc_vp, we can't really do anything useful */ + *data_vp = NULL; + if (!skiplock) hfs_unlock(cp); + return EINVAL; + } + + if (0 == hfs_vget(VTOHFS(cp->c_rsrc_vp), cp->c_cnid, data_vp, 1, 0) && + 0 != data_vp) { + vref = vnode_ref(*data_vp); + vnode_put(*data_vp); + if (!skiplock) hfs_unlock(cp); + if (vref == 0) { + return 0; + } + return EINVAL; + } + /* there was an error getting the vnode */ + *data_vp = NULL; + if (!skiplock) hfs_unlock(cp); + return EINVAL; +} + +/* + * hfs_lazy_init_decmpfs_cnode(): returns the decmpfs_cnode for a cnode, + * allocating it if necessary; returns NULL if there was an allocation error + */ +static decmpfs_cnode * +hfs_lazy_init_decmpfs_cnode(struct cnode *cp) +{ + if (!cp->c_decmp) { + decmpfs_cnode *dp = NULL; + MALLOC_ZONE(dp, decmpfs_cnode *, sizeof(decmpfs_cnode), M_DECMPFS_CNODE, M_WAITOK); + if (!dp) { + /* error allocating a decmpfs cnode */ + return NULL; + } + decmpfs_cnode_init(dp); + if (!OSCompareAndSwapPtr(NULL, dp, (void * volatile *)&cp->c_decmp)) { + /* another thread got here first, so free the decmpfs_cnode we allocated */ + decmpfs_cnode_destroy(dp); + FREE_ZONE(dp, sizeof(*dp), M_DECMPFS_CNODE); + } + } + + return cp->c_decmp; +} + +/* + * hfs_file_is_compressed(): returns 1 if the file is compressed, and 0 (zero) if not. + * if the file's compressed flag is set, makes sure that the decmpfs_cnode field + * is allocated by calling hfs_lazy_init_decmpfs_cnode(), then makes sure it is populated, + * or else fills it in via the decmpfs_file_is_compressed() function. + */ +int +hfs_file_is_compressed(struct cnode *cp, int skiplock) +{ + int ret = 0; + + /* fast check to see if file is compressed. If flag is clear, just answer no */ + if (!(cp->c_bsdflags & UF_COMPRESSED)) { + return 0; + } + + decmpfs_cnode *dp = hfs_lazy_init_decmpfs_cnode(cp); + if (!dp) { + /* error allocating a decmpfs cnode, treat the file as uncompressed */ + return 0; + } + + /* flag was set, see if the decmpfs_cnode state is valid (zero == invalid) */ + uint32_t decmpfs_state = decmpfs_cnode_get_vnode_state(dp); + switch(decmpfs_state) { + case FILE_IS_COMPRESSED: + case FILE_IS_CONVERTING: /* treat decompressing files as if they are compressed */ + return 1; + case FILE_IS_NOT_COMPRESSED: + return 0; + /* otherwise the state is not cached yet */ + } + + /* decmpfs hasn't seen this file yet, so call decmpfs_file_is_compressed() to init the decmpfs_cnode struct */ + struct vnode *data_vp = NULL; + if (0 == hfs_ref_data_vp(cp, &data_vp, skiplock)) { + if (data_vp) { + ret = decmpfs_file_is_compressed(data_vp, VTOCMP(data_vp)); // fill in decmpfs_cnode + vnode_rele(data_vp); + } + } + return ret; +} + +/* hfs_uncompressed_size_of_compressed_file() - get the uncompressed size of the file. + * if the caller has passed a valid vnode (has a ref count > 0), then hfsmp and fid are not required. + * if the caller doesn't have a vnode, pass NULL in vp, and pass valid hfsmp and fid. + * files size is returned in size (required) + * if the indicated file is a directory (or something that doesn't have a data fork), then this call + * will return an error and the caller should fall back to treating the item as an uncompressed file + */ +int +hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *vp, cnid_t fid, off_t *size, int skiplock) +{ + int ret = 0; + int putaway = 0; /* flag to remember if we used hfs_vget() */ + + if (!size) { + return EINVAL; /* no place to put the file size */ + } + + if (NULL == vp) { + if (!hfsmp || !fid) { /* make sure we have the required parameters */ + return EINVAL; + } + if (0 != hfs_vget(hfsmp, fid, &vp, skiplock, 0)) { /* vnode is null, use hfs_vget() to get it */ + vp = NULL; + } else { + putaway = 1; /* note that hfs_vget() was used to aquire the vnode */ + } + } + /* this double check for compression (hfs_file_is_compressed) + * ensures the cached size is present in case decmpfs hasn't + * encountered this node yet. + */ + if (vp) { + if (hfs_file_is_compressed(VTOC(vp), skiplock) ) { + *size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp)); /* file info will be cached now, so get size */ + } else { + if (VTOCMP(vp) && VTOCMP(vp)->cmp_type >= CMP_MAX) { + if (VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) { + // if we don't recognize this type, just use the real data fork size + if (VTOC(vp)->c_datafork) { + *size = VTOC(vp)->c_datafork->ff_size; + ret = 0; + } else { + ret = EINVAL; + } + } else { + *size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp)); /* file info will be cached now, so get size */ + ret = 0; + } + } else { + ret = EINVAL; + } + } + } + + if (putaway) { /* did we use hfs_vget() to get this vnode? */ + vnode_put(vp); /* if so, release it and set it to null */ + vp = NULL; + } + return ret; +} + +int +hfs_hides_rsrc(vfs_context_t ctx, struct cnode *cp, int skiplock) +{ + if (ctx == decmpfs_ctx) + return 0; + if (!hfs_file_is_compressed(cp, skiplock)) + return 0; + return decmpfs_hides_rsrc(ctx, cp->c_decmp); +} + +int +hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int skiplock) +{ + if (ctx == decmpfs_ctx) + return 0; + if (!hfs_file_is_compressed(cp, skiplock)) + return 0; + return decmpfs_hides_xattr(ctx, cp->c_decmp, name); +} +#endif /* HFS_COMPRESSION */ + /* * Open a file/directory. */ -static int +int hfs_vnop_open(struct vnop_open_args *ap) { struct vnode *vp = ap->a_vp; struct filefork *fp; struct timeval tv; int error; + static int past_bootup = 0; + struct cnode *cp = VTOC(vp); + struct hfsmount *hfsmp = VTOHFS(vp); + +#if HFS_COMPRESSION + if (ap->a_mode & FWRITE) { + /* open for write */ + if ( hfs_file_is_compressed(cp, 1) ) { /* 1 == don't take the cnode lock */ + /* opening a compressed file for write, so convert it to decompressed */ + struct vnode *data_vp = NULL; + error = hfs_ref_data_vp(cp, &data_vp, 1); /* 1 == don't take the cnode lock */ + if (0 == error) { + if (data_vp) { + error = decmpfs_decompress_file(data_vp, VTOCMP(data_vp), -1, 1, 0); + vnode_rele(data_vp); + } else { + error = EINVAL; + } + } + if (error != 0) + return error; + } + } else { + /* open for read */ + if (hfs_file_is_compressed(cp, 1) ) { /* 1 == don't take the cnode lock */ + if (VNODE_IS_RSRC(vp)) { + /* opening the resource fork of a compressed file, so nothing to do */ + } else { + /* opening a compressed file for read, make sure it validates */ + error = decmpfs_validate_compressed_file(vp, VTOCMP(vp)); + if (error != 0) + return error; + } + } + } +#endif /* * Files marked append-only must be opened for appending. */ - if ((VTOC(vp)->c_flags & APPEND) && !vnode_isdir(vp) && + if ((cp->c_bsdflags & APPEND) && !vnode_isdir(vp) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); if (vnode_isreg(vp) && !UBCINFOEXISTS(vp)) return (EBUSY); /* file is in use by the kernel */ - /* Don't allow journal file to be opened externally. */ - if (VTOC(vp)->c_fileid == VTOHFS(vp)->hfs_jnlfileid) + /* Don't allow journal to be opened externally. */ + if (hfs_is_journal_file(hfsmp, cp)) return (EPERM); - /* - * On the first (non-busy) open of a fragmented - * file attempt to de-frag it (if its less than 20MB). - */ - if ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) || - (VTOHFS(vp)->jnl == NULL) || + + if ((hfsmp->hfs_flags & HFS_READ_ONLY) || + (hfsmp->jnl == NULL) || +#if NAMEDSTREAMS + !vnode_isreg(vp) || vnode_isinuse(vp, 0) || vnode_isnamedstream(vp)) { +#else !vnode_isreg(vp) || vnode_isinuse(vp, 0)) { +#endif return (0); } - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) return (error); + +#if QUOTA + /* If we're going to write to the file, initialize quotas. */ + if ((ap->a_mode & FWRITE) && (hfsmp->hfs_flags & HFS_QUOTAS)) + (void)hfs_getinoquota(cp); +#endif /* QUOTA */ + + /* + * On the first (non-busy) open of a fragmented + * file attempt to de-frag it (if its less than 20MB). + */ fp = VTOF(vp); if (fp->ff_blocks && fp->ff_extents[7].blockCount != 0 && fp->ff_size <= (20 * 1024 * 1024)) { + int no_mods = 0; + struct timeval now; /* * Wait until system bootup is done (3 min). + * And don't relocate a file that's been modified + * within the past minute -- this can lead to + * system thrashing. */ - microuptime(&tv); - if (tv.tv_sec > (60 * 3)) { - (void) hfs_relocate(vp, VTOVCB(vp)->nextAllocation + 4096, - vfs_context_ucred(ap->a_context), - vfs_context_proc(ap->a_context)); + + if (!past_bootup) { + microuptime(&tv); + if (tv.tv_sec > (60*3)) { + past_bootup = 1; + } + } + + microtime(&now); + if ((now.tv_sec - cp->c_mtime) > 60) { + no_mods = 1; + } + + if (past_bootup && no_mods) { + (void) hfs_relocate(vp, hfsmp->nextAllocation + 4096, + vfs_context_ucred(ap->a_context), + vfs_context_proc(ap->a_context)); } } - hfs_unlock(VTOC(vp)); + + hfs_unlock(cp); return (0); } @@ -217,7 +570,7 @@ hfs_vnop_open(struct vnop_open_args *ap) /* * Close a file/directory. */ -static int +int hfs_vnop_close(ap) struct vnop_close_args /* { struct vnode *a_vp; @@ -230,17 +583,64 @@ hfs_vnop_close(ap) struct proc *p = vfs_context_proc(ap->a_context); struct hfsmount *hfsmp; int busy; + int tooktrunclock = 0; + int knownrefs = 0; if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) return (0); cp = VTOC(vp); hfsmp = VTOHFS(vp); + /* + * If the rsrc fork is a named stream, it can cause the data fork to + * stay around, preventing de-allocation of these blocks. + * Do checks for truncation on close. Purge extra extents if they exist. + * Make sure the vp is not a directory, and that it has a resource fork, + * and that resource fork is also a named stream. + */ + + if ((vp->v_type == VREG) && (cp->c_rsrc_vp) + && (vnode_isnamedstream(cp->c_rsrc_vp))) { + uint32_t blks; + + blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize); + /* + * If there are extra blocks and there are only 2 refs on + * this vp (ourselves + rsrc fork holding ref on us), go ahead + * and try to truncate. + */ + if ((blks < VTOF(vp)->ff_blocks) && (!vnode_isinuse(vp, 2))) { + // release cnode lock; must acquire truncate lock BEFORE cnode lock + hfs_unlock(cp); + + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + tooktrunclock = 1; + + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { + hfs_unlock_truncate(cp, 0); + // bail out if we can't re-acquire cnode lock + return 0; + } + // now re-test to make sure it's still valid + if (cp->c_rsrc_vp) { + knownrefs = 1 + vnode_isnamedstream(cp->c_rsrc_vp); + if (!vnode_isinuse(vp, knownrefs)){ + // now we can truncate the file, if necessary + blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize); + if (blks < VTOF(vp)->ff_blocks){ + (void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY, 0, 0, ap->a_context); + } + } + } + } + } + + // if we froze the fs and we're exiting, then "thaw" the fs if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) { hfsmp->hfs_freezing_proc = NULL; - hfs_global_exclusive_lock_release(hfsmp); - lck_rw_unlock_exclusive(&hfsmp->hfs_insync); + hfs_unlock_global (hfsmp); + lck_rw_unlock_exclusive(&hfsmp->hfs_insync); } busy = vnode_isinuse(vp, 1); @@ -254,127 +654,256 @@ hfs_vnop_close(ap) vnode_recycle(vp); } + if (tooktrunclock){ + hfs_unlock_truncate(cp, 0); + } hfs_unlock(cp); + + if (ap->a_fflag & FWASWRITTEN) { + hfs_sync_ejectable(hfsmp); + } + return (0); } /* * Get basic attributes. */ -static int +int hfs_vnop_getattr(struct vnop_getattr_args *ap) { +#define VNODE_ATTR_TIMES \ + (VNODE_ATTR_va_access_time|VNODE_ATTR_va_change_time|VNODE_ATTR_va_modify_time) +#define VNODE_ATTR_AUTH \ + (VNODE_ATTR_va_mode | VNODE_ATTR_va_uid | VNODE_ATTR_va_gid | \ + VNODE_ATTR_va_flags | VNODE_ATTR_va_acl) + struct vnode *vp = ap->a_vp; struct vnode_attr *vap = ap->a_vap; - struct vnode *rvp = NULL; + struct vnode *rvp = NULLVP; struct hfsmount *hfsmp; struct cnode *cp; + uint64_t data_size; enum vtype v_type; int error = 0; + cp = VTOC(vp); - if ((error = hfs_lock(VTOC(vp), HFS_SHARED_LOCK))) { - return (error); +#if HFS_COMPRESSION + /* we need to inspect the decmpfs state of the file before we take the hfs cnode lock */ + int compressed = 0; + int hide_size = 0; + off_t uncompressed_size = -1; + if (VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_alloc) || VATTR_IS_ACTIVE(vap, va_data_alloc) || VATTR_IS_ACTIVE(vap, va_total_size)) { + /* we only care about whether the file is compressed if asked for the uncompressed size */ + if (VNODE_IS_RSRC(vp)) { + /* if it's a resource fork, decmpfs may want us to hide the size */ + hide_size = hfs_hides_rsrc(ap->a_context, cp, 0); + } else { + /* if it's a data fork, we need to know if it was compressed so we can report the uncompressed size */ + compressed = hfs_file_is_compressed(cp, 0); + } + if ((VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_size))) { + // if it's compressed + if (compressed || (!VNODE_IS_RSRC(vp) && cp->c_decmp && cp->c_decmp->cmp_type >= CMP_MAX)) { + if (0 != hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0)) { + /* failed to get the uncompressed size, we'll check for this later */ + uncompressed_size = -1; + } else { + // fake that it's compressed + compressed = 1; + } + } + } } - cp = VTOC(vp); +#endif + + /* + * Shortcut for vnode_authorize path. Each of the attributes + * in this set is updated atomically so we don't need to take + * the cnode lock to access them. + */ + if ((vap->va_active & ~VNODE_ATTR_AUTH) == 0) { + /* Make sure file still exists. */ + if (cp->c_flag & C_NOEXISTS) + return (ENOENT); + + vap->va_uid = cp->c_uid; + vap->va_gid = cp->c_gid; + vap->va_mode = cp->c_mode; + vap->va_flags = cp->c_bsdflags; + vap->va_supported |= VNODE_ATTR_AUTH & ~VNODE_ATTR_va_acl; + + if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) { + vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE; + VATTR_SET_SUPPORTED(vap, va_acl); + } + + return (0); + } + hfsmp = VTOHFS(vp); - hfs_touchtimes(hfsmp, cp); v_type = vnode_vtype(vp); + /* + * If time attributes are requested and we have cnode times + * that require updating, then acquire an exclusive lock on + * the cnode before updating the times. Otherwise we can + * just acquire a shared lock. + */ + if ((vap->va_active & VNODE_ATTR_TIMES) && + (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime)) { + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) + return (error); + hfs_touchtimes(hfsmp, cp); + } + else { + if ((error = hfs_lock(cp, HFS_SHARED_LOCK))) + return (error); + } - VATTR_RETURN(vap, va_rdev, (v_type == VBLK || v_type == VCHR) ? cp->c_rdev : 0); if (v_type == VDIR) { + data_size = (cp->c_entries + 2) * AVERAGE_HFSDIRENTRY_SIZE; + if (VATTR_IS_ACTIVE(vap, va_nlink)) { - int entries; + int nlink; - entries = cp->c_nlink; - if (vnode_isvroot(vp)) { - if (hfsmp->hfs_privdir_desc.cd_cnid != 0) - --entries; /* hide private dir */ - if (hfsmp->jnl) - entries -= 2; /* hide the journal files */ + /* + * For directories, the va_nlink is esentially a count + * of the ".." references to a directory plus the "." + * reference and the directory itself. So for HFS+ this + * becomes the sub-directory count plus two. + * + * In the absence of a sub-directory count we use the + * directory's item count. This will be too high in + * most cases since it also includes files. + */ + if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) && + (cp->c_attr.ca_recflags & kHFSHasFolderCountMask)) + nlink = cp->c_attr.ca_dircount; /* implied ".." entries */ + else + nlink = cp->c_entries; + + /* Account for ourself and our "." entry */ + nlink += 2; + /* Hide our private directories. */ + if (cp->c_cnid == kHFSRootFolderID) { + if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0) { + --nlink; + } + if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0) { + --nlink; + } } - VATTR_RETURN(vap, va_nlink, (uint64_t)entries); - } - + VATTR_RETURN(vap, va_nlink, (u_int64_t)nlink); + } if (VATTR_IS_ACTIVE(vap, va_nchildren)) { int entries; entries = cp->c_entries; - if (vnode_isvroot(vp)) { - if (hfsmp->hfs_privdir_desc.cd_cnid != 0) - --entries; /* hide private dir */ - if (hfsmp->jnl) - entries -= 2; /* hide the journal files */ + /* Hide our private files and directories. */ + if (cp->c_cnid == kHFSRootFolderID) { + if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0) + --entries; + if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0) + --entries; + if (hfsmp->jnl || ((hfsmp->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) + entries -= 2; /* hide the journal files */ } VATTR_RETURN(vap, va_nchildren, entries); } - } else { - VATTR_RETURN(vap, va_nlink, (uint64_t)cp->c_nlink); + /* + * The va_dirlinkcount is the count of real directory hard links. + * (i.e. its not the sum of the implied "." and ".." references) + */ + if (VATTR_IS_ACTIVE(vap, va_dirlinkcount)) { + VATTR_RETURN(vap, va_dirlinkcount, (uint32_t)cp->c_linkcount); + } + } else /* !VDIR */ { + data_size = VCTOF(vp, cp)->ff_size; + + VATTR_RETURN(vap, va_nlink, (u_int64_t)cp->c_linkcount); + if (VATTR_IS_ACTIVE(vap, va_data_alloc)) { + u_int64_t blocks; + +#if HFS_COMPRESSION + if (hide_size) { + VATTR_RETURN(vap, va_data_alloc, 0); + } else if (compressed) { + /* for compressed files, we report all allocated blocks as belonging to the data fork */ + blocks = cp->c_blocks; + VATTR_RETURN(vap, va_data_alloc, blocks * (u_int64_t)hfsmp->blockSize); + } + else +#endif + { + blocks = VCTOF(vp, cp)->ff_blocks; + VATTR_RETURN(vap, va_data_alloc, blocks * (u_int64_t)hfsmp->blockSize); + } + } } /* conditional because 64-bit arithmetic can be expensive */ if (VATTR_IS_ACTIVE(vap, va_total_size)) { if (v_type == VDIR) { - VATTR_RETURN(vap, va_total_size, cp->c_nlink * AVERAGE_HFSDIRENTRY_SIZE); + VATTR_RETURN(vap, va_total_size, (cp->c_entries + 2) * AVERAGE_HFSDIRENTRY_SIZE); } else { - uint64_t total_size = 0; + u_int64_t total_size = ~0ULL; struct cnode *rcp; - - if (cp->c_datafork) { - total_size = cp->c_datafork->ff_size; +#if HFS_COMPRESSION + if (hide_size) { + /* we're hiding the size of this file, so just return 0 */ + total_size = 0; + } else if (compressed) { + if (uncompressed_size == -1) { + /* + * We failed to get the uncompressed size above, + * so we'll fall back to the standard path below + * since total_size is still -1 + */ + } else { + /* use the uncompressed size we fetched above */ + total_size = uncompressed_size; + } } - - if (cp->c_blocks - VTOF(vp)->ff_blocks) { - /* hfs_vgetrsrc does not use struct proc - therefore passing NULL */ - error = hfs_vgetrsrc(hfsmp, vp, &rvp, NULL); - if (error) { - goto out; +#endif + if (total_size == ~0ULL) { + if (cp->c_datafork) { + total_size = cp->c_datafork->ff_size; } - - rcp = VTOC(rvp); - if (rcp && rcp->c_rsrcfork) { - total_size += rcp->c_rsrcfork->ff_size; + + if (cp->c_blocks - VTOF(vp)->ff_blocks) { + /* We deal with rsrc fork vnode iocount at the end of the function */ + error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE); + if (error) { + /* + * Note that we call hfs_vgetrsrc with error_on_unlinked + * set to FALSE. This is because we may be invoked via + * fstat() on an open-unlinked file descriptor and we must + * continue to support access to the rsrc fork until it disappears. + * The code at the end of this function will be + * responsible for releasing the iocount generated by + * hfs_vgetrsrc. This is because we can't drop the iocount + * without unlocking the cnode first. + */ + goto out; + } + + rcp = VTOC(rvp); + if (rcp && rcp->c_rsrcfork) { + total_size += rcp->c_rsrcfork->ff_size; + } } } - + VATTR_RETURN(vap, va_total_size, total_size); - /* Include size of attibute data (extents), if any */ - if (cp->c_attrblks) { - vap->va_total_size += (uint64_t)cp->c_attrblks * (uint64_t)hfsmp->blockSize; - } } } if (VATTR_IS_ACTIVE(vap, va_total_alloc)) { if (v_type == VDIR) { VATTR_RETURN(vap, va_total_alloc, 0); } else { - VATTR_RETURN(vap, va_total_alloc, (uint64_t)cp->c_blocks * (uint64_t)hfsmp->blockSize); - /* Include size of attibute data (extents), if any */ - if (cp->c_attrblks) { - vap->va_total_alloc += (uint64_t)cp->c_attrblks * (uint64_t)hfsmp->blockSize; - } + VATTR_RETURN(vap, va_total_alloc, (u_int64_t)cp->c_blocks * (u_int64_t)hfsmp->blockSize); } } - /* XXX broken... if ask for "data size" of rsrc fork vp you get rsrc fork size! */ - if (v_type == VDIR) { - VATTR_RETURN(vap, va_data_size, cp->c_nlink * AVERAGE_HFSDIRENTRY_SIZE); - } else { - VATTR_RETURN(vap, va_data_size, VTOF(vp)->ff_size); - } - if (VATTR_IS_ACTIVE(vap, va_data_alloc) && (v_type != VDIR)) { - /* XXX do we need to account for ff_unallocblocks ? */ - VATTR_RETURN(vap, va_data_alloc, (uint64_t)VTOF(vp)->ff_blocks * (uint64_t)hfsmp->blockSize); - } - /* XXX is this really a good 'optimal I/O size'? */ - VATTR_RETURN(vap, va_iosize, hfsmp->hfs_logBlockSize); - VATTR_RETURN(vap, va_uid, cp->c_uid); - VATTR_RETURN(vap, va_gid, cp->c_gid); - VATTR_RETURN(vap, va_mode, cp->c_mode); -#if 0 - /* XXX is S_IFXATTR still needed ??? */ - if (VNODE_IS_RSRC(vp)) - vap->va_mode |= S_IFXATTR; -#endif - VATTR_RETURN(vap, va_flags, cp->c_flags); /* * If the VFS wants extended security data, and we know that we @@ -385,16 +914,12 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) */ if (VATTR_IS_ACTIVE(vap, va_acl)) { if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) { - vap->va_acl = KAUTH_FILESEC_NONE; + vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE; VATTR_SET_SUPPORTED(vap, va_acl); } } - vap->va_create_time.tv_sec = cp->c_itime; - vap->va_create_time.tv_nsec = 0; - VATTR_SET_SUPPORTED(vap, va_create_time); - if (VATTR_IS_ACTIVE(vap, va_access_time)) { - /* Access times are lazyily updated, get current time if needed */ + /* Access times are lazily updated, get current time if needed */ if (cp->c_touch_acctime) { struct timeval tv; @@ -406,15 +931,31 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) vap->va_access_time.tv_nsec = 0; VATTR_SET_SUPPORTED(vap, va_access_time); } + vap->va_create_time.tv_sec = cp->c_itime; + vap->va_create_time.tv_nsec = 0; vap->va_modify_time.tv_sec = cp->c_mtime; vap->va_modify_time.tv_nsec = 0; - VATTR_SET_SUPPORTED(vap, va_modify_time); vap->va_change_time.tv_sec = cp->c_ctime; vap->va_change_time.tv_nsec = 0; - VATTR_SET_SUPPORTED(vap, va_change_time); vap->va_backup_time.tv_sec = cp->c_btime; - vap->va_backup_time.tv_nsec = 0; - VATTR_SET_SUPPORTED(vap, va_backup_time); + vap->va_backup_time.tv_nsec = 0; + + /* See if we need to emit the date added field to the user */ + if (VATTR_IS_ACTIVE(vap, va_addedtime)) { + u_int32_t dateadded = hfs_get_dateadded (cp); + if (dateadded) { + vap->va_addedtime.tv_sec = dateadded; + vap->va_addedtime.tv_nsec = 0; + VATTR_SET_SUPPORTED (vap, va_addedtime); + } + } + + /* XXX is this really a good 'optimal I/O size'? */ + vap->va_iosize = hfsmp->hfs_logBlockSize; + vap->va_uid = cp->c_uid; + vap->va_gid = cp->c_gid; + vap->va_mode = cp->c_mode; + vap->va_flags = cp->c_bsdflags; /* * Exporting file IDs from HFS Plus: @@ -427,36 +968,142 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) * The stat call (getattr) uses va_fileid and the Carbon APIs, * which are hardlink-ignorant, will ask for va_linkid. */ - VATTR_RETURN(vap, va_fileid, (uint64_t)cp->c_fileid); - VATTR_RETURN(vap, va_linkid, (uint64_t)cp->c_cnid); - VATTR_RETURN(vap, va_parentid, (uint64_t)cp->c_parentcnid); - VATTR_RETURN(vap, va_fsid, cp->c_dev); - VATTR_RETURN(vap, va_filerev, 0); + vap->va_fileid = (u_int64_t)cp->c_fileid; + /* + * We need to use the origin cache for both hardlinked files + * and directories. Hardlinked directories have multiple cnids + * and parents (one per link). Hardlinked files also have their + * own parents and link IDs separate from the indirect inode number. + * If we don't use the cache, we could end up vending the wrong ID + * because the cnode will only reflect the link that was looked up most recently. + */ + if (cp->c_flag & C_HARDLINK) { + vap->va_linkid = (u_int64_t)hfs_currentcnid(cp); + vap->va_parentid = (u_int64_t)hfs_currentparent(cp); + } else { + vap->va_linkid = (u_int64_t)cp->c_cnid; + vap->va_parentid = (u_int64_t)cp->c_parentcnid; + } + vap->va_fsid = hfsmp->hfs_raw_dev; + vap->va_filerev = 0; + vap->va_encoding = cp->c_encoding; + vap->va_rdev = (v_type == VBLK || v_type == VCHR) ? cp->c_rdev : 0; +#if HFS_COMPRESSION + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + if (hide_size) + vap->va_data_size = 0; + else if (compressed) { + if (uncompressed_size == -1) { + /* failed to get the uncompressed size above, so just return data_size */ + vap->va_data_size = data_size; + } else { + /* use the uncompressed size we fetched above */ + vap->va_data_size = uncompressed_size; + } + } else + vap->va_data_size = data_size; +// vap->va_supported |= VNODE_ATTR_va_data_size; + VATTR_SET_SUPPORTED(vap, va_data_size); + } +#else + vap->va_data_size = data_size; + vap->va_supported |= VNODE_ATTR_va_data_size; +#endif + + /* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */ + vap->va_supported |= VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time | + VNODE_ATTR_va_change_time| VNODE_ATTR_va_backup_time | + VNODE_ATTR_va_iosize | VNODE_ATTR_va_uid | + VNODE_ATTR_va_gid | VNODE_ATTR_va_mode | + VNODE_ATTR_va_flags |VNODE_ATTR_va_fileid | + VNODE_ATTR_va_linkid | VNODE_ATTR_va_parentid | + VNODE_ATTR_va_fsid | VNODE_ATTR_va_filerev | + VNODE_ATTR_va_encoding | VNODE_ATTR_va_rdev; + + /* If this is the root, let VFS to find out the mount name, which + * may be different from the real name. Otherwise, we need to take care + * for hardlinked files, which need to be looked up, if necessary + */ + if (VATTR_IS_ACTIVE(vap, va_name) && (cp->c_cnid != kHFSRootFolderID)) { + struct cat_desc linkdesc; + int lockflags; + int uselinkdesc = 0; + cnid_t nextlinkid = 0; + cnid_t prevlinkid = 0; + + /* Get the name for ATTR_CMN_NAME. We need to take special care for hardlinks + * here because the info. for the link ID requested by getattrlist may be + * different than what's currently in the cnode. This is because the cnode + * will be filled in with the information for the most recent link ID that went + * through namei/lookup(). If there are competing lookups for hardlinks that point + * to the same inode, one (or more) getattrlists could be vended incorrect name information. + * Also, we need to beware of open-unlinked files which could have a namelen of 0. + */ - VATTR_RETURN(vap, va_encoding, cp->c_encoding); + if ((cp->c_flag & C_HARDLINK) && + ((cp->c_desc.cd_namelen == 0) || (vap->va_linkid != cp->c_cnid))) { + /* If we have no name and our link ID is the raw inode number, then we may + * have an open-unlinked file. Go to the next link in this case. + */ + if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) { + if ((error = hfs_lookup_siblinglinks(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))){ + goto out; + } + } + else { + /* just use link obtained from vap above */ + nextlinkid = vap->va_linkid; + } - /* if this is the root, let VFS to find out the mount name, which may be different from the real name */ - if (VATTR_IS_ACTIVE(vap, va_name) && !vnode_isvroot(vp)) { - /* Return the name for ATTR_CMN_NAME */ - if (cp->c_desc.cd_namelen == 0) { - error = ENOENT; - goto out; + /* We need to probe the catalog for the descriptor corresponding to the link ID + * stored in nextlinkid. Note that we don't know if we have the exclusive lock + * for the cnode here, so we can't just update the descriptor. Instead, + * we should just store the descriptor's value locally and then use it to pass + * out the name value as needed below. + */ + if (nextlinkid){ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_findname(hfsmp, nextlinkid, &linkdesc); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error == 0) { + uselinkdesc = 1; + } + } + } + + /* By this point, we've either patched up the name above and the c_desc + * points to the correct data, or it already did, in which case we just proceed + * by copying the name into the vap. Note that we will never set va_name to + * supported if nextlinkid is never initialized. This could happen in the degenerate + * case above involving the raw inode number, where it has no nextlinkid. In this case + * we will simply not mark the name bit as supported. + */ + if (uselinkdesc) { + strlcpy(vap->va_name, (const char*) linkdesc.cd_nameptr, MAXPATHLEN); + VATTR_SET_SUPPORTED(vap, va_name); + cat_releasedesc(&linkdesc); + } + else if (cp->c_desc.cd_namelen) { + strlcpy(vap->va_name, (const char*) cp->c_desc.cd_nameptr, MAXPATHLEN); + VATTR_SET_SUPPORTED(vap, va_name); } - - strncpy(vap->va_name, cp->c_desc.cd_nameptr, MAXPATHLEN); - vap->va_name[MAXPATHLEN-1] = '\0'; - VATTR_SET_SUPPORTED(vap, va_name); } out: hfs_unlock(cp); + /* + * We need to vnode_put the rsrc fork vnode only *after* we've released + * the cnode lock, since vnode_put can trigger an inactive call, which + * will go back into HFS and try to acquire a cnode lock. + */ if (rvp) { - vnode_put(rvp); + vnode_put (rvp); } + return (error); } -static int +int hfs_vnop_setattr(ap) struct vnop_setattr_args /* { struct vnode *a_vp; @@ -473,11 +1120,40 @@ hfs_vnop_setattr(ap) int error = 0; uid_t nuid; gid_t ngid; + time_t orig_ctime; + + orig_ctime = VTOC(vp)->c_ctime; + +#if HFS_COMPRESSION + int decmpfs_reset_state = 0; + /* + we call decmpfs_update_attributes even if the file is not compressed + because we want to update the incoming flags if the xattrs are invalid + */ + error = decmpfs_update_attributes(vp, vap); + if (error) + return error; + + // + // if this is not a size-changing setattr and it is not just + // an atime update, then check for a snapshot. + // + if (!VATTR_IS_ACTIVE(vap, va_data_size) && !(vap->va_active == VNODE_ATTR_va_access_time)) { + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_MOD, NULL); + } +#endif + + +#if CONFIG_PROTECT + if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { + return (error); + } +#endif /* CONFIG_PROTECT */ hfsmp = VTOHFS(vp); - /* Don't allow modification of the journal file. */ - if (hfsmp->hfs_jnlfileid == VTOC(vp)->c_fileid) { + /* Don't allow modification of the journal. */ + if (hfs_is_journal_file(hfsmp, VTOC(vp))) { return (EPERM); } @@ -485,21 +1161,60 @@ hfs_vnop_setattr(ap) * File size change request. * We are guaranteed that this is not a directory, and that * the filesystem object is writeable. + * + * NOTE: HFS COMPRESSION depends on the data_size being set *before* the bsd flags are updated */ VATTR_SET_SUPPORTED(vap, va_data_size); if (VATTR_IS_ACTIVE(vap, va_data_size) && !vnode_islnk(vp)) { +#if HFS_COMPRESSION + /* keep the compressed state locked until we're done truncating the file */ + decmpfs_cnode *dp = VTOCMP(vp); + if (!dp) { + /* + * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode + * is filled in; we need a decmpfs_cnode to lock out decmpfs state changes + * on this file while it's truncating + */ + dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp)); + if (!dp) { + /* failed to allocate a decmpfs_cnode */ + return ENOMEM; /* what should this be? */ + } + } + + check_for_tracked_file(vp, orig_ctime, vap->va_data_size == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL); + + decmpfs_lock_compressed_data(dp, 1); + if (hfs_file_is_compressed(VTOC(vp), 1)) { + error = decmpfs_decompress_file(vp, dp, -1/*vap->va_data_size*/, 0, 1); + if (error != 0) { + decmpfs_unlock_compressed_data(dp, 1); + return error; + } + } +#endif /* Take truncate lock before taking cnode lock. */ - hfs_lock_truncate(VTOC(vp), TRUE); + hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK); + + /* Perform the ubc_setsize before taking the cnode lock. */ + ubc_setsize(vp, vap->va_data_size); + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { - hfs_unlock_truncate(VTOC(vp)); + hfs_unlock_truncate(VTOC(vp), 0); +#if HFS_COMPRESSION + decmpfs_unlock_compressed_data(dp, 1); +#endif return (error); } cp = VTOC(vp); - error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 0, ap->a_context); + error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, 0, ap->a_context); - hfs_unlock_truncate(cp); + hfs_unlock_truncate(cp, 0); +#if HFS_COMPRESSION + decmpfs_unlock_compressed_data(dp, 1); +#endif if (error) goto out; } @@ -509,6 +1224,21 @@ hfs_vnop_setattr(ap) cp = VTOC(vp); } + /* + * If it is just an access time update request by itself + * we know the request is from kernel level code, and we + * can delay it without being as worried about consistency. + * This change speeds up mmaps, in the rare case that they + * get caught behind a sync. + */ + + if (vap->va_active == VNODE_ATTR_va_access_time) { + cp->c_touch_acctime=TRUE; + goto out; + } + + + /* * Owner/group change request. * We are guaranteed that the new owner/group is valid and legal. @@ -525,7 +1255,7 @@ hfs_vnop_setattr(ap) * Mode change request. * We are guaranteed that the mode value is valid and that in * conjunction with the owner and group, this change is legal. - */ + */ VATTR_SET_SUPPORTED(vap, va_mode); if (VATTR_IS_ACTIVE(vap, va_mode) && ((error = hfs_chmod(vp, (int)vap->va_mode, cred, p)) != 0)) @@ -538,31 +1268,35 @@ hfs_vnop_setattr(ap) */ VATTR_SET_SUPPORTED(vap, va_flags); if (VATTR_IS_ACTIVE(vap, va_flags)) { - cp->c_flags = vap->va_flags; - cp->c_touch_chgtime = TRUE; - } + u_int16_t *fdFlags; - /* - * If the file's extended security data is being changed, we - * need to note the change. Note that because we don't store - * the data, we do not set the SUPPORTED bit; this will cause - * the VFS to use a fallback strategy. - */ - if (VATTR_IS_ACTIVE(vap, va_acl)) { - /* Remember if any ACL data was set or cleared. */ - if (vap->va_acl == NULL) { - /* being cleared */ - if (cp->c_attr.ca_recflags & kHFSHasSecurityMask) { - cp->c_attr.ca_recflags &= ~kHFSHasSecurityMask; - cp->c_touch_chgtime = TRUE; - } - } else { - /* being set */ - if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) { - cp->c_attr.ca_recflags |= kHFSHasSecurityMask; - cp->c_touch_chgtime = TRUE; - } +#if HFS_COMPRESSION + if ((cp->c_bsdflags ^ vap->va_flags) & UF_COMPRESSED) { + /* + * the UF_COMPRESSED was toggled, so reset our cached compressed state + * but we don't want to actually do the update until we've released the cnode lock down below + * NOTE: turning the flag off doesn't actually decompress the file, so that we can + * turn off the flag and look at the "raw" file for debugging purposes + */ + decmpfs_reset_state = 1; } +#endif + + cp->c_bsdflags = vap->va_flags; + cp->c_touch_chgtime = TRUE; + + /* + * Mirror the UF_HIDDEN flag to the invisible bit of the Finder Info. + * + * The fdFlags for files and frFlags for folders are both 8 bytes + * into the userInfo (the first 16 bytes of the Finder Info). They + * are both 16-bit fields. + */ + fdFlags = (u_int16_t *) &cp->c_finderinfo[8]; + if (vap->va_flags & UF_HIDDEN) + *fdFlags |= OSSwapHostToBigConstInt16(kFinderInvisibleMask); + else + *fdFlags &= ~OSSwapHostToBigConstInt16(kFinderInvisibleMask); } /* @@ -577,8 +1311,6 @@ hfs_vnop_setattr(ap) VATTR_IS_ACTIVE(vap, va_access_time) || VATTR_IS_ACTIVE(vap, va_modify_time) || VATTR_IS_ACTIVE(vap, va_backup_time)) { - if (vnode_islnk(vp)) - goto done; if (VATTR_IS_ACTIVE(vap, va_create_time)) cp->c_itime = vap->va_create_time.tv_sec; if (VATTR_IS_ACTIVE(vap, va_access_time)) { @@ -616,13 +1348,41 @@ hfs_vnop_setattr(ap) hfs_setencodingbits(hfsmp, cp->c_encoding); } -done: if ((error = hfs_update(vp, TRUE)) != 0) - goto out; - HFS_KNOTE(vp, NOTE_ATTRIB); + goto out; out: - if (cp) + if (cp) { + /* Purge origin cache for cnode, since caller now has correct link ID for it + * We purge it here since it was acquired for us during lookup, and we no longer need it. + */ + if ((cp->c_flag & C_HARDLINK) && (vp->v_type != VDIR)){ + hfs_relorigin(cp, 0); + } + hfs_unlock(cp); +#if HFS_COMPRESSION + if (decmpfs_reset_state) { + /* + * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode + * but don't do it while holding the hfs cnode lock + */ + decmpfs_cnode *dp = VTOCMP(vp); + if (!dp) { + /* + * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode + * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes + * on this file if it's locked + */ + dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp)); + if (!dp) { + /* failed to allocate a decmpfs_cnode */ + return ENOMEM; /* what should this be? */ + } + } + decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0); + } +#endif + } return (error); } @@ -631,24 +1391,17 @@ out: * Change the mode on a file. * cnode must be locked before calling. */ -__private_extern__ int -hfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p) +hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struct proc *p) { register struct cnode *cp = VTOC(vp); - int error; if (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord) return (0); - // XXXdbg - don't allow modification of the journal or journal_info_block - if (VTOHFS(vp)->jnl && cp && cp->c_datafork) { - struct HFSPlusExtentDescriptor *extd; - - extd = &cp->c_datafork->ff_extents[0]; - if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) { - return EPERM; - } + // Don't allow modification of the journal or journal_info_block + if (hfs_is_journal_file(VTOHFS(vp), cp)) { + return EPERM; } #if OVERRIDE_UNKNOWN_PERMISSIONS @@ -663,7 +1416,6 @@ hfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p) } -__private_extern__ int hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags) { @@ -688,7 +1440,7 @@ hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean co } /* If immutable bit set, nobody gets to write it. */ - if (considerFlags && (cp->c_flags & IMMUTABLE)) + if (considerFlags && (cp->c_bsdflags & IMMUTABLE)) return (EPERM); /* Otherwise, user id 0 always gets access. */ @@ -713,17 +1465,20 @@ hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean co * Perform chown operation on cnode cp; * code must be locked prior to call. */ -__private_extern__ int +#if !QUOTA +hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, __unused kauth_cred_t cred, + __unused struct proc *p) +#else hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, - struct proc *p) + __unused struct proc *p) +#endif { register struct cnode *cp = VTOC(vp); uid_t ouid; gid_t ogid; - int error = 0; - int is_member; #if QUOTA + int error = 0; register int i; int64_t change; #endif /* QUOTA */ @@ -825,7 +1580,12 @@ good: panic("hfs_chown: lost quota"); #endif /* QUOTA */ - if (ouid != uid || ogid != gid) + + /* + According to the SUSv3 Standard, chown() shall mark + for update the st_ctime field of the file. + (No exceptions mentioned) + */ cp->c_touch_chgtime = TRUE; return (0); } @@ -838,7 +1598,7 @@ good: * case the file is being tracked through its file ID. Typically * its used after creating a new file during a safe-save. */ -static int +int hfs_vnop_exchange(ap) struct vnop_exchange_args /* { struct vnode *a_fvp; @@ -854,9 +1614,18 @@ hfs_vnop_exchange(ap) struct hfsmount *hfsmp; struct cat_desc tempdesc; struct cat_attr tempattr; + const unsigned char *from_nameptr; + const unsigned char *to_nameptr; + char from_iname[32]; + char to_iname[32]; + uint32_t to_flag_special; + uint32_t from_flag_special; + cnid_t from_parid; + cnid_t to_parid; int lockflags; int error = 0, started_tr = 0, got_cookie = 0; cat_cookie_t cookie; + time_t orig_from_ctime, orig_to_ctime; /* The files must be on the same volume. */ if (vnode_mount(from_vp) != vnode_mount(to_vp)) @@ -865,6 +1634,77 @@ hfs_vnop_exchange(ap) if (from_vp == to_vp) return (EINVAL); + orig_from_ctime = VTOC(from_vp)->c_ctime; + orig_to_ctime = VTOC(to_vp)->c_ctime; + + +#if CONFIG_PROTECT + /* + * Do not allow exchangedata/F_MOVEDATAEXTENTS on data-protected filesystems + * because the EAs will not be swapped. As a result, the persistent keys would not + * match and the files will be garbage. + */ + if (cp_fs_protected (vnode_mount(from_vp))) { + return EINVAL; + } +#endif + +#if HFS_COMPRESSION + if ( hfs_file_is_compressed(VTOC(from_vp), 0) ) { + if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) { + return error; + } + } + + if ( hfs_file_is_compressed(VTOC(to_vp), 0) ) { + if ( 0 != ( error = decmpfs_decompress_file(to_vp, VTOCMP(to_vp), -1, 0, 1) ) ) { + return error; + } + } +#endif // HFS_COMPRESSION + + /* + * Normally, we want to notify the user handlers about the event, + * except if it's a handler driving the event. + */ + if ((ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) == 0) { + check_for_tracked_file(from_vp, orig_from_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); + check_for_tracked_file(to_vp, orig_to_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); + } else { + /* + * We're doing a data-swap. + * Take the truncate lock/cnode lock, then verify there are no mmap references. + * Issue a hfs_filedone to flush out all of the remaining state for this file. + * Allow the rest of the codeflow to re-acquire the cnode locks in order. + */ + + hfs_lock_truncate (VTOC(from_vp), HFS_SHARED_LOCK); + + if ((error = hfs_lock(VTOC(from_vp), HFS_EXCLUSIVE_LOCK))) { + hfs_unlock_truncate (VTOC(from_vp), 0); + return error; + } + + /* Verify the source file is not in use by anyone besides us (including mmap refs) */ + if (vnode_isinuse(from_vp, 1)) { + error = EBUSY; + hfs_unlock(VTOC(from_vp)); + hfs_unlock_truncate (VTOC(from_vp), 0); + return error; + } + + /* Flush out the data in the source file */ + VTOC(from_vp)->c_flag |= C_SWAPINPROGRESS; + error = hfs_filedone (from_vp, ap->a_context); + VTOC(from_vp)->c_flag &= ~C_SWAPINPROGRESS; + hfs_unlock(VTOC(from_vp)); + hfs_unlock_truncate(VTOC(from_vp), 0); + + if (error) { + return error; + } + } + if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK))) return (error); @@ -872,34 +1712,28 @@ hfs_vnop_exchange(ap) to_cp = VTOC(to_vp); hfsmp = VTOHFS(from_vp); - /* Only normal files can be exchanged. */ - if (!vnode_isreg(from_vp) || !vnode_isreg(to_vp) || - (from_cp->c_flag & C_HARDLINK) || (to_cp->c_flag & C_HARDLINK) || - VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) { + /* Resource forks cannot be exchanged. */ + if (VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) { error = EINVAL; goto exit; } - // XXXdbg - don't allow modification of the journal or journal_info_block - if (hfsmp->jnl) { - struct HFSPlusExtentDescriptor *extd; - - if (from_cp->c_datafork) { - extd = &from_cp->c_datafork->ff_extents[0]; - if (extd->startBlock == VTOVCB(from_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) { - error = EPERM; - goto exit; - } - } - - if (to_cp->c_datafork) { - extd = &to_cp->c_datafork->ff_extents[0]; - if (extd->startBlock == VTOVCB(to_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) { - error = EPERM; - goto exit; - } - } + // Don't allow modification of the journal or journal_info_block + if (hfs_is_journal_file(hfsmp, from_cp) || + hfs_is_journal_file(hfsmp, to_cp)) { + error = EPERM; + goto exit; + } + + /* + * Ok, now that all of the pre-flighting is done, call the underlying + * function if needed. + */ + if (ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) { + error = hfs_movedata(from_vp, to_vp); + goto exit; } + if ((error = hfs_start_transaction(hfsmp)) != 0) { goto exit; @@ -909,26 +1743,42 @@ hfs_vnop_exchange(ap) /* * Reserve some space in the Catalog file. */ - bzero(&cookie, sizeof(cookie)); if ((error = cat_preflight(hfsmp, CAT_EXCHANGE, &cookie, vfs_context_proc(ap->a_context)))) { goto exit; } got_cookie = 1; /* The backend code always tries to delete the virtual - * extent id for exchanging files so we neeed to lock + * extent id for exchanging files so we need to lock * the extents b-tree. */ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + /* Account for the location of the catalog objects. */ + if (from_cp->c_flag & C_HARDLINK) { + MAKE_INODE_NAME(from_iname, sizeof(from_iname), + from_cp->c_attr.ca_linkref); + from_nameptr = (unsigned char *)from_iname; + from_parid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + from_cp->c_hint = 0; + } else { + from_nameptr = from_cp->c_desc.cd_nameptr; + from_parid = from_cp->c_parentcnid; + } + if (to_cp->c_flag & C_HARDLINK) { + MAKE_INODE_NAME(to_iname, sizeof(to_iname), + to_cp->c_attr.ca_linkref); + to_nameptr = (unsigned char *)to_iname; + to_parid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + to_cp->c_hint = 0; + } else { + to_nameptr = to_cp->c_desc.cd_nameptr; + to_parid = to_cp->c_parentcnid; + } + /* Do the exchange */ - error = ExchangeFileIDs(hfsmp, - from_cp->c_desc.cd_nameptr, - to_cp->c_desc.cd_nameptr, - from_cp->c_parentcnid, - to_cp->c_parentcnid, - from_cp->c_hint, - to_cp->c_hint); + error = ExchangeFileIDs(hfsmp, from_nameptr, to_nameptr, from_parid, + to_parid, from_cp->c_hint, to_cp->c_hint); hfs_systemfile_unlock(hfsmp, lockflags); /* @@ -950,6 +1800,14 @@ hfs_vnop_exchange(ap) /* Save a copy of from attributes before swapping. */ bcopy(&from_cp->c_desc, &tempdesc, sizeof(struct cat_desc)); bcopy(&from_cp->c_attr, &tempattr, sizeof(struct cat_attr)); + + /* Save whether or not each cnode is a hardlink or has EAs */ + from_flag_special = from_cp->c_flag & (C_HARDLINK | C_HASXATTRS); + to_flag_special = to_cp->c_flag & (C_HARDLINK | C_HASXATTRS); + + /* Drop the special bits from each cnode */ + from_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS); + to_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS); /* * Swap the descriptors and all non-fork related attributes. @@ -958,70 +1816,443 @@ hfs_vnop_exchange(ap) bcopy(&to_cp->c_desc, &from_cp->c_desc, sizeof(struct cat_desc)); from_cp->c_hint = 0; - from_cp->c_fileid = from_cp->c_cnid; + /* + * If 'to' was a hardlink, then we copied over its link ID/CNID/(namespace ID) + * when we bcopy'd the descriptor above. However, we need to be careful + * when setting up the fileID below, because we cannot assume that the + * file ID is the same as the CNID if either one was a hardlink. + * The file ID is stored in the c_attr as the ca_fileid. So it needs + * to be pulled explicitly; we cannot just use the CNID. + */ + from_cp->c_fileid = to_cp->c_attr.ca_fileid; + from_cp->c_itime = to_cp->c_itime; from_cp->c_btime = to_cp->c_btime; from_cp->c_atime = to_cp->c_atime; from_cp->c_ctime = to_cp->c_ctime; from_cp->c_gid = to_cp->c_gid; from_cp->c_uid = to_cp->c_uid; - from_cp->c_flags = to_cp->c_flags; + from_cp->c_bsdflags = to_cp->c_bsdflags; from_cp->c_mode = to_cp->c_mode; + from_cp->c_linkcount = to_cp->c_linkcount; + from_cp->c_attr.ca_linkref = to_cp->c_attr.ca_linkref; + from_cp->c_attr.ca_firstlink = to_cp->c_attr.ca_firstlink; + + /* + * The cnode flags need to stay with the cnode and not get transferred + * over along with everything else because they describe the content; they are + * not attributes that reflect changes specific to the file ID. In general, + * fields that are tied to the file ID are the ones that will move. + * + * This reflects the fact that the file may have borrowed blocks, dirty metadata, + * or other extents, which may not yet have been written to the catalog. If + * they were, they would have been transferred above in the ExchangeFileIDs call above... + * + * The flags that are special are: + * C_HARDLINK, C_HASXATTRS + * + * These flags move with the item and file ID in the namespace since their + * state is tied to that of the file ID. + * + * So to transfer the flags, we have to take the following steps + * 1) Store in a localvar whether or not the special bits are set. + * 2) Drop the special bits from the current flags + * 3) swap the special flag bits to their destination + */ + from_cp->c_flag |= to_flag_special; + from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags; bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32); bcopy(&tempdesc, &to_cp->c_desc, sizeof(struct cat_desc)); to_cp->c_hint = 0; - to_cp->c_fileid = to_cp->c_cnid; + /* + * Pull the file ID from the tempattr we copied above. We can't assume + * it is the same as the CNID. + */ + to_cp->c_fileid = tempattr.ca_fileid; to_cp->c_itime = tempattr.ca_itime; to_cp->c_btime = tempattr.ca_btime; to_cp->c_atime = tempattr.ca_atime; to_cp->c_ctime = tempattr.ca_ctime; to_cp->c_gid = tempattr.ca_gid; to_cp->c_uid = tempattr.ca_uid; - to_cp->c_flags = tempattr.ca_flags; + to_cp->c_bsdflags = tempattr.ca_flags; to_cp->c_mode = tempattr.ca_mode; + to_cp->c_linkcount = tempattr.ca_linkcount; + to_cp->c_attr.ca_linkref = tempattr.ca_linkref; + to_cp->c_attr.ca_firstlink = tempattr.ca_firstlink; + + /* + * Only OR in the "from" flags into our cnode flags below. + * Leave the rest of the flags alone. + */ + to_cp->c_flag |= from_flag_special; + to_cp->c_attr.ca_recflags = tempattr.ca_recflags; bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32); /* Rehash the cnodes using their new file IDs */ - hfs_chash_rehash(from_cp, to_cp); + hfs_chash_rehash(hfsmp, from_cp, to_cp); /* * When a file moves out of "Cleanup At Startup" * we can drop its NODUMP status. */ - if ((from_cp->c_flags & UF_NODUMP) && + if ((from_cp->c_bsdflags & UF_NODUMP) && (from_cp->c_parentcnid != to_cp->c_parentcnid)) { - from_cp->c_flags &= ~UF_NODUMP; + from_cp->c_bsdflags &= ~UF_NODUMP; from_cp->c_touch_chgtime = TRUE; } - if ((to_cp->c_flags & UF_NODUMP) && + if ((to_cp->c_bsdflags & UF_NODUMP) && (to_cp->c_parentcnid != from_cp->c_parentcnid)) { - to_cp->c_flags &= ~UF_NODUMP; + to_cp->c_bsdflags &= ~UF_NODUMP; to_cp->c_touch_chgtime = TRUE; } - HFS_KNOTE(from_vp, NOTE_ATTRIB); - HFS_KNOTE(to_vp, NOTE_ATTRIB); - exit: if (got_cookie) { cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context)); } - if (started_tr) { - hfs_end_transaction(hfsmp); + if (started_tr) { + hfs_end_transaction(hfsmp); + } + + hfs_unlockpair(from_cp, to_cp); + return (error); +} + +int +hfs_vnop_mmap(struct vnop_mmap_args *ap) +{ + struct vnode *vp = ap->a_vp; + int error; + + if (VNODE_IS_RSRC(vp)) { + /* allow pageins of the resource fork */ + } else { + int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ + time_t orig_ctime = VTOC(vp)->c_ctime; + + if (!compressed && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) { + error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP); + if (error != 0) { + return error; + } + } + + if (ap->a_fflags & PROT_WRITE) { + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); + } + } + + // + // NOTE: we return ENOTSUP because we want the cluster layer + // to actually do all the real work. + // + return (ENOTSUP); +} + +/* + * hfs_movedata + * + * This is a non-symmetric variant of exchangedata. In this function, + * the contents of the fork in from_vp are moved to the fork + * specified by to_vp. + * + * The cnodes pointed to by 'from_vp' and 'to_vp' must be locked. + * + * The vnode pointed to by 'to_vp' *must* be empty prior to invoking this function. + * We impose this restriction because we may not be able to fully delete the entire + * file's contents in a single transaction, particularly if it has a lot of extents. + * In the normal file deletion codepath, the file is screened for two conditions: + * 1) bigger than 400MB, and 2) more than 8 extents. If so, the file is relocated to + * the hidden directory and the deletion is broken up into multiple truncates. We can't + * do that here because both files need to exist in the namespace. The main reason this + * is imposed is that we may have to touch a whole lot of bitmap blocks if there are + * many extents. + * + * Any data written to 'from_vp' after this call completes is not guaranteed + * to be moved. + * + * Arguments: + * vnode from_vp: source file + * vnode to_vp: destination file; must be empty + * + * Returns: + * EFBIG - Destination file was not empty + * 0 - success + * + * + */ +int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) { + + struct cnode *from_cp; + struct cnode *to_cp; + struct hfsmount *hfsmp = NULL; + int error = 0; + int started_tr = 0; + int lockflags = 0; + int overflow_blocks; + int rsrc = 0; + + + /* Get the HFS pointers */ + from_cp = VTOC(from_vp); + to_cp = VTOC(to_vp); + hfsmp = VTOHFS(from_vp); + + /* Verify that neither source/dest file is open-unlinked */ + if (from_cp->c_flag & (C_DELETED | C_NOEXISTS)) { + error = EBUSY; + goto movedata_exit; + } + + if (to_cp->c_flag & (C_DELETED | C_NOEXISTS)) { + error = EBUSY; + goto movedata_exit; + } + + /* + * Verify the source file is not in use by anyone besides us. + * + * This function is typically invoked by a namespace handler + * process responding to a temporarily stalled system call. + * The FD that it is working off of is opened O_EVTONLY, so + * it really has no active usecounts (the kusecount from O_EVTONLY + * is subtracted from the total usecounts). + * + * As a result, we shouldn't have any active usecounts against + * this vnode when we go to check it below. + */ + if (vnode_isinuse(from_vp, 0)) { + error = EBUSY; + goto movedata_exit; + } + + if (from_cp->c_rsrc_vp == from_vp) { + rsrc = 1; + } + + /* + * We assume that the destination file is already empty. + * Verify that it is. + */ + if (rsrc) { + if (to_cp->c_rsrcfork->ff_size > 0) { + error = EFBIG; + goto movedata_exit; + } + } + else { + if (to_cp->c_datafork->ff_size > 0) { + error = EFBIG; + goto movedata_exit; + } + } + + /* If the source has the rsrc open, make sure the destination is also the rsrc */ + if (rsrc) { + if (to_vp != to_cp->c_rsrc_vp) { + error = EINVAL; + goto movedata_exit; + } + } + else { + /* Verify that both forks are data forks */ + if (to_vp != to_cp->c_vp) { + error = EINVAL; + goto movedata_exit; + } + } + + /* + * See if the source file has overflow extents. If it doesn't, we don't + * need to call into MoveData, and the catalog will be enough. + */ + if (rsrc) { + overflow_blocks = overflow_extents(from_cp->c_rsrcfork); + } + else { + overflow_blocks = overflow_extents(from_cp->c_datafork); + } + + if ((error = hfs_start_transaction (hfsmp)) != 0) { + goto movedata_exit; + } + started_tr = 1; + + /* Lock the system files: catalog, extents, attributes */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + /* Copy over any catalog allocation data into the new spot. */ + if (rsrc) { + if ((error = hfs_move_fork (from_cp->c_rsrcfork, from_cp, to_cp->c_rsrcfork, to_cp))){ + hfs_systemfile_unlock(hfsmp, lockflags); + goto movedata_exit; + } + } + else { + if ((error = hfs_move_fork (from_cp->c_datafork, from_cp, to_cp->c_datafork, to_cp))) { + hfs_systemfile_unlock(hfsmp, lockflags); + goto movedata_exit; + } + } + + /* + * Note that because all we're doing is moving the extents around, we can + * probably do this in a single transaction: Each extent record (group of 8) + * is 64 bytes. A extent overflow B-Tree node is typically 4k. This means + * each node can hold roughly ~60 extent records == (480 extents). + * + * If a file was massively fragmented and had 20k extents, this means we'd + * roughly touch 20k/480 == 41 to 42 nodes, plus the index nodes, for half + * of the operation. (inserting or deleting). So if we're manipulating 80-100 + * nodes, this is basically 320k of data to write to the journal in + * a bad case. + */ + if (overflow_blocks != 0) { + if (rsrc) { + error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 1); + } + else { + error = MoveData (hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0); + } + } + + if (error) { + /* Reverse the operation. Copy the fork data back into the source */ + if (rsrc) { + hfs_move_fork (to_cp->c_rsrcfork, to_cp, from_cp->c_rsrcfork, from_cp); + } + else { + hfs_move_fork (to_cp->c_datafork, to_cp, from_cp->c_datafork, from_cp); + } + } + else { + struct cat_fork *src_data = NULL; + struct cat_fork *src_rsrc = NULL; + struct cat_fork *dst_data = NULL; + struct cat_fork *dst_rsrc = NULL; + + /* Touch the times*/ + to_cp->c_touch_acctime = TRUE; + to_cp->c_touch_chgtime = TRUE; + to_cp->c_touch_modtime = TRUE; + + from_cp->c_touch_acctime = TRUE; + from_cp->c_touch_chgtime = TRUE; + from_cp->c_touch_modtime = TRUE; + + hfs_touchtimes(hfsmp, to_cp); + hfs_touchtimes(hfsmp, from_cp); + + if (from_cp->c_datafork) { + src_data = &from_cp->c_datafork->ff_data; + } + if (from_cp->c_rsrcfork) { + src_rsrc = &from_cp->c_rsrcfork->ff_data; + } + + if (to_cp->c_datafork) { + dst_data = &to_cp->c_datafork->ff_data; + } + if (to_cp->c_rsrcfork) { + dst_rsrc = &to_cp->c_rsrcfork->ff_data; + } + + /* Update the catalog nodes */ + (void) cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, + src_data, src_rsrc); + + (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, + dst_data, dst_rsrc); + + } + /* unlock the system files */ + hfs_systemfile_unlock(hfsmp, lockflags); + + +movedata_exit: + if (started_tr) { + hfs_end_transaction(hfsmp); + } + + return error; + +} + +/* + * Copy all of the catalog and runtime data in srcfork to dstfork. + * + * This allows us to maintain the invalid ranges across the movedata operation so + * we don't need to force all of the pending IO right now. In addition, we move all + * non overflow-extent extents into the destination here. + */ +static int hfs_move_fork (struct filefork *srcfork, struct cnode *src_cp, + struct filefork *dstfork, struct cnode *dst_cp) { + struct rl_entry *invalid_range; + int size = sizeof(struct HFSPlusExtentDescriptor); + size = size * kHFSPlusExtentDensity; + + /* If the dstfork has any invalid ranges, bail out */ + invalid_range = TAILQ_FIRST(&dstfork->ff_invalidranges); + if (invalid_range != NULL) { + return EFBIG; + } + + if (dstfork->ff_data.cf_size != 0 || dstfork->ff_data.cf_new_size != 0) { + return EFBIG; + } + + /* First copy the invalid ranges */ + while ((invalid_range = TAILQ_FIRST(&srcfork->ff_invalidranges))) { + off_t start = invalid_range->rl_start; + off_t end = invalid_range->rl_end; + + /* Remove it from the srcfork and add it to dstfork */ + rl_remove(start, end, &srcfork->ff_invalidranges); + rl_add(start, end, &dstfork->ff_invalidranges); } - - hfs_unlockpair(from_cp, to_cp); - return (error); + + /* + * Ignore the ff_union. We don't move symlinks or system files. + * Now copy the in-catalog extent information + */ + dstfork->ff_data.cf_size = srcfork->ff_data.cf_size; + dstfork->ff_data.cf_new_size = srcfork->ff_data.cf_new_size; + dstfork->ff_data.cf_vblocks = srcfork->ff_data.cf_vblocks; + dstfork->ff_data.cf_blocks = srcfork->ff_data.cf_blocks; + + /* just memcpy the whole array of extents to the new location. */ + memcpy (dstfork->ff_data.cf_extents, srcfork->ff_data.cf_extents, size); + + /* + * Copy the cnode attribute data. + * + */ + src_cp->c_blocks -= srcfork->ff_data.cf_vblocks; + src_cp->c_blocks -= srcfork->ff_data.cf_blocks; + + dst_cp->c_blocks += srcfork->ff_data.cf_vblocks; + dst_cp->c_blocks += srcfork->ff_data.cf_blocks; + + /* Now delete the entries in the source fork */ + srcfork->ff_data.cf_size = 0; + srcfork->ff_data.cf_new_size = 0; + srcfork->ff_data.cf_union.cfu_bytesread = 0; + srcfork->ff_data.cf_vblocks = 0; + srcfork->ff_data.cf_blocks = 0; + + /* Zero out the old extents */ + bzero (srcfork->ff_data.cf_extents, size); + return 0; } - + /* * cnode must be locked */ -__private_extern__ int hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) { @@ -1029,16 +2260,28 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) struct filefork *fp = NULL; int retval = 0; struct hfsmount *hfsmp = VTOHFS(vp); + struct rl_entry *invalid_range; struct timeval tv; - int wait; + int waitdata; /* attributes necessary for data retrieval */ + int wait; /* all other attributes (e.g. atime, etc.) */ int lockflag; int took_trunc_lock = 0; + int locked_buffers = 0; + /* + * Applications which only care about data integrity rather than full + * file integrity may opt out of (delay) expensive metadata update + * operations as a performance optimization. + */ wait = (waitfor == MNT_WAIT); - + waitdata = (waitfor == MNT_DWAIT) | wait; + if (always_do_fullfsync) + fullsync = 1; + /* HFS directories don't have any data blocks. */ if (vnode_isdir(vp)) goto metasync; + fp = VTOF(vp); /* * For system files flush the B-tree header and @@ -1053,11 +2296,16 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) } } else if (UBCINFOEXISTS(vp)) { hfs_unlock(cp); - hfs_lock_truncate(cp, TRUE); + hfs_lock_truncate(cp, HFS_SHARED_LOCK); took_trunc_lock = 1; + if (fp->ff_unallocblocks != 0) { + hfs_unlock_truncate(cp, 0); + + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + } /* Don't hold cnode lock when calling into cluster layer. */ - (void) cluster_push(vp, 0); + (void) cluster_push(vp, waitdata ? IO_SYNC : 0); hfs_lock(cp, HFS_FORCE_LOCK); } @@ -1068,53 +2316,58 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) * * Files with NODUMP can bypass zero filling here. */ - if ((wait || (cp->c_flag & C_ZFWANTSYNC)) && - ((cp->c_flags & UF_NODUMP) == 0) && - UBCINFOEXISTS(vp) && (fp = VTOF(vp)) && - cp->c_zftimeout != 0) { + if (fp && (((cp->c_flag & C_ALWAYS_ZEROFILL) && !TAILQ_EMPTY(&fp->ff_invalidranges)) || + ((wait || (cp->c_flag & C_ZFWANTSYNC)) && + ((cp->c_bsdflags & UF_NODUMP) == 0) && + UBCINFOEXISTS(vp) && (vnode_issystem(vp) ==0) && + cp->c_zftimeout != 0))) { + microuptime(&tv); - if (tv.tv_sec < cp->c_zftimeout) { + if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && !fullsync && tv.tv_sec < (long)cp->c_zftimeout) { /* Remember that a force sync was requested. */ cp->c_flag |= C_ZFWANTSYNC; goto datasync; } - if (!took_trunc_lock) { - hfs_unlock(cp); - hfs_lock_truncate(cp, TRUE); - hfs_lock(cp, HFS_FORCE_LOCK); - took_trunc_lock = 1; - } - - while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) { - struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges); - off_t start = invalid_range->rl_start; - off_t end = invalid_range->rl_end; + if (!TAILQ_EMPTY(&fp->ff_invalidranges)) { + if (!took_trunc_lock || (cp->c_truncatelockowner == HFS_SHARED_OWNER)) { + hfs_unlock(cp); + if (took_trunc_lock) { + hfs_unlock_truncate(cp, 0); + } + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock(cp, HFS_FORCE_LOCK); + took_trunc_lock = 1; + } + while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { + off_t start = invalid_range->rl_start; + off_t end = invalid_range->rl_end; - /* The range about to be written must be validated - * first, so that VNOP_BLOCKMAP() will return the - * appropriate mapping for the cluster code: - */ - rl_remove(start, end, &fp->ff_invalidranges); + /* The range about to be written must be validated + * first, so that VNOP_BLOCKMAP() will return the + * appropriate mapping for the cluster code: + */ + rl_remove(start, end, &fp->ff_invalidranges); - /* Don't hold cnode lock when calling into cluster layer. */ + /* Don't hold cnode lock when calling into cluster layer. */ + hfs_unlock(cp); + (void) cluster_write(vp, (struct uio *) 0, + fp->ff_size, end + 1, start, (off_t)0, + IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); + hfs_lock(cp, HFS_FORCE_LOCK); + cp->c_flag |= C_MODIFIED; + } hfs_unlock(cp); - (void) cluster_write(vp, (struct uio *) 0, - fp->ff_size, end + 1, start, (off_t)0, - IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); + (void) cluster_push(vp, waitdata ? IO_SYNC : 0); hfs_lock(cp, HFS_FORCE_LOCK); - cp->c_flag |= C_MODIFIED; } - hfs_unlock(cp); - (void) cluster_push(vp, 0); - hfs_lock(cp, HFS_FORCE_LOCK); - cp->c_flag &= ~C_ZFWANTSYNC; cp->c_zftimeout = 0; } datasync: - if (took_trunc_lock) - hfs_unlock_truncate(cp); - + if (took_trunc_lock) { + hfs_unlock_truncate(cp, 0); + took_trunc_lock = 0; + } /* * if we have a journal and if journal_active() returns != 0 then the * we shouldn't do anything to a locked block (because it is part @@ -1132,8 +2385,32 @@ datasync: /* * Flush all dirty buffers associated with a vnode. + * Record how many of them were dirty AND locked (if necessary). */ - buf_flushdirtyblks(vp, wait, lockflag, "hfs_fsync"); + locked_buffers = buf_flushdirtyblks_skipinfo(vp, waitdata, lockflag, "hfs_fsync"); + if ((lockflag & BUF_SKIP_LOCKED) && (locked_buffers) && (vnode_vtype(vp) == VLNK)) { + /* + * If there are dirty symlink buffers, then we may need to take action + * to prevent issues later on if we are journaled. If we're fsyncing a + * symlink vnode then we are in one of three cases: + * + * 1) automatic sync has fired. In this case, we don't want the behavior to change. + * + * 2) Someone has opened the FD for the symlink (not what it points to) + * and has issued an fsync against it. This should be rare, and we don't + * want the behavior to change. + * + * 3) We are being called by a vclean which is trying to reclaim this + * symlink vnode. If this is the case, then allowing this fsync to + * proceed WITHOUT flushing the journal could result in the vclean + * invalidating the buffer's blocks before the journal transaction is + * written to disk. To prevent this, we force a journal flush + * if the vnode is in the middle of a recycle (VL_TERMINATE or VL_DEAD is set). + */ + if (vnode_isrecycled(vp)) { + fullsync = 1; + } + } metasync: if (vnode_isreg(vp) && vnode_issystem(vp)) { @@ -1147,23 +2424,39 @@ metasync: } else if ( !(vp->v_flag & VSWAP) ) /* User file */ { retval = hfs_update(vp, wait); - /* When MNT_WAIT is requested push out any delayed meta data */ - if ((retval == 0) && wait && cp->c_hint && + /* + * When MNT_WAIT is requested push out the catalog record for + * this file. If they asked for a full fsync, we can skip this + * because the journal_flush or hfs_metasync_all will push out + * all of the metadata changes. + */ + if ((retval == 0) && wait && !fullsync && cp->c_hint && !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) { hfs_metasync(VTOHFS(vp), (daddr64_t)cp->c_hint, p); - } + } - // make sure that we've really been called from the user - // fsync() and if so push out any pending transactions - // that this file might is a part of (and get them on - // stable storage). - if (fullsync || always_do_fullfsync) { - if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); - } else { - /* XXX need to pass context! */ - VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); - } + /* + * If this was a full fsync, make sure all metadata + * changes get to stable storage. + */ + if (fullsync) { + if (hfsmp->jnl) { + hfs_journal_flush(hfsmp, FALSE); + + if (journal_uses_fua(hfsmp->jnl)) { + /* + * the journal_flush did NOT issue a sync track cache command, + * and the fullsync indicates we are supposed to flush all cached + * data to the media, so issue the sync track cache command + * explicitly + */ + VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); + } + } else { + retval = hfs_metasync_all(hfsmp); + /* XXX need to pass context! */ + VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); + } } } @@ -1172,8 +2465,8 @@ metasync: /* Sync an hfs catalog b-tree node */ -static int -hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, struct proc *p) +int +hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p) { vnode_t vp; buf_t bp; @@ -1212,9 +2505,46 @@ hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, struct proc *p) } +/* + * Sync all hfs B-trees. Use this instead of journal_flush for a volume + * without a journal. Note that the volume bitmap does not get written; + * we rely on fsck_hfs to fix that up (which it can do without any loss + * of data). + */ +int +hfs_metasync_all(struct hfsmount *hfsmp) +{ + int lockflags; + + /* Lock all of the B-trees so we get a mutually consistent state */ + lockflags = hfs_systemfile_lock(hfsmp, + SFL_CATALOG|SFL_EXTENTS|SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + /* Sync each of the B-trees */ + if (hfsmp->hfs_catalog_vp) + hfs_btsync(hfsmp->hfs_catalog_vp, 0); + if (hfsmp->hfs_extents_vp) + hfs_btsync(hfsmp->hfs_extents_vp, 0); + if (hfsmp->hfs_attribute_vp) + hfs_btsync(hfsmp->hfs_attribute_vp, 0); + + /* Wait for all of the writes to complete */ + if (hfsmp->hfs_catalog_vp) + vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_metasync_all"); + if (hfsmp->hfs_extents_vp) + vnode_waitforwrites(hfsmp->hfs_extents_vp, 0, 0, 0, "hfs_metasync_all"); + if (hfsmp->hfs_attribute_vp) + vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs_metasync_all"); + + hfs_systemfile_unlock(hfsmp, lockflags); + + return 0; +} + + /*ARGSUSED 1*/ static int -hfs_btsync_callback(struct buf *bp, void *dummy) +hfs_btsync_callback(struct buf *bp, __unused void *dummy) { buf_clearflags(bp, B_LOCKED); (void) buf_bawrite(bp); @@ -1223,7 +2553,6 @@ hfs_btsync_callback(struct buf *bp, void *dummy) } -__private_extern__ int hfs_btsync(struct vnode *vp, int sync_transaction) { @@ -1251,7 +2580,7 @@ hfs_btsync(struct vnode *vp, int sync_transaction) /* * Remove a directory. */ -static int +int hfs_vnop_rmdir(ap) struct vnop_rmdir_args /* { struct vnode *a_dvp; @@ -1262,20 +2591,35 @@ hfs_vnop_rmdir(ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; + struct cnode *dcp = VTOC(dvp); + struct cnode *cp = VTOC(vp); int error; + time_t orig_ctime; - if (!vnode_isdir(vp)) { + orig_ctime = VTOC(vp)->c_ctime; + + if (!S_ISDIR(cp->c_mode)) { return (ENOTDIR); } if (dvp == vp) { return (EINVAL); } - if ((error = hfs_lockpair(VTOC(dvp), VTOC(vp), HFS_EXCLUSIVE_LOCK))) + + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL); + cp = VTOC(vp); + + if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) { return (error); + } - error = hfs_removedir(dvp, vp, ap->a_cnp, 0); + /* Check for a race with rmdir on the parent directory */ + if (dcp->c_flag & (C_DELETED | C_NOEXISTS)) { + hfs_unlockpair (dcp, cp); + return ENOENT; + } + error = hfs_removedir(dvp, vp, ap->a_cnp, 0, 0); - hfs_unlockpair(VTOC(dvp), VTOC(vp)); + hfs_unlockpair(dcp, cp); return (error); } @@ -1285,46 +2629,114 @@ hfs_vnop_rmdir(ap) * * Both dvp and vp cnodes are locked */ -static int +int hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, - int skip_reserve) + int skip_reserve, int only_unlink) { - vfs_context_t ctx = cnp->cn_context; - struct proc *p = vfs_context_proc(ctx); struct cnode *cp; struct cnode *dcp; struct hfsmount * hfsmp; struct cat_desc desc; - cat_cookie_t cookie; int lockflags; - int error = 0, started_tr = 0, got_cookie = 0; + int error = 0, started_tr = 0; cp = VTOC(vp); dcp = VTOC(dvp); hfsmp = VTOHFS(vp); - if (dcp == cp) + if (dcp == cp) { return (EINVAL); /* cannot remove "." */ + } + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + return (0); + } + if (cp->c_entries != 0) { + return (ENOTEMPTY); + } + + /* + * If the directory is open or in use (e.g. opendir() or current working + * directory for some process); wait for inactive/reclaim to actually + * remove cnode from the catalog. Both inactive and reclaim codepaths are capable + * of removing open-unlinked directories from the catalog, as well as getting rid + * of EAs still on the element. So change only_unlink to true, so that it will get + * cleaned up below. + * + * Otherwise, we can get into a weird old mess where the directory has C_DELETED, + * but it really means C_NOEXISTS because the item was actually removed from the + * catalog. Then when we try to remove the entry from the catalog later on, it won't + * really be there anymore. + */ + if (vnode_isinuse(vp, 0)) { + only_unlink = 1; + } + + /* Deal with directory hardlinks */ + if (cp->c_flag & C_HARDLINK) { + /* + * Note that if we have a directory which was a hardlink at any point, + * its actual directory data is stored in the directory inode in the hidden + * directory rather than the leaf element(s) present in the namespace. + * + * If there are still other hardlinks to this directory, + * then we'll just eliminate this particular link and the vnode will still exist. + * If this is the last link to an empty directory, then we'll open-unlink the + * directory and it will be only tagged with C_DELETED (as opposed to C_NOEXISTS). + * + * We could also return EBUSY here. + */ + + return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve); + } + + /* + * In a few cases, we may want to allow the directory to persist in an + * open-unlinked state. If the directory is being open-unlinked (still has usecount + * references), or if it has EAs, or if it was being deleted as part of a rename, + * then we go ahead and move it to the hidden directory. + * + * If the directory is being open-unlinked, then we want to keep the catalog entry + * alive so that future EA calls and fchmod/fstat etc. do not cause issues later. + * + * If the directory had EAs, then we want to use the open-unlink trick so that the + * EA removal is not done in one giant transaction. Otherwise, it could cause a panic + * due to overflowing the journal. + * + * Finally, if it was deleted as part of a rename, we move it to the hidden directory + * in order to maintain rename atomicity. + * + * Note that the allow_dirs argument to hfs_removefile specifies that it is + * supposed to handle directories for this case. + */ + + if (((hfsmp->hfs_attribute_vp != NULL) && + ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0)) || + (only_unlink != 0)) { + + int ret = hfs_removefile(dvp, vp, cnp, 0, 0, 1, NULL, only_unlink); + /* + * Even though hfs_vnop_rename calls vnode_recycle for us on tvp we call + * it here just in case we were invoked by rmdir() on a directory that had + * EAs. To ensure that we start reclaiming the space as soon as possible, + * we call vnode_recycle on the directory. + */ + vnode_recycle(vp); + + return ret; + + } + + dcp->c_flag |= C_DIR_MODIFICATION; #if QUOTA - (void)hfs_getinoquota(cp); + if (hfsmp->hfs_flags & HFS_QUOTAS) + (void)hfs_getinoquota(cp); #endif if ((error = hfs_start_transaction(hfsmp)) != 0) { goto out; } started_tr = 1; - if (!skip_reserve) { - /* - * Reserve some space in the Catalog file. - */ - bzero(&cookie, sizeof(cookie)); - if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { - goto out; - } - got_cookie = 1; - } - /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since @@ -1332,18 +2744,11 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, * the current directory and thus be * non-empty.) */ - if (cp->c_entries != 0) { - error = ENOTEMPTY; - goto out; - } - if ((dcp->c_flags & APPEND) || (cp->c_flags & (IMMUTABLE | APPEND))) { + if ((dcp->c_bsdflags & APPEND) || (cp->c_bsdflags & (IMMUTABLE | APPEND))) { error = EPERM; goto out; } - if (cp->c_entries > 0) - panic("hfs_rmdir: attempting to delete a non-empty directory!"); - /* Remove the entry from the namei cache: */ cache_purge(vp); @@ -1352,51 +2757,64 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, * name passed in and parent id from dvp (instead of using * the cp->c_desc which may have changed). */ - bzero(&desc, sizeof(desc)); - desc.cd_nameptr = cnp->cn_nameptr; + desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr; desc.cd_namelen = cnp->cn_namelen; - desc.cd_parentcnid = dcp->c_cnid; + desc.cd_parentcnid = dcp->c_fileid; desc.cd_cnid = cp->c_cnid; + desc.cd_flags = CD_ISDIR; + desc.cd_encoding = cp->c_encoding; + desc.cd_hint = 0; + + if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid, NULL, &error)) { + error = 0; + goto out; + } /* Remove entry from catalog */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + if (!skip_reserve) { + /* + * Reserve some space in the Catalog file. + */ + if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) { + hfs_systemfile_unlock(hfsmp, lockflags); + goto out; + } + } + error = cat_delete(hfsmp, &desc, &cp->c_attr); if (error == 0) { - /* Delete any attributes, ignore errors */ - (void) hfs_removeallattr(hfsmp, cp->c_fileid); + /* The parent lost a child */ + if (dcp->c_entries > 0) + dcp->c_entries--; + DEC_FOLDERCOUNT(hfsmp, dcp->c_attr); + dcp->c_dirchangecnt++; + dcp->c_touch_chgtime = TRUE; + dcp->c_touch_modtime = TRUE; + hfs_touchtimes(hfsmp, cp); + (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE); } + hfs_systemfile_unlock(hfsmp, lockflags); if (error) goto out; #if QUOTA - (void)hfs_chkiq(cp, -1, NOCRED, 0); + if (hfsmp->hfs_flags & HFS_QUOTAS) + (void)hfs_chkiq(cp, -1, NOCRED, 0); #endif /* QUOTA */ - /* The parent lost a child */ - if (dcp->c_entries > 0) - dcp->c_entries--; - if (dcp->c_nlink > 0) - dcp->c_nlink--; - dcp->c_touch_chgtime = TRUE; - dcp->c_touch_modtime = TRUE; - - dcp->c_flag |= C_FORCEUPDATE; // XXXdbg - don't screw around, force this guy out - - (void) hfs_update(dvp, 0); - HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); - hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID)); - cp->c_mode = 0; /* Makes the vnode go away...see inactive */ + /* Mark C_NOEXISTS since the catalog entry is now gone */ cp->c_flag |= C_NOEXISTS; out: - HFS_KNOTE(vp, NOTE_DELETE); + dcp->c_flag &= ~C_DIR_MODIFICATION; + wakeup((caddr_t)&dcp->c_flag); - if (got_cookie) { - cat_postflight(hfsmp, &cookie, p); - } if (started_tr) { hfs_end_transaction(hfsmp); } @@ -1408,7 +2826,7 @@ out: /* * Remove a file or link. */ -static int +int hfs_vnop_remove(ap) struct vnop_remove_args /* { struct vnode *a_dvp; @@ -1420,31 +2838,143 @@ hfs_vnop_remove(ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; - int error; + struct cnode *dcp = VTOC(dvp); + struct cnode *cp; + struct vnode *rvp = NULL; + int error=0, recycle_rsrc=0; + time_t orig_ctime; + uint32_t rsrc_vid = 0; if (dvp == vp) { return (EINVAL); } - hfs_lock_truncate(VTOC(vp), TRUE); + orig_ctime = VTOC(vp)->c_ctime; + if ( (!vnode_isnamedstream(vp)) && ((ap->a_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) == 0)) { + error = check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL); + if (error) { + // XXXdbg - decide on a policy for handling namespace handler failures! + // for now we just let them proceed. + } + } + error = 0; + + cp = VTOC(vp); - if ((error = hfs_lockpair(VTOC(dvp), VTOC(vp), HFS_EXCLUSIVE_LOCK))) - goto out; +relock: - error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + + if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) { + hfs_unlock_truncate(cp, 0); + if (rvp) { + vnode_put (rvp); + } + return (error); + } + + /* + * Lazily respond to determining if there is a valid resource fork + * vnode attached to 'cp' if it is a regular file or symlink. + * If the vnode does not exist, then we may proceed without having to + * create it. + * + * If, however, it does exist, then we need to acquire an iocount on the + * vnode after acquiring its vid. This ensures that if we have to do I/O + * against it, it can't get recycled from underneath us in the middle + * of this call. + * + * Note: this function may be invoked for directory hardlinks, so just skip these + * steps if 'vp' is a directory. + */ + + + if ((vp->v_type == VLNK) || (vp->v_type == VREG)) { + if ((cp->c_rsrc_vp) && (rvp == NULL)) { + /* We need to acquire the rsrc vnode */ + rvp = cp->c_rsrc_vp; + rsrc_vid = vnode_vid (rvp); + + /* Unlock everything to acquire iocount on the rsrc vnode */ + hfs_unlock_truncate (cp, 0); + hfs_unlockpair (dcp, cp); + + /* Use the vid to maintain identity on rvp */ + if (vnode_getwithvid(rvp, rsrc_vid)) { + /* + * If this fails, then it was recycled or + * reclaimed in the interim. Reset fields and + * start over. + */ + rvp = NULL; + rsrc_vid = 0; + } + goto relock; + } + } + + /* + * Check to see if we raced rmdir for the parent directory + * hfs_removefile already checks for a race on vp/cp + */ + if (dcp->c_flag & (C_DELETED | C_NOEXISTS)) { + error = ENOENT; + goto rm_done; + } + + error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, NULL, 0); + + /* + * If the remove succeeded in deleting the file, then we may need to mark + * the resource fork for recycle so that it is reclaimed as quickly + * as possible. If it were not recycled quickly, then this resource fork + * vnode could keep a v_parent reference on the data fork, which prevents it + * from going through reclaim (by giving it extra usecounts), except in the force- + * unmount case. + * + * However, a caveat: we need to continue to supply resource fork + * access to open-unlinked files even if the resource fork is not open. This is + * a requirement for the compressed files work. Luckily, hfs_vgetrsrc will handle + * this already if the data fork has been re-parented to the hidden directory. + * + * As a result, all we really need to do here is mark the resource fork vnode + * for recycle. If it goes out of core, it can be brought in again if needed. + * If the cnode was instead marked C_NOEXISTS, then there wouldn't be any + * more work. + */ + if ((error == 0) && (rvp)) { + recycle_rsrc = 1; + } + + /* + * Drop the truncate lock before unlocking the cnode + * (which can potentially perform a vnode_put and + * recycle the vnode which in turn might require the + * truncate lock) + */ +rm_done: + hfs_unlock_truncate(cp, 0); + hfs_unlockpair(dcp, cp); + + if (recycle_rsrc) { + /* inactive or reclaim on rvp will clean up the blocks from the rsrc fork */ + vnode_recycle(rvp); + } + + if (rvp) { + /* drop iocount on rsrc fork, was obtained at beginning of fxn */ + vnode_put(rvp); + } - hfs_unlockpair(VTOC(dvp), VTOC(vp)); -out: - hfs_unlock_truncate(VTOC(vp)); return (error); } -static int +int hfs_removefile_callback(struct buf *bp, void *hfsmp) { if ( !(buf_flags(bp) & B_META)) - panic("hfs: symlink bp @ 0x%x is not marked meta-data!\n", bp); + panic("hfs: symlink bp @ %p is not marked meta-data!\n", bp); /* * it's part of the current transaction, kill it. */ @@ -1457,90 +2987,171 @@ hfs_removefile_callback(struct buf *bp, void *hfsmp) { * hfs_removefile * * Similar to hfs_vnop_remove except there are additional options. + * This function may be used to remove directories if they have + * lots of EA's -- note the 'allow_dirs' argument. + * + * This function is able to delete blocks & fork data for the resource + * fork even if it does not exist in core (and have a backing vnode). + * It should infer the correct behavior based on the number of blocks + * in the cnode and whether or not the resource fork pointer exists or + * not. As a result, one only need pass in the 'vp' corresponding to the + * data fork of this file (or main vnode in the case of a directory). + * Passing in a resource fork will result in an error. + * + * Because we do not create any vnodes in this function, we are not at + * risk of deadlocking against ourselves by double-locking. * * Requires cnode and truncate locks to be held. */ -static int +int hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, - int flags, int skip_reserve) + int flags, int skip_reserve, int allow_dirs, + __unused struct vnode *rvp, int only_unlink) { - struct vnode *rvp = NULL; struct cnode *cp; struct cnode *dcp; + struct vnode *rsrc_vp = NULL; struct hfsmount *hfsmp; struct cat_desc desc; struct timeval tv; - vfs_context_t ctx = cnp->cn_context; int dataforkbusy = 0; int rsrcforkbusy = 0; - int truncated = 0; - cat_cookie_t cookie; int lockflags; int error = 0; - int started_tr = 0, got_cookie = 0; - int isbigfile = 0; - cnid_t real_cnid = 0; - - /* Directories should call hfs_rmdir! */ - if (vnode_isdir(vp)) { - return (EISDIR); - } + int started_tr = 0; + int isbigfile = 0, defer_remove=0, isdir=0; + int update_vh = 0; cp = VTOC(vp); dcp = VTOC(dvp); hfsmp = VTOHFS(vp); + /* Check if we lost a race post lookup. */ if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { - return 0; + return (0); } - - // if it's not a hardlink, check that the parent - // cnid is the same as the directory cnid - if ( (cp->c_flag & C_HARDLINK) == 0 - && (cp->c_parentcnid != hfsmp->hfs_privdir_desc.cd_cnid) - && (cp->c_parentcnid != dcp->c_cnid)) { - error = EINVAL; - goto out; + + if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid, NULL, &error)) { + return 0; } /* Make sure a remove is permitted */ if (VNODE_IS_RSRC(vp)) { - error = EPERM; - goto out; + return (EPERM); + } + else { + /* + * We know it's a data fork. + * Probe the cnode to see if we have a valid resource fork + * in hand or not. + */ + rsrc_vp = cp->c_rsrc_vp; + } + + /* Don't allow deleting the journal or journal_info_block. */ + if (hfs_is_journal_file(hfsmp, cp)) { + return (EPERM); + } + + /* + * If removing a symlink, then we need to ensure that the + * data blocks for the symlink are not still in-flight or pending. + * If so, we will unlink the symlink here, making its blocks + * available for re-allocation by a subsequent transaction. That is OK, but + * then the I/O for the data blocks could then go out before the journal + * transaction that created it was flushed, leading to I/O ordering issues. + */ + if (vp->v_type == VLNK) { + /* + * This will block if the asynchronous journal flush is in progress. + * If this symlink is not being renamed over and doesn't have any open FDs, + * then we'll remove it from the journal's bufs below in kill_block. + */ + buf_wait_for_shadow_io (vp, 0); } /* - * Aquire a vnode for a non-empty resource fork. - * (needed for hfs_truncate) + * Hard links require special handling. */ - if (cp->c_blocks - VTOF(vp)->ff_blocks) { - error = hfs_vgetrsrc(hfsmp, vp, &rvp, 0); - if (error) - goto out; + if (cp->c_flag & C_HARDLINK) { + if ((flags & VNODE_REMOVE_NODELETEBUSY) && vnode_isinuse(vp, 0)) { + return (EBUSY); + } else { + /* A directory hard link with a link count of one is + * treated as a regular directory. Therefore it should + * only be removed using rmdir(). + */ + if ((vnode_isdir(vp) == 1) && (cp->c_linkcount == 1) && + (allow_dirs == 0)) { + return (EPERM); + } + return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve); + } + } + + /* Directories should call hfs_rmdir! (unless they have a lot of attributes) */ + if (vnode_isdir(vp)) { + if (allow_dirs == 0) + return (EPERM); /* POSIX */ + isdir = 1; + } + /* Sanity check the parent ids. */ + if ((cp->c_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && + (cp->c_parentcnid != dcp->c_fileid)) { + return (EINVAL); } - // XXXdbg - don't allow deleting the journal or journal_info_block - if (hfsmp->jnl && cp->c_datafork) { - struct HFSPlusExtentDescriptor *extd; + dcp->c_flag |= C_DIR_MODIFICATION; - extd = &cp->c_datafork->ff_extents[0]; - if (extd->startBlock == HFSTOVCB(hfsmp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) { - error = EPERM; - goto out; + // this guy is going away so mark him as such + cp->c_flag |= C_DELETED; + + + /* Remove our entry from the namei cache. */ + cache_purge(vp); + + /* + * If the caller was operating on a file (as opposed to a + * directory with EAs), then we need to figure out + * whether or not it has a valid resource fork vnode. + * + * If there was a valid resource fork vnode, then we need + * to use hfs_truncate to eliminate its data. If there is + * no vnode, then we hold the cnode lock which would + * prevent it from being created. As a result, + * we can use the data deletion functions which do not + * require that a cnode/vnode pair exist. + */ + + /* Check if this file is being used. */ + if (isdir == 0) { + dataforkbusy = vnode_isinuse(vp, 0); + /* + * At this point, we know that 'vp' points to the + * a data fork because we checked it up front. And if + * there is no rsrc fork, rsrc_vp will be NULL. + */ + if (rsrc_vp && (cp->c_blocks - VTOF(vp)->ff_blocks)) { + rsrcforkbusy = vnode_isinuse(rsrc_vp, 0); } } + + /* Check if we have to break the deletion into multiple pieces. */ + if (isdir == 0) { + isbigfile = ((cp->c_datafork->ff_size >= HFS_BIGFILE_SIZE) && overflow_extents(VTOF(vp))); + } - /* - * Check if this file is being used. - */ - if (vnode_isinuse(vp, 0)) - dataforkbusy = 1; - if (rvp && vnode_isinuse(rvp, 0)) - rsrcforkbusy = 1; - - // need this to check if we have to break the deletion - // into multiple pieces - isbigfile = (VTOC(vp)->c_datafork->ff_size >= HFS_BIGFILE_SIZE); + /* Check if the file has xattrs. If it does we'll have to delete them in + individual transactions in case there are too many */ + if ((hfsmp->hfs_attribute_vp != NULL) && + (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) { + defer_remove = 1; + } + + /* If we are explicitly told to only unlink item and move to hidden dir, then do it */ + if (only_unlink) { + defer_remove = 1; + } /* * Carbon semantics prohibit deleting busy files. @@ -1548,57 +3159,38 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, */ if (dataforkbusy || rsrcforkbusy) { if ((flags & VNODE_REMOVE_NODELETEBUSY) || - (hfsmp->hfs_privdir_desc.cd_cnid == 0)) { + (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid == 0)) { error = EBUSY; goto out; } } #if QUOTA - (void)hfs_getinoquota(cp); + if (hfsmp->hfs_flags & HFS_QUOTAS) + (void)hfs_getinoquota(cp); #endif /* QUOTA */ - - /* - * We do the ubc_setsize before the hfs_truncate - * since we'll be inside a transaction. + + /* + * Do a ubc_setsize to indicate we need to wipe contents if: + * 1) item is a regular file. + * 2) Neither fork is busy AND we are not told to unlink this. + * + * We need to check for the defer_remove since it can be set without + * having a busy data or rsrc fork */ - if ((cp->c_flag & C_HARDLINK) == 0 && - (!dataforkbusy || !rsrcforkbusy)) { + if (isdir == 0 && (!dataforkbusy || !rsrcforkbusy) && (defer_remove == 0)) { /* - * A ubc_setsize can cause a pagein here - * so we need to the drop cnode lock. Note - * that we still hold the truncate lock. + * A ubc_setsize can cause a pagein so defer it + * until after the cnode lock is dropped. The + * cnode lock cannot be dropped/reacquired here + * since we might already hold the journal lock. */ - hfs_unlock(cp); if (!dataforkbusy && cp->c_datafork->ff_blocks && !isbigfile) { - ubc_setsize(vp, 0); + cp->c_flag |= C_NEED_DATA_SETSIZE; } - if (!rsrcforkbusy && rvp) { - ubc_setsize(rvp, 0); + if (!rsrcforkbusy && rsrc_vp) { + cp->c_flag |= C_NEED_RSRC_SETSIZE; } - hfs_lock(cp, HFS_FORCE_LOCK); - } else { - struct cat_desc cndesc; - - // for hard links, re-lookup the name that was passed - // in so we get the correct cnid for the name (as - // opposed to the c_cnid in the cnode which could have - // been changed before this node got locked). - bzero(&cndesc, sizeof(cndesc)); - cndesc.cd_nameptr = cnp->cn_nameptr; - cndesc.cd_namelen = cnp->cn_namelen; - cndesc.cd_parentcnid = VTOC(dvp)->c_cnid; - cndesc.cd_hint = VTOC(dvp)->c_childhint; - - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - - if (cat_lookup(hfsmp, &cndesc, 0, NULL, NULL, NULL, &real_cnid) != 0) { - hfs_systemfile_unlock(hfsmp, lockflags); - error = ENOENT; - goto out; - } - - hfs_systemfile_unlock(hfsmp, lockflags); } if ((error = hfs_start_transaction(hfsmp)) != 0) { @@ -1606,282 +3198,339 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, } started_tr = 1; - if (!skip_reserve) { - /* - * Reserve some space in the Catalog file. - */ - if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, 0))) { - goto out; - } - got_cookie = 1; - } - - /* Remove our entry from the namei cache. */ - cache_purge(vp); - // XXXdbg - if we're journaled, kill any dirty symlink buffers - if (hfsmp->jnl && vnode_islnk(vp)) + if (hfsmp->jnl && vnode_islnk(vp) && (defer_remove == 0)) { buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp); + } /* - * Truncate any non-busy forks. Busy forks will - * get trucated when their vnode goes inactive. + * Prepare to truncate any non-busy forks. Busy forks will + * get truncated when their vnode goes inactive. + * Note that we will only enter this region if we + * can avoid creating an open-unlinked file. If + * either region is busy, we will have to create an open + * unlinked file. * - * Since we're already inside a transaction, - * tell hfs_truncate to skip the ubc_setsize. - * - * (Note: hard links are truncated in VOP_INACTIVE) + * Since we are deleting the file, we need to stagger the runtime + * modifications to do things in such a way that a crash won't + * result in us getting overlapped extents or any other + * bad inconsistencies. As such, we call prepare_release_storage + * which updates the UBC, updates quota information, and releases + * any loaned blocks that belong to this file. No actual + * truncation or bitmap manipulation is done until *AFTER* + * the catalog record is removed. */ - if ((cp->c_flag & C_HARDLINK) == 0) { - int mode = cp->c_mode; - + if (isdir == 0 && (!dataforkbusy && !rsrcforkbusy) && (only_unlink == 0)) { + if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) { - cp->c_mode = 0; /* Suppress hfs_update */ - error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ctx); - cp->c_mode = mode; - if (error) + + error = hfs_prepare_release_storage (hfsmp, vp); + if (error) { goto out; - truncated = 1; + } + update_vh = 1; } - if (!rsrcforkbusy && rvp) { - cp->c_mode = 0; /* Suppress hfs_update */ - error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, ctx); - cp->c_mode = mode; - if (error) + + /* + * If the resource fork vnode does not exist, we can skip this step. + */ + if (!rsrcforkbusy && rsrc_vp) { + error = hfs_prepare_release_storage (hfsmp, rsrc_vp); + if (error) { goto out; - truncated = 1; + } + update_vh = 1; } } - + /* * Protect against a race with rename by using the component * name passed in and parent id from dvp (instead of using - * the cp->c_desc which may have changed). + * the cp->c_desc which may have changed). Also, be aware that + * because we allow directories to be passed in, we need to special case + * this temporary descriptor in case we were handed a directory. */ - desc.cd_flags = 0; + if (isdir) { + desc.cd_flags = CD_ISDIR; + } + else { + desc.cd_flags = 0; + } desc.cd_encoding = cp->c_desc.cd_encoding; - desc.cd_nameptr = cnp->cn_nameptr; + desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr; desc.cd_namelen = cnp->cn_namelen; - desc.cd_parentcnid = dcp->c_cnid; + desc.cd_parentcnid = dcp->c_fileid; desc.cd_hint = cp->c_desc.cd_hint; - if (real_cnid) { - // if it was a hardlink we had to re-lookup the cnid - desc.cd_cnid = real_cnid; - } else { - desc.cd_cnid = cp->c_cnid; - } + desc.cd_cnid = cp->c_cnid; microtime(&tv); /* - * There are 3 remove cases to consider: - * 1. File is a hardlink ==> remove the link - * 2. File is busy (in use) ==> move/rename the file - * 3. File is not in use ==> remove the file + * There are two cases to consider: + * 1. File/Dir is busy/big/defer_remove ==> move/rename the file/dir + * 2. File is not in use ==> remove the file + * + * We can get a directory in case 1 because it may have had lots of attributes, + * which need to get removed here. */ - - if (cp->c_flag & C_HARDLINK) { - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); - - /* Delete the link record */ - error = cat_delete(hfsmp, &desc, &cp->c_attr); - if (error == 0) { - /* Update the parent directory */ - if (dcp->c_entries > 0) - dcp->c_entries--; - if (dcp->c_nlink > 0) - dcp->c_nlink--; - dcp->c_ctime = tv.tv_sec; - dcp->c_mtime = tv.tv_sec; - (void ) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); - - if (--cp->c_nlink < 1) { - char inodename[32]; - char delname[32]; - struct cat_desc to_desc; - struct cat_desc from_desc; - - /* - * This is now esentially an open deleted file. - * Rename it to reflect this state which makes - * orphan file cleanup easier (see hfs_remove_orphans). - * Note: a rename failure here is not fatal. - */ - MAKE_INODE_NAME(inodename, cp->c_rdev); - bzero(&from_desc, sizeof(from_desc)); - from_desc.cd_nameptr = inodename; - from_desc.cd_namelen = strlen(inodename); - from_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid; - from_desc.cd_flags = 0; - from_desc.cd_cnid = cp->c_fileid; - - MAKE_DELETED_NAME(delname, cp->c_fileid); - bzero(&to_desc, sizeof(to_desc)); - to_desc.cd_nameptr = delname; - to_desc.cd_namelen = strlen(delname); - to_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid; - to_desc.cd_flags = 0; - to_desc.cd_cnid = cp->c_fileid; - - error = cat_rename(hfsmp, &from_desc, &hfsmp->hfs_privdir_desc, - &to_desc, (struct cat_desc *)NULL); - if (error != 0) { - panic("hfs_removefile: error %d from cat_rename(%s %s) cp 0x%x\n", - inodename, delname, cp); - } - if (error == 0) { - /* Update the file's state */ - cp->c_flag |= C_DELETED; - cp->c_ctime = tv.tv_sec; - (void) cat_update(hfsmp, &to_desc, &cp->c_attr, NULL, NULL); - } - } else { - /* Update the file's state */ - cp->c_ctime = tv.tv_sec; - (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); - } - } - hfs_systemfile_unlock(hfsmp, lockflags); - if (error != 0) - goto out; - - hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID)); - - } else if (dataforkbusy || rsrcforkbusy || isbigfile) { + if (dataforkbusy || rsrcforkbusy || isbigfile || defer_remove) { char delname[32]; struct cat_desc to_desc; struct cat_desc todir_desc; /* - * Orphan this file (move to hidden directory). + * Orphan this file or directory (move to hidden directory). + * Again, we need to take care that we treat directories as directories, + * and files as files. Because directories with attributes can be passed in + * check to make sure that we have a directory or a file before filling in the + * temporary descriptor's flags. We keep orphaned directories AND files in + * the FILE_HARDLINKS private directory since we're generalizing over all + * orphaned filesystem objects. */ bzero(&todir_desc, sizeof(todir_desc)); todir_desc.cd_parentcnid = 2; - MAKE_DELETED_NAME(delname, cp->c_fileid); + MAKE_DELETED_NAME(delname, sizeof(delname), cp->c_fileid); bzero(&to_desc, sizeof(to_desc)); - to_desc.cd_nameptr = delname; + to_desc.cd_nameptr = (const u_int8_t *)delname; to_desc.cd_namelen = strlen(delname); - to_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid; - to_desc.cd_flags = 0; + to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + if (isdir) { + to_desc.cd_flags = CD_ISDIR; + } + else { + to_desc.cd_flags = 0; + } to_desc.cd_cnid = cp->c_cnid; lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + if (!skip_reserve) { + if ((error = cat_preflight(hfsmp, CAT_RENAME, NULL, 0))) { + hfs_systemfile_unlock(hfsmp, lockflags); + goto out; + } + } error = cat_rename(hfsmp, &desc, &todir_desc, &to_desc, (struct cat_desc *)NULL); if (error == 0) { - hfsmp->hfs_privdir_attr.ca_entries++; - (void) cat_update(hfsmp, &hfsmp->hfs_privdir_desc, - &hfsmp->hfs_privdir_attr, NULL, NULL); + hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries++; + if (isdir == 1) { + INC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + } + (void) cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], + &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); /* Update the parent directory */ if (dcp->c_entries > 0) dcp->c_entries--; - if (dcp->c_nlink > 0) - dcp->c_nlink--; + if (isdir == 1) { + DEC_FOLDERCOUNT(hfsmp, dcp->c_attr); + } + dcp->c_dirchangecnt++; dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); - /* Update the file's state */ + /* Update the file or directory's state */ cp->c_flag |= C_DELETED; cp->c_ctime = tv.tv_sec; - --cp->c_nlink; + --cp->c_linkcount; (void) cat_update(hfsmp, &to_desc, &cp->c_attr, NULL, NULL); } hfs_systemfile_unlock(hfsmp, lockflags); if (error) goto out; - } else /* Not busy */ { - - if (cp->c_blocks > 0) { - printf("hfs_remove: attempting to delete a non-empty file %s\n", - cp->c_desc.cd_nameptr); - error = EBUSY; - goto out; + } + else { + /* + * Nobody is using this item; we can safely remove everything. + */ + struct filefork *temp_rsrc_fork = NULL; +#if QUOTA + off_t savedbytes; + int blksize = hfsmp->blockSize; +#endif + u_int32_t fileid = cp->c_fileid; + + /* + * Figure out if we need to read the resource fork data into + * core before wiping out the catalog record. + * + * 1) Must not be a directory + * 2) cnode's c_rsrcfork ptr must be NULL. + * 3) rsrc fork must have actual blocks + */ + if ((isdir == 0) && (cp->c_rsrcfork == NULL) && + (cp->c_blocks - VTOF(vp)->ff_blocks)) { + /* + * The resource fork vnode & filefork did not exist. + * Create a temporary one for use in this function only. + */ + MALLOC_ZONE (temp_rsrc_fork, struct filefork *, sizeof (struct filefork), M_HFSFORK, M_WAITOK); + bzero(temp_rsrc_fork, sizeof(struct filefork)); + temp_rsrc_fork->ff_cp = cp; + rl_init(&temp_rsrc_fork->ff_invalidranges); + } + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + /* Look up the resource fork first, if necessary */ + if (temp_rsrc_fork) { + error = cat_lookup (hfsmp, &desc, 1, (struct cat_desc*) NULL, + (struct cat_attr*) NULL, &temp_rsrc_fork->ff_data, NULL); + if (error) { + FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); + hfs_systemfile_unlock (hfsmp, lockflags); + goto out; + } } - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); - - error = cat_delete(hfsmp, &desc, &cp->c_attr); - - if (error && error != ENXIO && error != ENOENT && truncated) { - if ((cp->c_datafork && cp->c_datafork->ff_size != 0) || - (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) { - panic("hfs: remove: couldn't delete a truncated file! (%d, data sz %lld; rsrc sz %lld)", - error, cp->c_datafork->ff_size, cp->c_rsrcfork->ff_size); - } else { - printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n", - cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error); + if (!skip_reserve) { + if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) { + if (temp_rsrc_fork) { + FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); + } + hfs_systemfile_unlock(hfsmp, lockflags); + goto out; } } + + error = cat_delete(hfsmp, &desc, &cp->c_attr); + + if (error && error != ENXIO && error != ENOENT) { + printf("hfs_removefile: deleting file %s (%d), err: %d\n", + cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error); + } + if (error == 0) { - /* Delete any attributes, ignore errors */ - (void) hfs_removeallattr(hfsmp, cp->c_fileid); - /* Update the parent directory */ if (dcp->c_entries > 0) dcp->c_entries--; - if (dcp->c_nlink > 0) - dcp->c_nlink--; + dcp->c_dirchangecnt++; dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); } hfs_systemfile_unlock(hfsmp, lockflags); - if (error) - goto out; + if (error) { + if (temp_rsrc_fork) { + FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); + } + goto out; + } + + /* + * Now that we've wiped out the catalog record, the file effectively doesn't + * exist anymore. So update the quota records to reflect the loss of the + * data fork and the resource fork. + */ #if QUOTA - (void)hfs_chkiq(cp, -1, NOCRED, 0); -#endif /* QUOTA */ + if (cp->c_datafork->ff_blocks > 0) { + savedbytes = ((off_t)cp->c_datafork->ff_blocks * (off_t)blksize); + (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0); + } + + /* + * We may have just deleted the catalog record for a resource fork even + * though it did not exist in core as a vnode. However, just because there + * was a resource fork pointer in the cnode does not mean that it had any blocks. + */ + if (temp_rsrc_fork || cp->c_rsrcfork) { + if (cp->c_rsrcfork) { + if (cp->c_rsrcfork->ff_blocks > 0) { + savedbytes = ((off_t)cp->c_rsrcfork->ff_blocks * (off_t)blksize); + (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0); + } + } + else { + /* we must have used a temporary fork */ + savedbytes = ((off_t)temp_rsrc_fork->ff_blocks * (off_t)blksize); + (void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0); + } + } + + if (hfsmp->hfs_flags & HFS_QUOTAS) { + (void)hfs_chkiq(cp, -1, NOCRED, 0); + } +#endif + + /* + * If we didn't get any errors deleting the catalog entry, then go ahead + * and release the backing store now. The filefork pointers are still valid. + */ + if (temp_rsrc_fork) { + error = hfs_release_storage (hfsmp, cp->c_datafork, temp_rsrc_fork, fileid); + } + else { + /* if cp->c_rsrcfork == NULL, hfs_release_storage will skip over it. */ + error = hfs_release_storage (hfsmp, cp->c_datafork, cp->c_rsrcfork, fileid); + } + if (error) { + /* + * If we encountered an error updating the extents and bitmap, + * mark the volume inconsistent. At this point, the catalog record has + * already been deleted, so we can't recover it at this point. We need + * to proceed and update the volume header and mark the cnode C_NOEXISTS. + * The subsequent fsck should be able to recover the free space for us. + */ + hfs_mark_volume_inconsistent(hfsmp); + } + else { + /* reset update_vh to 0, since hfs_release_storage should have done it for us */ + update_vh = 0; + } + + /* Get rid of the temporary rsrc fork */ + if (temp_rsrc_fork) { + FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); + } - cp->c_mode = 0; - truncated = 0; // because the catalog entry is gone cp->c_flag |= C_NOEXISTS; + cp->c_flag &= ~C_DELETED; + cp->c_touch_chgtime = TRUE; /* XXX needed ? */ - --cp->c_nlink; - + --cp->c_linkcount; + + /* + * We must never get a directory if we're in this else block. We could + * accidentally drop the number of files in the volume header if we did. + */ hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID)); + } /* * All done with this cnode's descriptor... * - * Note: all future catalog calls for this cnode must be - * by fileid only. This is OK for HFS (which doesn't have - * file thread records) since HFS doesn't support hard - * links or the removal of busy files. + * Note: all future catalog calls for this cnode must be by + * fileid only. This is OK for HFS (which doesn't have file + * thread records) since HFS doesn't support the removal of + * busy files. */ cat_releasedesc(&cp->c_desc); - HFS_KNOTE(dvp, NOTE_WRITE); - out: - if (got_cookie) { - cat_postflight(hfsmp, &cookie, 0); - } - - /* Commit the truncation to the catalog record */ - if (truncated) { - cp->c_flag |= C_FORCEUPDATE; - cp->c_touch_chgtime = TRUE; - cp->c_touch_modtime = TRUE; - (void) hfs_update(vp, 0); + if (error) { + cp->c_flag &= ~C_DELETED; } + + if (update_vh) { + /* + * If we bailed out earlier, we may need to update the volume header + * to deal with the borrowed blocks accounting. + */ + hfs_volupdate (hfsmp, VOL_UPDATE, 0); + } if (started_tr) { hfs_end_transaction(hfsmp); } - HFS_KNOTE(vp, NOTE_DELETE); - if (rvp) { - HFS_KNOTE(rvp, NOTE_DELETE); - /* Defer the vnode_put on rvp until the hfs_unlock(). */ - cp->c_flag |= C_NEED_RVNODE_PUT; - }; + dcp->c_flag &= ~C_DIR_MODIFICATION; + wakeup((caddr_t)&dcp->c_flag); return (error); } @@ -1890,14 +3539,19 @@ out: __private_extern__ void replace_desc(struct cnode *cp, struct cat_desc *cdp) { + // fixes 4348457 and 4463138 + if (&cp->c_desc == cdp) { + return; + } + /* First release allocated name buffer */ if (cp->c_desc.cd_flags & CD_HASBUF && cp->c_desc.cd_nameptr != 0) { - char *name = cp->c_desc.cd_nameptr; + const u_int8_t *name = cp->c_desc.cd_nameptr; cp->c_desc.cd_nameptr = 0; cp->c_desc.cd_namelen = 0; cp->c_desc.cd_flags &= ~CD_HASBUF; - vfs_removename(name); + vfs_removename((const char *)name); } bcopy(cdp, &cp->c_desc, sizeof(cp->c_desc)); @@ -1917,8 +3571,21 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp) * - all the vnodes are from the same file system * * When the target is a directory, HFS must ensure that its empty. + * + * Note that this function requires up to 6 vnodes in order to work properly + * if it is operating on files (and not on directories). This is because only + * files can have resource forks, and we now require iocounts to be held on the + * vnodes corresponding to the resource forks (if applicable) as well as + * the files or directories undergoing rename. The problem with not holding + * iocounts on the resource fork vnodes is that it can lead to a deadlock + * situation: The rsrc fork of the source file may be recycled and reclaimed + * in order to provide a vnode for the destination file's rsrc fork. Since + * data and rsrc forks share the same cnode, we'd eventually try to lock the + * source file's cnode in order to sync its rsrc fork to disk, but it's already + * been locked. By taking the rsrc fork vnodes up front we ensure that they + * cannot be recycled, and that the situation mentioned above cannot happen. */ -static int +int hfs_vnop_rename(ap) struct vnop_rename_args /* { struct vnode *a_fdvp; @@ -1934,6 +3601,13 @@ hfs_vnop_rename(ap) struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; + /* + * Note that we only need locals for the target/destination's + * resource fork vnode (and only if necessary). We don't care if the + * source has a resource fork vnode or not. + */ + struct vnode *tvp_rsrc = NULLVP; + uint32_t tvp_rsrc_vid = 0; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = vfs_context_proc(ap->a_context); @@ -1941,6 +3615,7 @@ hfs_vnop_rename(ap) struct cnode *fdcp; struct cnode *tdcp; struct cnode *tcp; + struct cnode *error_cnode; struct cat_desc from_desc; struct cat_desc to_desc; struct cat_desc out_desc; @@ -1951,18 +3626,108 @@ hfs_vnop_rename(ap) int took_trunc_lock = 0; int lockflags; int error; + time_t orig_from_ctime, orig_to_ctime; + int emit_rename = 1; + int emit_delete = 1; + + orig_from_ctime = VTOC(fvp)->c_ctime; + if (tvp && VTOC(tvp)) { + orig_to_ctime = VTOC(tvp)->c_ctime; + } else { + orig_to_ctime = ~0; + } + + hfsmp = VTOHFS(tdvp); + /* + * Do special case checks here. If fvp == tvp then we need to check the + * cnode with locks held. + */ + if (fvp == tvp) { + int is_hardlink = 0; + /* + * In this case, we do *NOT* ever emit a DELETE event. + * We may not necessarily emit a RENAME event + */ + emit_delete = 0; + if ((error = hfs_lock(VTOC(fvp), HFS_SHARED_LOCK))) { + return error; + } + /* Check to see if the item is a hardlink or not */ + is_hardlink = (VTOC(fvp)->c_flag & C_HARDLINK); + hfs_unlock (VTOC(fvp)); + + /* + * If the item is not a hardlink, then case sensitivity must be off, otherwise + * two names should not resolve to the same cnode unless they were case variants. + */ + if (is_hardlink) { + emit_rename = 0; + /* + * Hardlinks are a little trickier. We only want to emit a rename event + * if the item is a hardlink, the parent directories are the same, case sensitivity + * is off, and the case folded names are the same. See the fvp == tvp case below for more + * info. + */ + + if ((fdvp == tdvp) && ((hfsmp->hfs_flags & HFS_CASE_SENSITIVE) == 0)) { + if (hfs_namecmp((const u_int8_t *)fcnp->cn_nameptr, fcnp->cn_namelen, + (const u_int8_t *)tcnp->cn_nameptr, tcnp->cn_namelen) == 0) { + /* Then in this case only it is ok to emit a rename */ + emit_rename = 1; + } + } + } + } + if (emit_rename) { + check_for_tracked_file(fvp, orig_from_ctime, NAMESPACE_HANDLER_RENAME_OP, NULL); + } - /* When tvp exist, take the truncate lock for the hfs_removefile(). */ - if (tvp && vnode_isreg(tvp)) { - hfs_lock_truncate(VTOC(tvp), TRUE); + if (tvp && VTOC(tvp)) { + if (emit_delete) { + check_for_tracked_file(tvp, orig_to_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL); + } + } + +retry: + /* When tvp exists, take the truncate lock for hfs_removefile(). */ + if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) { + hfs_lock_truncate(VTOC(tvp), HFS_EXCLUSIVE_LOCK); took_trunc_lock = 1; } error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL, - HFS_EXCLUSIVE_LOCK); + HFS_EXCLUSIVE_LOCK, &error_cnode); if (error) { - if (took_trunc_lock) - hfs_unlock_truncate(VTOC(tvp)); + if (took_trunc_lock) { + hfs_unlock_truncate(VTOC(tvp), 0); + took_trunc_lock = 0; + } + + /* + * We hit an error path. If we were trying to re-acquire the locks + * after coming through here once, we might have already obtained + * an iocount on tvp's resource fork vnode. Drop that before dealing + * with the failure. Note this is safe -- since we are in an + * error handling path, we can't be holding the cnode locks. + */ + if (tvp_rsrc) { + vnode_put (tvp_rsrc); + tvp_rsrc_vid = 0; + tvp_rsrc = NULL; + } + + /* + * tvp might no longer exist. If the cause of the lock failure + * was tvp, then we can try again with tvp/tcp set to NULL. + * This is ok because the vfs syscall will vnode_put the vnodes + * after we return from hfs_vnop_rename. + */ + if ((error == ENOENT) && (tvp != NULL) && (error_cnode == VTOC(tvp))) { + tcp = NULL; + tvp = NULL; + goto retry; + } + return (error); } @@ -1970,14 +3735,107 @@ hfs_vnop_rename(ap) fcp = VTOC(fvp); tdcp = VTOC(tdvp); tcp = tvp ? VTOC(tvp) : NULL; - hfsmp = VTOHFS(tdvp); - /* Check for a race against unlink. */ - if (fcp->c_flag & C_NOEXISTS) { + /* + * Acquire iocounts on the destination's resource fork vnode + * if necessary. If dst/src are files and the dst has a resource + * fork vnode, then we need to try and acquire an iocount on the rsrc vnode. + * If it does not exist, then we don't care and can skip it. + */ + if ((vnode_isreg(fvp)) || (vnode_islnk(fvp))) { + if ((tvp) && (tcp->c_rsrc_vp) && (tvp_rsrc == NULL)) { + tvp_rsrc = tcp->c_rsrc_vp; + /* + * We can look at the vid here because we're holding the + * cnode lock on the underlying cnode for this rsrc vnode. + */ + tvp_rsrc_vid = vnode_vid (tvp_rsrc); + + /* Unlock everything to acquire iocount on this rsrc vnode */ + if (took_trunc_lock) { + hfs_unlock_truncate (VTOC(tvp), 0); + took_trunc_lock = 0; + } + hfs_unlockfour(fdcp, fcp, tdcp, tcp); + + if (vnode_getwithvid (tvp_rsrc, tvp_rsrc_vid)) { + /* iocount acquisition failed. Reset fields and start over.. */ + tvp_rsrc_vid = 0; + tvp_rsrc = NULL; + } + goto retry; + } + } + + /* Ensure we didn't race src or dst parent directories with rmdir. */ + if (fdcp->c_flag & (C_NOEXISTS | C_DELETED)) { + error = ENOENT; + goto out; + } + + if (tdcp->c_flag & (C_NOEXISTS | C_DELETED)) { + error = ENOENT; + goto out; + } + + + /* Check for a race against unlink. The hfs_valid_cnode checks validate + * the parent/child relationship with fdcp and tdcp, as well as the + * component name of the target cnodes. + */ + if ((fcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, fdvp, fcnp, fcp->c_fileid, NULL, &error)) { error = ENOENT; goto out; } + if (tcp && ((tcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, tdvp, tcnp, tcp->c_fileid, NULL, &error))) { + // + // hmm, the destination vnode isn't valid any more. + // in this case we can just drop him and pretend he + // never existed in the first place. + // + if (took_trunc_lock) { + hfs_unlock_truncate(VTOC(tvp), 0); + took_trunc_lock = 0; + } + error = 0; + + hfs_unlockfour(fdcp, fcp, tdcp, tcp); + + tcp = NULL; + tvp = NULL; + + // retry the locking with tvp null'ed out + goto retry; + } + + fdcp->c_flag |= C_DIR_MODIFICATION; + if (fdvp != tdvp) { + tdcp->c_flag |= C_DIR_MODIFICATION; + } + + /* + * Disallow renaming of a directory hard link if the source and + * destination parent directories are different, or a directory whose + * descendant is a directory hard link and the one of the ancestors + * of the destination directory is a directory hard link. + */ + if (vnode_isdir(fvp) && (fdvp != tdvp)) { + if (fcp->c_flag & C_HARDLINK) { + error = EPERM; + goto out; + } + if (fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + if (cat_check_link_ancestry(hfsmp, tdcp->c_fileid, 0)) { + error = EPERM; + hfs_systemfile_unlock(hfsmp, lockflags); + goto out; + } + hfs_systemfile_unlock(hfsmp, lockflags); + } + } + /* * The following edge case is caught here: * (to cannot be a descendent of from) @@ -1993,7 +3851,7 @@ hfs_vnop_rename(ap) * / * o tvp */ - if (tdcp->c_parentcnid == fcp->c_cnid) { + if (tdcp->c_parentcnid == fcp->c_fileid) { error = EINVAL; goto out; } @@ -2035,7 +3893,7 @@ hfs_vnop_rename(ap) /* * Make sure "from" vnode and its parent are changeable. */ - if ((fcp->c_flags & (IMMUTABLE | APPEND)) || (fdcp->c_flags & APPEND)) { + if ((fcp->c_bsdflags & (IMMUTABLE | APPEND)) || (fdcp->c_bsdflags & APPEND)) { error = EPERM; goto out; } @@ -2057,38 +3915,31 @@ hfs_vnop_rename(ap) goto out; } + /* Don't allow modification of the journal or journal_info_block */ + if (hfs_is_journal_file(hfsmp, fcp) || + (tcp && hfs_is_journal_file(hfsmp, tcp))) { + error = EPERM; + goto out; + } + #if QUOTA if (tvp) (void)hfs_getinoquota(tcp); #endif - /* Preflighting done, take fvp out of the name space. */ - cache_purge(fvp); - - /* - * When a file moves out of "Cleanup At Startup" - * we can drop its NODUMP status. - */ - if ((fcp->c_flags & UF_NODUMP) && - vnode_isreg(fvp) && - (fdvp != tdvp) && - (fdcp->c_desc.cd_nameptr != NULL) && - (strcmp(fdcp->c_desc.cd_nameptr, CARBON_TEMP_DIR_NAME) == 0)) { - fcp->c_flags &= ~UF_NODUMP; - fcp->c_touch_chgtime = TRUE; - (void) hfs_update(fvp, 0); - } + /* Preflighting done, take fvp out of the name space. */ + cache_purge(fvp); bzero(&from_desc, sizeof(from_desc)); - from_desc.cd_nameptr = fcnp->cn_nameptr; + from_desc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr; from_desc.cd_namelen = fcnp->cn_namelen; - from_desc.cd_parentcnid = fdcp->c_cnid; + from_desc.cd_parentcnid = fdcp->c_fileid; from_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED); from_desc.cd_cnid = fcp->c_cnid; bzero(&to_desc, sizeof(to_desc)); - to_desc.cd_nameptr = tcnp->cn_nameptr; + to_desc.cd_nameptr = (const u_int8_t *)tcnp->cn_nameptr; to_desc.cd_namelen = tcnp->cn_namelen; - to_desc.cd_parentcnid = tdcp->c_cnid; + to_desc.cd_parentcnid = tdcp->c_fileid; to_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED); to_desc.cd_cnid = fcp->c_cnid; @@ -2097,6 +3948,25 @@ hfs_vnop_rename(ap) } started_tr = 1; + /* hfs_vnop_link() and hfs_vnop_rename() set kHFSHasChildLinkMask + * inside a journal transaction and without holding a cnode lock. + * As setting of this bit depends on being in journal transaction for + * concurrency, check this bit again after we start journal transaction for rename + * to ensure that this directory does not have any descendant that + * is a directory hard link. + */ + if (vnode_isdir(fvp) && (fdvp != tdvp)) { + if (fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + if (cat_check_link_ancestry(hfsmp, tdcp->c_fileid, 0)) { + error = EPERM; + hfs_systemfile_unlock(hfsmp, lockflags); + goto out; + } + hfs_systemfile_unlock(hfsmp, lockflags); + } + } + // if it's a hardlink then re-lookup the name so // that we get the correct cnid in from_desc (see // the comment in hfs_removefile for more details) @@ -2105,11 +3975,12 @@ hfs_vnop_rename(ap) struct cat_desc tmpdesc; cnid_t real_cnid; - bzero(&tmpdesc, sizeof(tmpdesc)); - tmpdesc.cd_nameptr = fcnp->cn_nameptr; + tmpdesc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr; tmpdesc.cd_namelen = fcnp->cn_namelen; - tmpdesc.cd_parentcnid = fdcp->c_cnid; + tmpdesc.cd_parentcnid = fdcp->c_fileid; tmpdesc.cd_hint = fdcp->c_childhint; + tmpdesc.cd_flags = fcp->c_desc.cd_flags & CD_ISDIR; + tmpdesc.cd_encoding = 0; lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); @@ -2126,68 +3997,186 @@ hfs_vnop_rename(ap) /* * Reserve some space in the Catalog file. */ - bzero(&cookie, sizeof(cookie)); if ((error = cat_preflight(hfsmp, CAT_RENAME + CAT_DELETE, &cookie, p))) { goto out; } got_cookie = 1; /* - * If the destination exists then it may need to be removed. + * If the destination exists then it may need to be removed. + * + * Due to HFS's locking system, we should always move the + * existing 'tvp' element to the hidden directory in hfs_vnop_rename. + * Because the VNOP_LOOKUP call enters and exits the filesystem independently + * of the actual vnop that it was trying to do (stat, link, readlink), + * we must release the cnode lock of that element during the interim to + * do MAC checking, vnode authorization, and other calls. In that time, + * the item can be deleted (or renamed over). However, only in the rename + * case is it inappropriate to return ENOENT from any of those calls. Either + * the call should return information about the old element (stale), or get + * information about the newer element that we are about to write in its place. + * + * HFS lookup has been modified to detect a rename and re-drive its + * lookup internally. For other calls that have already succeeded in + * their lookup call and are waiting to acquire the cnode lock in order + * to proceed, that cnode lock will not fail due to the cnode being marked + * C_NOEXISTS, because it won't have been marked as such. It will only + * have C_DELETED. Thus, they will simply act on the stale open-unlinked + * element. All future callers will get the new element. + * + * To implement this behavior, we pass the "only_unlink" argument to + * hfs_removefile and hfs_removedir. This will result in the vnode acting + * as though it is open-unlinked. Additionally, when we are done moving the + * element to the hidden directory, we vnode_recycle the target so that it is + * reclaimed as soon as possible. Reclaim and inactive are both + * capable of clearing out unused blocks for an open-unlinked file or dir. */ if (tvp) { /* - * When fvp matches tvp they must be case variants - * or hard links. + * When fvp matches tvp they could be case variants + * or matching hard links. */ if (fvp == tvp) { - /* - * If this a hard link with different parents - * and its not a case variant then tvp should - * be removed. - */ - if (!((fcp->c_flag & C_HARDLINK) && - ((fdvp != tdvp) || - (hfs_namecmp(fcnp->cn_nameptr, fcnp->cn_namelen, - tcnp->cn_nameptr, tcnp->cn_namelen) != 0)))) { - goto skip; + if (!(fcp->c_flag & C_HARDLINK)) { + /* + * If they're not hardlinks, then fvp == tvp must mean we + * are using case-insensitive HFS because case-sensitive would + * not use the same vnode for both. In this case we just update + * the catalog for: a -> A + */ + goto skip_rm; /* simple case variant */ + + } + /* For all cases below, we must be using hardlinks */ + else if ((fdvp != tdvp) || + (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) { + /* + * If the parent directories are not the same, AND the two items + * are hardlinks, posix says to do nothing: + * dir1/fred <-> dir2/bob and the op was mv dir1/fred -> dir2/bob + * We just return 0 in this case. + * + * If case sensitivity is on, and we are using hardlinks + * then renaming is supposed to do nothing. + * dir1/fred <-> dir2/FRED, and op == mv dir1/fred -> dir2/FRED + */ + goto out; /* matching hardlinks, nothing to do */ + + } else if (hfs_namecmp((const u_int8_t *)fcnp->cn_nameptr, fcnp->cn_namelen, + (const u_int8_t *)tcnp->cn_nameptr, tcnp->cn_namelen) == 0) { + /* + * If we get here, then the following must be true: + * a) We are running case-insensitive HFS+. + * b) Both paths 'fvp' and 'tvp' are in the same parent directory. + * c) the two names are case-variants of each other. + * + * In this case, we are really only dealing with a single catalog record + * whose name is being updated. + * + * op is dir1/fred -> dir1/FRED + * + * We need to special case the name matching, because if + * dir1/fred <-> dir1/bob were the two links, and the + * op was dir1/fred -> dir1/bob + * That would fail/do nothing. + */ + goto skip_rm; /* case-variant hardlink in the same dir */ + } else { + goto out; /* matching hardlink, nothing to do */ } - } else { - cache_purge(tvp); } - if (vnode_isdir(tvp)) - error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE); + + if (vnode_isdir(tvp)) { + /* + * hfs_removedir will eventually call hfs_removefile on the directory + * we're working on, because only hfs_removefile does the renaming of the + * item to the hidden directory. The directory will stay around in the + * hidden directory with C_DELETED until it gets an inactive or a reclaim. + * That way, we can destroy all of the EAs as needed and allow new ones to be + * written. + */ + error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE, 1); + } else { - error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE); + error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, NULL, 1); + + /* + * If the destination file had a resource fork vnode, then we need to get rid of + * its blocks when there are no more references to it. Because the call to + * hfs_removefile above always open-unlinks things, we need to force an inactive/reclaim + * on the resource fork vnode, in order to prevent block leaks. Otherwise, + * the resource fork vnode could prevent the data fork vnode from going out of scope + * because it holds a v_parent reference on it. So we mark it for termination + * with a call to vnode_recycle. hfs_vnop_reclaim has been modified so that it + * can clean up the blocks of open-unlinked files and resource forks. + * + * We can safely call vnode_recycle on the resource fork because we took an iocount + * reference on it at the beginning of the function. + */ + + if ((error == 0) && (tcp->c_flag & C_DELETED) && (tvp_rsrc)) { + vnode_recycle(tvp_rsrc); + } } - if (error) + if (error) { goto out; + } + tvp_deleted = 1; + + /* Mark 'tcp' as being deleted due to a rename */ + tcp->c_flag |= C_RENAMED; + + /* + * Aggressively mark tvp/tcp for termination to ensure that we recover all blocks + * as quickly as possible. + */ + vnode_recycle(tvp); } -skip: +skip_rm: /* - * All done with tvp and fvp + * All done with tvp and fvp. + * + * We also jump to this point if there was no destination observed during lookup and namei. + * However, because only iocounts are held at the VFS layer, there is nothing preventing a + * competing thread from racing us and creating a file or dir at the destination of this rename + * operation. If this occurs, it may cause us to get a spurious EEXIST out of the cat_rename + * call below. To preserve rename's atomicity, we need to signal VFS to re-drive the + * namei/lookup and restart the rename operation. EEXIST is an allowable errno to be bubbled + * out of the rename syscall, but not for this reason, since it is a synonym errno for ENOTEMPTY. + * To signal VFS, we return ERECYCLE (which is also used for lookup restarts). This errno + * will be swallowed and it will restart the operation. */ - + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); error = cat_rename(hfsmp, &from_desc, &tdcp->c_desc, &to_desc, &out_desc); hfs_systemfile_unlock(hfsmp, lockflags); if (error) { + if (error == EEXIST) { + error = ERECYCLE; + } goto out; } /* Invalidate negative cache entries in the destination directory */ - if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + if (tdcp->c_flag & C_NEG_ENTRIES) { cache_purge_negatives(tdvp); + tdcp->c_flag &= ~C_NEG_ENTRIES; + } /* Update cnode's catalog descriptor */ replace_desc(fcp, &out_desc); - fcp->c_parentcnid = tdcp->c_cnid; + fcp->c_parentcnid = tdcp->c_fileid; fcp->c_hint = 0; + /* Now indicate this cnode needs to have date-added written to the finderinfo */ + fcp->c_flag |= C_NEEDS_DATEADDED; + (void) hfs_update (fvp, 0); + + hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_RMDIR : VOL_RMFILE, (fdcp->c_cnid == kHFSRootFolderID)); hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_MKDIR : VOL_MKFILE, @@ -2195,12 +4184,30 @@ skip: /* Update both parent directories. */ if (fdvp != tdvp) { - tdcp->c_nlink++; + if (vnode_isdir(fvp)) { + /* If the source directory has directory hard link + * descendants, set the kHFSHasChildLinkBit in the + * destination parent hierarchy + */ + if ((fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) && + !(tdcp->c_attr.ca_recflags & kHFSHasChildLinkMask)) { + + tdcp->c_attr.ca_recflags |= kHFSHasChildLinkMask; + + error = cat_set_childlinkbit(hfsmp, tdcp->c_parentcnid); + if (error) { + printf ("hfs_vnop_rename: error updating parent chain for %u\n", tdcp->c_cnid); + error = 0; + } + } + INC_FOLDERCOUNT(hfsmp, tdcp->c_attr); + DEC_FOLDERCOUNT(hfsmp, fdcp->c_attr); + } tdcp->c_entries++; - if (fdcp->c_nlink > 0) - fdcp->c_nlink--; + tdcp->c_dirchangecnt++; if (fdcp->c_entries > 0) fdcp->c_entries--; + fdcp->c_dirchangecnt++; fdcp->c_touch_chgtime = TRUE; fdcp->c_touch_modtime = TRUE; @@ -2213,6 +4220,41 @@ skip: tdcp->c_flag |= C_FORCEUPDATE; // XXXdbg - force it out! (void) hfs_update(tdvp, 0); + + /* Update the vnode's name now that the rename has completed. */ + vnode_update_identity(fvp, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, + tcnp->cn_hash, (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); + + /* + * At this point, we may have a resource fork vnode attached to the + * 'from' vnode. If it exists, we will want to update its name, because + * it contains the old name + _PATH_RSRCFORKSPEC. ("/..namedfork/rsrc"). + * + * Note that the only thing we need to update here is the name attached to + * the vnode, since a resource fork vnode does not have a separate resource + * cnode -- it's still 'fcp'. + */ + if (fcp->c_rsrc_vp) { + char* rsrc_path = NULL; + int len; + + /* Create a new temporary buffer that's going to hold the new name */ + MALLOC_ZONE (rsrc_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + len = snprintf (rsrc_path, MAXPATHLEN, "%s%s", tcnp->cn_nameptr, _PATH_RSRCFORKSPEC); + len = MIN(len, MAXPATHLEN); + + /* + * vnode_update_identity will do the following for us: + * 1) release reference on the existing rsrc vnode's name. + * 2) copy/insert new name into the name cache + * 3) attach the new name to the resource vnode + * 4) update the vnode's vid + */ + vnode_update_identity (fcp->c_rsrc_vp, fvp, rsrc_path, len, 0, (VNODE_UPDATE_NAME | VNODE_UPDATE_CACHE)); + + /* Free the memory associated with the resource fork's name */ + FREE_ZONE (rsrc_path, MAXPATHLEN, M_NAMEI); + } out: if (got_cookie) { cat_postflight(hfsmp, &cookie, p); @@ -2221,19 +4263,24 @@ out: hfs_end_transaction(hfsmp); } - /* Note that if hfs_removedir or hfs_removefile was invoked above they will already have - generated a NOTE_WRITE for tdvp and a NOTE_DELETE for tvp. - */ - if (error == 0) { - HFS_KNOTE(fvp, NOTE_RENAME); - HFS_KNOTE(fdvp, NOTE_WRITE); - if (tdvp != fdvp) HFS_KNOTE(tdvp, NOTE_WRITE); - }; + fdcp->c_flag &= ~C_DIR_MODIFICATION; + wakeup((caddr_t)&fdcp->c_flag); + if (fdvp != tdvp) { + tdcp->c_flag &= ~C_DIR_MODIFICATION; + wakeup((caddr_t)&tdcp->c_flag); + } - if (took_trunc_lock) - hfs_unlock_truncate(VTOC(tvp)); + if (took_trunc_lock) { + hfs_unlock_truncate(VTOC(tvp), 0); + } hfs_unlockfour(fdcp, fcp, tdcp, tcp); + + /* Now vnode_put the resource fork vnode if necessary */ + if (tvp_rsrc) { + vnode_put(tvp_rsrc); + tvp_rsrc = NULL; + } /* After tvp is removed the only acceptable error is EIO */ if (error && tvp_deleted) @@ -2246,7 +4293,7 @@ out: /* * Make a directory. */ -static int +int hfs_vnop_mkdir(struct vnop_mkdir_args *ap) { /***** HACK ALERT ********/ @@ -2258,18 +4305,20 @@ hfs_vnop_mkdir(struct vnop_mkdir_args *ap) /* * Create a symbolic link. */ -static int +int hfs_vnop_symlink(struct vnop_symlink_args *ap) { struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; struct vnode *vp = NULL; + struct cnode *cp = NULL; struct hfsmount *hfsmp; struct filefork *fp; struct buf *bp = NULL; char *datap; int started_tr = 0; - int len, error; + u_int32_t len; + int error; /* HFS standard disks don't support symbolic links */ if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord) @@ -2279,20 +4328,32 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) if (ap->a_target[0] == 0) return (EINVAL); + hfsmp = VTOHFS(dvp); + len = strlen(ap->a_target); + + /* Check for free space */ + if (((u_int64_t)hfs_freeblks(hfsmp, 0) * (u_int64_t)hfsmp->blockSize) < len) { + return (ENOSPC); + } + /* Create the vnode */ ap->a_vap->va_mode |= S_IFLNK; if ((error = hfs_makenode(dvp, vpp, ap->a_cnp, ap->a_vap, ap->a_context))) { goto out; } vp = *vpp; - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) - return (error); + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + goto out; + } + cp = VTOC(vp); fp = VTOF(vp); - hfsmp = VTOHFS(dvp); - len = strlen(ap->a_target); + + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + goto out; + } #if QUOTA - (void)hfs_getinoquota(VTOC(vp)); + (void)hfs_getinoquota(cp); #endif /* QUOTA */ if ((error = hfs_start_transaction(hfsmp)) != 0) { @@ -2308,12 +4369,34 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) * * Don't need truncate lock since a symlink is treated as a system file. */ - error = hfs_truncate(vp, len, IO_NOZEROFILL, 1, ap->a_context); - if (error) - goto out; /* XXX need to remove link */ + error = hfs_truncate(vp, len, IO_NOZEROFILL, 1, 0, ap->a_context); + + /* On errors, remove the symlink file */ + if (error) { + /* + * End the transaction so we don't re-take the cnode lock + * below while inside a transaction (lock order violation). + */ + hfs_end_transaction(hfsmp); + + /* hfs_removefile() requires holding the truncate lock */ + hfs_unlock(cp); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock(cp, HFS_FORCE_LOCK); + + if (hfs_start_transaction(hfsmp) != 0) { + started_tr = 0; + hfs_unlock_truncate(cp, TRUE); + goto out; + } + + (void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0, NULL, 0); + hfs_unlock_truncate(cp, 0); + goto out; + } /* Write the link to disk */ - bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, VTOHFS(vp)->hfs_phys_block_size), + bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, hfsmp->hfs_physical_block_size), 0, 0, BLK_META); if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, bp); @@ -2323,7 +4406,7 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) bcopy(ap->a_target, datap, len); if (hfsmp->jnl) { - journal_modify_block_end(hfsmp->jnl, bp); + journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); } else { buf_bawrite(bp); } @@ -2338,8 +4421,14 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) out: if (started_tr) hfs_end_transaction(hfsmp); - if (vp) { - hfs_unlock(VTOC(vp)); + if ((cp != NULL) && (vp != NULL)) { + hfs_unlock(cp); + } + if (error) { + if (vp) { + vnode_put(vp); + } + *vpp = NULL; } return (error); } @@ -2394,8 +4483,11 @@ typedef union { * Each tag/index pair is tied to a unique directory hint. The hint * contains information (filename) needed to build the catalog b-tree * key for finding the next set of entries. + * + * If the directory is marked as deleted-but-in-use (cp->c_flag & C_DELETED), + * do NOT synthesize entries for "." and "..". */ -static int +int hfs_vnop_readdir(ap) struct vnop_readdir_args /* { vnode_t a_vp; @@ -2424,25 +4516,33 @@ hfs_vnop_readdir(ap) int lockflags; int extended; int nfs_cookies; - caddr_t bufstart; cnid_t cnid_hint = 0; items = 0; startoffset = offset = uio_offset(uio); - bufstart = CAST_DOWN(caddr_t, uio_iov_base(uio)); extended = (ap->a_flags & VNODE_READDIR_EXTENDED); nfs_cookies = extended && (ap->a_flags & VNODE_READDIR_REQSEEKOFF); /* Sanity check the uio data. */ - if ((uio_iovcnt(uio) > 1) || - (uio_resid(uio) < (int)sizeof(struct dirent))) { + if (uio_iovcnt(uio) > 1) return (EINVAL); + + if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) { + int compressed = hfs_file_is_compressed(VTOC(vp), 0); /* 0 == take the cnode lock */ + if (VTOCMP(vp) != NULL && !compressed) { + error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP); + if (error) { + return error; + } + } } + + cp = VTOC(vp); + hfsmp = VTOHFS(vp); + /* Note that the dirhint calls require an exclusive lock. */ if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) return (error); - cp = VTOC(vp); - hfsmp = VTOHFS(vp); /* Pick up cnid hint (if any). */ if (nfs_cookies) { @@ -2454,9 +4554,10 @@ hfs_vnop_readdir(ap) } } /* - * Synthesize entries for "." and ".." + * Synthesize entries for "." and "..", unless the directory has + * been deleted, but not closed yet (lazy delete in progress). */ - if (offset == 0) { + if (offset == 0 && !(cp->c_flag & C_DELETED)) { hfs_dotentry_t dotentry[2]; size_t uiosize; @@ -2525,7 +4626,7 @@ hfs_vnop_readdir(ap) // here and we can't service our page fault because VM is // blocked trying to start a transaction as a result of // trying to free up pages for our page fault. It's messy - // but it does happen on dual-procesors that are paging + // but it does happen on dual-processors that are paging // heavily (see radar 3082639 for more info). By locking // the buffer up-front we prevent ourselves from faulting // while holding the shared catalog file lock. @@ -2553,7 +4654,7 @@ hfs_vnop_readdir(ap) /* When called from NFS, try and resolve a cnid hint. */ if (nfs_cookies && cnid_hint != 0) { if (cat_findname(hfsmp, cnid_hint, &localhint.dh_desc) == 0) { - if ( localhint.dh_desc.cd_parentcnid == cp->c_cnid) { + if ( localhint.dh_desc.cd_parentcnid == cp->c_fileid) { localhint.dh_index = index - 1; localhint.dh_time = 0; bzero(&localhint.dh_link, sizeof(localhint.dh_link)); @@ -2566,7 +4667,7 @@ hfs_vnop_readdir(ap) /* Get a directory hint (cnode must be locked exclusive) */ if (dirhint == NULL) { - dirhint = hfs_getdirhint(cp, ((index - 1) & HFS_INDEX_MASK) | tag); + dirhint = hfs_getdirhint(cp, ((index - 1) & HFS_INDEX_MASK) | tag, 0); /* Hide tag from catalog layer. */ dirhint->dh_index &= HFS_INDEX_MASK; @@ -2575,8 +4676,35 @@ hfs_vnop_readdir(ap) } } + if (index == 0) { + dirhint->dh_threadhint = cp->c_dirthreadhint; + } + else { + /* + * If we have a non-zero index, there is a possibility that during the last + * call to hfs_vnop_readdir we hit EOF for this directory. If that is the case + * then we don't want to return any new entries for the caller. Just return 0 + * items, mark the eofflag, and bail out. Because we won't have done any work, the + * code at the end of the function will release the dirhint for us. + * + * Don't forget to unlock the catalog lock on the way out, too. + */ + if (dirhint->dh_desc.cd_flags & CD_EOF) { + error = 0; + eofflag = 1; + uio_setoffset(uio, startoffset); + hfs_systemfile_unlock (hfsmp, lockflags); + + goto seekoffcalc; + } + } + /* Pack the buffer with dirent entries. */ - error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items, &eofflag); + error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, ap->a_flags, &items, &eofflag); + + if (index == 0 && error == 0) { + cp->c_dirthreadhint = dirhint->dh_threadhint; + } hfs_systemfile_unlock(hfsmp, lockflags); @@ -2607,7 +4735,7 @@ seekoffcalc: } out: - if (hfsmp->jnl && user_start) { + if (user_start) { vsunlock(user_start, user_len, TRUE); } /* If we didn't do anything then go ahead and dump the hint. */ @@ -2631,7 +4759,7 @@ out: /* * Read contents of a symbolic link. */ -static int +int hfs_vnop_readlink(ap) struct vnop_readlink_args /* { struct vnode *a_vp; @@ -2654,7 +4782,6 @@ hfs_vnop_readlink(ap) /* Zero length sym links are not allowed */ if (fp->ff_size == 0 || fp->ff_size > MAXPATHLEN) { - VTOVCB(vp)->vcbFlags |= kHFS_DamagedVolume; error = EINVAL; goto exit; } @@ -2664,9 +4791,12 @@ hfs_vnop_readlink(ap) struct buf *bp = NULL; MALLOC(fp->ff_symlinkptr, char *, fp->ff_size, M_TEMP, M_WAITOK); + if (fp->ff_symlinkptr == NULL) { + error = ENOMEM; + goto exit; + } error = (int)buf_meta_bread(vp, (daddr64_t)0, - roundup((int)fp->ff_size, - VTOHFS(vp)->hfs_phys_block_size), + roundup((int)fp->ff_size, VTOHFS(vp)->hfs_physical_block_size), vfs_context_ucred(ap->a_context), &bp); if (error) { if (bp) @@ -2713,7 +4843,7 @@ exit: /* * Get configurable pathname variables. */ -static int +int hfs_vnop_pathconf(ap) struct vnop_pathconf_args /* { struct vnode *a_vp; @@ -2731,9 +4861,9 @@ hfs_vnop_pathconf(ap) break; case _PC_NAME_MAX: if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) - *ap->a_retval = kHFSMaxFileNameChars; /* 255 */ + *ap->a_retval = kHFSMaxFileNameChars; /* 31 */ else - *ap->a_retval = kHFSPlusMaxFileNameChars; /* 31 */ + *ap->a_retval = kHFSPlusMaxFileNameChars; /* 255 */ break; case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; /* 1024 */ @@ -2742,13 +4872,16 @@ hfs_vnop_pathconf(ap) *ap->a_retval = PIPE_BUF; break; case _PC_CHOWN_RESTRICTED: - *ap->a_retval = 1; + *ap->a_retval = 200112; /* _POSIX_CHOWN_RESTRICTED */ break; case _PC_NO_TRUNC: - *ap->a_retval = 0; + *ap->a_retval = 200112; /* _POSIX_NO_TRUNC */ break; case _PC_NAME_CHARS_MAX: - *ap->a_retval = kHFSPlusMaxFileNameChars; + if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) + *ap->a_retval = kHFSMaxFileNameChars; /* 31 */ + else + *ap->a_retval = kHFSPlusMaxFileNameChars; /* 255 */ break; case _PC_CASE_SENSITIVE: if (VTOHFS(ap->a_vp)->hfs_flags & HFS_CASE_SENSITIVE) @@ -2759,6 +4892,16 @@ hfs_vnop_pathconf(ap) case _PC_CASE_PRESERVING: *ap->a_retval = 1; break; + case _PC_FILESIZEBITS: + if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) + *ap->a_retval = 32; + else + *ap->a_retval = 64; /* number of bits to store max file size */ + break; + case _PC_XATTR_SIZE_BITS: + /* Number of bits to store maximum extended attribute size */ + *ap->a_retval = HFS_XATTR_SIZE_BITS; + break; default: return (EINVAL); } @@ -2775,7 +4918,6 @@ hfs_vnop_pathconf(ap) * * The cnode must be locked exclusive */ -__private_extern__ int hfs_update(struct vnode *vp, __unused int waitfor) { @@ -2784,6 +4926,7 @@ hfs_update(struct vnode *vp, __unused int waitfor) struct cat_fork *dataforkp = NULL; struct cat_fork *rsrcforkp = NULL; struct cat_fork datafork; + struct cat_fork rsrcfork; struct hfsmount *hfsmp; int lockflags; int error; @@ -2791,7 +4934,8 @@ hfs_update(struct vnode *vp, __unused int waitfor) p = current_proc(); hfsmp = VTOHFS(vp); - if (vnode_issystem(vp) && (cp->c_cnid < kHFSFirstUserCatalogNodeID)) { + if (((vnode_issystem(vp) && (cp->c_cnid < kHFSFirstUserCatalogNodeID))) || + hfsmp->hfs_catalog_vp == NULL){ return (0); } if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (cp->c_mode == 0)) { @@ -2831,8 +4975,6 @@ hfs_update(struct vnode *vp, __unused int waitfor) // cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE); cp->c_flag |= C_MODIFIED; - HFS_KNOTE(vp, NOTE_ATTRIB); - return (0); } @@ -2840,36 +4982,68 @@ hfs_update(struct vnode *vp, __unused int waitfor) return error; } + /* + * Modify the values passed to cat_update based on whether or not + * the file has invalid ranges or borrowed blocks. + */ + if (dataforkp) { + off_t numbytes = 0; + + /* copy the datafork into a temporary copy so we don't pollute the cnode's */ + bcopy(dataforkp, &datafork, sizeof(datafork)); + dataforkp = &datafork; + + /* + * If there are borrowed blocks, ensure that they are subtracted + * from the total block count before writing the cnode entry to disk. + * Only extents that have actually been marked allocated in the bitmap + * should be reflected in the total block count for this fork. + */ + if (cp->c_datafork->ff_unallocblocks != 0) { + // make sure that we don't assign a negative block count + if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) { + panic("hfs: ff_blocks %d is less than unalloc blocks %d\n", + cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks); + } + + /* Also cap the LEOF to the total number of bytes that are allocated. */ + datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks); + datafork.cf_size = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; + } + + /* + * For files with invalid ranges (holes) the on-disk + * field representing the size of the file (cf_size) + * must be no larger than the start of the first hole. + * However, note that if the first invalid range exists + * solely within borrowed blocks, then our LEOF and block + * count should both be zero. As a result, set it to the + * min of the current cf_size and the start of the first + * invalid range, because it may have already been reduced + * to zero by the borrowed blocks check above. + */ + if (!TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges)) { + numbytes = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start; + datafork.cf_size = MIN((numbytes), (datafork.cf_size)); + } + } + /* - * For files with invalid ranges (holes) the on-disk - * field representing the size of the file (cf_size) - * must be no larger than the start of the first hole. + * For resource forks with delayed allocations, make sure + * the block count and file size match the number of blocks + * actually allocated to the file on disk. */ - if (dataforkp && !CIRCLEQ_EMPTY(&cp->c_datafork->ff_invalidranges)) { - bcopy(dataforkp, &datafork, sizeof(datafork)); - datafork.cf_size = CIRCLEQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start; - dataforkp = &datafork; - } else if (dataforkp && (cp->c_datafork->ff_unallocblocks != 0)) { - // always make sure the block count and the size - // of the file match the number of blocks actually - // allocated to the file on disk - bcopy(dataforkp, &datafork, sizeof(datafork)); - // make sure that we don't assign a negative block count - if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) { - panic("hfs: ff_blocks %d is less than unalloc blocks %d\n", - cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks); - } - datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks); - datafork.cf_size = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; - dataforkp = &datafork; + if (rsrcforkp && (cp->c_rsrcfork->ff_unallocblocks != 0)) { + bcopy(rsrcforkp, &rsrcfork, sizeof(rsrcfork)); + rsrcfork.cf_blocks = (cp->c_rsrcfork->ff_blocks - cp->c_rsrcfork->ff_unallocblocks); + rsrcfork.cf_size = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; + rsrcforkp = &rsrcfork; } /* * Lock the Catalog b-tree file. - * A shared lock is sufficient since an update doesn't change - * the tree and the lock on vp protects the cnode. */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); /* XXX - waitfor is not enforced */ error = cat_update(hfsmp, &cp->c_desc, &cp->c_attr, dataforkp, rsrcforkp); @@ -2881,48 +5055,75 @@ hfs_update(struct vnode *vp, __unused int waitfor) hfs_end_transaction(hfsmp); - HFS_KNOTE(vp, NOTE_ATTRIB); - return (error); } /* * Allocate a new node + * Note - Function does not create and return a vnode for whiteout creation. */ -static int +int hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx) { struct cnode *cp = NULL; - struct cnode *dcp; + struct cnode *dcp = NULL; struct vnode *tvp; struct hfsmount *hfsmp; struct cat_desc in_desc, out_desc; struct cat_attr attr; struct timeval tv; - cat_cookie_t cookie; int lockflags; - int error, started_tr = 0, got_cookie = 0; + int error, started_tr = 0; enum vtype vnodetype; int mode; + int newvnode_flags = 0; + u_int32_t gnv_flags = 0; + int protectable_target = 0; + +#if CONFIG_PROTECT + struct cprotect *entry = NULL; + uint32_t cp_class = 0; + if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) { + cp_class = vap->va_dataprotect_class; + } + int protected_mount = 0; +#endif + if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK))) return (error); + + /* set the cnode pointer only after successfully acquiring lock */ dcp = VTOC(dvp); + + /* Don't allow creation of new entries in open-unlinked directories */ + if ((error = hfs_checkdeleted(dcp))) { + hfs_unlock(dcp); + return error; + } + + dcp->c_flag |= C_DIR_MODIFICATION; + hfsmp = VTOHFS(dvp); + *vpp = NULL; tvp = NULL; out_desc.cd_flags = 0; out_desc.cd_nameptr = NULL; - mode = MAKEIMODE(vap->va_type, vap->va_mode); + vnodetype = vap->va_type; + if (vnodetype == VNON) + vnodetype = VREG; + mode = MAKEIMODE(vnodetype, vap->va_mode); - if ((mode & S_IFMT) == 0) - mode |= S_IFREG; - vnodetype = IFTOVT(mode); + if (S_ISDIR (mode) || S_ISREG (mode)) { + protectable_target = 1; + } + /* Check if were out of usable disk space. */ - if ((hfs_freeblks(hfsmp, 1) <= 0) && (suser(vfs_context_ucred(ctx), NULL) != 0)) { + if ((hfs_freeblks(hfsmp, 1) == 0) && (vfs_context_suser(ctx) != 0)) { error = ENOSPC; goto exit; } @@ -2932,16 +5133,63 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, /* Setup the default attributes */ bzero(&attr, sizeof(attr)); attr.ca_mode = mode; - attr.ca_nlink = vnodetype == VDIR ? 2 : 1; - attr.ca_mtime = tv.tv_sec; - if ((VTOVCB(dvp)->vcbSigWord == kHFSSigWord) && gTimeZone.tz_dsttime) { - attr.ca_mtime += 3600; /* Same as what hfs_update does */ + attr.ca_linkcount = 1; + if (VATTR_IS_ACTIVE(vap, va_rdev)) { + attr.ca_rdev = vap->va_rdev; + } + if (VATTR_IS_ACTIVE(vap, va_create_time)) { + VATTR_SET_SUPPORTED(vap, va_create_time); + attr.ca_itime = vap->va_create_time.tv_sec; + } else { + attr.ca_itime = tv.tv_sec; } - attr.ca_atime = attr.ca_ctime = attr.ca_itime = attr.ca_mtime; + if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) { + attr.ca_itime += 3600; /* Same as what hfs_update does */ + } + attr.ca_atime = attr.ca_ctime = attr.ca_mtime = attr.ca_itime; attr.ca_atimeondisk = attr.ca_atime; - /* On HFS+ the ThreadExists flag must always be set for files. */ - if (vnodetype != VDIR && (hfsmp->hfs_flags & HFS_STANDARD) == 0) - attr.ca_recflags = kHFSThreadExistsMask; + if (VATTR_IS_ACTIVE(vap, va_flags)) { + VATTR_SET_SUPPORTED(vap, va_flags); + attr.ca_flags = vap->va_flags; + } + + /* + * HFS+ only: all files get ThreadExists + * HFSX only: dirs get HasFolderCount + */ + if (!(hfsmp->hfs_flags & HFS_STANDARD)) { + if (vnodetype == VDIR) { + if (hfsmp->hfs_flags & HFS_FOLDERCOUNT) + attr.ca_recflags = kHFSHasFolderCountMask; + } else { + attr.ca_recflags = kHFSThreadExistsMask; + } + } + +#if CONFIG_PROTECT + if (cp_fs_protected(hfsmp->hfs_mp)) { + protected_mount = 1; + } + /* + * On a content-protected HFS+/HFSX filesystem, files and directories + * cannot be created without atomically setting/creating the EA that + * contains the protection class metadata and keys at the same time, in + * the same transaction. As a result, pre-set the "EAs exist" flag + * on the cat_attr for protectable catalog record creations. This will + * cause the cnode creation routine in hfs_getnewvnode to mark the cnode + * as having EAs. + */ + if ((protected_mount) && (protectable_target)) { + attr.ca_recflags |= kHFSHasAttributesMask; + } +#endif + + + /* + * Add the date added to the item. See above, as + * all of the dates are set to the itime. + */ + hfs_write_dateadded (&attr, attr.ca_atime); attr.ca_uid = vap->va_uid; attr.ca_gid = vap->va_gid; @@ -2949,6 +5197,19 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, VATTR_SET_SUPPORTED(vap, va_uid); VATTR_SET_SUPPORTED(vap, va_gid); +#if QUOTA + /* check to see if this node's creation would cause us to go over + * quota. If so, abort this operation. + */ + if (hfsmp->hfs_flags & HFS_QUOTAS) { + if ((error = hfs_quotacheck(hfsmp, 1, attr.ca_uid, attr.ca_gid, + vfs_context_ucred(ctx)))) { + goto exit; + } + } +#endif + + /* Tag symlinks with a type and creator. */ if (vnodetype == VLNK) { struct FndrFileInfo *fip; @@ -2961,55 +5222,100 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, attr.ca_flags |= UF_OPAQUE; /* Setup the descriptor */ - in_desc.cd_nameptr = cnp->cn_nameptr; + in_desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr; in_desc.cd_namelen = cnp->cn_namelen; - in_desc.cd_parentcnid = dcp->c_cnid; + in_desc.cd_parentcnid = dcp->c_fileid; in_desc.cd_flags = S_ISDIR(mode) ? CD_ISDIR : 0; in_desc.cd_hint = dcp->c_childhint; in_desc.cd_encoding = 0; +#if CONFIG_PROTECT + /* + * To preserve file creation atomicity with regards to the content protection EA, + * we must create the file in the catalog and then write out the EA in the same + * transaction. Pre-flight any operations that we can (such as allocating/preparing + * the buffer, wrapping the keys) before we start the txn and take the requisite + * b-tree locks. We pass '0' as the fileid because we do not know it yet. + */ + if ((protected_mount) && (protectable_target)) { + error = cp_entry_create_keys (&entry, dcp, hfsmp, cp_class, 0, attr.ca_mode); + if (error) { + goto exit; + } + } +#endif + if ((error = hfs_start_transaction(hfsmp)) != 0) { goto exit; } started_tr = 1; - /* - * Reserve some space in the Catalog file. - * - * (we also add CAT_DELETE since our getnewvnode - * request can cause an hfs_inactive call to - * delete an unlinked file) - */ - if ((error = cat_preflight(hfsmp, CAT_CREATE | CAT_DELETE, &cookie, 0))) { + // have to also lock the attribute file because cat_create() needs + // to check that any fileID it wants to use does not have orphaned + // attributes in it. + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + /* Reserve some space in the Catalog file. */ + if ((error = cat_preflight(hfsmp, CAT_CREATE, NULL, 0))) { + hfs_systemfile_unlock(hfsmp, lockflags); goto exit; } - got_cookie = 1; - - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); error = cat_create(hfsmp, &in_desc, &attr, &out_desc); if (error == 0) { /* Update the parent directory */ dcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ - dcp->c_nlink++; dcp->c_entries++; + if (vnodetype == VDIR) { + INC_FOLDERCOUNT(hfsmp, dcp->c_attr); + } + dcp->c_dirchangecnt++; dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); - HFS_KNOTE(dvp, NOTE_ATTRIB); + +#if CONFIG_PROTECT + /* + * If we are creating a content protected file, now is when + * we create the EA. We must create it in the same transaction + * that creates the file. We can also guarantee that the file + * MUST exist because we are still holding the catalog lock + * at this point. + */ + if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target)) { + error = cp_setxattr (NULL, entry, hfsmp, attr.ca_fileid, XATTR_CREATE); + + if (error) { + int delete_err; + /* + * If we fail the EA creation, then we need to delete the file. + * Luckily, we are still holding all of the right locks. + */ + delete_err = cat_delete (hfsmp, &out_desc, &attr); + if (delete_err == 0) { + /* Update the parent directory */ + if (dcp->c_entries > 0) + dcp->c_entries--; + dcp->c_dirchangecnt++; + dcp->c_ctime = tv.tv_sec; + dcp->c_mtime = tv.tv_sec; + (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + } + + /* Emit EINVAL if we fail to create EA*/ + error = EINVAL; + } + } +#endif } hfs_systemfile_unlock(hfsmp, lockflags); if (error) goto exit; /* Invalidate negative cache entries in the directory */ - if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + if (dcp->c_flag & C_NEG_ENTRIES) { cache_purge_negatives(dvp); - - if (vnodetype == VDIR) { - HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); - } else { - HFS_KNOTE(dvp, NOTE_WRITE); - }; + dcp->c_flag &= ~C_NEG_ENTRIES; + } hfs_volupdate(hfsmp, vnodetype == VDIR ? VOL_MKDIR : VOL_MKFILE, (dcp->c_cnid == kHFSRootFolderID)); @@ -3031,98 +5337,85 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, started_tr = 0; } +#if CONFIG_PROTECT + /* + * At this point, we must have encountered success with writing the EA. + * Update MKB with the data for the cached key, then destroy it. This may + * prevent information leakage by ensuring the cache key is only unwrapped + * to perform file I/O and it is allowed. + */ + + if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target)) { + cp_update_mkb (entry, attr.ca_fileid); + cp_entry_destroy (&entry); + } +#endif + + /* Do not create vnode for whiteouts */ + if (S_ISWHT(mode)) { + goto exit; + } + + gnv_flags |= GNV_CREATE; + /* * Create a vnode for the object just created. - * + * + * NOTE: Maintaining the cnode lock on the parent directory is important, + * as it prevents race conditions where other threads want to look up entries + * in the directory and/or add things as we are in the process of creating + * the vnode below. However, this has the potential for causing a + * double lock panic when dealing with shadow files on a HFS boot partition. + * The panic could occur if we are not cleaning up after ourselves properly + * when done with a shadow file or in the error cases. The error would occur if we + * try to create a new vnode, and then end up reclaiming another shadow vnode to + * create the new one. However, if everything is working properly, this should + * be a non-issue as we would never enter that reclaim codepath. + * * The cnode is locked on successful return. */ - error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, 0, &attr, NULL, &tvp); + error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, gnv_flags, &attr, + NULL, &tvp, &newvnode_flags); if (error) goto exit; - // XXXdbg - //cache_enter(dvp, tvp, cnp); - cp = VTOC(tvp); + *vpp = tvp; + #if QUOTA /* - * We call hfs_chkiq with FORCE flag so that if we - * fall through to the rmdir we actually have - * accounted for the inode - */ - if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_QUOTA) { - if ((error = hfs_getinoquota(cp)) || - (error = hfs_chkiq(cp, 1, vfs_context_ucred(ctx), FORCE))) { - - if (vnode_isdir(tvp)) - (void) hfs_removedir(dvp, tvp, cnp, 0); - else { - hfs_unlock(cp); - hfs_lock_truncate(cp, TRUE); - hfs_lock(cp, HFS_FORCE_LOCK); - (void) hfs_removefile(dvp, tvp, cnp, 0, 0); - hfs_unlock_truncate(cp); - } - /* - * we successfully allocated a new vnode, but - * the quota check is telling us we're beyond - * our limit, so we need to dump our lock + reference - */ - hfs_unlock(cp); - vnode_put(tvp); - - goto exit; - } + * Once we create this vnode, we need to initialize its quota data + * structures, if necessary. We know that it is OK to just go ahead and + * initialize because we've already validated earlier (through the hfs_quotacheck + * function) to see if creating this cnode/vnode would cause us to go over quota. + */ + if (hfsmp->hfs_flags & HFS_QUOTAS) { + (void) hfs_getinoquota(cp); } -#endif /* QUOTA */ +#endif - /* Remember if any ACL data was set. */ - if (VATTR_IS_ACTIVE(vap, va_acl) && - (vap->va_acl != NULL)) { - cp->c_attr.ca_recflags |= kHFSHasSecurityMask; - cp->c_touch_chgtime = TRUE; - (void) hfs_update(tvp, TRUE); - } - *vpp = tvp; exit: cat_releasedesc(&out_desc); + +#if CONFIG_PROTECT + /* + * We may have jumped here in error-handling various situations above. + * If we haven't already dumped the temporary CP used to initialize + * the file atomically, then free it now. cp_entry_destroy should null + * out the pointer if it was called already. + */ + if (entry) { + cp_entry_destroy (&entry); + } +#endif - if (got_cookie) { - cat_postflight(hfsmp, &cookie, 0); - } /* - * Check if a file is located in the "Cleanup At Startup" - * directory. If it is then tag it as NODUMP so that we - * can be lazy about zero filling data holes. + * Make sure we release cnode lock on dcp. */ - if ((error == 0) && dvp && (vnodetype == VREG) && - (dcp->c_desc.cd_nameptr != NULL) && - (strcmp(dcp->c_desc.cd_nameptr, CARBON_TEMP_DIR_NAME) == 0)) { - struct vnode *ddvp; - - hfs_unlock(dcp); - dvp = NULL; - - /* - * The parent of "Cleanup At Startup" should - * have the ASCII name of the userid. - */ - if (hfs_vget(hfsmp, dcp->c_parentcnid, &ddvp, 0) == 0) { - if (VTOC(ddvp)->c_desc.cd_nameptr) { - uid_t uid; - - uid = strtoul(VTOC(ddvp)->c_desc.cd_nameptr, 0, 0); - if ((uid == cp->c_uid) || - (uid == vfs_context_ucred(ctx)->cr_uid)) { - cp->c_flags |= UF_NODUMP; - cp->c_touch_chgtime = TRUE; - } - } - hfs_unlock(VTOC(ddvp)); - vnode_put(ddvp); - } - } - if (dvp) { + if (dcp) { + dcp->c_flag &= ~C_DIR_MODIFICATION; + wakeup((caddr_t)&dcp->c_flag); + hfs_unlock(dcp); } if (error == 0 && cp != NULL) { @@ -3138,50 +5431,239 @@ exit: /* - * WARNING - assumes caller has cnode lock. + * hfs_vgetrsrc acquires a resource fork vnode corresponding to the cnode that is + * found in 'vp'. The rsrc fork vnode is returned with the cnode locked and iocount + * on the rsrc vnode. + * + * *rvpp is an output argument for returning the pointer to the resource fork vnode. + * In most cases, the resource fork vnode will not be set if we return an error. + * However, if error_on_unlinked is set, we may have already acquired the resource fork vnode + * before we discover the error (the file has gone open-unlinked). In this case only, + * we may return a vnode in the output argument despite an error. + * + * If can_drop_lock is set, then it is safe for this function to temporarily drop + * and then re-acquire the cnode lock. We may need to do this, for example, in order to + * acquire an iocount or promote our lock. + * + * error_on_unlinked is an argument which indicates that we are to return an error if we + * discover that the cnode has gone into an open-unlinked state ( C_DELETED or C_NOEXISTS) + * is set in the cnode flags. This is only necessary if can_drop_lock is true, otherwise + * there's really no reason to double-check for errors on the cnode. */ -__private_extern__ + int -hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, __unused struct proc *p) +hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, + int can_drop_lock, int error_on_unlinked) { struct vnode *rvp; + struct vnode *dvp = NULLVP; struct cnode *cp = VTOC(vp); int error; int vid; + int delete_status = 0; + + if (vnode_vtype(vp) == VDIR) { + return EINVAL; + } + + /* + * Need to check the status of the cnode to validate it hasn't gone + * open-unlinked on us before we can actually do work with it. + */ + delete_status = hfs_checkdeleted(cp); + if ((delete_status) && (error_on_unlinked)) { + return delete_status; + } +restart: + /* Attempt to use existing vnode */ if ((rvp = cp->c_rsrc_vp)) { vid = vnode_vid(rvp); - /* Use exising vnode */ + /* + * It is not safe to hold the cnode lock when calling vnode_getwithvid() + * for the alternate fork -- vnode_getwithvid() could deadlock waiting + * for a VL_WANTTERM while another thread has an iocount on the alternate + * fork vnode and is attempting to acquire the common cnode lock. + * + * But it's also not safe to drop the cnode lock when we're holding + * multiple cnode locks, like during a hfs_removefile() operation + * since we could lock out of order when re-acquiring the cnode lock. + * + * So we can only drop the lock here if its safe to drop it -- which is + * most of the time with the exception being hfs_removefile(). + */ + if (can_drop_lock) + hfs_unlock(cp); + error = vnode_getwithvid(rvp, vid); + + if (can_drop_lock) { + (void) hfs_lock(cp, HFS_FORCE_LOCK); + + /* + * When we relinquished our cnode lock, the cnode could have raced + * with a delete and gotten deleted. If the caller did not want + * us to ignore open-unlinked files, then re-check the C_DELETED + * state and see if we need to return an ENOENT here because the item + * got deleted in the intervening time. + */ + if (error_on_unlinked) { + if ((delete_status = hfs_checkdeleted(cp))) { + /* + * If error == 0, this means that we succeeded in acquiring an iocount on the + * rsrc fork vnode. However, if we're in this block of code, that means that we noticed + * that the cnode has gone open-unlinked. In this case, the caller requested that we + * not do any other work and return an errno. The caller will be responsible for + * dropping the iocount we just acquired because we can't do it until we've released + * the cnode lock. + */ + if (error == 0) { + *rvpp = rvp; + } + return delete_status; + } + } + + /* + * When our lock was relinquished, the resource fork + * could have been recycled. Check for this and try + * again. + */ + if (error == ENOENT) + goto restart; + } if (error) { - char * name = VTOC(vp)->c_desc.cd_nameptr; + const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr; if (name) - printf("hfs_vgetrsrc: couldn't get" - " resource fork for %s\n", name); + printf("hfs_vgetrsrc: couldn't get resource" + " fork for %s, err %d\n", name, error); return (error); } } else { struct cat_fork rsrcfork; struct componentname cn; + struct cat_desc *descptr = NULL; + struct cat_desc to_desc; + char delname[32]; int lockflags; + int newvnode_flags = 0; + + /* + * Make sure cnode lock is exclusive, if not upgrade it. + * + * We assume that we were called from a read-only VNOP (getattr) + * and that its safe to have the cnode lock dropped and reacquired. + */ + if (cp->c_lockowner != current_thread()) { + if (!can_drop_lock) { + return (EINVAL); + } + /* + * If the upgrade fails we lose the lock and + * have to take the exclusive lock on our own. + */ + if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE) + lck_rw_lock_exclusive(&cp->c_rwlock); + cp->c_lockowner = current_thread(); + } + + /* + * hfs_vgetsrc may be invoked for a cnode that has already been marked + * C_DELETED. This is because we need to continue to provide rsrc + * fork access to open-unlinked files. In this case, build a fake descriptor + * like in hfs_removefile. If we don't do this, buildkey will fail in + * cat_lookup because this cnode has no name in its descriptor. However, + * only do this if the caller did not specify that they wanted us to + * error out upon encountering open-unlinked files. + */ + + if ((error_on_unlinked) && (can_drop_lock)) { + if ((error = hfs_checkdeleted(cp))) { + return error; + } + } + + if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) { + bzero (&to_desc, sizeof(to_desc)); + bzero (delname, 32); + MAKE_DELETED_NAME(delname, sizeof(delname), cp->c_fileid); + to_desc.cd_nameptr = (const u_int8_t*) delname; + to_desc.cd_namelen = strlen(delname); + to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + to_desc.cd_flags = 0; + to_desc.cd_cnid = cp->c_cnid; + + descptr = &to_desc; + } + else { + descptr = &cp->c_desc; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - /* Get resource fork data */ - error = cat_lookup(hfsmp, &cp->c_desc, 1, (struct cat_desc *)0, - (struct cat_attr *)0, &rsrcfork, NULL); + /* + * Get resource fork data + * + * We call cat_idlookup (instead of cat_lookup) below because we can't + * trust the descriptor in the provided cnode for lookups at this point. + * Between the time of the original lookup of this vnode and now, the + * descriptor could have gotten swapped or replaced. If this occurred, + * the parent/name combo originally desired may not necessarily be provided + * if we use the descriptor. Even worse, if the vnode represents + * a hardlink, we could have removed one of the links from the namespace + * but left the descriptor alone, since hfs_unlink does not invalidate + * the descriptor in the cnode if other links still point to the inode. + * + * Consider the following (slightly contrived) scenario: + * /tmp/a <--> /tmp/b (hardlinks). + * 1. Thread A: open rsrc fork on /tmp/b. + * 1a. Thread A: does lookup, goes out to lunch right before calling getnamedstream. + * 2. Thread B does 'mv /foo/b /tmp/b' + * 2. Thread B succeeds. + * 3. Thread A comes back and wants rsrc fork info for /tmp/b. + * + * Even though the hardlink backing /tmp/b is now eliminated, the descriptor + * is not removed/updated during the unlink process. So, if you were to + * do a lookup on /tmp/b, you'd acquire an entirely different record's resource + * fork. + * + * As a result, we use the fileid, which should be invariant for the lifetime + * of the cnode (possibly barring calls to exchangedata). + * + * Addendum: We can't do the above for HFS standard since we aren't guaranteed to + * have thread records for files. They were only required for directories. So + * we need to do the lookup with the catalog name. This is OK since hardlinks were + * never allowed on HFS standard. + */ + + if (hfsmp->hfs_flags & HFS_STANDARD) { + /* + * HFS standard only: + * + * Get the resource fork for this item via catalog lookup + * since HFS standard was case-insensitive only. We don't want the + * descriptor; just the fork data here. + */ + error = cat_lookup (hfsmp, descptr, 1, (struct cat_desc*)NULL, + (struct cat_attr*)NULL, &rsrcfork, NULL); + } + else { + error = cat_idlookup (hfsmp, cp->c_fileid, 0, 1, NULL, NULL, &rsrcfork); + } hfs_systemfile_unlock(hfsmp, lockflags); - if (error) + if (error) { return (error); - + } + /* * Supply hfs_getnewvnode with a component name. */ cn.cn_pnbuf = NULL; - if (cp->c_desc.cd_nameptr) { + if (descptr->cd_nameptr) { MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN | HASBUF; @@ -3190,10 +5672,16 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, __un cn.cn_nameptr = cn.cn_pnbuf; cn.cn_hash = 0; cn.cn_consume = 0; - cn.cn_namelen = sprintf(cn.cn_nameptr, "%s%s", cp->c_desc.cd_nameptr, _PATH_RSRCFORKSPEC); + cn.cn_namelen = snprintf(cn.cn_nameptr, MAXPATHLEN, + "%s%s", descptr->cd_nameptr, + _PATH_RSRCFORKSPEC); } - error = hfs_getnewvnode(hfsmp, vnode_parent(vp), cn.cn_pnbuf ? &cn : NULL, - &cp->c_desc, 2, &cp->c_attr, &rsrcfork, &rvp); + dvp = vnode_getparent(vp); + error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL, + descptr, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr, + &rsrcfork, &rvp, &newvnode_flags); + if (dvp) + vnode_put(dvp); if (cn.cn_pnbuf) FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); if (error) @@ -3204,181 +5692,10 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, __un return (0); } - -static void -filt_hfsdetach(struct knote *kn) -{ - struct vnode *vp; - - vp = (struct vnode *)kn->kn_hook; - if (vnode_getwithvid(vp, kn->kn_hookid)) - return; - - if (1) { /* ! KNDETACH_VNLOCKED */ - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { - (void) KNOTE_DETACH(&VTOC(vp)->c_knotes, kn); - hfs_unlock(VTOC(vp)); - } - } - - vnode_put(vp); -} - -/*ARGSUSED*/ -static int -filt_hfsread(struct knote *kn, long hint) -{ - struct vnode *vp = (struct vnode *)kn->kn_hook; - int dropvp = 0; - - if (hint == 0) { - if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - dropvp = 1; - } - if (hint == NOTE_REVOKE) { - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - - /* poll(2) semantics dictate always saying there is data */ - kn->kn_data = (!(kn->kn_flags & EV_POLL)) ? - VTOF(vp)->ff_size - kn->kn_fp->f_fglob->fg_offset : 1; - - if (dropvp) - vnode_put(vp); - - return (kn->kn_data != 0); -} - -/*ARGSUSED*/ -static int -filt_hfswrite(struct knote *kn, long hint) -{ - int dropvp = 0; - - if (hint == 0) { - if ((vnode_getwithvid(kn->kn_hook, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - vnode_put(kn->kn_hook); - } - if (hint == NOTE_REVOKE) { - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - kn->kn_data = 0; - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - kn->kn_data = 0; - return (1); -} - -static int -filt_hfsvnode(struct knote *kn, long hint) -{ - - if (hint == 0) { - if ((vnode_getwithvid(kn->kn_hook, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - vnode_put(kn->kn_hook); - } - if (kn->kn_sfflags & hint) - kn->kn_fflags |= hint; - if ((hint == NOTE_REVOKE)) { - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - - return (kn->kn_fflags != 0); -} - -static struct filterops hfsread_filtops = - { 1, NULL, filt_hfsdetach, filt_hfsread }; -static struct filterops hfswrite_filtops = - { 1, NULL, filt_hfsdetach, filt_hfswrite }; -static struct filterops hfsvnode_filtops = - { 1, NULL, filt_hfsdetach, filt_hfsvnode }; - -/* - * Add a kqueue filter. - */ -static int -hfs_vnop_kqfiltadd( - struct vnop_kqfilt_add_args /* { - struct vnode *a_vp; - struct knote *a_kn; - struct proc *p; - vfs_context_t a_context; - } */ *ap) -{ - struct vnode *vp = ap->a_vp; - struct knote *kn = ap->a_kn; - int error; - - switch (kn->kn_filter) { - case EVFILT_READ: - if (vnode_isreg(vp)) { - kn->kn_fop = &hfsread_filtops; - } else { - return EINVAL; - }; - break; - case EVFILT_WRITE: - if (vnode_isreg(vp)) { - kn->kn_fop = &hfswrite_filtops; - } else { - return EINVAL; - }; - break; - case EVFILT_VNODE: - kn->kn_fop = &hfsvnode_filtops; - break; - default: - return (1); - } - - kn->kn_hook = (caddr_t)vp; - kn->kn_hookid = vnode_vid(vp); - - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) - return (error); - KNOTE_ATTACH(&VTOC(vp)->c_knotes, kn); - hfs_unlock(VTOC(vp)); - - return (0); -} - -/* - * Remove a kqueue filter - */ -static int -hfs_vnop_kqfiltremove(ap) - struct vnop_kqfilt_remove_args /* { - struct vnode *a_vp; - uintptr_t ident; - vfs_context_t a_context; - } */ *ap; -{ - int result; - - result = ENOTSUP; /* XXX */ - - return (result); -} - /* * Wrapper for special device reads */ -static int +int hfsspec_read(ap) struct vnop_read_args /* { struct vnode *a_vp; @@ -3397,7 +5714,7 @@ hfsspec_read(ap) /* * Wrapper for special device writes */ -static int +int hfsspec_write(ap) struct vnop_write_args /* { struct vnode *a_vp; @@ -3419,7 +5736,7 @@ hfsspec_write(ap) * * Update the times on the cnode then do device close. */ -static int +int hfsspec_close(ap) struct vnop_close_args /* { struct vnode *a_vp; @@ -3430,7 +5747,7 @@ hfsspec_close(ap) struct vnode *vp = ap->a_vp; struct cnode *cp; - if (vnode_isinuse(ap->a_vp, 1)) { + if (vnode_isinuse(ap->a_vp, 0)) { if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { cp = VTOC(vp); hfs_touchtimes(VTOHFS(vp), cp); @@ -3453,8 +5770,6 @@ hfsfifo_read(ap) vfs_context_t a_context; } */ *ap; { - extern int (**fifo_vnodeop_p)(void *); - /* * Set access flag. */ @@ -3474,8 +5789,6 @@ hfsfifo_write(ap) vfs_context_t a_context; } */ *ap; { - extern int (**fifo_vnodeop_p)(void *); - /* * Set update and change flags. */ @@ -3497,7 +5810,6 @@ hfsfifo_close(ap) vfs_context_t a_context; } */ *ap; { - extern int (**fifo_vnodeop_p)(void *); struct vnode *vp = ap->a_vp; struct cnode *cp; @@ -3511,48 +5823,13 @@ hfsfifo_close(ap) return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_close), ap)); } -/* - * kqfilt_add wrapper for fifos. - * - * Fall through to hfs kqfilt_add routines if needed - */ -int -hfsfifo_kqfilt_add(ap) - struct vnop_kqfilt_add_args *ap; -{ - extern int (**fifo_vnodeop_p)(void *); - int error; - - error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_add), ap); - if (error) - error = hfs_vnop_kqfiltadd(ap); - return (error); -} - -/* - * kqfilt_remove wrapper for fifos. - * - * Fall through to hfs kqfilt_remove routines if needed - */ -int -hfsfifo_kqfilt_remove(ap) - struct vnop_kqfilt_remove_args *ap; -{ - extern int (**fifo_vnodeop_p)(void *); - int error; - - error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_remove), ap); - if (error) - error = hfs_vnop_kqfiltremove(ap); - return (error); -} #endif /* FIFO */ /* * Synchronize a file's in-core state with that on disk. */ -static int +int hfs_vnop_fsync(ap) struct vnop_fsync_args /* { struct vnode *a_vp; @@ -3563,6 +5840,21 @@ hfs_vnop_fsync(ap) struct vnode* vp = ap->a_vp; int error; + /* Note: We check hfs flags instead of vfs mount flag because during + * read-write update, hfs marks itself read-write much earlier than + * the vfs, and hence won't result in skipping of certain writes like + * zero'ing out of unused nodes, creation of hotfiles btree, etc. + */ + if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) { + return 0; + } + +#if CONFIG_PROTECT + if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { + return (error); + } +#endif /* CONFIG_PROTECT */ + /* * We need to allow ENOENT lock errors since unlink * systenm call can call VNOP_FSYNC during vclean. @@ -3577,39 +5869,148 @@ hfs_vnop_fsync(ap) return (error); } -/***************************************************************************** -* -* VOP Tables -* -*****************************************************************************/ -int hfs_vnop_readdirattr(struct vnop_readdirattr_args *); /* in hfs_attrlist.c */ -int hfs_vnop_inactive(struct vnop_inactive_args *); /* in hfs_cnode.c */ -int hfs_vnop_reclaim(struct vnop_reclaim_args *); /* in hfs_cnode.c */ -int hfs_vnop_link(struct vnop_link_args *); /* in hfs_link.c */ -int hfs_vnop_lookup(struct vnop_lookup_args *); /* in hfs_lookup.c */ -int hfs_vnop_search(struct vnop_searchfs_args *); /* in hfs_search.c */ - -int hfs_vnop_read(struct vnop_read_args *); /* in hfs_readwrite.c */ -int hfs_vnop_write(struct vnop_write_args *); /* in hfs_readwrite.c */ -int hfs_vnop_ioctl(struct vnop_ioctl_args *); /* in hfs_readwrite.c */ -int hfs_vnop_select(struct vnop_select_args *); /* in hfs_readwrite.c */ -int hfs_vnop_strategy(struct vnop_strategy_args *); /* in hfs_readwrite.c */ -int hfs_vnop_allocate(struct vnop_allocate_args *); /* in hfs_readwrite.c */ -int hfs_vnop_pagein(struct vnop_pagein_args *); /* in hfs_readwrite.c */ -int hfs_vnop_pageout(struct vnop_pageout_args *); /* in hfs_readwrite.c */ -int hfs_vnop_bwrite(struct vnop_bwrite_args *); /* in hfs_readwrite.c */ -int hfs_vnop_blktooff(struct vnop_blktooff_args *); /* in hfs_readwrite.c */ -int hfs_vnop_offtoblk(struct vnop_offtoblk_args *); /* in hfs_readwrite.c */ -int hfs_vnop_blockmap(struct vnop_blockmap_args *); /* in hfs_readwrite.c */ -int hfs_vnop_getxattr(struct vnop_getxattr_args *); /* in hfs_xattr.c */ -int hfs_vnop_setxattr(struct vnop_setxattr_args *); /* in hfs_xattr.c */ -int hfs_vnop_removexattr(struct vnop_removexattr_args *); /* in hfs_xattr.c */ -int hfs_vnop_listxattr(struct vnop_listxattr_args *); /* in hfs_xattr.c */ + +int +hfs_vnop_whiteout(ap) + struct vnop_whiteout_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + int a_flags; + vfs_context_t a_context; + } */ *ap; +{ + int error = 0; + struct vnode *vp = NULL; + struct vnode_attr va; + struct vnop_lookup_args lookup_args; + struct vnop_remove_args remove_args; + struct hfsmount *hfsmp; + + hfsmp = VTOHFS(ap->a_dvp); + if (hfsmp->hfs_flags & HFS_STANDARD) { + error = ENOTSUP; + goto exit; + } + + switch (ap->a_flags) { + case LOOKUP: + error = 0; + break; + + case CREATE: + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VREG); + VATTR_SET(&va, va_mode, S_IFWHT); + VATTR_SET(&va, va_uid, 0); + VATTR_SET(&va, va_gid, 0); + + error = hfs_makenode(ap->a_dvp, &vp, ap->a_cnp, &va, ap->a_context); + /* No need to release the vnode as no vnode is created for whiteouts */ + break; + + case DELETE: + lookup_args.a_dvp = ap->a_dvp; + lookup_args.a_vpp = &vp; + lookup_args.a_cnp = ap->a_cnp; + lookup_args.a_context = ap->a_context; + + error = hfs_vnop_lookup(&lookup_args); + if (error) { + break; + } + + remove_args.a_dvp = ap->a_dvp; + remove_args.a_vp = vp; + remove_args.a_cnp = ap->a_cnp; + remove_args.a_flags = 0; + remove_args.a_context = ap->a_context; + + error = hfs_vnop_remove(&remove_args); + vnode_put(vp); + break; + + default: + panic("hfs_vnop_whiteout: unknown operation (flag = %x)\n", ap->a_flags); + }; + +exit: + return (error); +} int (**hfs_vnodeop_p)(void *); +int (**hfs_std_vnodeop_p) (void *); #define VOPFUNC int (*)(void *) +static int hfs_readonly_op (__unused void* ap) { return (EROFS); } + +/* + * In 10.6 and forward, HFS Standard is read-only and deprecated. The vnop table below + * is for use with HFS standard to block out operations that would modify the file system + */ + +struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = { + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)hfs_vnop_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)hfs_readonly_op }, /* create (READONLY) */ + { &vnop_mknod_desc, (VOPFUNC)hfs_readonly_op }, /* mknod (READONLY) */ + { &vnop_open_desc, (VOPFUNC)hfs_vnop_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)hfs_vnop_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)hfs_readonly_op }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)hfs_vnop_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)hfs_readonly_op }, /* write (READONLY) */ + { &vnop_ioctl_desc, (VOPFUNC)hfs_vnop_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)hfs_vnop_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ + { &vnop_exchange_desc, (VOPFUNC)hfs_readonly_op }, /* exchange (READONLY)*/ + { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)hfs_readonly_op}, /* fsync (READONLY) */ + { &vnop_remove_desc, (VOPFUNC)hfs_readonly_op }, /* remove (READONLY) */ + { &vnop_link_desc, (VOPFUNC)hfs_readonly_op }, /* link ( READONLLY) */ + { &vnop_rename_desc, (VOPFUNC)hfs_readonly_op }, /* rename (READONLY)*/ + { &vnop_mkdir_desc, (VOPFUNC)hfs_readonly_op }, /* mkdir (READONLY) */ + { &vnop_rmdir_desc, (VOPFUNC)hfs_readonly_op }, /* rmdir (READONLY) */ + { &vnop_symlink_desc, (VOPFUNC)hfs_readonly_op }, /* symlink (READONLY) */ + { &vnop_readdir_desc, (VOPFUNC)hfs_vnop_readdir }, /* readdir */ + { &vnop_readdirattr_desc, (VOPFUNC)hfs_vnop_readdirattr }, /* readdirattr */ + { &vnop_readlink_desc, (VOPFUNC)hfs_vnop_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)hfs_vnop_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_allocate_desc, (VOPFUNC)hfs_readonly_op }, /* allocate (READONLY) */ +#if CONFIG_SEARCHFS + { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search }, /* search fs */ +#else + { &vnop_searchfs_desc, (VOPFUNC)err_searchfs }, /* search fs */ +#endif + { &vnop_bwrite_desc, (VOPFUNC)hfs_readonly_op }, /* bwrite (READONLY) */ + { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* pagein */ + { &vnop_pageout_desc,(VOPFUNC) hfs_readonly_op }, /* pageout (READONLY) */ + { &vnop_copyfile_desc, (VOPFUNC)hfs_readonly_op }, /* copyfile (READONLY)*/ + { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ + { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, + { &vnop_setxattr_desc, (VOPFUNC)hfs_readonly_op}, /* set xattr (READONLY) */ + { &vnop_removexattr_desc, (VOPFUNC)hfs_readonly_op}, /* remove xattr (READONLY) */ + { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, + { &vnop_whiteout_desc, (VOPFUNC)hfs_readonly_op}, /* whiteout (READONLY) */ +#if NAMEDSTREAMS + { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream }, + { &vnop_makenamedstream_desc, (VOPFUNC)hfs_readonly_op }, + { &vnop_removenamedstream_desc, (VOPFUNC)hfs_readonly_op }, +#endif + { NULL, (VOPFUNC)NULL } +}; + +struct vnodeopv_desc hfs_std_vnodeop_opv_desc = +{ &hfs_std_vnodeop_p, hfs_standard_vnodeop_entries }; + + +/* VNOP table for HFS+ */ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_default_desc, (VOPFUNC)vn_default_error }, { &vnop_lookup_desc, (VOPFUNC)hfs_vnop_lookup }, /* lookup */ @@ -3625,7 +6026,7 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_select_desc, (VOPFUNC)hfs_vnop_select }, /* select */ { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ { &vnop_exchange_desc, (VOPFUNC)hfs_vnop_exchange }, /* exchange */ - { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_mmap_desc, (VOPFUNC)hfs_vnop_mmap }, /* mmap */ { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync }, /* fsync */ { &vnop_remove_desc, (VOPFUNC)hfs_vnop_remove }, /* remove */ { &vnop_link_desc, (VOPFUNC)hfs_vnop_link }, /* link */ @@ -3642,7 +6043,11 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ { &vnop_allocate_desc, (VOPFUNC)hfs_vnop_allocate }, /* allocate */ +#if CONFIG_SEARCHFS { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search }, /* search fs */ +#else + { &vnop_searchfs_desc, (VOPFUNC)err_searchfs }, /* search fs */ +#endif { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite }, /* bwrite */ { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* pagein */ { &vnop_pageout_desc,(VOPFUNC) hfs_vnop_pageout }, /* pageout */ @@ -3650,18 +6055,24 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ - { &vnop_kqfilt_add_desc, (VOPFUNC)hfs_vnop_kqfiltadd }, /* kqfilt_add */ - { &vnop_kqfilt_remove_desc, (VOPFUNC)hfs_vnop_kqfiltremove }, /* kqfilt_remove */ { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr}, { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr}, { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, + { &vnop_whiteout_desc, (VOPFUNC)hfs_vnop_whiteout}, +#if NAMEDSTREAMS + { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream }, + { &vnop_makenamedstream_desc, (VOPFUNC)hfs_vnop_makenamedstream }, + { &vnop_removenamedstream_desc, (VOPFUNC)hfs_vnop_removenamedstream }, +#endif { NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_vnodeop_opv_desc = { &hfs_vnodeop_p, hfs_vnodeop_entries }; + +/* Spec Op vnop table for HFS+ */ int (**hfs_specop_p)(void *); struct vnodeopv_entry_desc hfs_specop_entries[] = { { &vnop_default_desc, (VOPFUNC)vn_default_error }, @@ -3693,10 +6104,9 @@ struct vnodeopv_entry_desc hfs_specop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite }, - { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* Pagein */ { &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout }, /* Pageout */ - { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } @@ -3705,6 +6115,7 @@ struct vnodeopv_desc hfs_specop_opv_desc = { &hfs_specop_p, hfs_specop_entries }; #if FIFO +/* HFS+ FIFO VNOP table */ int (**hfs_fifoop_p)(void *); struct vnodeopv_entry_desc hfs_fifoop_entries[] = { { &vnop_default_desc, (VOPFUNC)vn_default_error }, @@ -3742,8 +6153,6 @@ struct vnodeopv_entry_desc hfs_fifoop_entries[] = { { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ - { &vnop_kqfilt_add_desc, (VOPFUNC)hfsfifo_kqfilt_add }, /* kqfilt_add */ - { &vnop_kqfilt_remove_desc, (VOPFUNC)hfsfifo_kqfilt_remove }, /* kqfilt_remove */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_fifoop_opv_desc =