/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/filedesc.h>
#include <sys/stat.h>
#include <sys/buf.h>
+#include <sys/buf_internal.h>
#include <sys/proc.h>
#include <sys/kauth.h>
#include <sys/vnode.h>
+#include <sys/vnode_internal.h>
#include <sys/uio.h>
#include <sys/vfs_context.h>
+#include <sys/fsevents.h>
+#include <kern/kalloc.h>
#include <sys/disk.h>
#include <sys/sysctl.h>
+#include <sys/fsctl.h>
+#include <sys/mount_internal.h>
+#include <sys/file_internal.h>
#include <miscfs/specfs/specdev.h>
#include <sys/ubc.h>
+#include <sys/ubc_internal.h>
+
#include <vm/vm_pageout.h>
#include <vm/vm_kern.h>
#include <sys/kdebug.h>
#include "hfs.h"
+#include "hfs_attrlist.h"
#include "hfs_endian.h"
-#include "hfs_fsctl.h"
+#include "hfs_fsctl.h"
#include "hfs_quota.h"
#include "hfscommon/headers/FileMgrInternal.h"
#include "hfscommon/headers/BTreesInternal.h"
#include "hfs_cnode.h"
#include "hfs_dbg.h"
-extern int overflow_extents(struct filefork *fp);
-
#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
enum {
MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
};
-extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
-
-extern int hfs_setextendedsecurity(struct hfsmount *, int);
-
+/* from bsd/hfs/hfs_vfsops.c */
+extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
-static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
static int hfs_clonefile(struct vnode *, int, int, int);
static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
+static int hfs_minorupdate(struct vnode *vp);
+static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
+
+/* from bsd/hfs/hfs_vnops.c */
+extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
-int flush_cache_on_write = 0;
-SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
-
-
-/*****************************************************************************
-*
-* I/O Operations on vnodes
-*
-*****************************************************************************/
-int hfs_vnop_read(struct vnop_read_args *);
-int hfs_vnop_write(struct vnop_write_args *);
-int hfs_vnop_ioctl(struct vnop_ioctl_args *);
-int hfs_vnop_select(struct vnop_select_args *);
-int hfs_vnop_blktooff(struct vnop_blktooff_args *);
-int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
-int hfs_vnop_blockmap(struct vnop_blockmap_args *);
-int hfs_vnop_strategy(struct vnop_strategy_args *);
-int hfs_vnop_allocate(struct vnop_allocate_args *);
-int hfs_vnop_pagein(struct vnop_pagein_args *);
-int hfs_vnop_pageout(struct vnop_pageout_args *);
-int hfs_vnop_bwrite(struct vnop_bwrite_args *);
+int flush_cache_on_write = 0;
+SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
/*
* Read data from a file.
int
hfs_vnop_read(struct vnop_read_args *ap)
{
+ /*
+ struct vnop_read_args {
+ struct vnodeop_desc *a_desc;
+ vnode_t a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ vfs_context_t a_context;
+ };
+ */
+
uio_t uio = ap->a_uio;
struct vnode *vp = ap->a_vp;
struct cnode *cp;
off_t start_resid = uio_resid(uio);
off_t offset = uio_offset(uio);
int retval = 0;
-
+ int took_truncate_lock = 0;
+ int io_throttle = 0;
+ int throttled_count = 0;
/* Preflight checks */
if (!vnode_isreg(vp)) {
if (offset < 0)
return (EINVAL); /* cant read from a negative offset */
+ if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
+ (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
+ /* Don't allow unencrypted io request from user space */
+ return EPERM;
+ }
+
+
+
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
+ return 0;
+ }
+ /* otherwise read the resource fork normally */
+ } else {
+ int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+ if (compressed) {
+ retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
+ if (compressed) {
+ if (retval == 0) {
+ /* successful read, update the access time */
+ VTOC(vp)->c_touch_acctime = TRUE;
+
+ /* compressed files are not hot file candidates */
+ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+ VTOF(vp)->ff_bytesread = 0;
+ }
+ }
+ return retval;
+ }
+ /* otherwise the file was converted back to a regular file while we were reading it */
+ retval = 0;
+ } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
+ int error;
+
+ error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
+ if (error) {
+ return error;
+ }
+
+ }
+ }
+#endif /* HFS_COMPRESSION */
+
cp = VTOC(vp);
fp = VTOF(vp);
hfsmp = VTOHFS(vp);
+#if CONFIG_PROTECT
+ if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
+ goto exit;
+ }
+#endif
+
+ /*
+ * If this read request originated from a syscall (as opposed to
+ * an in-kernel page fault or something), then set it up for
+ * throttle checks
+ */
+ if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
+ io_throttle = IO_RETURN_ON_THROTTLE;
+ }
+
+read_again:
+
/* Protect against a size change. */
- hfs_lock_truncate(cp, 0);
+ hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+ took_truncate_lock = 1;
filesize = fp->ff_size;
filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+
+ /*
+ * Check the file size. Note that per POSIX spec, we return 0 at
+ * file EOF, so attempting a read at an offset that is too big
+ * should just return 0 on HFS+. Since the return value was initialized
+ * to 0 above, we just jump to exit. HFS Standard has its own behavior.
+ */
if (offset > filesize) {
if ((hfsmp->hfs_flags & HFS_STANDARD) &&
(offset > (off_t)MAXHFSFILESIZE)) {
goto exit;
}
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
+ KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
- retval = cluster_read(vp, uio, filesize, 0);
+ retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
cp->c_touch_acctime = TRUE;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
+ KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
/*
* Keep track blocks read
*/
- if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
+ if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
int took_cnode_lock = 0;
off_t bytesread;
/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
- hfs_lock(cp, HFS_FORCE_LOCK);
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
took_cnode_lock = 1;
}
/*
* If this file hasn't been seen since the start of
* the current sampling period then start over.
*/
- if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+ if (cp->c_atime < hfsmp->hfc_timebase) {
struct timeval tv;
fp->ff_bytesread = bytesread;
hfs_unlock(cp);
}
exit:
- hfs_unlock_truncate(cp);
+ if (took_truncate_lock) {
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ }
+ if (retval == EAGAIN) {
+ throttle_lowpri_io(1);
+ throttled_count++;
+
+ retval = 0;
+ goto read_again;
+ }
+ if (throttled_count) {
+ throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread()));
+ }
return (retval);
}
kauth_cred_t cred = NULL;
off_t origFileSize;
off_t writelimit;
- off_t bytesToAdd;
+ off_t bytesToAdd = 0;
off_t actualBytesAdded;
off_t filebytes;
off_t offset;
- size_t resid;
+ ssize_t resid;
int eflags;
int ioflag = ap->a_ioflag;
int retval = 0;
int lockflags;
int cnode_locked = 0;
+ int partialwrite = 0;
+ int do_snapshot = 1;
+ time_t orig_ctime=VTOC(vp)->c_ctime;
+ int took_truncate_lock = 0;
+ int io_return_on_throttle = 0;
+ int throttled_count = 0;
+ struct rl_entry *invalid_range;
+
+#if HFS_COMPRESSION
+ if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+ int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+ switch(state) {
+ case FILE_IS_COMPRESSED:
+ return EACCES;
+ case FILE_IS_CONVERTING:
+ /* if FILE_IS_CONVERTING, we allow writes but do not
+ bother with snapshots or else we will deadlock.
+ */
+ do_snapshot = 0;
+ break;
+ default:
+ printf("invalid state %d for compressed file\n", state);
+ /* fall through */
+ }
+ } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
+ int error;
+
+ error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
+ if (error != 0) {
+ return error;
+ }
+ }
+
+ if (do_snapshot) {
+ check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
+ }
+
+#endif
+
+ if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
+ (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
+ /* Don't allow unencrypted io request from user space */
+ return EPERM;
+ }
+
- // LP64todo - fix this! uio_resid may be 64-bit value
resid = uio_resid(uio);
offset = uio_offset(uio);
if (!vnode_isreg(vp))
return (EPERM); /* Can only write regular files */
- /* Protect against a size change. */
- hfs_lock_truncate(VTOC(vp), TRUE);
-
- if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
- hfs_unlock_truncate(VTOC(vp));
- return (retval);
- }
- cnode_locked = 1;
cp = VTOC(vp);
fp = VTOF(vp);
hfsmp = VTOHFS(vp);
- filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
- if (ioflag & IO_APPEND) {
- uio_setoffset(uio, fp->ff_size);
- offset = fp->ff_size;
- }
- if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
- retval = EPERM;
+#if CONFIG_PROTECT
+ if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
goto exit;
}
+#endif
- origFileSize = fp->ff_size;
eflags = kEFDeferMask; /* defer file block allocations */
-
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
/*
* When the underlying device is sparse and space
* is low (< 8MB), stop doing delayed allocations
}
#endif /* HFS_SPARSE_DEV */
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
- (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
+ if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
+ (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
+ io_return_on_throttle = IO_RETURN_ON_THROTTLE;
+ }
+
+again:
+ /*
+ * Protect against a size change.
+ *
+ * Note: If took_truncate_lock is true, then we previously got the lock shared
+ * but needed to upgrade to exclusive. So try getting it exclusive from the
+ * start.
+ */
+ if (ioflag & IO_APPEND || took_truncate_lock) {
+ hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ }
+ else {
+ hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+ }
+ took_truncate_lock = 1;
- /* Now test if we need to extend the file */
- /* Doing so will adjust the filebytes for us */
+ /* Update UIO */
+ if (ioflag & IO_APPEND) {
+ uio_setoffset(uio, fp->ff_size);
+ offset = fp->ff_size;
+ }
+ if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
+ retval = EPERM;
+ goto exit;
+ }
+ origFileSize = fp->ff_size;
writelimit = offset + resid;
- if (writelimit <= filebytes)
+ filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+
+ /*
+ * We may need an exclusive truncate lock for several reasons, all
+ * of which are because we may be writing to a (portion of a) block
+ * for the first time, and we need to make sure no readers see the
+ * prior, uninitialized contents of the block. The cases are:
+ *
+ * 1. We have unallocated (delayed allocation) blocks. We may be
+ * allocating new blocks to the file and writing to them.
+ * (A more precise check would be whether the range we're writing
+ * to contains delayed allocation blocks.)
+ * 2. We need to extend the file. The bytes between the old EOF
+ * and the new EOF are not yet initialized. This is important
+ * even if we're not allocating new blocks to the file. If the
+ * old EOF and new EOF are in the same block, we still need to
+ * protect that range of bytes until they are written for the
+ * first time.
+ * 3. The write overlaps some invalid ranges (delayed zero fill; that
+ * part of the file has been allocated, but not yet written).
+ *
+ * If we had a shared lock with the above cases, we need to try to upgrade
+ * to an exclusive lock. If the upgrade fails, we will lose the shared
+ * lock, and will need to take the truncate lock again; the took_truncate_lock
+ * flag will still be set, causing us to try for an exclusive lock next time.
+ *
+ * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
+ * lock is held, since it protects the range lists.
+ */
+ if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
+ ((fp->ff_unallocblocks != 0) ||
+ (writelimit > origFileSize))) {
+ if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
+ /*
+ * Lock upgrade failed and we lost our shared lock, try again.
+ * Note: we do not set took_truncate_lock=0 here. Leaving it
+ * set to 1 will cause us to try to get the lock exclusive.
+ */
+ goto again;
+ }
+ else {
+ /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
+ cp->c_truncatelockowner = current_thread();
+ }
+ }
+
+ if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+ goto exit;
+ }
+ cnode_locked = 1;
+
+ /*
+ * Now that we have the cnode lock, see if there are delayed zero fill ranges
+ * overlapping our write. If so, we need the truncate lock exclusive (see above).
+ */
+ if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
+ (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
+ /*
+ * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
+ * a deadlock, rather than simply returning failure. (That is, it apparently does
+ * not behave like a "try_lock"). Since this condition is rare, just drop the
+ * cnode lock and try again. Since took_truncate_lock is set, we will
+ * automatically take the truncate lock exclusive.
+ */
+ hfs_unlock(cp);
+ cnode_locked = 0;
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ goto again;
+ }
+
+ KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
+ (int)offset, uio_resid(uio), (int)fp->ff_size,
+ (int)filebytes, 0);
+
+ /* Check if we do not need to extend the file */
+ if (writelimit <= filebytes) {
goto sizeok;
+ }
cred = vfs_context_ucred(ap->a_context);
-#if QUOTA
bytesToAdd = writelimit - filebytes;
+
+#if QUOTA
retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
cred, 0);
if (retval)
if (retval != E_NONE)
break;
filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
+ KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
(int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
}
(void) hfs_update(vp, TRUE);
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
(void) hfs_end_transaction(hfsmp);
+ /*
+ * If we didn't grow the file enough try a partial write.
+ * POSIX expects this behavior.
+ */
+ if ((retval == ENOSPC) && (filebytes > offset)) {
+ retval = 0;
+ partialwrite = 1;
+ uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
+ resid -= bytesToAdd;
+ writelimit = filebytes;
+ }
sizeok:
if (retval == E_NONE) {
off_t filesize;
off_t inval_end;
off_t io_start;
int lflag;
- struct rl_entry *invalid_range;
if (writelimit > fp->ff_size)
filesize = writelimit;
else
filesize = fp->ff_size;
- lflag = (ioflag & IO_SYNC);
+ lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
if (offset <= fp->ff_size) {
zero_off = offset & ~PAGE_MASK_64;
fp->ff_size, inval_start,
zero_off, (off_t)0,
lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
- hfs_lock(cp, HFS_FORCE_LOCK);
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
cnode_locked = 1;
if (retval) goto ioerr_exit;
offset = uio_offset(uio);
hfs_unlock(cp);
cnode_locked = 0;
+
+ /*
+ * We need to tell UBC the fork's new size BEFORE calling
+ * cluster_write, in case any of the new pages need to be
+ * paged out before cluster_write completes (which does happen
+ * in embedded systems due to extreme memory pressure).
+ * Similarly, we need to tell hfs_vnop_pageout what the new EOF
+ * will be, so that it can pass that on to cluster_pageout, and
+ * allow those pageouts.
+ *
+ * We don't update ff_size yet since we don't want pageins to
+ * be able to see uninitialized data between the old and new
+ * EOF, until cluster_write has completed and initialized that
+ * part of the file.
+ *
+ * The vnode pager relies on the file size last given to UBC via
+ * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
+ * ff_size (whichever is larger). NOTE: ff_new_size is always
+ * zero, unless we are extending the file via write.
+ */
+ if (filesize > fp->ff_size) {
+ fp->ff_new_size = filesize;
+ ubc_setsize(vp, filesize);
+ }
retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
- tail_off, lflag | IO_NOZERODIRTY);
- offset = uio_offset(uio);
- if (offset > fp->ff_size) {
- fp->ff_size = offset;
+ tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle);
+ if (retval) {
+ fp->ff_new_size = 0; /* no longer extending; use ff_size */
+
+ if (retval == EAGAIN) {
+ /*
+ * EAGAIN indicates that we still have I/O to do, but
+ * that we now need to be throttled
+ */
+ if (resid != uio_resid(uio)) {
+ /*
+ * did manage to do some I/O before returning EAGAIN
+ */
+ resid = uio_resid(uio);
+ offset = uio_offset(uio);
+
+ cp->c_touch_chgtime = TRUE;
+ cp->c_touch_modtime = TRUE;
+ hfs_incr_gencount(cp);
+ }
+ if (filesize > fp->ff_size) {
+ /*
+ * we called ubc_setsize before the call to
+ * cluster_write... since we only partially
+ * completed the I/O, we need to
+ * re-adjust our idea of the filesize based
+ * on our interim EOF
+ */
+ ubc_setsize(vp, offset);
- ubc_setsize(vp, fp->ff_size); /* XXX check errors */
+ fp->ff_size = offset;
+ }
+ goto exit;
+ }
+ if (filesize > origFileSize) {
+ ubc_setsize(vp, origFileSize);
+ }
+ goto ioerr_exit;
+ }
+
+ if (filesize > origFileSize) {
+ fp->ff_size = filesize;
+
/* Files that are changing size are not hot file candidates. */
- if (hfsmp->hfc_stage == HFC_RECORDING)
+ if (hfsmp->hfc_stage == HFC_RECORDING) {
fp->ff_bytesread = 0;
+ }
}
- if (resid > uio_resid(uio)) {
- cp->c_touch_chgtime = TRUE;
- cp->c_touch_modtime = TRUE;
- }
+ fp->ff_new_size = 0; /* ff_size now has the correct size */
+ }
+ if (partialwrite) {
+ uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
+ resid += bytesToAdd;
}
- // XXXdbg - testing for vivek and paul lambert
+ // XXXdbg - see radar 4871353 for more info
{
if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
}
}
- HFS_KNOTE(vp, NOTE_WRITE);
ioerr_exit:
- /*
- * If we successfully wrote any data, and we are not the superuser
- * we clear the setuid and setgid bits as a precaution against
- * tampering.
- */
- if (cp->c_mode & (S_ISUID | S_ISGID)) {
- cred = vfs_context_ucred(ap->a_context);
- if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
- if (!cnode_locked) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- cnode_locked = 1;
+ if (resid > uio_resid(uio)) {
+ if (!cnode_locked) {
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ cnode_locked = 1;
+ }
+
+ cp->c_touch_chgtime = TRUE;
+ cp->c_touch_modtime = TRUE;
+ hfs_incr_gencount(cp);
+
+ /*
+ * If we successfully wrote any data, and we are not the superuser
+ * we clear the setuid and setgid bits as a precaution against
+ * tampering.
+ */
+ if (cp->c_mode & (S_ISUID | S_ISGID)) {
+ cred = vfs_context_ucred(ap->a_context);
+ if (cred && suser(cred, NULL)) {
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
}
- cp->c_mode &= ~(S_ISUID | S_ISGID);
}
}
if (retval) {
if (ioflag & IO_UNIT) {
- if (!cnode_locked) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- cnode_locked = 1;
- }
(void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
0, ap->a_context);
- // LP64todo - fix this! resid needs to by user_ssize_t
uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
uio_setresid(uio, resid);
filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
}
- } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
- if (!cnode_locked) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- cnode_locked = 1;
- }
+ } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
retval = hfs_update(vp, TRUE);
- }
+
/* Updating vcbWrCnt doesn't need to be atomic. */
hfsmp->vcbWrCnt++;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
+ KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
(int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
exit:
if (cnode_locked)
hfs_unlock(cp);
- hfs_unlock_truncate(cp);
+
+ if (took_truncate_lock) {
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ }
+ if (retval == EAGAIN) {
+ throttle_lowpri_io(1);
+ throttled_count++;
+
+ retval = 0;
+ goto again;
+ }
+ if (throttled_count) {
+ throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread()));
+ }
return (retval);
}
/* support for the "bulk-access" fcntl */
-#define CACHE_ELEMS 64
#define CACHE_LEVELS 16
+#define NUM_CACHE_ENTRIES (64*16)
#define PARENT_IDS_FLAG 0x100
-/* from hfs_attrlist.c */
-extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
- mode_t obj_mode, struct mount *mp,
- kauth_cred_t cred, struct proc *p);
-
-/* from vfs/vfs_fsevents.c */
-extern char *get_pathbuff(void);
-extern void release_pathbuff(char *buff);
-
struct access_cache {
int numcached;
int cachehits; /* these two for statistics gathering */
int lookups;
unsigned int *acache;
- Boolean *haveaccess;
+ unsigned char *haveaccess;
};
struct access_t {
int *file_ids; /* IN: array of file ids */
gid_t *groups; /* IN: array of groups */
short *access; /* OUT: access info for each file (0 for 'has access') */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_access_t {
+ uid_t uid; /* IN: effective user id */
+ short flags; /* IN: access requested (i.e. R_OK) */
+ short num_groups; /* IN: number of groups user belongs to */
+ int num_files; /* IN: number of files to process */
+ user32_addr_t file_ids; /* IN: array of file ids */
+ user32_addr_t groups; /* IN: array of groups */
+ user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
};
-struct user_access_t {
+struct user64_access_t {
uid_t uid; /* IN: effective user id */
short flags; /* IN: access requested (i.e. R_OK) */
short num_groups; /* IN: number of groups user belongs to */
- int num_files; /* IN: number of files to process */
- user_addr_t file_ids; /* IN: array of file ids */
- user_addr_t groups; /* IN: array of groups */
- user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
+ int num_files; /* IN: number of files to process */
+ user64_addr_t file_ids; /* IN: array of file ids */
+ user64_addr_t groups; /* IN: array of groups */
+ user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
+};
+
+
+// these are the "extended" versions of the above structures
+// note that it is crucial that they be different sized than
+// the regular version
+struct ext_access_t {
+ uint32_t flags; /* IN: access requested (i.e. R_OK) */
+ uint32_t num_files; /* IN: number of files to process */
+ uint32_t map_size; /* IN: size of the bit map */
+ uint32_t *file_ids; /* IN: Array of file ids */
+ char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
+ short *access; /* OUT: access info for each file (0 for 'has access') */
+ uint32_t num_parents; /* future use */
+ cnid_t *parents; /* future use */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_ext_access_t {
+ uint32_t flags; /* IN: access requested (i.e. R_OK) */
+ uint32_t num_files; /* IN: number of files to process */
+ uint32_t map_size; /* IN: size of the bit map */
+ user32_addr_t file_ids; /* IN: Array of file ids */
+ user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
+ user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
+ uint32_t num_parents; /* future use */
+ user32_addr_t parents; /* future use */
+};
+
+struct user64_ext_access_t {
+ uint32_t flags; /* IN: access requested (i.e. R_OK) */
+ uint32_t num_files; /* IN: number of files to process */
+ uint32_t map_size; /* IN: size of the bit map */
+ user64_addr_t file_ids; /* IN: array of file ids */
+ user64_addr_t bitmap; /* IN: array of groups */
+ user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
+ uint32_t num_parents;/* future use */
+ user64_addr_t parents;/* future use */
};
+
/*
* Perform a binary search for the given parent_id. Return value is
- * found/not found boolean, and indexp will be the index of the item
- * or the index at which to insert the item if it's not found.
+ * the index if there is a match. If no_match_indexp is non-NULL it
+ * will be assigned with the index to insert the item (even if it was
+ * not found).
*/
-static int
-lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
+static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
{
- unsigned int lo, hi;
- int index, matches = 0;
+ int index=-1;
+ unsigned int lo=0;
- if (cache->numcached == 0) {
- *indexp = 0;
- return 0; // table is empty, so insert at index=0 and report no match
+ do {
+ unsigned int mid = ((hi - lo)/2) + lo;
+ unsigned int this_id = array[mid];
+
+ if (parent_id == this_id) {
+ hi = mid;
+ break;
}
-
- if (cache->numcached > CACHE_ELEMS) {
- /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
- cache->numcached, CACHE_ELEMS);*/
- cache->numcached = CACHE_ELEMS;
+
+ if (parent_id < this_id) {
+ hi = mid;
+ continue;
}
+
+ if (parent_id > this_id) {
+ lo = mid + 1;
+ continue;
+ }
+ } while(lo < hi);
+
+ /* check if lo and hi converged on the match */
+ if (parent_id == array[hi]) {
+ index = hi;
+ }
+
+ if (no_match_indexp) {
+ *no_match_indexp = hi;
+ }
+
+ return index;
+}
+
+
+static int
+lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
+{
+ unsigned int hi;
+ int matches = 0;
+ int index, no_match_index;
- lo = 0;
- hi = cache->numcached - 1;
- index = -1;
+ if (cache->numcached == 0) {
+ *indexp = 0;
+ return 0; // table is empty, so insert at index=0 and report no match
+ }
- /* perform binary search for parent_id */
- do {
- unsigned int mid = (hi - lo)/2 + lo;
- unsigned int this_id = cache->acache[mid];
-
- if (parent_id == this_id) {
- index = mid;
- break;
- }
-
- if (parent_id < this_id) {
- hi = mid;
- continue;
- }
-
- if (parent_id > this_id) {
- lo = mid + 1;
- continue;
- }
- } while(lo < hi);
+ if (cache->numcached > NUM_CACHE_ENTRIES) {
+ cache->numcached = NUM_CACHE_ENTRIES;
+ }
- /* check if lo and hi converged on the match */
- if (parent_id == cache->acache[hi]) {
- index = hi;
- }
+ hi = cache->numcached - 1;
- /* if no existing entry found, find index for new one */
- if (index == -1) {
- index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
- matches = 0;
- } else {
- matches = 1;
- }
+ index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
+
+ /* if no existing entry found, find index for new one */
+ if (index == -1) {
+ index = no_match_index;
+ matches = 0;
+ } else {
+ matches = 1;
+ }
- *indexp = index;
- return matches;
+ *indexp = index;
+ return matches;
}
/*
static void
add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
{
- int lookup_index = -1;
-
- /* need to do a lookup first if -1 passed for index */
- if (index == -1) {
- if (lookup_bucket(cache, &lookup_index, nodeID)) {
- if (cache->haveaccess[lookup_index] != access) {
- /* change access info for existing entry... should never happen */
- cache->haveaccess[lookup_index] = access;
- }
-
- /* mission accomplished */
- return;
- } else {
- index = lookup_index;
- }
-
- }
-
- /* if the cache is full, do a replace rather than an insert */
- if (cache->numcached >= CACHE_ELEMS) {
- //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
- cache->numcached = CACHE_ELEMS-1;
-
- if (index > cache->numcached) {
- // printf("index %d pinned to %d\n", index, cache->numcached);
- index = cache->numcached;
- }
- } else if (index >= 0 && index < cache->numcached) {
- /* only do bcopy if we're inserting */
- bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
- bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
- }
-
- cache->acache[index] = nodeID;
- cache->haveaccess[index] = access;
- cache->numcached++;
+ int lookup_index = -1;
+
+ /* need to do a lookup first if -1 passed for index */
+ if (index == -1) {
+ if (lookup_bucket(cache, &lookup_index, nodeID)) {
+ if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
+ // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
+ cache->haveaccess[lookup_index] = access;
+ }
+
+ /* mission accomplished */
+ return;
+ } else {
+ index = lookup_index;
+ }
+
+ }
+
+ /* if the cache is full, do a replace rather than an insert */
+ if (cache->numcached >= NUM_CACHE_ENTRIES) {
+ cache->numcached = NUM_CACHE_ENTRIES-1;
+
+ if (index > cache->numcached) {
+ index = cache->numcached;
+ }
+ }
+
+ if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
+ index++;
+ }
+
+ if (index >= 0 && index < cache->numcached) {
+ /* only do bcopy if we're inserting */
+ bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
+ bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
+ }
+
+ cache->acache[index] = nodeID;
+ cache->haveaccess[index] = access;
+ cache->numcached++;
}
struct cinfo {
- uid_t uid;
- gid_t gid;
- mode_t mode;
- cnid_t parentcnid;
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+ cnid_t parentcnid;
+ u_int16_t recflags;
};
static int
-snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
+snoop_callback(const cnode_t *cp, void *arg)
{
- struct cinfo *cip = (struct cinfo *)arg;
+ struct cinfo *cip = arg;
- cip->uid = attrp->ca_uid;
- cip->gid = attrp->ca_gid;
- cip->mode = attrp->ca_mode;
- cip->parentcnid = descp->cd_parentcnid;
+ cip->uid = cp->c_uid;
+ cip->gid = cp->c_gid;
+ cip->mode = cp->c_mode;
+ cip->parentcnid = cp->c_parentcnid;
+ cip->recflags = cp->c_attr.ca_recflags;
- return (0);
+ return (0);
}
/*
* isn't incore, then go to the catalog.
*/
static int
-do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
- struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
+do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
+ struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
{
- int error = 0;
+ int error = 0;
- /* if this id matches the one the fsctl was called with, skip the lookup */
- if (cnid == skip_cp->c_cnid) {
+ /* if this id matches the one the fsctl was called with, skip the lookup */
+ if (cnid == skip_cp->c_cnid) {
cnattrp->ca_uid = skip_cp->c_uid;
cnattrp->ca_gid = skip_cp->c_gid;
cnattrp->ca_mode = skip_cp->c_mode;
+ cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
- } else {
+ } else {
struct cinfo c_info;
/* otherwise, check the cnode hash incase the file/dir is incore */
- if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
+ error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
+
+ if (error == EACCES) {
+ // File is deleted
+ return ENOENT;
+ } else if (!error) {
cnattrp->ca_uid = c_info.uid;
cnattrp->ca_gid = c_info.gid;
cnattrp->ca_mode = c_info.mode;
+ cnattrp->ca_recflags = c_info.recflags;
keyp->hfsPlus.parentID = c_info.parentcnid;
} else {
int lockflags;
-
+
+ if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
+ throttle_lowpri_io(1);
+
lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
-
+
/* lookup this cnid in the catalog */
error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
cache->lookups++;
}
- }
+ }
- return (error);
+ return (error);
}
+
/*
* Compute whether we have access to the given directory (nodeID) and all its parents. Cache
* up to CACHE_LEVELS as we progress towards the root.
*/
static int
do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
- struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
+ struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
+ struct vfs_context *my_context,
+ char *bitmap,
+ uint32_t map_size,
+ cnid_t* parents,
+ uint32_t num_parents)
{
- int myErr = 0;
- int myResult;
- HFSCatalogNodeID thisNodeID;
- unsigned long myPerms;
- struct cat_attr cnattr;
- int cache_index = -1;
- CatalogKey catkey;
-
- int i = 0, ids_to_cache = 0;
- int parent_ids[CACHE_LEVELS];
-
- /* root always has access */
- if (!suser(myp_ucred, NULL)) {
- return (1);
- }
-
- thisNodeID = nodeID;
- while (thisNodeID >= kRootDirID) {
- myResult = 0; /* default to "no access" */
-
- /* check the cache before resorting to hitting the catalog */
-
- /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
- * to look any further after hitting cached dir */
-
- if (lookup_bucket(cache, &cache_index, thisNodeID)) {
- cache->cachehits++;
- myResult = cache->haveaccess[cache_index];
- goto ExitThisRoutine;
- }
-
- /* remember which parents we want to cache */
- if (ids_to_cache < CACHE_LEVELS) {
- parent_ids[ids_to_cache] = thisNodeID;
- ids_to_cache++;
- }
+ int myErr = 0;
+ int myResult;
+ HFSCatalogNodeID thisNodeID;
+ unsigned int myPerms;
+ struct cat_attr cnattr;
+ int cache_index = -1, scope_index = -1, scope_idx_start = -1;
+ CatalogKey catkey;
+
+ int i = 0, ids_to_cache = 0;
+ int parent_ids[CACHE_LEVELS];
+
+ thisNodeID = nodeID;
+ while (thisNodeID >= kRootDirID) {
+ myResult = 0; /* default to "no access" */
- /* do the lookup (checks the cnode hash, then the catalog) */
- myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
- if (myErr) {
- goto ExitThisRoutine; /* no access */
- }
-
- myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
- cnattr.ca_mode, hfsmp->hfs_mp,
- myp_ucred, theProcPtr);
-
- if ( (myPerms & X_OK) == 0 ) {
- myResult = 0;
- goto ExitThisRoutine; /* no access */
- }
-
- /* up the hierarchy we go */
- thisNodeID = catkey.hfsPlus.parentID;
- }
-
- /* if here, we have access to this node */
- myResult = 1;
-
- ExitThisRoutine:
- if (myErr) {
- //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
- myResult = 0;
- }
- *err = myErr;
-
- /* cache the parent directory(ies) */
- for (i = 0; i < ids_to_cache; i++) {
- /* small optimization: get rid of double-lookup for all these */
- // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
- add_node(cache, -1, parent_ids[i], myResult);
- }
-
- return (myResult);
-}
-/* end "bulk-access" support */
+ /* check the cache before resorting to hitting the catalog */
+ /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
+ * to look any further after hitting cached dir */
+ if (lookup_bucket(cache, &cache_index, thisNodeID)) {
+ cache->cachehits++;
+ myErr = cache->haveaccess[cache_index];
+ if (scope_index != -1) {
+ if (myErr == ESRCH) {
+ myErr = 0;
+ }
+ } else {
+ scope_index = 0; // so we'll just use the cache result
+ scope_idx_start = ids_to_cache;
+ }
+ myResult = (myErr == 0) ? 1 : 0;
+ goto ExitThisRoutine;
+ }
-/*
- * Callback for use with freeze ioctl.
- */
-static int
-hfs_freezewrite_callback(struct vnode *vp, void *cargs)
-{
- vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
- return 0;
-}
+ if (parents) {
+ int tmp;
+ tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
+ if (scope_index == -1)
+ scope_index = tmp;
+ if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
+ scope_idx_start = ids_to_cache;
+ }
+ }
-/*
- * Control filesystem operating characteristics.
- */
-int
-hfs_vnop_ioctl( struct vnop_ioctl_args /* {
- vnode_t a_vp;
- int a_command;
- caddr_t a_data;
- int a_fflag;
- vfs_context_t a_context;
- } */ *ap)
-{
- struct vnode * vp = ap->a_vp;
- struct hfsmount *hfsmp = VTOHFS(vp);
- vfs_context_t context = ap->a_context;
- kauth_cred_t cred = vfs_context_ucred(context);
+ /* remember which parents we want to cache */
+ if (ids_to_cache < CACHE_LEVELS) {
+ parent_ids[ids_to_cache] = thisNodeID;
+ ids_to_cache++;
+ }
+ // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
+ if (bitmap && map_size) {
+ bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
+ }
+
+
+ /* do the lookup (checks the cnode hash, then the catalog) */
+ myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
+ if (myErr) {
+ goto ExitThisRoutine; /* no access */
+ }
+
+ /* Root always gets access. */
+ if (suser(myp_ucred, NULL) == 0) {
+ thisNodeID = catkey.hfsPlus.parentID;
+ myResult = 1;
+ continue;
+ }
+
+ // if the thing has acl's, do the full permission check
+ if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+ struct vnode *vp;
+
+ /* get the vnode for this cnid */
+ myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
+ if ( myErr ) {
+ myResult = 0;
+ goto ExitThisRoutine;
+ }
+
+ thisNodeID = VTOC(vp)->c_parentcnid;
+
+ hfs_unlock(VTOC(vp));
+
+ if (vnode_vtype(vp) == VDIR) {
+ myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
+ } else {
+ myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
+ }
+
+ vnode_put(vp);
+ if (myErr) {
+ myResult = 0;
+ goto ExitThisRoutine;
+ }
+ } else {
+ unsigned int flags;
+ int mode = cnattr.ca_mode & S_IFMT;
+ myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
+
+ if (mode == S_IFDIR) {
+ flags = R_OK | X_OK;
+ } else {
+ flags = R_OK;
+ }
+ if ( (myPerms & flags) != flags) {
+ myResult = 0;
+ myErr = EACCES;
+ goto ExitThisRoutine; /* no access */
+ }
+
+ /* up the hierarchy we go */
+ thisNodeID = catkey.hfsPlus.parentID;
+ }
+ }
+
+ /* if here, we have access to this node */
+ myResult = 1;
+
+ ExitThisRoutine:
+ if (parents && myErr == 0 && scope_index == -1) {
+ myErr = ESRCH;
+ }
+
+ if (myErr) {
+ myResult = 0;
+ }
+ *err = myErr;
+
+ /* cache the parent directory(ies) */
+ for (i = 0; i < ids_to_cache; i++) {
+ if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
+ add_node(cache, -1, parent_ids[i], ESRCH);
+ } else {
+ add_node(cache, -1, parent_ids[i], myErr);
+ }
+ }
+
+ return (myResult);
+}
+
+static int
+do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
+ struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
+{
+ boolean_t is64bit;
+
+ /*
+ * NOTE: on entry, the vnode has an io_ref. In case this vnode
+ * happens to be in our list of file_ids, we'll note it
+ * avoid calling hfs_chashget_nowait() on that id as that
+ * will cause a "locking against myself" panic.
+ */
+ Boolean check_leaf = true;
+
+ struct user64_ext_access_t *user_access_structp;
+ struct user64_ext_access_t tmp_user_access;
+ struct access_cache cache;
+
+ int error = 0, prev_parent_check_ok=1;
+ unsigned int i;
+
+ short flags;
+ unsigned int num_files = 0;
+ int map_size = 0;
+ int num_parents = 0;
+ int *file_ids=NULL;
+ short *access=NULL;
+ char *bitmap=NULL;
+ cnid_t *parents=NULL;
+ int leaf_index;
+
+ cnid_t cnid;
+ cnid_t prevParent_cnid = 0;
+ unsigned int myPerms;
+ short myaccess = 0;
+ struct cat_attr cnattr;
+ CatalogKey catkey;
+ struct cnode *skip_cp = VTOC(vp);
+ kauth_cred_t cred = vfs_context_ucred(context);
+ proc_t p = vfs_context_proc(context);
+
+ is64bit = proc_is64bit(p);
+
+ /* initialize the local cache and buffers */
+ cache.numcached = 0;
+ cache.cachehits = 0;
+ cache.lookups = 0;
+ cache.acache = NULL;
+ cache.haveaccess = NULL;
+
+ /* struct copyin done during dispatch... need to copy file_id array separately */
+ if (ap->a_data == NULL) {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ if (is64bit) {
+ if (arg_size != sizeof(struct user64_ext_access_t)) {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ user_access_structp = (struct user64_ext_access_t *)ap->a_data;
+
+ } else if (arg_size == sizeof(struct user32_access_t)) {
+ struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
+
+ // convert an old style bulk-access struct to the new style
+ tmp_user_access.flags = accessp->flags;
+ tmp_user_access.num_files = accessp->num_files;
+ tmp_user_access.map_size = 0;
+ tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
+ tmp_user_access.bitmap = USER_ADDR_NULL;
+ tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
+ tmp_user_access.num_parents = 0;
+ user_access_structp = &tmp_user_access;
+
+ } else if (arg_size == sizeof(struct user32_ext_access_t)) {
+ struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
+
+ // up-cast from a 32-bit version of the struct
+ tmp_user_access.flags = accessp->flags;
+ tmp_user_access.num_files = accessp->num_files;
+ tmp_user_access.map_size = accessp->map_size;
+ tmp_user_access.num_parents = accessp->num_parents;
+
+ tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
+ tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
+ tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
+ tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
+
+ user_access_structp = &tmp_user_access;
+ } else {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ map_size = user_access_structp->map_size;
+
+ num_files = user_access_structp->num_files;
+
+ num_parents= user_access_structp->num_parents;
+
+ if (num_files < 1) {
+ goto err_exit_bulk_access;
+ }
+ if (num_files > 1024) {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ if (num_parents > 1024) {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ file_ids = (int *) kalloc(sizeof(int) * num_files);
+ access = (short *) kalloc(sizeof(short) * num_files);
+ if (map_size) {
+ bitmap = (char *) kalloc(sizeof(char) * map_size);
+ }
+
+ if (num_parents) {
+ parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
+ }
+
+ cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
+ cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+
+ if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
+ if (file_ids) {
+ kfree(file_ids, sizeof(int) * num_files);
+ }
+ if (bitmap) {
+ kfree(bitmap, sizeof(char) * map_size);
+ }
+ if (access) {
+ kfree(access, sizeof(short) * num_files);
+ }
+ if (cache.acache) {
+ kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+ }
+ if (cache.haveaccess) {
+ kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+ }
+ if (parents) {
+ kfree(parents, sizeof(cnid_t) * num_parents);
+ }
+ return ENOMEM;
+ }
+
+ // make sure the bitmap is zero'ed out...
+ if (bitmap) {
+ bzero(bitmap, (sizeof(char) * map_size));
+ }
+
+ if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
+ num_files * sizeof(int)))) {
+ goto err_exit_bulk_access;
+ }
+
+ if (num_parents) {
+ if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
+ num_parents * sizeof(cnid_t)))) {
+ goto err_exit_bulk_access;
+ }
+ }
+
+ flags = user_access_structp->flags;
+ if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
+ flags = R_OK;
+ }
+
+ /* check if we've been passed leaf node ids or parent ids */
+ if (flags & PARENT_IDS_FLAG) {
+ check_leaf = false;
+ }
+
+ /* Check access to each file_id passed in */
+ for (i = 0; i < num_files; i++) {
+ leaf_index=-1;
+ cnid = (cnid_t) file_ids[i];
+
+ /* root always has access */
+ if ((!parents) && (!suser(cred, NULL))) {
+ access[i] = 0;
+ continue;
+ }
+
+ if (check_leaf) {
+ /* do the lookup (checks the cnode hash, then the catalog) */
+ error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
+ if (error) {
+ access[i] = (short) error;
+ continue;
+ }
+
+ if (parents) {
+ // Check if the leaf matches one of the parent scopes
+ leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
+ if (leaf_index >= 0 && parents[leaf_index] == cnid)
+ prev_parent_check_ok = 0;
+ else if (leaf_index >= 0)
+ prev_parent_check_ok = 1;
+ }
+
+ // if the thing has acl's, do the full permission check
+ if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+ struct vnode *cvp;
+ int myErr = 0;
+ /* get the vnode for this cnid */
+ myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
+ if ( myErr ) {
+ access[i] = myErr;
+ continue;
+ }
+
+ hfs_unlock(VTOC(cvp));
+
+ if (vnode_vtype(cvp) == VDIR) {
+ myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
+ } else {
+ myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
+ }
+
+ vnode_put(cvp);
+ if (myErr) {
+ access[i] = myErr;
+ continue;
+ }
+ } else {
+ /* before calling CheckAccess(), check the target file for read access */
+ myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
+ cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
+
+ /* fail fast if no access */
+ if ((myPerms & flags) == 0) {
+ access[i] = EACCES;
+ continue;
+ }
+ }
+ } else {
+ /* we were passed an array of parent ids */
+ catkey.hfsPlus.parentID = cnid;
+ }
+
+ /* if the last guy had the same parent and had access, we're done */
+ if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
+ cache.cachehits++;
+ access[i] = 0;
+ continue;
+ }
+
+ myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
+ skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
+
+ if (myaccess || (error == ESRCH && leaf_index != -1)) {
+ access[i] = 0; // have access.. no errors to report
+ } else {
+ access[i] = (error != 0 ? (short) error : EACCES);
+ }
+
+ prevParent_cnid = catkey.hfsPlus.parentID;
+ }
+
+ /* copyout the access array */
+ if ((error = copyout((caddr_t)access, user_access_structp->access,
+ num_files * sizeof (short)))) {
+ goto err_exit_bulk_access;
+ }
+ if (map_size && bitmap) {
+ if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
+ map_size * sizeof (char)))) {
+ goto err_exit_bulk_access;
+ }
+ }
+
+
+ err_exit_bulk_access:
+
+ if (file_ids)
+ kfree(file_ids, sizeof(int) * num_files);
+ if (parents)
+ kfree(parents, sizeof(cnid_t) * num_parents);
+ if (bitmap)
+ kfree(bitmap, sizeof(char) * map_size);
+ if (access)
+ kfree(access, sizeof(short) * num_files);
+ if (cache.acache)
+ kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+ if (cache.haveaccess)
+ kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+
+ return (error);
+}
+
+
+/* end "bulk-access" support */
+
+
+/*
+ * Control filesystem operating characteristics.
+ */
+int
+hfs_vnop_ioctl( struct vnop_ioctl_args /* {
+ vnode_t a_vp;
+ long a_command;
+ caddr_t a_data;
+ int a_fflag;
+ vfs_context_t a_context;
+ } */ *ap)
+{
+ struct vnode * vp = ap->a_vp;
+ struct hfsmount *hfsmp = VTOHFS(vp);
+ vfs_context_t context = ap->a_context;
+ kauth_cred_t cred = vfs_context_ucred(context);
proc_t p = vfs_context_proc(context);
struct vfsstatfs *vfsp;
boolean_t is64bit;
+ off_t jnl_start, jnl_size;
+ struct hfs_journal_info *jip;
+#if HFS_COMPRESSION
+ int compressed = 0;
+ off_t uncompressed_size = -1;
+ int decmpfs_error = 0;
+
+ if (ap->a_command == F_RDADVISE) {
+ /* we need to inspect the decmpfs state of the file as early as possible */
+ compressed = hfs_file_is_compressed(VTOC(vp), 0);
+ if (compressed) {
+ if (VNODE_IS_RSRC(vp)) {
+ /* if this is the resource fork, treat it as if it were empty */
+ uncompressed_size = 0;
+ } else {
+ decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
+ if (decmpfs_error != 0) {
+ /* failed to get the uncompressed size, we'll check for this later */
+ uncompressed_size = -1;
+ }
+ }
+ }
+ }
+#endif /* HFS_COMPRESSION */
is64bit = proc_is64bit(p);
+#if CONFIG_PROTECT
+ {
+ int error = 0;
+ if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+ return error;
+ }
+ }
+#endif /* CONFIG_PROTECT */
+
switch (ap->a_command) {
+ case HFS_GETPATH:
+ {
+ struct vnode *file_vp;
+ cnid_t cnid;
+ int outlen;
+ char *bufptr;
+ int error;
+ int flags = 0;
+
+ /* Caller must be owner of file system. */
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES);
+ }
+ /* Target vnode must be file system's root. */
+ if (!vnode_isvroot(vp)) {
+ return (EINVAL);
+ }
+ bufptr = (char *)ap->a_data;
+ cnid = strtoul(bufptr, NULL, 10);
+ if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
+ flags |= BUILDPATH_VOLUME_RELATIVE;
+ }
+
+ /* We need to call hfs_vfs_vget to leverage the code that will
+ * fix the origin list for us if needed, as opposed to calling
+ * hfs_vget, since we will need the parent for build_path call.
+ */
+
+ if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
+ return (error);
+ }
+ error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
+ vnode_put(file_vp);
+
+ return (error);
+ }
+
+ case HFS_TRANSFER_DOCUMENT_ID:
+ {
+ struct cnode *cp = NULL;
+ int error;
+ u_int32_t to_fd = *(u_int32_t *)ap->a_data;
+ struct fileproc *to_fp;
+ struct vnode *to_vp;
+ struct cnode *to_cp;
+
+ cp = VTOC(vp);
+
+ if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
+ //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
+ return error;
+ }
+ if ( (error = vnode_getwithref(to_vp)) ) {
+ file_drop(to_fd);
+ return error;
+ }
+
+ if (VTOHFS(to_vp) != hfsmp) {
+ error = EXDEV;
+ goto transfer_cleanup;
+ }
+
+ int need_unlock = 1;
+ to_cp = VTOC(to_vp);
+ error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
+ if (error != 0) {
+ //printf("could not lock the pair of cnodes (error %d)\n", error);
+ goto transfer_cleanup;
+ }
+
+ if (!(cp->c_bsdflags & UF_TRACKED)) {
+ error = EINVAL;
+ } else if (to_cp->c_bsdflags & UF_TRACKED) {
+ //
+ // if the destination is already tracked, return an error
+ // as otherwise it's a silent deletion of the target's
+ // document-id
+ //
+ error = EEXIST;
+ } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+ //
+ // we can use the FndrExtendedFileInfo because the doc-id is the first
+ // thing in both it and the ExtendedDirInfo struct which is fixed in
+ // format and can not change layout
+ //
+ struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
+ struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
+
+ if (f_extinfo->document_id == 0) {
+ uint32_t new_id;
+
+ hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
+
+ if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
+ //
+ // re-lock the pair now that we have the document-id
+ //
+ hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
+ f_extinfo->document_id = new_id;
+ } else {
+ goto transfer_cleanup;
+ }
+ }
+
+ to_extinfo->document_id = f_extinfo->document_id;
+ f_extinfo->document_id = 0;
+ //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
+
+ // make sure the destination is also UF_TRACKED
+ to_cp->c_bsdflags |= UF_TRACKED;
+ cp->c_bsdflags &= ~UF_TRACKED;
+
+ // mark the cnodes dirty
+ cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+ to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+
+ int lockflags;
+ if ((error = hfs_start_transaction(hfsmp)) == 0) {
+
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+ (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
+ (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
+
+ hfs_systemfile_unlock (hfsmp, lockflags);
+ (void) hfs_end_transaction(hfsmp);
+ }
+
+#if CONFIG_FSE
+ add_fsevent(FSE_DOCID_CHANGED, context,
+ FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+ FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
+ FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
+ FSE_ARG_INT32, to_extinfo->document_id,
+ FSE_ARG_DONE);
+
+ hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
+ need_unlock = 0;
+
+ if (need_fsevent(FSE_STAT_CHANGED, vp)) {
+ add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+ }
+ if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
+ add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
+ }
+#else
+ hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
+ need_unlock = 0;
+#endif
+ }
+
+ if (need_unlock) {
+ hfs_unlockpair(cp, to_cp);
+ }
+
+ transfer_cleanup:
+ vnode_put(to_vp);
+ file_drop(to_fd);
+
+ return error;
+ }
+
+
+
+ case HFS_PREV_LINK:
+ case HFS_NEXT_LINK:
+ {
+ cnid_t linkfileid;
+ cnid_t nextlinkid;
+ cnid_t prevlinkid;
+ int error;
+
+ /* Caller must be owner of file system. */
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES);
+ }
+ /* Target vnode must be file system's root. */
+ if (!vnode_isvroot(vp)) {
+ return (EINVAL);
+ }
+ linkfileid = *(cnid_t *)ap->a_data;
+ if (linkfileid < kHFSFirstUserCatalogNodeID) {
+ return (EINVAL);
+ }
+ if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
+ return (error);
+ }
+ if (ap->a_command == HFS_NEXT_LINK) {
+ *(cnid_t *)ap->a_data = nextlinkid;
+ } else {
+ *(cnid_t *)ap->a_data = prevlinkid;
+ }
+ return (0);
+ }
+
case HFS_RESIZE_PROGRESS: {
vfsp = vfs_statfs(HFSTOVFS(hfsmp));
if (!vnode_isvroot(vp)) {
return (EINVAL);
}
+ /* file system must not be mounted read-only */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+
return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
}
+
case HFS_RESIZE_VOLUME: {
u_int64_t newsize;
u_int64_t cursize;
if (!vnode_isvroot(vp)) {
return (EINVAL);
}
+
+ /* filesystem must not be mounted read only */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
newsize = *(u_int64_t *)ap->a_data;
cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
}
}
case HFS_CHANGE_NEXT_ALLOCATION: {
+ int error = 0; /* Assume success */
u_int32_t location;
if (vnode_vfsisrdonly(vp)) {
if (!vnode_isvroot(vp)) {
return (EINVAL);
}
+ hfs_lock_mount(hfsmp);
location = *(u_int32_t *)ap->a_data;
- if (location > hfsmp->totalBlocks - 1) {
- return (EINVAL);
+ if ((location >= hfsmp->allocLimit) &&
+ (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
+ error = EINVAL;
+ goto fail_change_next_allocation;
}
/* Return previous value. */
*(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
- HFS_MOUNT_LOCK(hfsmp, TRUE);
- hfsmp->nextAllocation = location;
- hfsmp->vcbFlags |= 0xFF00;
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
- return (0);
+ if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
+ /* On magic value for location, set nextAllocation to next block
+ * after metadata zone and set flag in mount structure to indicate
+ * that nextAllocation should not be updated again.
+ */
+ if (hfsmp->hfs_metazone_end != 0) {
+ HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
+ }
+ hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
+ } else {
+ hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
+ HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
+ }
+ MarkVCBDirty(hfsmp);
+fail_change_next_allocation:
+ hfs_unlock_mount(hfsmp);
+ return (error);
}
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
case HFS_SETBACKINGSTOREINFO: {
struct vnode * bsfs_rootvp;
struct vnode * di_vp;
struct hfs_backingstoreinfo *bsdata;
int error = 0;
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
return (EALREADY);
}
vnode_ref(bsfs_rootvp);
vnode_put(bsfs_rootvp);
+ hfs_lock_mount(hfsmp);
hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
- hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
- hfsmp->hfs_sparsebandblks *= 4;
+ hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
+ hfs_unlock_mount(hfsmp);
+
+ /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
+
+ /*
+ * If the sparse image is on a sparse image file (as opposed to a sparse
+ * bundle), then we may need to limit the free space to the maximum size
+ * of a file on that volume. So we query (using pathconf), and if we get
+ * a meaningful result, we cache the number of blocks for later use in
+ * hfs_freeblks().
+ */
+ hfsmp->hfs_backingfs_maxblocks = 0;
+ if (vnode_vtype(di_vp) == VREG) {
+ int terr;
+ int hostbits;
+ terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
+ if (terr == 0 && hostbits != 0 && hostbits < 64) {
+ u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
+
+ hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
+ }
+ }
+
+ /* The free extent cache is managed differently for sparse devices.
+ * There is a window between which the volume is mounted and the
+ * device is marked as sparse, so the free extent cache for this
+ * volume is currently initialized as normal volume (sorted by block
+ * count). Reset the cache so that it will be rebuilt again
+ * for sparse device (sorted by start block).
+ */
+ ResetVCBFreeExtCache(hfsmp);
(void)vnode_put(di_vp);
file_drop(bsdata->backingfd);
kauth_cred_getuid(cred) != vfsp->f_owner) {
return (EACCES); /* must be owner of file system */
}
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+
if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
hfsmp->hfs_backingfs_rootvp) {
+ hfs_lock_mount(hfsmp);
hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
tmpvp = hfsmp->hfs_backingfs_rootvp;
hfsmp->hfs_backingfs_rootvp = NULLVP;
hfsmp->hfs_sparsebandblks = 0;
+ hfs_unlock_mount(hfsmp);
+
vnode_rele(tmpvp);
}
return (0);
}
#endif /* HFS_SPARSE_DEV */
+ /* Change the next CNID stored in the VH */
+ case HFS_CHANGE_NEXTCNID: {
+ int error = 0; /* Assume success */
+ u_int32_t fileid;
+ int wraparound = 0;
+ int lockflags = 0;
+
+ if (vnode_vfsisrdonly(vp)) {
+ return (EROFS);
+ }
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES); /* must be owner of file system */
+ }
+
+ fileid = *(u_int32_t *)ap->a_data;
+
+ /* Must have catalog lock excl. to advance the CNID pointer */
+ lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
+
+ hfs_lock_mount(hfsmp);
+
+ /* If it is less than the current next CNID, force the wraparound bit to be set */
+ if (fileid < hfsmp->vcbNxtCNID) {
+ wraparound=1;
+ }
+
+ /* Return previous value. */
+ *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
+
+ hfsmp->vcbNxtCNID = fileid;
+
+ if (wraparound) {
+ hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
+ }
+
+ MarkVCBDirty(hfsmp);
+ hfs_unlock_mount(hfsmp);
+ hfs_systemfile_unlock (hfsmp, lockflags);
+
+ return (error);
+ }
+
case F_FREEZE_FS: {
struct mount *mp;
- task_t task;
- if (!is_suser())
- return (EACCES);
-
mp = vnode_mount(vp);
hfsmp = VFSTOHFS(mp);
if (!(hfsmp->jnl))
return (ENOTSUP);
- lck_rw_lock_exclusive(&hfsmp->hfs_insync);
-
- task = current_task();
- task_working_set_disable(task);
-
- // flush things before we get started to try and prevent
- // dirty data from being paged out while we're frozen.
- // note: can't do this after taking the lock as it will
- // deadlock against ourselves.
- vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
- hfs_global_exclusive_lock_acquire(hfsmp);
- journal_flush(hfsmp->jnl);
-
- // don't need to iterate on all vnodes, we just need to
- // wait for writes to the system files and the device vnode
- if (HFSTOVCB(hfsmp)->extentsRefNum)
- vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
- if (HFSTOVCB(hfsmp)->catalogRefNum)
- vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
- if (HFSTOVCB(hfsmp)->allocationsRefNum)
- vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
- if (hfsmp->hfs_attribute_vp)
- vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
- vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
-
- hfsmp->hfs_freezing_proc = current_proc();
+ vfsp = vfs_statfs(mp);
+
+ if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+ !kauth_cred_issuser(cred))
+ return (EACCES);
- return (0);
+ return hfs_freeze(hfsmp);
}
case F_THAW_FS: {
- if (!is_suser())
+ vfsp = vfs_statfs(vnode_mount(vp));
+ if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+ !kauth_cred_issuser(cred))
return (EACCES);
- // if we're not the one who froze the fs then we
- // can't thaw it.
- if (hfsmp->hfs_freezing_proc != current_proc()) {
- return EPERM;
+ return hfs_thaw(hfsmp, current_proc());
+ }
+
+ case HFS_BULKACCESS_FSCTL: {
+ int size;
+
+ if (hfsmp->hfs_flags & HFS_STANDARD) {
+ return EINVAL;
+ }
+
+ if (is64bit) {
+ size = sizeof(struct user64_access_t);
+ } else {
+ size = sizeof(struct user32_access_t);
+ }
+
+ return do_bulk_access_check(hfsmp, vp, ap, size, context);
+ }
+
+ case HFS_EXT_BULKACCESS_FSCTL: {
+ int size;
+
+ if (hfsmp->hfs_flags & HFS_STANDARD) {
+ return EINVAL;
+ }
+
+ if (is64bit) {
+ size = sizeof(struct user64_ext_access_t);
+ } else {
+ size = sizeof(struct user32_ext_access_t);
+ }
+
+ return do_bulk_access_check(hfsmp, vp, ap, size, context);
+ }
+
+ case HFS_SET_XATTREXTENTS_STATE: {
+ int state;
+
+ if (ap->a_data == NULL) {
+ return (EINVAL);
}
- // NOTE: if you add code here, also go check the
- // code that "thaws" the fs in hfs_vnop_close()
- //
- hfsmp->hfs_freezing_proc = NULL;
- hfs_global_exclusive_lock_release(hfsmp);
- lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
+ state = *(int *)ap->a_data;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
- return (0);
+ /* Super-user can enable or disable extent-based extended
+ * attribute support on a volume
+ * Note: Starting Mac OS X 10.7, extent-based extended attributes
+ * are enabled by default, so any change will be transient only
+ * till the volume is remounted.
+ */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return (EPERM);
+ }
+ if (state == 0 || state == 1)
+ return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
+ else
+ return (EINVAL);
}
-#define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
-#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
+ case F_SETSTATICCONTENT: {
+ int error;
+ int enable_static = 0;
+ struct cnode *cp = NULL;
+ /*
+ * lock the cnode, decorate the cnode flag, and bail out.
+ * VFS should have already authenticated the caller for us.
+ */
- case HFS_BULKACCESS_FSCTL:
- case HFS_BULKACCESS: {
- /*
- * NOTE: on entry, the vnode is locked. Incase this vnode
- * happens to be in our list of file_ids, we'll note it
- * avoid calling hfs_chashget_nowait() on that id as that
- * will cause a "locking against myself" panic.
+ if (ap->a_data) {
+ /*
+ * Note that even though ap->a_data is of type caddr_t,
+ * the fcntl layer at the syscall handler will pass in NULL
+ * or 1 depending on what the argument supplied to the fcntl
+ * was. So it is in fact correct to check the ap->a_data
+ * argument for zero or non-zero value when deciding whether or not
+ * to enable the static bit in the cnode.
+ */
+ enable_static = 1;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
+ }
+ cp = VTOC(vp);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ if (enable_static) {
+ cp->c_flag |= C_SSD_STATIC;
+ }
+ else {
+ cp->c_flag &= ~C_SSD_STATIC;
+ }
+ hfs_unlock (cp);
+ }
+ return error;
+ }
+
+ case F_SET_GREEDY_MODE: {
+ int error;
+ int enable_greedy_mode = 0;
+ struct cnode *cp = NULL;
+ /*
+ * lock the cnode, decorate the cnode flag, and bail out.
+ * VFS should have already authenticated the caller for us.
*/
- Boolean check_leaf = true;
-
- struct user_access_t *user_access_structp;
- struct user_access_t tmp_user_access_t;
- struct access_cache cache;
-
- int error = 0, i;
-
- dev_t dev = VTOC(vp)->c_dev;
-
- short flags;
- struct ucred myucred;
- int num_files;
- int *file_ids = NULL;
- short *access = NULL;
-
- cnid_t cnid;
- cnid_t prevParent_cnid = 0;
- unsigned long myPerms;
- short myaccess = 0;
- struct cat_attr cnattr;
- CatalogKey catkey;
- struct cnode *skip_cp = VTOC(vp);
- struct vfs_context my_context;
-
- /* set up front for common exit code */
- my_context.vc_ucred = NOCRED;
-
- /* first, return error if not run as root */
- if (cred->cr_ruid != 0) {
- return EPERM;
+
+ if (ap->a_data) {
+ /*
+ * Note that even though ap->a_data is of type caddr_t,
+ * the fcntl layer at the syscall handler will pass in NULL
+ * or 1 depending on what the argument supplied to the fcntl
+ * was. So it is in fact correct to check the ap->a_data
+ * argument for zero or non-zero value when deciding whether or not
+ * to enable the greedy mode bit in the cnode.
+ */
+ enable_greedy_mode = 1;
}
-
- /* initialize the local cache and buffers */
- cache.numcached = 0;
- cache.cachehits = 0;
- cache.lookups = 0;
-
- file_ids = (int *) get_pathbuff();
- access = (short *) get_pathbuff();
- cache.acache = (int *) get_pathbuff();
- cache.haveaccess = (Boolean *) get_pathbuff();
-
- if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
- release_pathbuff((char *) file_ids);
- release_pathbuff((char *) access);
- release_pathbuff((char *) cache.acache);
- release_pathbuff((char *) cache.haveaccess);
-
- return ENOMEM;
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
}
+ cp = VTOC(vp);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ if (enable_greedy_mode) {
+ cp->c_flag |= C_SSD_GREEDY_MODE;
+ }
+ else {
+ cp->c_flag &= ~C_SSD_GREEDY_MODE;
+ }
+ hfs_unlock (cp);
+ }
+ return error;
+ }
+
+ case F_SETIOTYPE: {
+ int error;
+ uint32_t iotypeflag = 0;
- /* struct copyin done during dispatch... need to copy file_id array separately */
+ struct cnode *cp = NULL;
+ /*
+ * lock the cnode, decorate the cnode flag, and bail out.
+ * VFS should have already authenticated the caller for us.
+ */
+
if (ap->a_data == NULL) {
- error = EINVAL;
- goto err_exit_bulk_access;
+ return EINVAL;
}
- if (is64bit) {
- user_access_structp = (struct user_access_t *)ap->a_data;
+ /*
+ * Note that even though ap->a_data is of type caddr_t, we
+ * can only use 32 bits of flag values.
+ */
+ iotypeflag = (uint32_t) ap->a_data;
+ switch (iotypeflag) {
+ case F_IOTYPE_ISOCHRONOUS:
+ break;
+ default:
+ return EINVAL;
}
- else {
- struct access_t * accessp = (struct access_t *)ap->a_data;
- tmp_user_access_t.uid = accessp->uid;
- tmp_user_access_t.flags = accessp->flags;
- tmp_user_access_t.num_groups = accessp->num_groups;
- tmp_user_access_t.num_files = accessp->num_files;
- tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
- tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
- tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
- user_access_structp = &tmp_user_access_t;
+
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
}
-
- num_files = user_access_structp->num_files;
- if (num_files < 1) {
- goto err_exit_bulk_access;
+ cp = VTOC(vp);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ switch (iotypeflag) {
+ case F_IOTYPE_ISOCHRONOUS:
+ cp->c_flag |= C_IO_ISOCHRONOUS;
+ break;
+ default:
+ break;
+ }
+ hfs_unlock (cp);
}
- if (num_files > 256) {
- error = EINVAL;
- goto err_exit_bulk_access;
+ return error;
+ }
+
+ case F_MAKECOMPRESSED: {
+ int error = 0;
+ uint32_t gen_counter;
+ struct cnode *cp = NULL;
+ int reset_decmp = 0;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
}
-
- if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
- num_files * sizeof(int)))) {
- goto err_exit_bulk_access;
+
+ /*
+ * acquire & lock the cnode.
+ * VFS should have already authenticated the caller for us.
+ */
+
+ if (ap->a_data) {
+ /*
+ * Cast the pointer into a uint32_t so we can extract the
+ * supplied generation counter.
+ */
+ gen_counter = *((uint32_t*)ap->a_data);
}
-
- /* fill in the ucred structure */
- flags = user_access_structp->flags;
- if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
- flags = R_OK;
+ else {
+ return EINVAL;
}
-
- /* check if we've been passed leaf node ids or parent ids */
- if (flags & PARENT_IDS_FLAG) {
- check_leaf = false;
+
+#if HFS_COMPRESSION
+ cp = VTOC(vp);
+ /* Grab truncate lock first; we may truncate the file */
+ hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error) {
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ return error;
}
-
- /*
- * Create a templated credential; this credential may *NOT*
- * be used unless instantiated with a kauth_cred_create();
- * there must be a correcponding kauth_cred_unref() when it
- * is no longer in use (i.e. before it goes out of scope).
- */
- memset(&myucred, 0, sizeof(myucred));
- myucred.cr_ref = 1;
- myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
- myucred.cr_ngroups = user_access_structp->num_groups;
- if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
- myucred.cr_ngroups = 0;
- } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
- myucred.cr_ngroups * sizeof(gid_t)))) {
- goto err_exit_bulk_access;
- }
- myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
- myucred.cr_gmuid = myucred.cr_uid;
-
- my_context.vc_proc = p;
- my_context.vc_ucred = kauth_cred_create(&myucred);
- /* Check access to each file_id passed in */
- for (i = 0; i < num_files; i++) {
-#if 0
- cnid = (cnid_t) file_ids[i];
-
- /* root always has access */
- if (!suser(my_context.vc_ucred, NULL)) {
- access[i] = 0;
- continue;
- }
-
- if (check_leaf) {
-
- /* do the lookup (checks the cnode hash, then the catalog) */
- error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
- if (error) {
- access[i] = (short) error;
- continue;
- }
-
- /* before calling CheckAccess(), check the target file for read access */
- myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
- cnattr.ca_mode, hfsmp->hfs_mp, my_context.vc_ucred, p );
-
-
- /* fail fast if no access */
- if ((myPerms & flags) == 0) {
- access[i] = EACCES;
- continue;
- }
- } else {
- /* we were passed an array of parent ids */
- catkey.hfsPlus.parentID = cnid;
- }
-
- /* if the last guy had the same parent and had access, we're done */
- if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
- cache.cachehits++;
- access[i] = 0;
- continue;
- }
-
- myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
- skip_cp, p, my_context.vc_ucred, dev);
-
- if ( myaccess ) {
- access[i] = 0; // have access.. no errors to report
- } else {
- access[i] = (error != 0 ? (short) error : EACCES);
- }
-
- prevParent_cnid = catkey.hfsPlus.parentID;
-#else
- int myErr;
-
- cnid = (cnid_t)file_ids[i];
-
- while (cnid >= kRootDirID) {
- /* get the vnode for this cnid */
- myErr = hfs_vget(hfsmp, cnid, &vp, 0);
- if ( myErr ) {
- access[i] = EACCES;
- break;
- }
+ /* Are there any other usecounts/FDs? */
+ if (vnode_isinuse(vp, 1)) {
+ hfs_unlock(cp);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ return EBUSY;
+ }
+
+ /* now we have the cnode locked down; Validate arguments */
+ if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
+ /* EINVAL if you are trying to manipulate an IMMUTABLE file */
+ hfs_unlock(cp);
+ hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+ return EINVAL;
+ }
+
+ if ((hfs_get_gencount (cp)) == gen_counter) {
+ /*
+ * OK, the gen_counter matched. Go for it:
+ * Toggle state bits, truncate file, and suppress mtime update
+ */
+ reset_decmp = 1;
+ cp->c_bsdflags |= UF_COMPRESSED;
+
+ error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
+ ap->a_context);
+ }
+ else {
+ error = ESTALE;
+ }
- cnid = VTOC(vp)->c_parentcnid;
+ /* Unlock cnode before executing decmpfs ; they may need to get an EA */
+ hfs_unlock(cp);
- hfs_unlock(VTOC(vp));
- if (vnode_vtype(vp) == VDIR) {
- /*
- * XXX This code assumes that none of the
- * XXX callbacks from vnode_authorize() will
- * XXX take a persistent ref on the context
- * XXX credential, which is a bad assumption.
- */
- myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
- } else {
- myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
- }
- vnode_put(vp);
- access[i] = myErr;
- if (myErr) {
- break;
- }
+ /*
+ * Reset the decmp state while still holding the truncate lock. We need to
+ * serialize here against a listxattr on this node which may occur at any
+ * time.
+ *
+ * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
+ * that will still potentially require getting the com.apple.decmpfs EA. If the
+ * EA is required, then we can't hold the cnode lock, because the getxattr call is
+ * generic(through VFS), and can't pass along any info telling it that we're already
+ * holding it (the lock). If we don't serialize, then we risk listxattr stopping
+ * and trying to fill in the hfs_file_is_compressed info during the callback
+ * operation, which will result in deadlock against the b-tree node.
+ *
+ * So, to serialize against listxattr (which will grab buf_t meta references on
+ * the b-tree blocks), we hold the truncate lock as we're manipulating the
+ * decmpfs payload.
+ */
+ if ((reset_decmp) && (error == 0)) {
+ decmpfs_cnode *dp = VTOCMP (vp);
+ if (dp != NULL) {
+ decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
}
-#endif
- }
-
- /* copyout the access array */
- if ((error = copyout((caddr_t)access, user_access_structp->access,
- num_files * sizeof (short)))) {
- goto err_exit_bulk_access;
+
+ /* Initialize the decmpfs node as needed */
+ (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
}
-
- err_exit_bulk_access:
-
- //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
-
- release_pathbuff((char *) cache.acache);
- release_pathbuff((char *) cache.haveaccess);
- release_pathbuff((char *) file_ids);
- release_pathbuff((char *) access);
- /* clean up local context, if needed */
- if (IS_VALID_CRED(my_context.vc_ucred))
- kauth_cred_unref(&my_context.vc_ucred);
-
- return (error);
- } /* HFS_BULKACCESS */
- case HFS_SETACLSTATE: {
- int state;
+ hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
- if (ap->a_data == NULL) {
- return (EINVAL);
- }
+#endif
+ return error;
+ }
- vfsp = vfs_statfs(HFSTOVFS(hfsmp));
- state = *(int *)ap->a_data;
+ case F_SETBACKINGSTORE: {
- // super-user can enable or disable acl's on a volume.
- // the volume owner can only enable acl's
- if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
- return (EPERM);
+ int error = 0;
+
+ /*
+ * See comment in F_SETSTATICCONTENT re: using
+ * a null check for a_data
+ */
+ if (ap->a_data) {
+ error = hfs_set_backingstore (vp, 1);
}
- if (state == 0 || state == 1)
- return hfs_setextendedsecurity(hfsmp, state);
- else
- return (EINVAL);
+ else {
+ error = hfs_set_backingstore (vp, 0);
+ }
+
+ return error;
+ }
+
+ case F_GETPATH_MTMINFO: {
+ int error = 0;
+
+ int *data = (int*) ap->a_data;
+
+ /* Ask if this is a backingstore vnode */
+ error = hfs_is_backingstore (vp, data);
+
+ return error;
}
case F_FULLFSYNC: {
int error;
-
- error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
if (error == 0) {
- error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
+ error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
hfs_unlock(VTOC(vp));
}
if (!vnode_isreg(vp))
return EINVAL;
- error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
if (error == 0) {
cp = VTOC(vp);
/*
fp = VTOF(vp);
/* Protect against a size change. */
- hfs_lock_truncate(VTOC(vp), TRUE);
-
+ hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+#if HFS_COMPRESSION
+ if (compressed && (uncompressed_size == -1)) {
+ /* fetching the uncompressed size failed above, so return the error */
+ error = decmpfs_error;
+ } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
+ (!compressed && (ra->ra_offset >= fp->ff_size))) {
+ error = EFBIG;
+ }
+#else /* HFS_COMPRESSION */
if (ra->ra_offset >= fp->ff_size) {
error = EFBIG;
- } else {
+ }
+#endif /* HFS_COMPRESSION */
+ else {
error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
}
- hfs_unlock_truncate(VTOC(vp));
+ hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
return (error);
}
- case F_READBOOTSTRAP:
- case F_WRITEBOOTSTRAP:
- {
- struct vnode *devvp = NULL;
- user_fbootstraptransfer_t *user_bootstrapp;
- int devBlockSize;
- int error;
- uio_t auio;
- daddr64_t blockNumber;
- u_long blockOffset;
- u_long xfersize;
- struct buf *bp;
- user_fbootstraptransfer_t user_bootstrap;
-
- if (!vnode_isvroot(vp))
- return (EINVAL);
- /* LP64 - when caller is a 64 bit process then we are passed a pointer
- * to a user_fbootstraptransfer_t else we get a pointer to a
- * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
- */
- if (is64bit) {
- user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
- }
- else {
- fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
- user_bootstrapp = &user_bootstrap;
- user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
- user_bootstrap.fbt_length = bootstrapp->fbt_length;
- user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
- }
- if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
- return EINVAL;
-
- devvp = VTOHFS(vp)->hfs_devvp;
- auio = uio_create(1, user_bootstrapp->fbt_offset,
- is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
- (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
- uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
-
- devBlockSize = vfs_devblocksize(vnode_mount(vp));
-
- while (uio_resid(auio) > 0) {
- blockNumber = uio_offset(auio) / devBlockSize;
- error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
- if (error) {
- if (bp) buf_brelse(bp);
- uio_free(auio);
- return error;
- };
-
- blockOffset = uio_offset(auio) % devBlockSize;
- xfersize = devBlockSize - blockOffset;
- error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
- if (error) {
- buf_brelse(bp);
- uio_free(auio);
- return error;
- };
- if (uio_rw(auio) == UIO_WRITE) {
- error = VNOP_BWRITE(bp);
- if (error) {
- uio_free(auio);
- return error;
- }
- } else {
- buf_brelse(bp);
- };
- };
- uio_free(auio);
- };
- return 0;
-
case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
{
if (is64bit) {
*(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
}
else {
- *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
+ *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
}
return 0;
}
- case HFS_GET_MOUNT_TIME:
- return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
+ case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
+ break;
+
+ case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
+ break;
+
+ case HFS_FSCTL_GET_VERY_LOW_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
+ break;
+
+ case HFS_FSCTL_SET_VERY_LOW_DISK:
+ if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_FSCTL_GET_LOW_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
+ break;
+
+ case HFS_FSCTL_SET_LOW_DISK:
+ if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
+ || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
+
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_FSCTL_GET_DESIRED_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
+ break;
+
+ case HFS_FSCTL_SET_DESIRED_DISK:
+ if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
break;
- case HFS_GET_LAST_MTIME:
- return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
+ case HFS_VOLUME_STATUS:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
break;
case HFS_SET_BOOT_INFO:
return(EINVAL);
if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
return(EACCES); /* must be superuser or owner of filesystem */
- HFS_MOUNT_LOCK(hfsmp, TRUE);
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ hfs_lock_mount (hfsmp);
bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount (hfsmp);
(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
break;
case HFS_GET_BOOT_INFO:
if (!vnode_isvroot(vp))
return(EINVAL);
- HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfs_lock_mount (hfsmp);
bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount(hfsmp);
+ break;
+
+ case HFS_MARK_BOOT_CORRUPT:
+ /* Mark the boot volume corrupt by setting
+ * kHFSVolumeInconsistentBit in the volume header. This will
+ * force fsck_hfs on next mount.
+ */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+
+ /* Allowed only on the root vnode of the boot volume */
+ if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
+ !vnode_isvroot(vp)) {
+ return EINVAL;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
+ hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
+ break;
+
+ case HFS_FSCTL_GET_JOURNAL_INFO:
+ jip = (struct hfs_journal_info*)ap->a_data;
+
+ if (vp == NULLVP)
+ return EINVAL;
+
+ if (hfsmp->jnl == NULL) {
+ jnl_start = 0;
+ jnl_size = 0;
+ } else {
+ jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
+ jnl_size = (off_t)hfsmp->jnl_size;
+ }
+
+ jip->jstart = jnl_start;
+ jip->jsize = jnl_size;
+ break;
+
+ case HFS_SET_ALWAYS_ZEROFILL: {
+ struct cnode *cp = VTOC(vp);
+
+ if (*(int *)ap->a_data) {
+ cp->c_flag |= C_ALWAYS_ZEROFILL;
+ } else {
+ cp->c_flag &= ~C_ALWAYS_ZEROFILL;
+ }
+ break;
+ }
+
+ case HFS_DISABLE_METAZONE: {
+ /* Only root can disable metadata zone */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+
+ /* Disable metadata zone now */
+ (void) hfs_metadatazone_init(hfsmp, true);
+ printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
+ break;
+ }
+
+
+ case HFS_FSINFO_METADATA_BLOCKS: {
+ int error;
+ struct hfsinfo_metadata *hinfo;
+
+ hinfo = (struct hfsinfo_metadata *)ap->a_data;
+
+ /* Get information about number of metadata blocks */
+ error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
+ if (error) {
+ return error;
+ }
+
+ break;
+ }
+
+ case HFS_GET_FSINFO: {
+ hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
+
+ /* Only root is allowed to get fsinfo */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+
+ /*
+ * Make sure that the caller's version number matches with
+ * the kernel's version number. This will make sure that
+ * if the structures being read/written into are changed
+ * by the kernel, the caller will not read incorrect data.
+ *
+ * The first three fields --- request_type, version and
+ * flags are same for all the hfs_fsinfo structures, so
+ * we can access the version number by assuming any
+ * structure for now.
+ */
+ if (fsinfo->header.version != HFS_FSINFO_VERSION) {
+ return ENOTSUP;
+ }
+
+ /* Make sure that the current file system is not marked inconsistent */
+ if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
+ return EIO;
+ }
+
+ return hfs_get_fsinfo(hfsmp, ap->a_data);
+ }
+
+ case HFS_CS_FREESPACE_TRIM: {
+ int error = 0;
+ int lockflags = 0;
+
+ /* Only root allowed */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+
+ /*
+ * This core functionality is similar to hfs_scan_blocks().
+ * The main difference is that hfs_scan_blocks() is called
+ * as part of mount where we are assured that the journal is
+ * empty to start with. This fcntl() can be called on a
+ * mounted volume, therefore it has to flush the content of
+ * the journal as well as ensure the state of summary table.
+ *
+ * This fcntl scans over the entire allocation bitmap,
+ * creates list of all the free blocks, and issues TRIM
+ * down to the underlying device. This can take long time
+ * as it can generate up to 512MB of read I/O.
+ */
+
+ if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
+ error = hfs_init_summary(hfsmp);
+ if (error) {
+ printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
+ return error;
+ }
+ }
+
+ /*
+ * The journal maintains list of recently deallocated blocks to
+ * issue DKIOCUNMAPs when the corresponding journal transaction is
+ * flushed to the disk. To avoid any race conditions, we only
+ * want one active trim list and only one thread issuing DKIOCUNMAPs.
+ * Therefore we make sure that the journal trim list is sync'ed,
+ * empty, and not modifiable for the duration of our scan.
+ *
+ * Take the journal lock before flushing the journal to the disk.
+ * We will keep on holding the journal lock till we don't get the
+ * bitmap lock to make sure that no new journal transactions can
+ * start. This will make sure that the journal trim list is not
+ * modified after the journal flush and before getting bitmap lock.
+ * We can release the journal lock after we acquire the bitmap
+ * lock as it will prevent any further block deallocations.
+ */
+ hfs_journal_lock(hfsmp);
+
+ /* Flush the journal and wait for all I/Os to finish up */
+ error = hfs_journal_flush(hfsmp, TRUE);
+ if (error) {
+ hfs_journal_unlock(hfsmp);
+ return error;
+ }
+
+ /* Take bitmap lock to ensure it is not being modified */
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+ /* Release the journal lock */
+ hfs_journal_unlock(hfsmp);
+
+ /*
+ * ScanUnmapBlocks reads the bitmap in large block size
+ * (up to 1MB) unlike the runtime which reads the bitmap
+ * in the 4K block size. This can cause buf_t collisions
+ * and potential data corruption. To avoid this, we
+ * invalidate all the existing buffers associated with
+ * the bitmap vnode before scanning it.
+ *
+ * Note: ScanUnmapBlock() cleans up all the buffers
+ * after itself, so there won't be any large buffers left
+ * for us to clean up after it returns.
+ */
+ error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+ if (error) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ return error;
+ }
+
+ /* Traverse bitmap and issue DKIOCUNMAPs */
+ error = ScanUnmapBlocks(hfsmp);
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ if (error) {
+ return error;
+ }
+
break;
+ }
default:
return (ENOTTY);
}
- /* Should never get here */
return 0;
}
* The block run is returned in logical blocks, and is the REMAINING amount of blocks
*/
int
-hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
+hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
{
- struct cnode *cp = VTOC(vp);
struct filefork *fp = VTOF(vp);
struct hfsmount *hfsmp = VTOHFS(vp);
int retval = E_NONE;
- daddr_t logBlockSize;
+ u_int32_t logBlockSize;
size_t bytesContAvail = 0;
off_t blockposition;
int lockExtBtree;
* to physical mapping is requested.
*/
if (vpp != NULL)
- *vpp = cp->c_devvp;
+ *vpp = hfsmp->hfs_devvp;
if (bnp == NULL)
return (0);
logBlockSize = GetLogicalBlockSize(vp);
- blockposition = (off_t)bn * (off_t)logBlockSize;
+ blockposition = (off_t)bn * logBlockSize;
lockExtBtree = overflow_extents(fp);
if (lockExtBtree)
- lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
retval = MacToVFSError(
MapFileBlockC (HFSTOVCB(hfsmp),
/*
* Map file offset to physical block number.
*
+ * If this function is called for write operation, and if the file
+ * had virtual blocks allocated (delayed allocation), real blocks
+ * are allocated by calling ExtendFileC().
+ *
+ * If this function is called for read operation, and if the file
+ * had virtual blocks allocated (delayed allocation), no change
+ * to the size of file is done, and if required, rangelist is
+ * searched for mapping.
+ *
* System file cnodes are expected to be locked (shared or exclusive).
*/
int
int started_tr = 0;
int tooklock = 0;
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ /* allow blockmaps to the resource fork */
+ } else {
+ if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+ int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+ switch(state) {
+ case FILE_IS_COMPRESSED:
+ return ENOTSUP;
+ case FILE_IS_CONVERTING:
+ /* if FILE_IS_CONVERTING, we allow blockmap */
+ break;
+ default:
+ printf("invalid state %d for compressed file\n", state);
+ /* fall through */
+ }
+ }
+ }
+#endif /* HFS_COMPRESSION */
+
/* Do not allow blockmap operation on a directory */
if (vnode_isdir(vp)) {
return (ENOTSUP);
if (ap->a_bpn == NULL)
return (0);
- if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
+ if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
if (VTOC(vp)->c_lockowner != current_thread()) {
- hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
tooklock = 1;
- } else {
- cp = VTOC(vp);
- panic("blockmap: %s cnode lock already held!\n",
- cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
}
}
hfsmp = VTOHFS(vp);
fp = VTOF(vp);
retry:
- if (fp->ff_unallocblocks) {
+ /* Check virtual blocks only when performing write operation */
+ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
if (hfs_start_transaction(hfsmp) != 0) {
retval = EINVAL;
goto exit;
/*
* Check for any delayed allocations.
*/
- if (fp->ff_unallocblocks) {
- SInt64 actbytes;
+ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+ int64_t actbytes;
u_int32_t loanedBlocks;
//
cp->c_blocks += loanedBlocks;
fp->ff_blocks += loanedBlocks;
- HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfs_lock_mount (hfsmp);
hfsmp->loanedBlocks += loanedBlocks;
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
- }
+ hfs_unlock_mount (hfsmp);
- if (retval) {
hfs_systemfile_unlock(hfsmp, lockflags);
cp->c_flag |= C_MODIFIED;
if (started_tr) {
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
hfs_end_transaction(hfsmp);
+ started_tr = 0;
}
goto exit;
}
started_tr = 0;
}
if (retval) {
+ /* On write, always return error because virtual blocks, if any,
+ * should have been allocated in ExtendFileC(). We do not
+ * allocate virtual blocks on read, therefore return error
+ * only if no virtual blocks are allocated. Otherwise we search
+ * rangelist for zero-fills
+ */
+ if ((MacToVFSError(retval) != ERANGE) ||
+ (ap->a_flags & VNODE_WRITE) ||
+ ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
+ goto exit;
+ }
+
+ /* Validate if the start offset is within logical file size */
+ if (ap->a_foffset >= fp->ff_size) {
+ goto exit;
+ }
+
+ /*
+ * At this point, we have encountered a failure during
+ * MapFileBlockC that resulted in ERANGE, and we are not servicing
+ * a write, and there are borrowed blocks.
+ *
+ * However, the cluster layer will not call blockmap for
+ * blocks that are borrowed and in-cache. We have to assume that
+ * because we observed ERANGE being emitted from MapFileBlockC, this
+ * extent range is not valid on-disk. So we treat this as a
+ * mapping that needs to be zero-filled prior to reading.
+ *
+ * Note that under certain circumstances (such as non-contiguous
+ * userland VM mappings in the calling process), cluster_io
+ * may be forced to split a large I/O driven by hfs_vnop_write
+ * into multiple sub-I/Os that necessitate a RMW cycle. If this is
+ * the case here, then we have already removed the invalid range list
+ * mapping prior to getting to this blockmap call, so we should not
+ * search the invalid rangelist for this byte range.
+ */
+
+ bytesContAvail = fp->ff_size - ap->a_foffset;
+ /*
+ * Clip the contiguous available bytes to, at most, the allowable
+ * maximum or the amount requested.
+ */
+
+ if (bytesContAvail > ap->a_size) {
+ bytesContAvail = ap->a_size;
+ }
+
+ *ap->a_bpn = (daddr64_t) -1;
+ retval = 0;
+
goto exit;
}
- /* Adjust the mapping information for invalid file ranges: */
+ /* MapFileC() found a valid extent in the filefork. Search the
+ * mapping information further for invalid file ranges
+ */
overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
ap->a_foffset + (off_t)bytesContAvail - 1,
&invalid_range);
case RL_MATCHINGOVERLAP:
case RL_OVERLAPCONTAINSRANGE:
case RL_OVERLAPSTARTSBEFORE:
- /* There's no valid block for this byte offset: */
+ /* There's no valid block for this byte offset */
*ap->a_bpn = (daddr64_t)-1;
/* There's no point limiting the amount to be returned
* if the invalid range that was hit extends all the way
* end of this range and the file's EOF):
*/
if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
- (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
}
break;
/* There's actually no valid information to be had starting here: */
*ap->a_bpn = (daddr64_t)-1;
if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
- (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
}
} else {
} /* end switch */
if (bytesContAvail > ap->a_size)
bytesContAvail = ap->a_size;
+ }
+
+exit:
+ if (retval == 0) {
+ if (ap->a_run)
+ *ap->a_run = bytesContAvail;
+
+ if (ap->a_poff)
+ *(int *)ap->a_poff = 0;
}
- if (ap->a_run)
- *ap->a_run = bytesContAvail;
- if (ap->a_poff)
- *(int *)ap->a_poff = 0;
-exit:
if (tooklock)
hfs_unlock(cp);
return (MacToVFSError(retval));
}
-
/*
* prepare and issue the I/O
* buf_strategy knows how to deal
{
buf_t bp = ap->a_bp;
vnode_t vp = buf_vnode(bp);
- struct cnode *cp = VTOC(vp);
+ int error = 0;
+
+ /* Mark buffer as containing static data if cnode flag set */
+ if (VTOC(vp)->c_flag & C_SSD_STATIC) {
+ buf_markstatic(bp);
+ }
+
+ /* Mark buffer as containing static data if cnode flag set */
+ if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
+ bufattr_markgreedymode(&bp->b_attr);
+ }
- return (buf_strategy(cp->c_devvp, ap));
+ /* mark buffer as containing burst mode data if cnode flag set */
+ if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
+ bufattr_markisochronous(&bp->b_attr);
+ }
+
+#if CONFIG_PROTECT
+ cnode_t *cp = NULL;
+
+ if ((!bufattr_rawencrypted(&bp->b_attr)) &&
+ ((cp = cp_get_protected_cnode(vp)) != NULL)) {
+ /*
+ * We rely upon the truncate lock to protect the
+ * CP cache key from getting tossed prior to our IO finishing here.
+ * Nearly all cluster io calls to manipulate file payload from HFS
+ * take the truncate lock before calling into the cluster
+ * layer to ensure the file size does not change, or that they
+ * have exclusive right to change the EOF of the file.
+ * That same guarantee protects us here since the code that
+ * deals with CP lock events must now take the truncate lock
+ * before doing anything.
+ *
+ * There is 1 exception here:
+ * 1) One exception should be the VM swapfile IO, because HFS will
+ * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
+ * swapfile code only without holding the truncate lock. This is because
+ * individual swapfiles are maintained at fixed-length sizes by the VM code.
+ * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
+ * create our own UPL and thus take the truncate lock before calling
+ * into the cluster layer. In that case, however, we are not concerned
+ * with the CP blob being wiped out in the middle of the IO
+ * because there isn't anything to toss; the VM swapfile key stays
+ * in-core as long as the file is open.
+ */
+
+
+ /*
+ * Last chance: If this data protected I/O does not have unwrapped keys
+ * present, then try to get them. We already know that it should, by this point.
+ */
+ if (cp->c_cpentry->cp_flags & (CP_KEY_FLUSHED | CP_NEEDS_KEYS)) {
+ int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS);
+ if ((error = cp_handle_vnop(vp, io_op, 0)) != 0) {
+ /*
+ * We have to be careful here. By this point in the I/O path, VM or the cluster
+ * engine has prepared a buf_t with the proper file offsets and all the rest,
+ * so simply erroring out will result in us leaking this particular buf_t.
+ * We need to properly decorate the buf_t just as buf_strategy would so as
+ * to make it appear that the I/O errored out with the particular error code.
+ */
+ buf_seterror (bp, error);
+ buf_biodone(bp);
+ return error;
+ }
+ }
+
+ /*
+ *NB:
+ * For filesystem resize, we may not have access to the underlying
+ * file's cache key for whatever reason (device may be locked). However,
+ * we do not need it since we are going to use the temporary HFS-wide resize key
+ * which is generated once we start relocating file content. If this file's I/O
+ * should be done using the resize key, it will have been supplied already, so
+ * do not attach the file's cp blob to the buffer.
+ */
+ if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
+ buf_setcpaddr(bp, cp->c_cpentry);
+ }
+ }
+#endif /* CONFIG_PROTECT */
+
+ error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
+
+ return error;
}
+static int
+hfs_minorupdate(struct vnode *vp) {
+ struct cnode *cp = VTOC(vp);
+ cp->c_flag &= ~C_MODIFIED;
+ cp->c_touch_acctime = 0;
+ cp->c_touch_chgtime = 0;
+ cp->c_touch_modtime = 0;
+
+ return 0;
+}
-static int
-do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
+int
+do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
{
register struct cnode *cp = VTOC(vp);
struct filefork *fp = VTOF(vp);
- struct proc *p = vfs_context_proc(context);;
kauth_cred_t cred = vfs_context_ucred(context);
int retval;
off_t bytesToAdd;
off_t actualBytesAdded;
off_t filebytes;
- u_int64_t old_filesize;
- u_long fileblocks;
+ u_int32_t fileblocks;
int blksize;
struct hfsmount *hfsmp;
int lockflags;
+ int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE);
+ int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
blksize = VTOVCB(vp)->blockSize;
fileblocks = fp->ff_blocks;
filebytes = (off_t)fileblocks * (off_t)blksize;
- old_filesize = fp->ff_size;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+ KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
if (length < 0)
*/
if (length > filebytes) {
int eflags;
- u_long blockHint = 0;
+ u_int32_t blockHint = 0;
/* All or nothing and don't round up to clumpsize. */
eflags = kEFAllMask | kEFNoClumpMask;
- if (cred && suser(cred, NULL) != 0)
+ if (cred && (suser(cred, NULL) != 0)) {
eflags |= kEFReserveMask; /* keep a reserve */
+ }
/*
* Allocate Journal and Quota files in metadata zone.
lockflags |= SFL_EXTENTS;
lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+ /*
+ * Keep growing the file as long as the current EOF is
+ * less than the desired value.
+ */
while ((length > filebytes) && (retval == E_NONE)) {
bytesToAdd = length - filebytes;
retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
hfs_systemfile_unlock(hfsmp, lockflags);
if (hfsmp->jnl) {
- (void) hfs_update(vp, TRUE);
- (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ if (skipupdate) {
+ (void) hfs_minorupdate(vp);
+ }
+ else {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
}
hfs_end_transaction(hfsmp);
if (retval)
goto Err_Exit;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
}
- if (!(flags & IO_NOZEROFILL)) {
- if (UBCINFOEXISTS(vp) && retval == E_NONE) {
+ if (ISSET(flags, IO_NOZEROFILL)) {
+ // An optimisation for the hibernation file
+ if (vnode_isswap(vp))
+ rl_remove_all(&fp->ff_invalidranges);
+ } else {
+ if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
struct rl_entry *invalid_range;
off_t zero_limit;
retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
fp->ff_size, (off_t)0,
(flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
- hfs_lock(cp, HFS_FORCE_LOCK);
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
if (retval) goto Err_Exit;
/* Merely invalidate the remaining area, if necessary: */
panic("hfs_truncate: invoked on non-UBC object?!");
};
}
- cp->c_touch_modtime = TRUE;
- fp->ff_size = length;
-
- /* Nested transactions will do their own ubc_setsize. */
- if (!skipsetsize) {
- /*
- * ubc_setsize can cause a pagein here
- * so we need to drop cnode lock.
- */
- hfs_unlock(cp);
- ubc_setsize(vp, length);
- hfs_lock(cp, HFS_FORCE_LOCK);
+ if (suppress_times == 0) {
+ cp->c_touch_modtime = TRUE;
}
+ fp->ff_size = length;
} else { /* Shorten the size of the file */
- if ((off_t)fp->ff_size > length) {
- /*
- * Any buffers that are past the truncation point need to be
- * invalidated (to maintain buffer cache consistency).
- */
-
- /* Nested transactions will do their own ubc_setsize. */
- if (!skipsetsize) {
- /*
- * ubc_setsize can cause a pageout here
- * so we need to drop cnode lock.
- */
- hfs_unlock(cp);
- ubc_setsize(vp, length);
- hfs_lock(cp, HFS_FORCE_LOCK);
- }
-
+ // An optimisation for the hibernation file
+ if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
+ rl_remove_all(&fp->ff_invalidranges);
+ } else if ((off_t)fp->ff_size > length) {
/* Any space previously marked as invalid is now irrelevant: */
rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
}
u_int32_t finalblks;
u_int32_t loanedBlocks;
- HFS_MOUNT_LOCK(hfsmp, TRUE);
-
+ hfs_lock_mount(hfsmp);
loanedBlocks = fp->ff_unallocblocks;
cp->c_blocks -= loanedBlocks;
fp->ff_blocks -= loanedBlocks;
cp->c_blocks += loanedBlocks;
fp->ff_blocks += loanedBlocks;
}
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount (hfsmp);
}
- /*
- * For a TBE process the deallocation of the file blocks is
- * delayed until the file is closed. And hfs_close calls
- * truncate with the IO_NDELAY flag set. So when IO_NDELAY
- * isn't set, we make sure this isn't a TBE process.
- */
- if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
#if QUOTA
- off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+ off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
#endif /* QUOTA */
- if (hfs_start_transaction(hfsmp) != 0) {
- retval = EINVAL;
- goto Err_Exit;
- }
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
- if (fp->ff_unallocblocks == 0) {
- /* Protect extents b-tree and allocation bitmap */
- lockflags = SFL_BITMAP;
- if (overflow_extents(fp))
- lockflags |= SFL_EXTENTS;
- lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+ if (fp->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
- retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
- (FCB*)fp, length, false));
+ retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
+ FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
- hfs_systemfile_unlock(hfsmp, lockflags);
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ if (hfsmp->jnl) {
+ if (retval == 0) {
+ fp->ff_size = length;
}
- if (hfsmp->jnl) {
- if (retval == 0) {
- fp->ff_size = length;
- }
+ if (skipupdate) {
+ (void) hfs_minorupdate(vp);
+ }
+ else {
(void) hfs_update(vp, TRUE);
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
}
+ }
+ hfs_end_transaction(hfsmp);
- hfs_end_transaction(hfsmp);
-
- filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
- if (retval)
- goto Err_Exit;
+ filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+ if (retval)
+ goto Err_Exit;
#if QUOTA
- /* These are bytesreleased */
- (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+ /* These are bytesreleased */
+ (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
#endif /* QUOTA */
- }
- /* Only set update flag if the logical length changes */
- if (old_filesize != length)
+
+ /*
+ * Only set update flag if the logical length changes & we aren't
+ * suppressing modtime updates.
+ */
+ if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
cp->c_touch_modtime = TRUE;
+ }
fp->ff_size = length;
}
- cp->c_touch_chgtime = TRUE;
- retval = hfs_update(vp, MNT_WAIT);
+ if (cp->c_mode & (S_ISUID | S_ISGID)) {
+ if (!vfs_context_issuser(context)) {
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ skipupdate = 0;
+ }
+ }
+ if (skipupdate) {
+ retval = hfs_minorupdate(vp);
+ }
+ else {
+ cp->c_touch_chgtime = TRUE; /* status changed */
+ if (suppress_times == 0) {
+ cp->c_touch_modtime = TRUE; /* file data was modified */
+
+ /*
+ * If we are not suppressing the modtime update, then
+ * update the gen count as well.
+ */
+ if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
+ hfs_incr_gencount(cp);
+ }
+ }
+
+ retval = hfs_update(vp, MNT_WAIT);
+ }
if (retval) {
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
-1, -1, -1, retval, 0);
}
Err_Exit:
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+ KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
(int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
return (retval);
}
+/*
+ * Preparation which must be done prior to deleting the catalog record
+ * of a file or directory. In order to make the on-disk as safe as possible,
+ * we remove the catalog entry before releasing the bitmap blocks and the
+ * overflow extent records. However, some work must be done prior to deleting
+ * the catalog record.
+ *
+ * When calling this function, the cnode must exist both in memory and on-disk.
+ * If there are both resource fork and data fork vnodes, this function should
+ * be called on both.
+ */
+
+int
+hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
+
+ struct filefork *fp = VTOF(vp);
+ struct cnode *cp = VTOC(vp);
+#if QUOTA
+ int retval = 0;
+#endif /* QUOTA */
+
+ /* Cannot truncate an HFS directory! */
+ if (vnode_isdir(vp)) {
+ return (EISDIR);
+ }
+
+ /*
+ * See the comment below in hfs_truncate for why we need to call
+ * setsize here. Essentially we want to avoid pending IO if we
+ * already know that the blocks are going to be released here.
+ * This function is only called when totally removing all storage for a file, so
+ * we can take a shortcut and immediately setsize (0);
+ */
+ ubc_setsize(vp, 0);
+
+ /* This should only happen with a corrupt filesystem */
+ if ((off_t)fp->ff_size < 0)
+ return (EINVAL);
+
+ /*
+ * We cannot just check if fp->ff_size == length (as an optimization)
+ * since there may be extra physical blocks that also need truncation.
+ */
+#if QUOTA
+ if ((retval = hfs_getinoquota(cp))) {
+ return(retval);
+ }
+#endif /* QUOTA */
+
+ /* Wipe out any invalid ranges which have yet to be backed by disk */
+ rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
+
+ /*
+ * Account for any unmapped blocks. Since we're deleting the
+ * entire file, we don't have to worry about just shrinking
+ * to a smaller number of borrowed blocks.
+ */
+ if (fp->ff_unallocblocks > 0) {
+ u_int32_t loanedBlocks;
+
+ hfs_lock_mount (hfsmp);
+ loanedBlocks = fp->ff_unallocblocks;
+ cp->c_blocks -= loanedBlocks;
+ fp->ff_blocks -= loanedBlocks;
+ fp->ff_unallocblocks = 0;
+
+ hfsmp->loanedBlocks -= loanedBlocks;
+
+ hfs_unlock_mount (hfsmp);
+ }
+
+ return 0;
+}
+
+
+/*
+ * Special wrapper around calling TruncateFileC. This function is useable
+ * even when the catalog record does not exist any longer, making it ideal
+ * for use when deleting a file. The simplification here is that we know
+ * that we are releasing all blocks.
+ *
+ * Note that this function may be called when there is no vnode backing
+ * the file fork in question. We may call this from hfs_vnop_inactive
+ * to clear out resource fork data (and may not want to clear out the data
+ * fork yet). As a result, we pointer-check both sets of inputs before
+ * doing anything with them.
+ *
+ * The caller is responsible for saving off a copy of the filefork(s)
+ * embedded within the cnode prior to calling this function. The pointers
+ * supplied as arguments must be valid even if the cnode is no longer valid.
+ */
+
+int
+hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
+ struct filefork *rsrcfork, u_int32_t fileid) {
+
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int blksize = 0;
+ int error = 0;
+ int lockflags;
+
+ blksize = hfsmp->blockSize;
+
+ /* Data Fork */
+ if (datafork) {
+ datafork->ff_size = 0;
+
+ fileblocks = datafork->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+
+ while (filebytes > 0) {
+ if (filebytes > HFS_BIGFILE_SIZE) {
+ filebytes -= HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = 0;
+ }
+
+ /* Start a transaction, and wipe out as many blocks as we can in this iteration */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ if (datafork->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(datafork))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+
+ /* Finish the transaction and start over if necessary */
+ hfs_end_transaction(hfsmp);
+
+ if (error) {
+ break;
+ }
+ }
+ }
+
+ /* Resource fork */
+ if (error == 0 && rsrcfork) {
+ rsrcfork->ff_size = 0;
+
+ fileblocks = rsrcfork->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+
+ while (filebytes > 0) {
+ if (filebytes > HFS_BIGFILE_SIZE) {
+ filebytes -= HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = 0;
+ }
+
+ /* Start a transaction, and wipe out as many blocks as we can in this iteration */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ if (rsrcfork->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(rsrcfork))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+
+ /* Finish the transaction and start over if necessary */
+ hfs_end_transaction(hfsmp);
+
+ if (error) {
+ break;
+ }
+ }
+ }
+
+ return error;
+}
+
+errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
+{
+ errno_t error;
+
+ /*
+ * Call ubc_setsize to give the VM subsystem a chance to do
+ * whatever it needs to with existing pages before we delete
+ * blocks. Note that symlinks don't use the UBC so we'll
+ * get back ENOENT in that case.
+ */
+ if (have_cnode_lock) {
+ error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
+ if (error == EAGAIN) {
+ cnode_t *cp = VTOC(vp);
+
+ if (cp->c_truncatelockowner != current_thread()) {
+#if DEVELOPMENT || DEBUG
+ panic("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
+#else
+ printf("hfs: hfs_ubc_setsize called without exclusive truncate lock!\n");
+#endif
+ }
+
+ hfs_unlock(cp);
+ error = ubc_setsize_ex(vp, len, 0);
+ hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+ }
+ } else
+ error = ubc_setsize_ex(vp, len, 0);
+ return error == ENOENT ? 0 : error;
+}
/*
* Truncate a cnode to at most length size, freeing (or adding) the
* disk blocks.
*/
-__private_extern__
int
-hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
- vfs_context_t context)
+hfs_truncate(struct vnode *vp, off_t length, int flags,
+ int truncateflags, vfs_context_t context)
{
- struct filefork *fp = VTOF(vp);
+ struct filefork *fp = VTOF(vp);
off_t filebytes;
- u_long fileblocks;
- int blksize, error = 0;
+ u_int32_t fileblocks;
+ int blksize;
+ errno_t error = 0;
struct cnode *cp = VTOC(vp);
- if (vnode_isdir(vp))
- return (EISDIR); /* cannot truncate an HFS directory! */
+ /* Cannot truncate an HFS directory! */
+ if (vnode_isdir(vp)) {
+ return (EISDIR);
+ }
+ /* A swap file cannot change size. */
+ if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
+ return (EPERM);
+ }
blksize = VTOVCB(vp)->blockSize;
fileblocks = fp->ff_blocks;
filebytes = (off_t)fileblocks * (off_t)blksize;
+ bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
+
+ error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
+ if (error)
+ return error;
+
+ if (!caller_has_cnode_lock) {
+ error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error)
+ return error;
+ }
+
// have to loop truncating or growing files that are
// really big because otherwise transactions can get
// enormous and consume too many kernel resources.
if (length < filebytes) {
while (filebytes > length) {
- if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ if ((filebytes - length) > HFS_BIGFILE_SIZE) {
filebytes -= HFS_BIGFILE_SIZE;
} else {
filebytes = length;
}
cp->c_flag |= C_FORCEUPDATE;
- error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
+ error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
if (error)
break;
}
} else if (length > filebytes) {
while (filebytes < length) {
- if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ if ((length - filebytes) > HFS_BIGFILE_SIZE) {
filebytes += HFS_BIGFILE_SIZE;
} else {
filebytes = length;
}
cp->c_flag |= C_FORCEUPDATE;
- error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
+ error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
if (error)
break;
}
} else /* Same logical size */ {
- error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
+ error = do_hfs_truncate(vp, length, flags, truncateflags, context);
}
/* Files that are changing size are not hot file candidates. */
if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
fp->ff_bytesread = 0;
}
- return (error);
-}
+ if (!caller_has_cnode_lock)
+ hfs_unlock(cp);
+ // Make sure UBC's size matches up (in case we didn't completely succeed)
+ errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
+ if (!error)
+ error = err2;
+
+ return error;
+}
/*
off_t moreBytesRequested;
off_t actualBytesAdded;
off_t filebytes;
- u_long fileblocks;
+ u_int32_t fileblocks;
int retval, retval2;
- UInt32 blockHint;
- UInt32 extendFlags; /* For call to ExtendFileC */
+ u_int32_t blockHint;
+ u_int32_t extendFlags; /* For call to ExtendFileC */
struct hfsmount *hfsmp;
kauth_cred_t cred = vfs_context_ucred(ap->a_context);
int lockflags;
+ time_t orig_ctime;
*(ap->a_bytesallocated) = 0;
return (EISDIR);
if (length < (off_t)0)
return (EINVAL);
-
- if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
- return (retval);
+
cp = VTOC(vp);
+
+ orig_ctime = VTOC(vp)->c_ctime;
+
+ check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
+
+ hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+ goto Err_Exit;
+ }
+
fp = VTOF(vp);
hfsmp = VTOHFS(vp);
vcb = VTOVCB(vp);
extendFlags |= kEFAllMask;
if (cred && suser(cred, NULL) != 0)
extendFlags |= kEFReserveMask;
+ if (hfs_virtualmetafile(cp))
+ extendFlags |= kEFMetadataMask;
retval = E_NONE;
blockHint = 0;
* value of filebytes is 0, length will be at least 1.
*/
if (length > filebytes) {
- moreBytesRequested = length - filebytes;
+ off_t total_bytes_added = 0, orig_request_size;
+
+ orig_request_size = moreBytesRequested = length - filebytes;
#if QUOTA
retval = hfs_chkdq(cp,
* Allocate Journal and Quota files in metadata zone.
*/
if (hfs_virtualmetafile(cp)) {
- extendFlags |= kEFMetadataMask;
blockHint = hfsmp->hfs_metazone_start;
} else if ((blockHint >= hfsmp->hfs_metazone_start) &&
(blockHint <= hfsmp->hfs_metazone_end)) {
}
}
- if (hfs_start_transaction(hfsmp) != 0) {
- retval = EINVAL;
- goto Err_Exit;
- }
- /* Protect extents b-tree and allocation bitmap */
- lockflags = SFL_BITMAP;
- if (overflow_extents(fp))
- lockflags |= SFL_EXTENTS;
- lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+ while ((length > filebytes) && (retval == E_NONE)) {
+ off_t bytesRequested;
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
+ bytesRequested = HFS_BIGFILE_SIZE;
+ } else {
+ bytesRequested = moreBytesRequested;
+ }
- retval = MacToVFSError(ExtendFileC(vcb,
+ if (extendFlags & kEFContigMask) {
+ // if we're on a sparse device, this will force it to do a
+ // full scan to find the space needed.
+ hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
+ }
+
+ retval = MacToVFSError(ExtendFileC(vcb,
(FCB*)fp,
- moreBytesRequested,
+ bytesRequested,
blockHint,
extendFlags,
&actualBytesAdded));
- *(ap->a_bytesallocated) = actualBytesAdded;
- filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
-
- hfs_systemfile_unlock(hfsmp, lockflags);
+ if (retval == E_NONE) {
+ *(ap->a_bytesallocated) += actualBytesAdded;
+ total_bytes_added += actualBytesAdded;
+ moreBytesRequested -= actualBytesAdded;
+ if (blockHint != 0) {
+ blockHint += actualBytesAdded / vcb->blockSize;
+ }
+ }
+ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
- if (hfsmp->jnl) {
+ if (hfsmp->jnl) {
(void) hfs_update(vp, TRUE);
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+
+ hfs_end_transaction(hfsmp);
}
- hfs_end_transaction(hfsmp);
/*
* if we get an error and no changes were made then exit
* until the file is closed, when we truncate the file to allocation
* block size.
*/
- if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
+ if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
*(ap->a_bytesallocated) =
- roundup(moreBytesRequested, (off_t)vcb->blockSize);
+ roundup(orig_request_size, (off_t)vcb->blockSize);
} else { /* Shorten the size of the file */
- if (fp->ff_size > length) {
- /*
- * Any buffers that are past the truncation point need to be
- * invalidated (to maintain buffer cache consistency).
- */
- }
-
- if (hfs_start_transaction(hfsmp) != 0) {
- retval = EINVAL;
- goto Err_Exit;
- }
-
- /* Protect extents b-tree and allocation bitmap */
- lockflags = SFL_BITMAP;
- if (overflow_extents(fp))
- lockflags |= SFL_EXTENTS;
- lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
-
- retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
-
- hfs_systemfile_unlock(hfsmp, lockflags);
-
- filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
-
- if (hfsmp->jnl) {
- (void) hfs_update(vp, TRUE);
- (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
- }
+ /*
+ * N.B. At present, this code is never called. If and when we
+ * do start using it, it looks like there might be slightly
+ * strange semantics with the file size: it's possible for the
+ * file size to *increase* e.g. if current file size is 5,
+ * length is 1024 and filebytes is 4096, the file size will
+ * end up being 1024 bytes. This isn't necessarily a problem
+ * but it's not consistent with the code above which doesn't
+ * change the file size.
+ */
- hfs_end_transaction(hfsmp);
-
+ retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
+ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
/*
* if we get an error and no changes were made then exit
if (fp->ff_size > filebytes) {
fp->ff_size = filebytes;
- hfs_unlock(cp);
- ubc_setsize(vp, fp->ff_size);
- hfs_lock(cp, HFS_FORCE_LOCK);
+ hfs_ubc_setsize(vp, fp->ff_size, true);
}
}
if (retval == 0)
retval = retval2;
Err_Exit:
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
hfs_unlock(cp);
return (retval);
}
};
*/
{
- vnode_t vp = ap->a_vp;
- int error;
+ vnode_t vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ int error = 0;
+ upl_t upl;
+ upl_page_info_t *pl;
+ off_t f_offset;
+ off_t page_needed_f_offset;
+ int offset;
+ int isize;
+ int upl_size;
+ int pg_index;
+ boolean_t truncate_lock_held = FALSE;
+ boolean_t file_converted = FALSE;
+ kern_return_t kret;
+
+ vp = ap->a_vp;
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
+#if CONFIG_PROTECT
+ if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
+ /*
+ * If we errored here, then this means that one of two things occurred:
+ * 1. there was a problem with the decryption of the key.
+ * 2. the device is locked and we are not allowed to access this particular file.
+ *
+ * Either way, this means that we need to shut down this upl now. As long as
+ * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
+ * then we create a upl and immediately abort it.
+ */
+ if (ap->a_pl == NULL) {
+ /* create the upl */
+ ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
+ UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
+ /* mark the range as needed so it doesn't immediately get discarded upon abort */
+ ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
+
+ /* Abort the range */
+ ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
+ }
+
+
+ return error;
+ }
+#endif /* CONFIG_PROTECT */
- error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
- ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
+ if (ap->a_pl != NULL) {
+ /*
+ * this can only happen for swap files now that
+ * we're asking for V2 paging behavior...
+ * so don't need to worry about decompression, or
+ * keeping track of blocks read or taking the truncate lock
+ */
+ error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+ ap->a_size, (off_t)fp->ff_size, ap->a_flags);
+ goto pagein_done;
+ }
+
+ page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
+
+retry_pagein:
/*
- * Keep track of blocks read.
+ * take truncate lock (shared/recursive) to guard against
+ * zero-fill thru fsync interfering, but only for v2
+ *
+ * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
+ * lock shared and we are allowed to recurse 1 level if this thread already
+ * owns the lock exclusively... this can legally occur
+ * if we are doing a shrinking ftruncate against a file
+ * that is mapped private, and the pages being truncated
+ * do not currently exist in the cache... in that case
+ * we will have to page-in the missing pages in order
+ * to provide them to the private mapping... we must
+ * also call hfs_unlock_truncate with a postive been_recursed
+ * arg to indicate that if we have recursed, there is no need to drop
+ * the lock. Allowing this simple recursion is necessary
+ * in order to avoid a certain deadlock... since the ftruncate
+ * already holds the truncate lock exclusively, if we try
+ * to acquire it shared to protect the pagein path, we will
+ * hang this thread
+ *
+ * NOTE: The if () block below is a workaround in order to prevent a
+ * VM deadlock. See rdar://7853471.
+ *
+ * If we are in a forced unmount, then launchd will still have the
+ * dyld_shared_cache file mapped as it is trying to reboot. If we
+ * take the truncate lock here to service a page fault, then our
+ * thread could deadlock with the forced-unmount. The forced unmount
+ * thread will try to reclaim the dyld_shared_cache vnode, but since it's
+ * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
+ * thread will think it needs to copy all of the data out of the file
+ * and into a VM copy object. If we hold the cnode lock here, then that
+ * VM operation will not be able to proceed, because we'll set a busy page
+ * before attempting to grab the lock. Note that this isn't as simple as "don't
+ * call ubc_setsize" because doing that would just shift the problem to the
+ * ubc_msync done before the vnode is reclaimed.
+ *
+ * So, if a forced unmount on this volume is in flight AND the cnode is
+ * marked C_DELETED, then just go ahead and do the page in without taking
+ * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
+ * that is not going to be available on the next mount, this seems like a
+ * OK solution from a correctness point of view, even though it is hacky.
*/
- if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
- struct cnode *cp;
- struct filefork *fp;
- int bytesread;
- int took_cnode_lock = 0;
-
- cp = VTOC(vp);
- fp = VTOF(vp);
+ if (vfs_isforce(vp->v_mount)) {
+ if (cp->c_flag & C_DELETED) {
+ /* If we don't get it, then just go ahead and operate without the lock */
+ truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+ }
+ }
+ else {
+ hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+ truncate_lock_held = TRUE;
+ }
- if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
- bytesread = fp->ff_size;
- else
- bytesread = ap->a_size;
+ kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
- /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
- if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- took_cnode_lock = 1;
+ if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+ error = EINVAL;
+ goto pagein_done;
+ }
+ ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
+
+ upl_size = isize = ap->a_size;
+
+ /*
+ * Scan from the back to find the last page in the UPL, so that we
+ * aren't looking at a UPL that may have already been freed by the
+ * preceding aborts/completions.
+ */
+ for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+ if (upl_page_present(pl, --pg_index))
+ break;
+ if (pg_index == 0) {
+ /*
+ * no absent pages were found in the range specified
+ * just abort the UPL to get rid of it and then we're done
+ */
+ ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+ goto pagein_done;
}
- /*
- * If this file hasn't been seen since the start of
- * the current sampling period then start over.
+ }
+ /*
+ * initialize the offset variables before we touch the UPL.
+ * f_offset is the position into the file, in bytes
+ * offset is the position into the UPL, in bytes
+ * pg_index is the pg# of the UPL we're operating on
+ * isize is the offset into the UPL of the last page that is present.
+ */
+ isize = ((pg_index + 1) * PAGE_SIZE);
+ pg_index = 0;
+ offset = 0;
+ f_offset = ap->a_f_offset;
+
+ while (isize) {
+ int xsize;
+ int num_of_pages;
+
+ if ( !upl_page_present(pl, pg_index)) {
+ /*
+ * we asked for RET_ONLY_ABSENT, so it's possible
+ * to get back empty slots in the UPL.
+ * just skip over them
+ */
+ f_offset += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ isize -= PAGE_SIZE;
+ pg_index++;
+
+ continue;
+ }
+ /*
+ * We know that we have at least one absent page.
+ * Now checking to see how many in a row we have
*/
- if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
- struct timeval tv;
+ num_of_pages = 1;
+ xsize = isize - PAGE_SIZE;
- fp->ff_bytesread = bytesread;
- microtime(&tv);
- cp->c_atime = tv.tv_sec;
+ while (xsize) {
+ if ( !upl_page_present(pl, pg_index + num_of_pages))
+ break;
+ num_of_pages++;
+ xsize -= PAGE_SIZE;
+ }
+ xsize = num_of_pages * PAGE_SIZE;
+
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ /* allow pageins of the resource fork */
} else {
- fp->ff_bytesread += bytesread;
+ int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+
+ if (compressed) {
+
+ if (truncate_lock_held) {
+ /*
+ * can't hold the truncate lock when calling into the decmpfs layer
+ * since it calls back into this layer... even though we're only
+ * holding the lock in shared mode, and the re-entrant path only
+ * takes the lock shared, we can deadlock if some other thread
+ * tries to grab the lock exclusively in between.
+ */
+ hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+ truncate_lock_held = FALSE;
+ }
+ ap->a_pl = upl;
+ ap->a_pl_offset = offset;
+ ap->a_f_offset = f_offset;
+ ap->a_size = xsize;
+
+ error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+ /*
+ * note that decpfs_pagein_compressed can change the state of
+ * 'compressed'... it will set it to 0 if the file is no longer
+ * compressed once the compression lock is successfully taken
+ * i.e. we would block on that lock while the file is being inflated
+ */
+ if (compressed) {
+ if (error == 0) {
+ /* successful page-in, update the access time */
+ VTOC(vp)->c_touch_acctime = TRUE;
+
+ /* compressed files are not hot file candidates */
+ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
+ }
+ } else if (error == EAGAIN) {
+ /*
+ * EAGAIN indicates someone else already holds the compression lock...
+ * to avoid deadlocking, we'll abort this range of pages with an
+ * indication that the pagein needs to be redriven
+ */
+ ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
+ } else if (error == ENOSPC) {
+
+ if (upl_size == PAGE_SIZE)
+ panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
+
+ ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
+
+ ap->a_size = PAGE_SIZE;
+ ap->a_pl = NULL;
+ ap->a_pl_offset = 0;
+ ap->a_f_offset = page_needed_f_offset;
+
+ goto retry_pagein;
+ }
+ goto pagein_next_range;
+ }
+ else {
+ /*
+ * Set file_converted only if the file became decompressed while we were
+ * paging in. If it were still compressed, we would re-start the loop using the goto
+ * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
+ * condition below, since we could have avoided taking the truncate lock to prevent
+ * a deadlock in the force unmount case.
+ */
+ file_converted = TRUE;
+ }
+ }
+ if (file_converted == TRUE) {
+ /*
+ * the file was converted back to a regular file after we first saw it as compressed
+ * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
+ * reset a_size so that we consider what remains of the original request
+ * and null out a_upl and a_pl_offset.
+ *
+ * We should only be able to get into this block if the decmpfs_pagein_compressed
+ * successfully decompressed the range in question for this file.
+ */
+ ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
+
+ ap->a_size = isize;
+ ap->a_pl = NULL;
+ ap->a_pl_offset = 0;
+
+ /* Reset file_converted back to false so that we don't infinite-loop. */
+ file_converted = FALSE;
+ goto retry_pagein;
+ }
}
- cp->c_touch_acctime = TRUE;
- if (took_cnode_lock)
- hfs_unlock(cp);
+#endif
+ error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
+
+ /*
+ * Keep track of blocks read.
+ */
+ if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+ int bytesread;
+ int took_cnode_lock = 0;
+
+ if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
+ bytesread = fp->ff_size;
+ else
+ bytesread = xsize;
+
+ /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+ if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ took_cnode_lock = 1;
+ }
+ /*
+ * If this file hasn't been seen since the start of
+ * the current sampling period then start over.
+ */
+ if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+ struct timeval tv;
+
+ fp->ff_bytesread = bytesread;
+ microtime(&tv);
+ cp->c_atime = tv.tv_sec;
+ } else {
+ fp->ff_bytesread += bytesread;
+ }
+ cp->c_touch_acctime = TRUE;
+ if (took_cnode_lock)
+ hfs_unlock(cp);
+ }
+pagein_next_range:
+ f_offset += xsize;
+ offset += xsize;
+ isize -= xsize;
+ pg_index += num_of_pages;
+
+ error = 0;
}
+
+pagein_done:
+ if (truncate_lock_held == TRUE) {
+ /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
+ hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+ }
+
return (error);
}
vnode_t vp = ap->a_vp;
struct cnode *cp;
struct filefork *fp;
- int retval;
- off_t end_of_range;
+ int retval = 0;
off_t filesize;
+ upl_t upl;
+ upl_page_info_t* pl;
+ vm_offset_t a_pl_offset;
+ int a_flags;
+ int is_pageoutv2 = 0;
+ kern_return_t kret;
cp = VTOC(vp);
- if (cp->c_lockowner == current_thread()) {
- panic("pageout: %s cnode lock already held!\n",
- cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
- }
- if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
- if (!(ap->a_flags & UPL_NOCOMMIT)) {
- ubc_upl_abort_range(ap->a_pl,
- ap->a_pl_offset,
- ap->a_size,
- UPL_ABORT_FREE_ON_EMPTY);
- }
- return (retval);
- }
fp = VTOF(vp);
-
+
+ /*
+ * Figure out where the file ends, for pageout purposes. If
+ * ff_new_size > ff_size, then we're in the middle of extending the
+ * file via a write, so it is safe (and necessary) that we be able
+ * to pageout up to that point.
+ */
filesize = fp->ff_size;
- end_of_range = ap->a_f_offset + ap->a_size - 1;
+ if (fp->ff_new_size > filesize)
+ filesize = fp->ff_new_size;
- if (end_of_range >= filesize) {
- end_of_range = (off_t)(filesize - 1);
- }
- if (ap->a_f_offset < filesize) {
- rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
- cp->c_flag |= C_MODIFIED; /* leof is dirty */
+ a_flags = ap->a_flags;
+ a_pl_offset = ap->a_pl_offset;
+
+ /*
+ * we can tell if we're getting the new or old behavior from the UPL
+ */
+ if ((upl = ap->a_pl) == NULL) {
+ int request_flags;
+
+ is_pageoutv2 = 1;
+ /*
+ * we're in control of any UPL we commit
+ * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
+ */
+ a_flags &= ~UPL_NOCOMMIT;
+ a_pl_offset = 0;
+
+ /*
+ * For V2 semantics, we want to take the cnode truncate lock
+ * shared to guard against the file size changing via zero-filling.
+ *
+ * However, we have to be careful because we may be invoked
+ * via the ubc_msync path to write out dirty mmap'd pages
+ * in response to a lock event on a content-protected
+ * filesystem (e.g. to write out class A files).
+ * As a result, we want to take the truncate lock 'SHARED' with
+ * the mini-recursion locktype so that we don't deadlock/panic
+ * because we may be already holding the truncate lock exclusive to force any other
+ * IOs to have blocked behind us.
+ */
+ hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+
+ if (a_flags & UPL_MSYNC) {
+ request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
+ }
+ else {
+ request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
+ }
+
+ kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
+
+ if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+ retval = EINVAL;
+ goto pageout_done;
+ }
}
- hfs_unlock(cp);
+ /*
+ * from this point forward upl points at the UPL we're working with
+ * it was either passed in or we succesfully created it
+ */
+
+ /*
+ * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
+ * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
+ * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
+ * N dirty ranges in the UPL. Note that this is almost a direct copy of the
+ * logic in vnode_pageout except that we need to do it after grabbing the truncate
+ * lock in HFS so that we don't lock invert ourselves.
+ *
+ * Note that we can still get into this function on behalf of the default pager with
+ * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
+ * since fsync and other writing threads will grab the locks, then mark the
+ * relevant pages as busy. But the pageout codepath marks the pages as busy,
+ * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
+ * we do not try to grab anything for the pre-V2 case, which should only be accessed
+ * by the paging/VM system.
+ */
+
+ if (is_pageoutv2) {
+ off_t f_offset;
+ int offset;
+ int isize;
+ int pg_index;
+ int error;
+ int error_ret = 0;
+
+ isize = ap->a_size;
+ f_offset = ap->a_f_offset;
+
+ /*
+ * Scan from the back to find the last page in the UPL, so that we
+ * aren't looking at a UPL that may have already been freed by the
+ * preceding aborts/completions.
+ */
+ for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+ if (upl_page_present(pl, --pg_index))
+ break;
+ if (pg_index == 0) {
+ ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+ goto pageout_done;
+ }
+ }
+
+ /*
+ * initialize the offset variables before we touch the UPL.
+ * a_f_offset is the position into the file, in bytes
+ * offset is the position into the UPL, in bytes
+ * pg_index is the pg# of the UPL we're operating on.
+ * isize is the offset into the UPL of the last non-clean page.
+ */
+ isize = ((pg_index + 1) * PAGE_SIZE);
+
+ offset = 0;
+ pg_index = 0;
+
+ while (isize) {
+ int xsize;
+ int num_of_pages;
+
+ if ( !upl_page_present(pl, pg_index)) {
+ /*
+ * we asked for RET_ONLY_DIRTY, so it's possible
+ * to get back empty slots in the UPL.
+ * just skip over them
+ */
+ f_offset += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ isize -= PAGE_SIZE;
+ pg_index++;
+
+ continue;
+ }
+ if ( !upl_dirty_page(pl, pg_index)) {
+ panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
+ }
- retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
- ap->a_size, filesize, ap->a_flags);
+ /*
+ * We know that we have at least one dirty page.
+ * Now checking to see how many in a row we have
+ */
+ num_of_pages = 1;
+ xsize = isize - PAGE_SIZE;
+
+ while (xsize) {
+ if ( !upl_dirty_page(pl, pg_index + num_of_pages))
+ break;
+ num_of_pages++;
+ xsize -= PAGE_SIZE;
+ }
+ xsize = num_of_pages * PAGE_SIZE;
+
+ if (!vnode_isswap(vp)) {
+ off_t end_of_range;
+ int tooklock;
+
+ tooklock = 0;
+
+ if (cp->c_lockowner != current_thread()) {
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+ /*
+ * we're in the v2 path, so we are the
+ * owner of the UPL... we may have already
+ * processed some of the UPL, so abort it
+ * from the current working offset to the
+ * end of the UPL
+ */
+ ubc_upl_abort_range(upl,
+ offset,
+ ap->a_size - offset,
+ UPL_ABORT_FREE_ON_EMPTY);
+ goto pageout_done;
+ }
+ tooklock = 1;
+ }
+ end_of_range = f_offset + xsize - 1;
+
+ if (end_of_range >= filesize) {
+ end_of_range = (off_t)(filesize - 1);
+ }
+ if (f_offset < filesize) {
+ rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
+ cp->c_flag |= C_MODIFIED; /* leof is dirty */
+ }
+ if (tooklock) {
+ hfs_unlock(cp);
+ }
+ }
+ if ((error = cluster_pageout(vp, upl, offset, f_offset,
+ xsize, filesize, a_flags))) {
+ if (error_ret == 0)
+ error_ret = error;
+ }
+ f_offset += xsize;
+ offset += xsize;
+ isize -= xsize;
+ pg_index += num_of_pages;
+ }
+ /* capture errnos bubbled out of cluster_pageout if they occurred */
+ if (error_ret != 0) {
+ retval = error_ret;
+ }
+ } /* end block for v2 pageout behavior */
+ else {
+ if (!vnode_isswap(vp)) {
+ off_t end_of_range;
+ int tooklock = 0;
+
+ if (cp->c_lockowner != current_thread()) {
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+ if (!(a_flags & UPL_NOCOMMIT)) {
+ ubc_upl_abort_range(upl,
+ a_pl_offset,
+ ap->a_size,
+ UPL_ABORT_FREE_ON_EMPTY);
+ }
+ goto pageout_done;
+ }
+ tooklock = 1;
+ }
+ end_of_range = ap->a_f_offset + ap->a_size - 1;
+
+ if (end_of_range >= filesize) {
+ end_of_range = (off_t)(filesize - 1);
+ }
+ if (ap->a_f_offset < filesize) {
+ rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
+ cp->c_flag |= C_MODIFIED; /* leof is dirty */
+ }
+
+ if (tooklock) {
+ hfs_unlock(cp);
+ }
+ }
+ /*
+ * just call cluster_pageout for old pre-v2 behavior
+ */
+ retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
+ ap->a_size, filesize, a_flags);
+ }
/*
- * If data was written, and setuid or setgid bits are set and
- * this process is not the superuser then clear the setuid and
- * setgid bits as a precaution against tampering.
+ * If data was written, update the modification time of the file
+ * but only if it's mapped writable; we will have touched the
+ * modifcation time for direct writes.
*/
- if ((retval == 0) &&
- (cp->c_mode & (S_ISUID | S_ISGID)) &&
- (vfs_context_suser(ap->a_context) != 0)) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- cp->c_mode &= ~(S_ISUID | S_ISGID);
- cp->c_touch_chgtime = TRUE;
+ if (retval == 0 && (ubc_is_mapped_writable(vp)
+ || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+ // Check again with lock
+ bool mapped_writable = ubc_is_mapped_writable(vp);
+ if (mapped_writable
+ || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
+ cp->c_touch_modtime = TRUE;
+ cp->c_touch_chgtime = TRUE;
+
+ /*
+ * We only need to increment the generation counter if
+ * it's currently mapped writable because we incremented
+ * the counter in hfs_vnop_mnomap.
+ */
+ if (mapped_writable)
+ hfs_incr_gencount(VTOC(vp));
+
+ /*
+ * If setuid or setgid bits are set and this process is
+ * not the superuser then clear the setuid and setgid bits
+ * as a precaution against tampering.
+ */
+ if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
+ (vfs_context_suser(ap->a_context) != 0)) {
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ }
+ }
+
hfs_unlock(cp);
}
+
+pageout_done:
+ if (is_pageoutv2) {
+ /*
+ * Release the truncate lock. Note that because
+ * we may have taken the lock recursively by
+ * being invoked via ubc_msync due to lockdown,
+ * we should release it recursively, too.
+ */
+ hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+ }
return (retval);
}
* Swap and validate the node if it is in native byte order.
* This is always be true on big endian, so we always validate
* before writing here. On little endian, the node typically has
- * been swapped and validatated when it was written to the journal,
+ * been swapped and validated when it was written to the journal,
* so we won't do anything here.
*/
- if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
+ if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
/* Prepare the block pointer */
block.blockHeader = bp;
block.buffer = (char *)buf_dataptr(bp);
block.blockSize = buf_count(bp);
/* Endian un-swap B-Tree node */
- retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
+ retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
if (retval)
panic("hfs_vnop_bwrite: about to write corrupt node!\n");
}
if ((buf_flags(bp) & B_LOCKED)) {
// XXXdbg
if (VTOHFS(vp)->jnl) {
- panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
+ panic("hfs: CLEARING the lock bit on bp %p\n", bp);
}
buf_clearflags(bp, B_LOCKED);
}
* 0 N (file offset)
*
* ----------------- -----------------
- * |///////////////| | | STEP 1 (aquire new blocks)
+ * |///////////////| | | STEP 1 (acquire new blocks)
* ----------------- -----------------
* 0 N N+1 2N
*
* During steps 2 and 3 page-outs to file offsets less
* than or equal to N are suspended.
*
- * During step 3 page-ins to the file get supended.
+ * During step 3 page-ins to the file get suspended.
*/
-__private_extern__
int
hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
struct proc *p)
u_int32_t growsize;
u_int32_t nextallocsave;
daddr64_t sector_a, sector_b;
- int disabled_caching = 0;
int eflags;
off_t newbytes;
int retval;
enum vtype vnodetype;
vnodetype = vnode_vtype(vp);
- if (vnodetype != VREG && vnodetype != VLNK) {
+ if (vnodetype != VREG) {
+ /* Not allowed to move symlinks. */
return (EPERM);
}
fp = VTOF(vp);
if (fp->ff_unallocblocks)
return (EINVAL);
+
+#if CONFIG_PROTECT
+ /*
+ * <rdar://problem/9118426>
+ * Disable HFS file relocation on content-protected filesystems
+ */
+ if (cp_fs_protected (hfsmp->hfs_mp)) {
+ return EINVAL;
+ }
+#endif
+ /* If it's an SSD, also disable HFS relocation */
+ if (hfsmp->hfs_flags & HFS_SSD) {
+ return EINVAL;
+ }
+
+
blksize = hfsmp->blockSize;
if (blockHint == 0)
blockHint = hfsmp->nextAllocation;
- if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
- ((fp->ff_size > blksize) && vnodetype == VLNK)) {
+ if (fp->ff_size > 0x7fffffff) {
return (EFBIG);
}
if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
hfs_unlock(cp);
- hfs_lock_truncate(cp, TRUE);
- if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
- hfs_unlock_truncate(cp);
+ hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ /* Force lock since callers expects lock to be held. */
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
return (retval);
}
+ /* No need to continue if file was removed. */
+ if (cp->c_flag & C_NOEXISTS) {
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ return (ENOENT);
+ }
took_trunc_lock = 1;
}
headblks = fp->ff_blocks;
if (hfs_start_transaction(hfsmp) != 0) {
if (took_trunc_lock)
- hfs_unlock_truncate(cp);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
return (EINVAL);
}
started_tr = 1;
}
/*
- * STEP 1 - aquire new allocation blocks.
+ * STEP 1 - acquire new allocation blocks.
*/
- if (!vnode_isnocache(vp)) {
- vnode_setnocache(vp);
- disabled_caching = 1;
-
- }
nextallocsave = hfsmp->nextAllocation;
retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
if (eflags & kEFMetadataMask) {
- HFS_MOUNT_LOCK(hfsmp, TRUE);
- hfsmp->nextAllocation = nextallocsave;
- hfsmp->vcbFlags |= 0xFF00;
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_lock_mount(hfsmp);
+ HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
+ MarkVCBDirty(hfsmp);
+ hfs_unlock_mount(hfsmp);
}
retval = MacToVFSError(retval);
retval = ENOSPC;
goto restore;
} else if (fp->ff_blocks < (headblks + datablks)) {
- printf("hfs_relocate: allocation failed");
+ printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
retval = ENOSPC;
goto restore;
}
retval = ENOSPC;
goto restore;
} else if ((eflags & kEFMetadataMask) &&
- ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
+ ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
hfsmp->hfs_metazone_end)) {
- printf("hfs_relocate: didn't move into metadata zone\n");
+#if 0
+ const char * filestr;
+ char emptystr = '\0';
+
+ if (cp->c_desc.cd_nameptr != NULL) {
+ filestr = (const char *)&cp->c_desc.cd_nameptr[0];
+ } else if (vnode_name(vp) != NULL) {
+ filestr = vnode_name(vp);
+ } else {
+ filestr = &emptystr;
+ }
+#endif
retval = ENOSPC;
goto restore;
}
*/
if (vnodetype == VLNK)
- retval = hfs_clonelink(vp, blksize, cred, p);
+ retval = EPERM;
else if (vnode_issystem(vp))
retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
else
goto restore;
out:
if (took_trunc_lock)
- hfs_unlock_truncate(cp);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
if (lockflags) {
hfs_systemfile_unlock(hfsmp, lockflags);
if (retval == 0) {
(void) hfs_update(vp, MNT_WAIT);
}
-
if (hfsmp->jnl) {
if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
(void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
}
exit:
- if (disabled_caching) {
- vnode_clearnocache(vp);
- }
if (started_tr)
hfs_end_transaction(hfsmp);
return (retval);
restore:
- if (fp->ff_blocks == headblks)
+ if (fp->ff_blocks == headblks) {
+ if (took_trunc_lock)
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
goto exit;
+ }
/*
* Give back any newly allocated space.
*/
lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
}
- (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
+ (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
+ FTOC(fp)->c_fileid, false);
hfs_systemfile_unlock(hfsmp, lockflags);
lockflags = 0;
if (took_trunc_lock)
- hfs_unlock_truncate(cp);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
goto exit;
}
-/*
- * Clone a symlink.
- *
- */
-static int
-hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
-{
- struct buf *head_bp = NULL;
- struct buf *tail_bp = NULL;
- int error;
-
-
- error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
- if (error)
- goto out;
-
- tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
- if (tail_bp == NULL) {
- error = EIO;
- goto out;
- }
- bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
- error = (int)buf_bwrite(tail_bp);
-out:
- if (head_bp) {
- buf_markinvalid(head_bp);
- buf_brelse(head_bp);
- }
- (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
-
- return (error);
-}
-
/*
* Clone a file's data within the file.
*
hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
{
caddr_t bufp;
- size_t writebase;
size_t bufsize;
size_t copysize;
size_t iosize;
- off_t filesize;
size_t offset;
+ off_t writebase;
uio_t auio;
int error = 0;
- filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
writebase = blkstart * blksize;
copysize = blkcnt * blksize;
iosize = bufsize = MIN(copysize, 128 * 1024);
offset = 0;
+ hfs_unlock(VTOC(vp));
+
+#if CONFIG_PROTECT
+ if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ return (error);
+ }
+#endif /* CONFIG_PROTECT */
+
if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
return (ENOMEM);
- }
- hfs_unlock(VTOC(vp));
+ }
- auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
+ auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
while (offset < copysize) {
iosize = MIN(copysize - offset, iosize);
- uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
+ uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
uio_addiov(auio, (uintptr_t)bufp, iosize);
- error = cluster_read(vp, auio, copysize, 0);
+ error = cluster_read(vp, auio, copysize, IO_NOCACHE);
if (error) {
printf("hfs_clonefile: cluster_read failed - %d\n", error);
break;
}
if (uio_resid(auio) != 0) {
- printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
+ printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
error = EIO;
break;
}
- uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
+ uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
uio_addiov(auio, (uintptr_t)bufp, iosize);
- error = cluster_write(vp, auio, filesize + offset,
- filesize + offset + iosize,
+ error = cluster_write(vp, auio, writebase + offset,
+ writebase + offset + iosize,
uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
if (error) {
printf("hfs_clonefile: cluster_write failed - %d\n", error);
}
uio_free(auio);
- /*
- * No need to call ubc_sync_range or hfs_invalbuf
- * since the file was copied using IO_NOCACHE.
- */
-
+ if ((blksize & PAGE_MASK)) {
+ /*
+ * since the copy may not have started on a PAGE
+ * boundary (or may not have ended on one), we
+ * may have pages left in the cache since NOCACHE
+ * will let partially written pages linger...
+ * lets just flush the entire range to make sure
+ * we don't have any pages left that are beyond
+ * (or intersect) the real LEOF of this file
+ */
+ ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
+ } else {
+ /*
+ * No need to call ubc_msync or hfs_invalbuf
+ * since the file was copied using IO_NOCACHE and
+ * the copy was done starting and ending on a page
+ * boundary in the file.
+ */
+ }
kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
- hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
return (error);
}