/*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/filedesc.h>
#include <sys/stat.h>
#include <sys/buf.h>
+#include <sys/buf_internal.h>
#include <sys/proc.h>
#include <sys/kauth.h>
#include <sys/vnode.h>
#include <sys/disk.h>
#include <sys/sysctl.h>
#include <sys/fsctl.h>
+#include <sys/mount_internal.h>
+#include <sys/file_internal.h>
+
+#include <libkern/OSDebug.h>
#include <miscfs/specfs/specdev.h>
#include <vm/vm_pageout.h>
#include <vm/vm_kern.h>
+#include <IOKit/IOBSD.h>
+
#include <sys/kdebug.h>
#include "hfs.h"
#include "hfs_cnode.h"
#include "hfs_dbg.h"
+
#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
enum {
/* from bsd/hfs/hfs_vfsops.c */
extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
-static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
+/* from hfs_hotfiles.c */
+extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
+ uint8_t forktype, uint32_t *pinned);
+
static int hfs_clonefile(struct vnode *, int, int, int);
static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
-static int hfs_minorupdate(struct vnode *vp);
static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
+/* from bsd/hfs/hfs_vnops.c */
+extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
+
+
int flush_cache_on_write = 0;
SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
int
hfs_vnop_read(struct vnop_read_args *ap)
{
+ /*
+ struct vnop_read_args {
+ struct vnodeop_desc *a_desc;
+ vnode_t a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ vfs_context_t a_context;
+ };
+ */
+
uio_t uio = ap->a_uio;
struct vnode *vp = ap->a_vp;
struct cnode *cp;
off_t offset = uio_offset(uio);
int retval = 0;
int took_truncate_lock = 0;
+ int io_throttle = 0;
+ int throttled_count = 0;
/* Preflight checks */
if (!vnode_isreg(vp)) {
return (0); /* Nothing left to do */
if (offset < 0)
return (EINVAL); /* cant read from a negative offset */
-
+
+#if SECURE_KERNEL
+ if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
+ (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
+ /* Don't allow unencrypted io request from user space */
+ return EPERM;
+ }
+#endif
+
#if HFS_COMPRESSION
if (VNODE_IS_RSRC(vp)) {
if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
if (compressed) {
retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
+ if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
+ (void) hfs_addhotfile(vp);
+ }
if (compressed) {
if (retval == 0) {
/* successful read, update the access time */
VTOC(vp)->c_touch_acctime = TRUE;
- /* compressed files are not hot file candidates */
+ //
+ // compressed files are not traditional hot file candidates
+ // but they may be for CF (which ignores the ff_bytesread
+ // field)
+ //
if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
VTOF(vp)->ff_bytesread = 0;
}
}
/* otherwise the file was converted back to a regular file while we were reading it */
retval = 0;
- } else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
+ } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
int error;
error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
hfsmp = VTOHFS(vp);
#if CONFIG_PROTECT
- if ((retval = cp_handle_vnop (cp, CP_READ_ACCESS)) != 0) {
+ if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
goto exit;
}
-#endif
+
+#endif // CONFIG_PROTECT
+
+ /*
+ * If this read request originated from a syscall (as opposed to
+ * an in-kernel page fault or something), then set it up for
+ * throttle checks
+ */
+ if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
+ io_throttle = IO_RETURN_ON_THROTTLE;
+ }
+
+read_again:
/* Protect against a size change. */
- hfs_lock_truncate(cp, HFS_SHARED_LOCK);
+ hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
took_truncate_lock = 1;
filesize = fp->ff_size;
filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+
+ /*
+ * Check the file size. Note that per POSIX spec, we return 0 at
+ * file EOF, so attempting a read at an offset that is too big
+ * should just return 0 on HFS+. Since the return value was initialized
+ * to 0 above, we just jump to exit. HFS Standard has its own behavior.
+ */
if (offset > filesize) {
if ((hfsmp->hfs_flags & HFS_STANDARD) &&
(offset > (off_t)MAXHFSFILESIZE)) {
goto exit;
}
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
+ KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
- retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
+ retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
cp->c_touch_acctime = TRUE;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
+ KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
/*
/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
- hfs_lock(cp, HFS_FORCE_LOCK);
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
took_cnode_lock = 1;
}
/*
} else {
fp->ff_bytesread += bytesread;
}
+
+ if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
+ //
+ // We don't add hotfiles for processes doing IO_EVTONLY I/O
+ // on the assumption that they're system processes such as
+ // mdworker which scan everything in the system (and thus
+ // do not represent user-initiated access to files)
+ //
+ (void) hfs_addhotfile(vp);
+ }
if (took_cnode_lock)
hfs_unlock(cp);
}
exit:
if (took_truncate_lock) {
- hfs_unlock_truncate(cp, 0);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
}
+ if (retval == EAGAIN) {
+ throttle_lowpri_io(1);
+ throttled_count++;
+ retval = 0;
+ goto read_again;
+ }
+ if (throttled_count) {
+ throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread()));
+ }
return (retval);
}
+/*
+ * Ideally, this wouldn't be necessary; the cluster code should be
+ * able to handle this on the read-side. See <rdar://20420068>.
+ */
+static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to)
+{
+ assert(VTOC(vp)->c_lockowner != current_thread());
+ assert(VTOC(vp)->c_truncatelockowner == current_thread());
+
+ struct filefork *fp = VTOF(vp);
+
+ if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) {
+ // Nothing to do
+ return 0;
+ }
+
+ zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size));
+
+ /* N.B. At present, @zero_up_to is not important because the cluster
+ code will always zero up to the end of the page anyway. */
+ return cluster_write(vp, NULL, fp->ff_size, zero_up_to,
+ fp->ff_size, 0, IO_HEADZEROFILL);
+}
+
/*
* Write data to a file.
*/
int do_snapshot = 1;
time_t orig_ctime=VTOC(vp)->c_ctime;
int took_truncate_lock = 0;
+ int io_return_on_throttle = 0;
+ int throttled_count = 0;
#if HFS_COMPRESSION
if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
printf("invalid state %d for compressed file\n", state);
/* fall through */
}
- } else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
+ } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
int error;
error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
#endif
- // LP64todo - fix this! uio_resid may be 64-bit value
+#if SECURE_KERNEL
+ if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
+ (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
+ /* Don't allow unencrypted io request from user space */
+ return EPERM;
+ }
+#endif
+
resid = uio_resid(uio);
offset = uio_offset(uio);
hfsmp = VTOHFS(vp);
#if CONFIG_PROTECT
- if ((retval = cp_handle_vnop (cp, CP_WRITE_ACCESS)) != 0) {
+ if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
goto exit;
}
#endif
}
#endif /* HFS_SPARSE_DEV */
+ if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
+ (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
+ io_return_on_throttle = IO_RETURN_ON_THROTTLE;
+ }
+
again:
- /* Protect against a size change. */
- if (ioflag & IO_APPEND) {
- hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+ /*
+ * Protect against a size change.
+ *
+ * Note: If took_truncate_lock is true, then we previously got the lock shared
+ * but needed to upgrade to exclusive. So try getting it exclusive from the
+ * start.
+ */
+ if (ioflag & IO_APPEND || took_truncate_lock) {
+ hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
}
else {
- hfs_lock_truncate(cp, HFS_SHARED_LOCK);
+ hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
}
took_truncate_lock = 1;
uio_setoffset(uio, fp->ff_size);
offset = fp->ff_size;
}
- if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
+ if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
retval = EPERM;
goto exit;
}
+ cred = vfs_context_ucred(ap->a_context);
+ if (cred && suser(cred, NULL) != 0)
+ eflags |= kEFReserveMask;
+
origFileSize = fp->ff_size;
writelimit = offset + resid;
- filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
- /* If the truncate lock is shared, and if we either have virtual
- * blocks or will need to extend the file, upgrade the truncate
- * to exclusive lock. If upgrade fails, we lose the lock and
- * have to get exclusive lock again. Note that we want to
- * grab the truncate lock exclusive even if we're not allocating new blocks
- * because we could still be growing past the LEOF.
+ /*
+ * We may need an exclusive truncate lock for several reasons, all
+ * of which are because we may be writing to a (portion of a) block
+ * for the first time, and we need to make sure no readers see the
+ * prior, uninitialized contents of the block. The cases are:
+ *
+ * 1. We have unallocated (delayed allocation) blocks. We may be
+ * allocating new blocks to the file and writing to them.
+ * (A more precise check would be whether the range we're writing
+ * to contains delayed allocation blocks.)
+ * 2. We need to extend the file. The bytes between the old EOF
+ * and the new EOF are not yet initialized. This is important
+ * even if we're not allocating new blocks to the file. If the
+ * old EOF and new EOF are in the same block, we still need to
+ * protect that range of bytes until they are written for the
+ * first time.
+ *
+ * If we had a shared lock with the above cases, we need to try to upgrade
+ * to an exclusive lock. If the upgrade fails, we will lose the shared
+ * lock, and will need to take the truncate lock again; the took_truncate_lock
+ * flag will still be set, causing us to try for an exclusive lock next time.
*/
if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
- ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
- /* Lock upgrade failed and we lost our shared lock, try again */
+ ((fp->ff_unallocblocks != 0) ||
+ (writelimit > origFileSize))) {
if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
+ /*
+ * Lock upgrade failed and we lost our shared lock, try again.
+ * Note: we do not set took_truncate_lock=0 here. Leaving it
+ * set to 1 will cause us to try to get the lock exclusive.
+ */
goto again;
}
else {
}
}
- if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
+ if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
goto exit;
}
cnode_locked = 1;
-
- if (cp->c_truncatelockowner == HFS_SHARED_OWNER) {
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
- (int)offset, uio_resid(uio), (int)fp->ff_size,
- (int)filebytes, 0);
+
+ filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize);
+
+ if (offset > filebytes
+ && (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)),
+ hfsmp->blockSize) < offset - filebytes)) {
+ retval = ENOSPC;
+ goto exit;
}
+ KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
+ (int)offset, uio_resid(uio), (int)fp->ff_size,
+ (int)filebytes, 0);
+
/* Check if we do not need to extend the file */
if (writelimit <= filebytes) {
goto sizeok;
}
- cred = vfs_context_ucred(ap->a_context);
bytesToAdd = writelimit - filebytes;
#if QUOTA
while (writelimit > filebytes) {
bytesToAdd = writelimit - filebytes;
- if (cred && suser(cred, NULL) != 0)
- eflags |= kEFReserveMask;
/* Protect extents b-tree and allocation bitmap */
lockflags = SFL_BITMAP;
if (retval != E_NONE)
break;
filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
+ KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
(int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
}
- (void) hfs_update(vp, TRUE);
+ (void) hfs_update(vp, 0);
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
(void) hfs_end_transaction(hfsmp);
sizeok:
if (retval == E_NONE) {
off_t filesize;
- off_t zero_off;
- off_t tail_off;
- off_t inval_start;
- off_t inval_end;
- off_t io_start;
+ off_t head_off;
int lflag;
- struct rl_entry *invalid_range;
- if (writelimit > fp->ff_size)
+ if (writelimit > fp->ff_size) {
filesize = writelimit;
- else
+ struct timeval tv;
+ rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges);
+ microuptime(&tv);
+ cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+ } else
filesize = fp->ff_size;
lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
- if (offset <= fp->ff_size) {
- zero_off = offset & ~PAGE_MASK_64;
-
- /* Check to see whether the area between the zero_offset and the start
- of the transfer to see whether is invalid and should be zero-filled
- as part of the transfer:
- */
- if (offset > zero_off) {
- if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
- lflag |= IO_HEADZEROFILL;
- }
- } else {
- off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
-
- /* The bytes between fp->ff_size and uio->uio_offset must never be
- read without being zeroed. The current last block is filled with zeroes
- if it holds valid data but in all cases merely do a little bookkeeping
- to track the area from the end of the current last page to the start of
- the area actually written. For the same reason only the bytes up to the
- start of the page where this write will start is invalidated; any remainder
- before uio->uio_offset is explicitly zeroed as part of the cluster_write.
-
- Note that inval_start, the start of the page after the current EOF,
- may be past the start of the write, in which case the zeroing
- will be handled by the cluser_write of the actual data.
- */
- inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
- inval_end = offset & ~PAGE_MASK_64;
- zero_off = fp->ff_size;
-
- if ((fp->ff_size & PAGE_MASK_64) &&
- (rl_scan(&fp->ff_invalidranges,
- eof_page_base,
- fp->ff_size - 1,
- &invalid_range) != RL_NOOVERLAP)) {
- /* The page containing the EOF is not valid, so the
- entire page must be made inaccessible now. If the write
- starts on a page beyond the page containing the eof
- (inval_end > eof_page_base), add the
- whole page to the range to be invalidated. Otherwise
- (i.e. if the write starts on the same page), zero-fill
- the entire page explicitly now:
- */
- if (inval_end > eof_page_base) {
- inval_start = eof_page_base;
- } else {
- zero_off = eof_page_base;
- };
- };
-
- if (inval_start < inval_end) {
- struct timeval tv;
- /* There's some range of data that's going to be marked invalid */
-
- if (zero_off < inval_start) {
- /* The pages between inval_start and inval_end are going to be invalidated,
- and the actual write will start on a page past inval_end. Now's the last
- chance to zero-fill the page containing the EOF:
- */
- hfs_unlock(cp);
- cnode_locked = 0;
- retval = cluster_write(vp, (uio_t) 0,
- fp->ff_size, inval_start,
- zero_off, (off_t)0,
- lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
- hfs_lock(cp, HFS_FORCE_LOCK);
- cnode_locked = 1;
- if (retval) goto ioerr_exit;
- offset = uio_offset(uio);
- };
-
- /* Mark the remaining area of the newly allocated space as invalid: */
- rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
- microuptime(&tv);
- cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
- zero_off = fp->ff_size = inval_end;
- };
-
- if (offset > zero_off) lflag |= IO_HEADZEROFILL;
- };
-
- /* Check to see whether the area between the end of the write and the end of
- the page it falls in is invalid and should be zero-filled as part of the transfer:
- */
- tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
- if (tail_off > filesize) tail_off = filesize;
- if (tail_off > writelimit) {
- if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
- lflag |= IO_TAILZEROFILL;
- };
- };
-
/*
- * if the write starts beyond the current EOF (possibly advanced in the
- * zeroing of the last block, above), then we'll zero fill from the current EOF
- * to where the write begins:
- *
- * NOTE: If (and ONLY if) the portion of the file about to be written is
- * before the current EOF it might be marked as invalid now and must be
- * made readable (removed from the invalid ranges) before cluster_write
- * tries to write it:
+ * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
+ * for one case below). For the regions that lie before the
+ * beginning and after the end of this write that are in the
+ * same page, we let the cluster code handle zeroing that out
+ * if necessary. If those areas are not cached, the cluster
+ * code will try and read those areas in, and in the case
+ * where those regions have never been written to,
+ * hfs_vnop_blockmap will consult the invalid ranges and then
+ * indicate that. The cluster code will zero out those areas.
*/
- io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
- if (io_start < fp->ff_size) {
- off_t io_end;
- io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
- rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
- };
+ head_off = trunc_page_64(offset);
+
+ if (head_off < offset && head_off >= fp->ff_size) {
+ /*
+ * The first page is beyond current EOF, so as an
+ * optimisation, we can pass IO_HEADZEROFILL.
+ */
+ lflag |= IO_HEADZEROFILL;
+ }
hfs_unlock(cp);
cnode_locked = 0;
-
+
/*
* We need to tell UBC the fork's new size BEFORE calling
* cluster_write, in case any of the new pages need to be
* zero, unless we are extending the file via write.
*/
if (filesize > fp->ff_size) {
+ retval = hfs_zero_eof_page(vp, offset);
+ if (retval)
+ goto exit;
fp->ff_new_size = filesize;
ubc_setsize(vp, filesize);
}
- retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
- tail_off, lflag | IO_NOZERODIRTY);
+ retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off,
+ 0, lflag | IO_NOZERODIRTY | io_return_on_throttle);
if (retval) {
fp->ff_new_size = 0; /* no longer extending; use ff_size */
+
+ if (retval == EAGAIN) {
+ /*
+ * EAGAIN indicates that we still have I/O to do, but
+ * that we now need to be throttled
+ */
+ if (resid != uio_resid(uio)) {
+ /*
+ * did manage to do some I/O before returning EAGAIN
+ */
+ resid = uio_resid(uio);
+ offset = uio_offset(uio);
+
+ cp->c_touch_chgtime = TRUE;
+ cp->c_touch_modtime = TRUE;
+ hfs_incr_gencount(cp);
+ }
+ if (filesize > fp->ff_size) {
+ /*
+ * we called ubc_setsize before the call to
+ * cluster_write... since we only partially
+ * completed the I/O, we need to
+ * re-adjust our idea of the filesize based
+ * on our interim EOF
+ */
+ ubc_setsize(vp, offset);
+
+ fp->ff_size = offset;
+ }
+ goto exit;
+ }
if (filesize > origFileSize) {
ubc_setsize(vp, origFileSize);
}
fp->ff_bytesread = 0;
}
}
- fp->ff_new_size = 0; /* ff_size now has the correct size */
-
- /* If we wrote some bytes, then touch the change and mod times */
- if (resid > uio_resid(uio)) {
- cp->c_touch_chgtime = TRUE;
- cp->c_touch_modtime = TRUE;
- }
+ fp->ff_new_size = 0; /* ff_size now has the correct size */
}
if (partialwrite) {
uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
// XXXdbg - see radar 4871353 for more info
{
if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
- VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+ hfs_flush(hfsmp, HFS_FLUSH_CACHE);
}
}
ioerr_exit:
- /*
- * If we successfully wrote any data, and we are not the superuser
- * we clear the setuid and setgid bits as a precaution against
- * tampering.
- */
- if (cp->c_mode & (S_ISUID | S_ISGID)) {
- cred = vfs_context_ucred(ap->a_context);
- if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
- if (!cnode_locked) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- cnode_locked = 1;
+ if (!cnode_locked) {
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ cnode_locked = 1;
+ }
+
+ if (resid > uio_resid(uio)) {
+ cp->c_touch_chgtime = TRUE;
+ cp->c_touch_modtime = TRUE;
+ hfs_incr_gencount(cp);
+
+ /*
+ * If we successfully wrote any data, and we are not the superuser
+ * we clear the setuid and setgid bits as a precaution against
+ * tampering.
+ */
+ if (cp->c_mode & (S_ISUID | S_ISGID)) {
+ cred = vfs_context_ucred(ap->a_context);
+ if (cred && suser(cred, NULL)) {
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
}
- cp->c_mode &= ~(S_ISUID | S_ISGID);
}
}
if (retval) {
if (ioflag & IO_UNIT) {
- if (!cnode_locked) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- cnode_locked = 1;
- }
(void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
- 0, 0, ap->a_context);
- // LP64todo - fix this! resid needs to by user_ssize_t
+ 0, ap->a_context);
uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
uio_setresid(uio, resid);
filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
}
- } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
- if (!cnode_locked) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- cnode_locked = 1;
- }
- retval = hfs_update(vp, TRUE);
- }
+ } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
+ retval = hfs_update(vp, 0);
+
/* Updating vcbWrCnt doesn't need to be atomic. */
hfsmp->vcbWrCnt++;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
+ KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
(int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
exit:
+ if (retval && took_truncate_lock
+ && cp->c_truncatelockowner == current_thread()) {
+ fp->ff_new_size = 0;
+ rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges);
+ }
+
if (cnode_locked)
hfs_unlock(cp);
-
+
if (took_truncate_lock) {
- hfs_unlock_truncate(cp, 0);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ }
+ if (retval == EAGAIN) {
+ throttle_lowpri_io(1);
+ throttled_count++;
+
+ retval = 0;
+ goto again;
+ }
+ if (throttled_count) {
+ throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread()));
}
return (retval);
}
}
if (cache->numcached > NUM_CACHE_ENTRIES) {
- /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
- cache->numcached, NUM_CACHE_ENTRIES);*/
cache->numcached = NUM_CACHE_ENTRIES;
}
/* if the cache is full, do a replace rather than an insert */
if (cache->numcached >= NUM_CACHE_ENTRIES) {
- //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
cache->numcached = NUM_CACHE_ENTRIES-1;
if (index > cache->numcached) {
- // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
index = cache->numcached;
}
}
};
static int
-snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
+snoop_callback(const cnode_t *cp, void *arg)
{
- struct cinfo *cip = (struct cinfo *)arg;
+ struct cinfo *cip = arg;
- cip->uid = attrp->ca_uid;
- cip->gid = attrp->ca_gid;
- cip->mode = attrp->ca_mode;
- cip->parentcnid = descp->cd_parentcnid;
- cip->recflags = attrp->ca_recflags;
+ cip->uid = cp->c_uid;
+ cip->gid = cp->c_gid;
+ cip->mode = cp->c_mode;
+ cip->parentcnid = cp->c_parentcnid;
+ cip->recflags = cp->c_attr.ca_recflags;
return (0);
}
/* if this id matches the one the fsctl was called with, skip the lookup */
if (cnid == skip_cp->c_cnid) {
- cnattrp->ca_uid = skip_cp->c_uid;
- cnattrp->ca_gid = skip_cp->c_gid;
- cnattrp->ca_mode = skip_cp->c_mode;
- cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
- keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
+ cnattrp->ca_uid = skip_cp->c_uid;
+ cnattrp->ca_gid = skip_cp->c_gid;
+ cnattrp->ca_mode = skip_cp->c_mode;
+ cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
+ keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
} else {
- struct cinfo c_info;
-
- /* otherwise, check the cnode hash incase the file/dir is incore */
- if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) {
- cnattrp->ca_uid = c_info.uid;
- cnattrp->ca_gid = c_info.gid;
- cnattrp->ca_mode = c_info.mode;
- cnattrp->ca_recflags = c_info.recflags;
- keyp->hfsPlus.parentID = c_info.parentcnid;
- } else {
- int lockflags;
-
- lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
-
- /* lookup this cnid in the catalog */
- error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
+ struct cinfo c_info;
+
+ /* otherwise, check the cnode hash incase the file/dir is incore */
+ error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
+
+ if (error == EACCES) {
+ // File is deleted
+ return ENOENT;
+ } else if (!error) {
+ cnattrp->ca_uid = c_info.uid;
+ cnattrp->ca_gid = c_info.gid;
+ cnattrp->ca_mode = c_info.mode;
+ cnattrp->ca_recflags = c_info.recflags;
+ keyp->hfsPlus.parentID = c_info.parentcnid;
+ } else {
+ int lockflags;
+
+ if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
+ throttle_lowpri_io(1);
+
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+ /* lookup this cnid in the catalog */
+ error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
- hfs_systemfile_unlock(hfsmp, lockflags);
+ hfs_systemfile_unlock(hfsmp, lockflags);
- cache->lookups++;
- }
+ cache->lookups++;
+ }
}
return (error);
boolean_t is64bit;
/*
- * NOTE: on entry, the vnode is locked. Incase this vnode
+ * NOTE: on entry, the vnode has an io_ref. In case this vnode
* happens to be in our list of file_ids, we'll note it
* avoid calling hfs_chashget_nowait() on that id as that
* will cause a "locking against myself" panic.
access[i] = 0;
continue;
}
-
+
myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
err_exit_bulk_access:
- //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
-
if (file_ids)
kfree(file_ids, sizeof(int) * num_files);
if (parents)
/* end "bulk-access" support */
-/*
- * Callback for use with freeze ioctl.
- */
-static int
-hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
-{
- vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
-
- return 0;
-}
-
/*
* Control filesystem operating characteristics.
*/
int
hfs_vnop_ioctl( struct vnop_ioctl_args /* {
vnode_t a_vp;
- int a_command;
+ long a_command;
caddr_t a_data;
int a_fflag;
vfs_context_t a_context;
#if CONFIG_PROTECT
{
int error = 0;
- if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+ if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
return error;
}
}
int outlen;
char *bufptr;
int error;
+ int flags = 0;
/* Caller must be owner of file system. */
vfsp = vfs_statfs(HFSTOVFS(hfsmp));
}
bufptr = (char *)ap->a_data;
cnid = strtoul(bufptr, NULL, 10);
+ if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
+ flags |= BUILDPATH_VOLUME_RELATIVE;
+ }
/* We need to call hfs_vfs_vget to leverage the code that will
* fix the origin list for us if needed, as opposed to calling
if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
return (error);
}
- error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
+ error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
vnode_put(file_vp);
return (error);
}
+ case HFS_TRANSFER_DOCUMENT_ID:
+ {
+ struct cnode *cp = NULL;
+ int error;
+ u_int32_t to_fd = *(u_int32_t *)ap->a_data;
+ struct fileproc *to_fp;
+ struct vnode *to_vp;
+ struct cnode *to_cp;
+
+ cp = VTOC(vp);
+
+ if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
+ //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
+ return error;
+ }
+ if ( (error = vnode_getwithref(to_vp)) ) {
+ file_drop(to_fd);
+ return error;
+ }
+
+ if (VTOHFS(to_vp) != hfsmp) {
+ error = EXDEV;
+ goto transfer_cleanup;
+ }
+
+ int need_unlock = 1;
+ to_cp = VTOC(to_vp);
+ error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
+ if (error != 0) {
+ //printf("could not lock the pair of cnodes (error %d)\n", error);
+ goto transfer_cleanup;
+ }
+
+ if (!(cp->c_bsdflags & UF_TRACKED)) {
+ error = EINVAL;
+ } else if (to_cp->c_bsdflags & UF_TRACKED) {
+ //
+ // if the destination is already tracked, return an error
+ // as otherwise it's a silent deletion of the target's
+ // document-id
+ //
+ error = EEXIST;
+ } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+ //
+ // we can use the FndrExtendedFileInfo because the doc-id is the first
+ // thing in both it and the ExtendedDirInfo struct which is fixed in
+ // format and can not change layout
+ //
+ struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
+ struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
+
+ if (f_extinfo->document_id == 0) {
+ uint32_t new_id;
+
+ hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
+
+ if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
+ //
+ // re-lock the pair now that we have the document-id
+ //
+ hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
+ f_extinfo->document_id = new_id;
+ } else {
+ goto transfer_cleanup;
+ }
+ }
+
+ to_extinfo->document_id = f_extinfo->document_id;
+ f_extinfo->document_id = 0;
+ //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
+
+ // make sure the destination is also UF_TRACKED
+ to_cp->c_bsdflags |= UF_TRACKED;
+ cp->c_bsdflags &= ~UF_TRACKED;
+
+ // mark the cnodes dirty
+ cp->c_flag |= C_MODIFIED;
+ to_cp->c_flag |= C_MODIFIED;
+
+ int lockflags;
+ if ((error = hfs_start_transaction(hfsmp)) == 0) {
+
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+ (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
+ (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
+
+ hfs_systemfile_unlock (hfsmp, lockflags);
+ (void) hfs_end_transaction(hfsmp);
+ }
+
+#if CONFIG_FSE
+ add_fsevent(FSE_DOCID_CHANGED, context,
+ FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+ FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
+ FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
+ FSE_ARG_INT32, to_extinfo->document_id,
+ FSE_ARG_DONE);
+
+ hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
+ need_unlock = 0;
+
+ if (need_fsevent(FSE_STAT_CHANGED, vp)) {
+ add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+ }
+ if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
+ add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
+ }
+#else
+ hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
+ need_unlock = 0;
+#endif
+ }
+
+ if (need_unlock) {
+ hfs_unlockpair(cp, to_cp);
+ }
+
+ transfer_cleanup:
+ vnode_put(to_vp);
+ file_drop(to_fd);
+
+ return error;
+ }
+
+
+
case HFS_PREV_LINK:
case HFS_NEXT_LINK:
{
case HFS_RESIZE_VOLUME: {
u_int64_t newsize;
u_int64_t cursize;
+ int ret;
vfsp = vfs_statfs(HFSTOVFS(hfsmp));
if (suser(cred, NULL) &&
}
newsize = *(u_int64_t *)ap->a_data;
cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
-
+
+ if (newsize == cursize) {
+ return (0);
+ }
+ IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize);
if (newsize > cursize) {
- return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
- } else if (newsize < cursize) {
- return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
+ ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
} else {
- return (0);
+ ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
}
+ IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize);
+ return (ret);
}
case HFS_CHANGE_NEXT_ALLOCATION: {
int error = 0; /* Assume success */
if (!vnode_isvroot(vp)) {
return (EINVAL);
}
- HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfs_lock_mount(hfsmp);
location = *(u_int32_t *)ap->a_data;
if ((location >= hfsmp->allocLimit) &&
(location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
}
MarkVCBDirty(hfsmp);
fail_change_next_allocation:
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount(hfsmp);
return (error);
}
vnode_ref(bsfs_rootvp);
vnode_put(bsfs_rootvp);
+ hfs_lock_mount(hfsmp);
hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
-
hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
- /* The free extent cache is managed differently for sparse devices.
- * There is a window between which the volume is mounted and the
- * device is marked as sparse, so the free extent cache for this
- * volume is currently initialized as normal volume (sorted by block
- * count). Reset the cache so that it will be rebuilt again
- * for sparse device (sorted by start block).
- */
- ResetVCBFreeExtCache(hfsmp);
+ hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
+ hfs_unlock_mount(hfsmp);
- hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
- hfsmp->hfs_sparsebandblks *= 4;
-
- vfs_markdependency(hfsmp->hfs_mp);
+ /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
/*
* If the sparse image is on a sparse image file (as opposed to a sparse
}
}
+ /* The free extent cache is managed differently for sparse devices.
+ * There is a window between which the volume is mounted and the
+ * device is marked as sparse, so the free extent cache for this
+ * volume is currently initialized as normal volume (sorted by block
+ * count). Reset the cache so that it will be rebuilt again
+ * for sparse device (sorted by start block).
+ */
+ ResetVCBFreeExtCache(hfsmp);
+
(void)vnode_put(di_vp);
file_drop(bsdata->backingfd);
return (0);
if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
hfsmp->hfs_backingfs_rootvp) {
+ hfs_lock_mount(hfsmp);
hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
tmpvp = hfsmp->hfs_backingfs_rootvp;
hfsmp->hfs_backingfs_rootvp = NULLVP;
hfsmp->hfs_sparsebandblks = 0;
+ hfs_unlock_mount(hfsmp);
+
vnode_rele(tmpvp);
}
return (0);
}
#endif /* HFS_SPARSE_DEV */
- case F_FREEZE_FS: {
- struct mount *mp;
-
- mp = vnode_mount(vp);
- hfsmp = VFSTOHFS(mp);
+ /* Change the next CNID stored in the VH */
+ case HFS_CHANGE_NEXTCNID: {
+ int error = 0; /* Assume success */
+ u_int32_t fileid;
+ int wraparound = 0;
+ int lockflags = 0;
- if (!(hfsmp->jnl))
- return (ENOTSUP);
+ if (vnode_vfsisrdonly(vp)) {
+ return (EROFS);
+ }
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES); /* must be owner of file system */
+ }
+
+ fileid = *(u_int32_t *)ap->a_data;
- vfsp = vfs_statfs(mp);
-
- if (kauth_cred_getuid(cred) != vfsp->f_owner &&
- !kauth_cred_issuser(cred))
- return (EACCES);
+ /* Must have catalog lock excl. to advance the CNID pointer */
+ lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
- lck_rw_lock_exclusive(&hfsmp->hfs_insync);
-
- // flush things before we get started to try and prevent
- // dirty data from being paged out while we're frozen.
- // note: can't do this after taking the lock as it will
- // deadlock against ourselves.
- vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
- hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
-
- // DO NOT call hfs_journal_flush() because that takes a
- // shared lock on the global exclusive lock!
- journal_flush(hfsmp->jnl, TRUE);
-
- // don't need to iterate on all vnodes, we just need to
- // wait for writes to the system files and the device vnode
- //
- // Now that journal flush waits for all metadata blocks to
- // be written out, waiting for btree writes is probably no
- // longer required.
- if (HFSTOVCB(hfsmp)->extentsRefNum)
- vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
- if (HFSTOVCB(hfsmp)->catalogRefNum)
- vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
- if (HFSTOVCB(hfsmp)->allocationsRefNum)
- vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
- if (hfsmp->hfs_attribute_vp)
- vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
- vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
-
- hfsmp->hfs_freezing_proc = current_proc();
+ hfs_lock_mount(hfsmp);
- return (0);
+ /* If it is less than the current next CNID, force the wraparound bit to be set */
+ if (fileid < hfsmp->vcbNxtCNID) {
+ wraparound=1;
+ }
+
+ /* Return previous value. */
+ *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
+
+ hfsmp->vcbNxtCNID = fileid;
+
+ if (wraparound) {
+ hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
+ }
+
+ MarkVCBDirty(hfsmp);
+ hfs_unlock_mount(hfsmp);
+ hfs_systemfile_unlock (hfsmp, lockflags);
+
+ return (error);
}
+
+ case F_FREEZE_FS: {
+ struct mount *mp;
+
+ mp = vnode_mount(vp);
+ hfsmp = VFSTOHFS(mp);
- case F_THAW_FS: {
- vfsp = vfs_statfs(vnode_mount(vp));
+ if (!(hfsmp->jnl))
+ return (ENOTSUP);
+
+ vfsp = vfs_statfs(mp);
+
if (kauth_cred_getuid(cred) != vfsp->f_owner &&
!kauth_cred_issuser(cred))
return (EACCES);
- // if we're not the one who froze the fs then we
- // can't thaw it.
- if (hfsmp->hfs_freezing_proc != current_proc()) {
- return EPERM;
- }
-
- // NOTE: if you add code here, also go check the
- // code that "thaws" the fs in hfs_vnop_close()
- //
- hfsmp->hfs_freezing_proc = NULL;
- hfs_unlock_global (hfsmp);
- lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
-
- return (0);
+ return hfs_freeze(hfsmp);
}
- case HFS_BULKACCESS_FSCTL: {
- int size;
-
- if (hfsmp->hfs_flags & HFS_STANDARD) {
- return EINVAL;
- }
+ case F_THAW_FS: {
+ vfsp = vfs_statfs(vnode_mount(vp));
+ if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+ !kauth_cred_issuser(cred))
+ return (EACCES);
- if (is64bit) {
- size = sizeof(struct user64_access_t);
- } else {
- size = sizeof(struct user32_access_t);
- }
-
- return do_bulk_access_check(hfsmp, vp, ap, size, context);
- }
+ return hfs_thaw(hfsmp, current_proc());
+ }
case HFS_EXT_BULKACCESS_FSCTL: {
int size;
* are enabled by default, so any change will be transient only
* till the volume is remounted.
*/
- if (!is_suser()) {
+ if (!kauth_cred_issuser(kauth_cred_get())) {
return (EPERM);
}
if (state == 0 || state == 1)
return (EINVAL);
}
+ case F_SETSTATICCONTENT: {
+ int error;
+ int enable_static = 0;
+ struct cnode *cp = NULL;
+ /*
+ * lock the cnode, decorate the cnode flag, and bail out.
+ * VFS should have already authenticated the caller for us.
+ */
+
+ if (ap->a_data) {
+ /*
+ * Note that even though ap->a_data is of type caddr_t,
+ * the fcntl layer at the syscall handler will pass in NULL
+ * or 1 depending on what the argument supplied to the fcntl
+ * was. So it is in fact correct to check the ap->a_data
+ * argument for zero or non-zero value when deciding whether or not
+ * to enable the static bit in the cnode.
+ */
+ enable_static = 1;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
+ }
+ cp = VTOC(vp);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ if (enable_static) {
+ cp->c_flag |= C_SSD_STATIC;
+ }
+ else {
+ cp->c_flag &= ~C_SSD_STATIC;
+ }
+ hfs_unlock (cp);
+ }
+ return error;
+ }
+
+ case F_SET_GREEDY_MODE: {
+ int error;
+ int enable_greedy_mode = 0;
+ struct cnode *cp = NULL;
+ /*
+ * lock the cnode, decorate the cnode flag, and bail out.
+ * VFS should have already authenticated the caller for us.
+ */
+
+ if (ap->a_data) {
+ /*
+ * Note that even though ap->a_data is of type caddr_t,
+ * the fcntl layer at the syscall handler will pass in NULL
+ * or 1 depending on what the argument supplied to the fcntl
+ * was. So it is in fact correct to check the ap->a_data
+ * argument for zero or non-zero value when deciding whether or not
+ * to enable the greedy mode bit in the cnode.
+ */
+ enable_greedy_mode = 1;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
+ }
+ cp = VTOC(vp);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ if (enable_greedy_mode) {
+ cp->c_flag |= C_SSD_GREEDY_MODE;
+ }
+ else {
+ cp->c_flag &= ~C_SSD_GREEDY_MODE;
+ }
+ hfs_unlock (cp);
+ }
+ return error;
+ }
+
+ case F_SETIOTYPE: {
+ int error;
+ uint32_t iotypeflag = 0;
+
+ struct cnode *cp = NULL;
+ /*
+ * lock the cnode, decorate the cnode flag, and bail out.
+ * VFS should have already authenticated the caller for us.
+ */
+
+ if (ap->a_data == NULL) {
+ return EINVAL;
+ }
+
+ /*
+ * Note that even though ap->a_data is of type caddr_t, we
+ * can only use 32 bits of flag values.
+ */
+ iotypeflag = (uint32_t) ap->a_data;
+ switch (iotypeflag) {
+ case F_IOTYPE_ISOCHRONOUS:
+ break;
+ default:
+ return EINVAL;
+ }
+
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
+ }
+ cp = VTOC(vp);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ switch (iotypeflag) {
+ case F_IOTYPE_ISOCHRONOUS:
+ cp->c_flag |= C_IO_ISOCHRONOUS;
+ break;
+ default:
+ break;
+ }
+ hfs_unlock (cp);
+ }
+ return error;
+ }
+
+ case F_MAKECOMPRESSED: {
+ int error = 0;
+ uint32_t gen_counter;
+ struct cnode *cp = NULL;
+ int reset_decmp = 0;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
+ }
+
+ /*
+ * acquire & lock the cnode.
+ * VFS should have already authenticated the caller for us.
+ */
+
+ if (ap->a_data) {
+ /*
+ * Cast the pointer into a uint32_t so we can extract the
+ * supplied generation counter.
+ */
+ gen_counter = *((uint32_t*)ap->a_data);
+ }
+ else {
+ return EINVAL;
+ }
+
+#if HFS_COMPRESSION
+ cp = VTOC(vp);
+ /* Grab truncate lock first; we may truncate the file */
+ hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error) {
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ return error;
+ }
+
+ /* Are there any other usecounts/FDs? */
+ if (vnode_isinuse(vp, 1)) {
+ hfs_unlock(cp);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ return EBUSY;
+ }
+
+ /* now we have the cnode locked down; Validate arguments */
+ if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
+ /* EINVAL if you are trying to manipulate an IMMUTABLE file */
+ hfs_unlock(cp);
+ hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+ return EINVAL;
+ }
+
+ if ((hfs_get_gencount (cp)) == gen_counter) {
+ /*
+ * OK, the gen_counter matched. Go for it:
+ * Toggle state bits, truncate file, and suppress mtime update
+ */
+ reset_decmp = 1;
+ cp->c_bsdflags |= UF_COMPRESSED;
+
+ error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
+ ap->a_context);
+ }
+ else {
+ error = ESTALE;
+ }
+
+ /* Unlock cnode before executing decmpfs ; they may need to get an EA */
+ hfs_unlock(cp);
+
+ /*
+ * Reset the decmp state while still holding the truncate lock. We need to
+ * serialize here against a listxattr on this node which may occur at any
+ * time.
+ *
+ * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
+ * that will still potentially require getting the com.apple.decmpfs EA. If the
+ * EA is required, then we can't hold the cnode lock, because the getxattr call is
+ * generic(through VFS), and can't pass along any info telling it that we're already
+ * holding it (the lock). If we don't serialize, then we risk listxattr stopping
+ * and trying to fill in the hfs_file_is_compressed info during the callback
+ * operation, which will result in deadlock against the b-tree node.
+ *
+ * So, to serialize against listxattr (which will grab buf_t meta references on
+ * the b-tree blocks), we hold the truncate lock as we're manipulating the
+ * decmpfs payload.
+ */
+ if ((reset_decmp) && (error == 0)) {
+ decmpfs_cnode *dp = VTOCMP (vp);
+ if (dp != NULL) {
+ decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
+ }
+
+ /* Initialize the decmpfs node as needed */
+ (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
+ }
+
+ hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+
+#endif
+ return error;
+ }
+
+ case F_SETBACKINGSTORE: {
+
+ int error = 0;
+
+ /*
+ * See comment in F_SETSTATICCONTENT re: using
+ * a null check for a_data
+ */
+ if (ap->a_data) {
+ error = hfs_set_backingstore (vp, 1);
+ }
+ else {
+ error = hfs_set_backingstore (vp, 0);
+ }
+
+ return error;
+ }
+
+ case F_GETPATH_MTMINFO: {
+ int error = 0;
+
+ int *data = (int*) ap->a_data;
+
+ /* Ask if this is a backingstore vnode */
+ error = hfs_is_backingstore (vp, data);
+
+ return error;
+ }
+
case F_FULLFSYNC: {
int error;
if (hfsmp->hfs_flags & HFS_READ_ONLY) {
return (EROFS);
}
- error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
if (error == 0) {
- error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
+ error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p);
+ hfs_unlock(VTOC(vp));
+ }
+
+ return error;
+ }
+
+ case F_BARRIERFSYNC: {
+ int error;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p);
hfs_unlock(VTOC(vp));
}
if (!vnode_isreg(vp))
return EINVAL;
- error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
if (error == 0) {
cp = VTOC(vp);
/*
fp = VTOF(vp);
/* Protect against a size change. */
- hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+ hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
#if HFS_COMPRESSION
if (compressed && (uncompressed_size == -1)) {
error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
}
- hfs_unlock_truncate(VTOC(vp), 0);
+ hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
return (error);
}
- case F_READBOOTSTRAP:
- case F_WRITEBOOTSTRAP:
- {
- struct vnode *devvp = NULL;
- user_fbootstraptransfer_t *user_bootstrapp;
- int devBlockSize;
- int error;
- uio_t auio;
- daddr64_t blockNumber;
- u_int32_t blockOffset;
- u_int32_t xfersize;
- struct buf *bp;
- user_fbootstraptransfer_t user_bootstrap;
-
- if (!vnode_isvroot(vp))
- return (EINVAL);
- /* LP64 - when caller is a 64 bit process then we are passed a pointer
- * to a user_fbootstraptransfer_t else we get a pointer to a
- * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
- */
- if ((hfsmp->hfs_flags & HFS_READ_ONLY)
- && (ap->a_command == F_WRITEBOOTSTRAP)) {
- return (EROFS);
- }
- if (is64bit) {
- user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
- }
- else {
- user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
- user_bootstrapp = &user_bootstrap;
- user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
- user_bootstrap.fbt_length = bootstrapp->fbt_length;
- user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
- }
-
- if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) ||
- (user_bootstrapp->fbt_length > 1024)) {
- return EINVAL;
- }
-
- if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
- return EINVAL;
-
- devvp = VTOHFS(vp)->hfs_devvp;
- auio = uio_create(1, user_bootstrapp->fbt_offset,
- is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
- (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
- uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
-
- devBlockSize = vfs_devblocksize(vnode_mount(vp));
-
- while (uio_resid(auio) > 0) {
- blockNumber = uio_offset(auio) / devBlockSize;
- error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
- if (error) {
- if (bp) buf_brelse(bp);
- uio_free(auio);
- return error;
- };
-
- blockOffset = uio_offset(auio) % devBlockSize;
- xfersize = devBlockSize - blockOffset;
- error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
- if (error) {
- buf_brelse(bp);
- uio_free(auio);
- return error;
- };
- if (uio_rw(auio) == UIO_WRITE) {
- error = VNOP_BWRITE(bp);
- if (error) {
- uio_free(auio);
- return error;
- }
- } else {
- buf_brelse(bp);
- };
- };
- uio_free(auio);
- };
- return 0;
-
case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
{
if (is64bit) {
*(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
break;
+ case HFS_FSCTL_GET_VERY_LOW_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
+ break;
+
case HFS_FSCTL_SET_VERY_LOW_DISK:
if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
return EINVAL;
hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
break;
+ case HFS_FSCTL_GET_LOW_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
+ break;
+
case HFS_FSCTL_SET_LOW_DISK:
if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
|| *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
break;
+ case HFS_FSCTL_GET_DESIRED_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
+ break;
+
case HFS_FSCTL_SET_DESIRED_DISK:
if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
return EINVAL;
if (hfsmp->hfs_flags & HFS_READ_ONLY) {
return (EROFS);
}
- HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfs_lock_mount (hfsmp);
bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
- (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+ hfs_unlock_mount (hfsmp);
+ (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
break;
case HFS_GET_BOOT_INFO:
if (!vnode_isvroot(vp))
return(EINVAL);
- HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfs_lock_mount (hfsmp);
bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount(hfsmp);
break;
case HFS_MARK_BOOT_CORRUPT:
* kHFSVolumeInconsistentBit in the volume header. This will
* force fsck_hfs on next mount.
*/
- if (!is_suser()) {
+ if (!kauth_cred_issuser(kauth_cred_get())) {
return EACCES;
}
if (hfsmp->hfs_flags & HFS_READ_ONLY) {
return (EROFS);
}
- printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
- hfs_mark_volume_inconsistent(hfsmp);
+ printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
+ hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
+ break;
+
+ case HFS_FSCTL_GET_JOURNAL_INFO:
+ jip = (struct hfs_journal_info*)ap->a_data;
+
+ if (vp == NULLVP)
+ return EINVAL;
+
+ if (hfsmp->jnl == NULL) {
+ jnl_start = 0;
+ jnl_size = 0;
+ } else {
+ jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset;
+ jnl_size = hfsmp->jnl_size;
+ }
+
+ jip->jstart = jnl_start;
+ jip->jsize = jnl_size;
+ break;
+
+ case HFS_SET_ALWAYS_ZEROFILL: {
+ struct cnode *cp = VTOC(vp);
+
+ if (*(int *)ap->a_data) {
+ cp->c_flag |= C_ALWAYS_ZEROFILL;
+ } else {
+ cp->c_flag &= ~C_ALWAYS_ZEROFILL;
+ }
+ break;
+ }
+
+ case HFS_DISABLE_METAZONE: {
+ /* Only root can disable metadata zone */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+
+ /* Disable metadata zone now */
+ (void) hfs_metadatazone_init(hfsmp, true);
+ printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
+ break;
+ }
+
+
+ case HFS_FSINFO_METADATA_BLOCKS: {
+ int error;
+ struct hfsinfo_metadata *hinfo;
+
+ hinfo = (struct hfsinfo_metadata *)ap->a_data;
+
+ /* Get information about number of metadata blocks */
+ error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
+ if (error) {
+ return error;
+ }
+
+ break;
+ }
+
+ case HFS_GET_FSINFO: {
+ hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
+
+ /* Only root is allowed to get fsinfo */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+
+ /*
+ * Make sure that the caller's version number matches with
+ * the kernel's version number. This will make sure that
+ * if the structures being read/written into are changed
+ * by the kernel, the caller will not read incorrect data.
+ *
+ * The first three fields --- request_type, version and
+ * flags are same for all the hfs_fsinfo structures, so
+ * we can access the version number by assuming any
+ * structure for now.
+ */
+ if (fsinfo->header.version != HFS_FSINFO_VERSION) {
+ return ENOTSUP;
+ }
+
+ /* Make sure that the current file system is not marked inconsistent */
+ if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
+ return EIO;
+ }
+
+ return hfs_get_fsinfo(hfsmp, ap->a_data);
+ }
+
+ case HFS_CS_FREESPACE_TRIM: {
+ int error = 0;
+ int lockflags = 0;
+
+ /* Only root allowed */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+
+ /*
+ * This core functionality is similar to hfs_scan_blocks().
+ * The main difference is that hfs_scan_blocks() is called
+ * as part of mount where we are assured that the journal is
+ * empty to start with. This fcntl() can be called on a
+ * mounted volume, therefore it has to flush the content of
+ * the journal as well as ensure the state of summary table.
+ *
+ * This fcntl scans over the entire allocation bitmap,
+ * creates list of all the free blocks, and issues TRIM
+ * down to the underlying device. This can take long time
+ * as it can generate up to 512MB of read I/O.
+ */
+
+ if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
+ error = hfs_init_summary(hfsmp);
+ if (error) {
+ printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
+ return error;
+ }
+ }
+
+ /*
+ * The journal maintains list of recently deallocated blocks to
+ * issue DKIOCUNMAPs when the corresponding journal transaction is
+ * flushed to the disk. To avoid any race conditions, we only
+ * want one active trim list and only one thread issuing DKIOCUNMAPs.
+ * Therefore we make sure that the journal trim list is sync'ed,
+ * empty, and not modifiable for the duration of our scan.
+ *
+ * Take the journal lock before flushing the journal to the disk.
+ * We will keep on holding the journal lock till we don't get the
+ * bitmap lock to make sure that no new journal transactions can
+ * start. This will make sure that the journal trim list is not
+ * modified after the journal flush and before getting bitmap lock.
+ * We can release the journal lock after we acquire the bitmap
+ * lock as it will prevent any further block deallocations.
+ */
+ hfs_journal_lock(hfsmp);
+
+ /* Flush the journal and wait for all I/Os to finish up */
+ error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+ if (error) {
+ hfs_journal_unlock(hfsmp);
+ return error;
+ }
+
+ /* Take bitmap lock to ensure it is not being modified */
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+ /* Release the journal lock */
+ hfs_journal_unlock(hfsmp);
+
+ /*
+ * ScanUnmapBlocks reads the bitmap in large block size
+ * (up to 1MB) unlike the runtime which reads the bitmap
+ * in the 4K block size. This can cause buf_t collisions
+ * and potential data corruption. To avoid this, we
+ * invalidate all the existing buffers associated with
+ * the bitmap vnode before scanning it.
+ *
+ * Note: ScanUnmapBlock() cleans up all the buffers
+ * after itself, so there won't be any large buffers left
+ * for us to clean up after it returns.
+ */
+ error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+ if (error) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ return error;
+ }
+
+ /* Traverse bitmap and issue DKIOCUNMAPs */
+ error = ScanUnmapBlocks(hfsmp);
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ if (error) {
+ return error;
+ }
+
break;
+ }
- case HFS_FSCTL_GET_JOURNAL_INFO:
- jip = (struct hfs_journal_info*)ap->a_data;
+ case HFS_SET_HOTFILE_STATE: {
+ int error;
+ struct cnode *cp = VTOC(vp);
+ uint32_t hf_state = *((uint32_t*)ap->a_data);
+ uint32_t num_unpinned = 0;
+
+ error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error) {
+ return error;
+ }
- if (vp == NULLVP)
- return EINVAL;
+ // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
+ if (hf_state == HFS_MARK_FASTDEVCANDIDATE) {
+ vnode_setfastdevicecandidate(vp);
- if (hfsmp->jnl == NULL) {
- jnl_start = 0;
- jnl_size = 0;
- } else {
- jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
- jnl_size = (off_t)hfsmp->jnl_size;
- }
+ cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
+ cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask;
+ cp->c_flag |= C_MODIFIED;
+ } else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
+ vnode_clearfastdevicecandidate(vp);
+ hfs_removehotfile(vp);
- jip->jstart = jnl_start;
- jip->jsize = jnl_size;
- break;
+ if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) {
+ hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned, ap->a_context);
+ }
+
+ if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
+ cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
+ }
+ cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask);
+ cp->c_flag |= C_MODIFIED;
- case HFS_SET_ALWAYS_ZEROFILL: {
- struct cnode *cp = VTOC(vp);
+ } else {
+ error = EINVAL;
+ }
- if (*(int *)ap->a_data) {
- cp->c_flag |= C_ALWAYS_ZEROFILL;
- } else {
- cp->c_flag &= ~C_ALWAYS_ZEROFILL;
- }
- break;
- }
+ if (num_unpinned != 0) {
+ lck_mtx_lock(&hfsmp->hfc_mutex);
+ hfsmp->hfs_hotfile_freeblks += num_unpinned;
+ lck_mtx_unlock(&hfsmp->hfc_mutex);
+ }
- case HFS_DISABLE_METAZONE: {
- /* Only root can disable metadata zone */
- if (!is_suser()) {
+ hfs_unlock(cp);
+ return error;
+ break;
+ }
+
+ case HFS_REPIN_HOTFILE_STATE: {
+ int error=0;
+ uint32_t repin_what = *((uint32_t*)ap->a_data);
+
+ /* Only root allowed */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
return EACCES;
}
- if (hfsmp->hfs_flags & HFS_READ_ONLY) {
- return (EROFS);
+
+ if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) {
+ // this system is neither regular Fusion or Cooperative Fusion
+ // so this fsctl makes no sense.
+ return EINVAL;
}
- /* Disable metadata zone now */
- (void) hfs_metadatazone_init(hfsmp, true);
- printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
+ //
+ // After a converting a CoreStorage volume to be encrypted, the
+ // extents could have moved around underneath us. This call
+ // allows corestoraged to re-pin everything that should be
+ // pinned (it would happen on the next reboot too but that could
+ // be a long time away).
+ //
+ if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) {
+ hfs_pin_fs_metadata(hfsmp);
+ }
+ if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+ hfs_repin_hotfiles(hfsmp);
+ }
+ if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) {
+ //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
+ //XXX Do we care? They have a more transient/dynamic nature/lifetime.
+ }
+
+ return error;
break;
- }
-
+ }
+
+
default:
return (ENOTTY);
}
* searched for mapping.
*
* System file cnodes are expected to be locked (shared or exclusive).
+ *
+ * -- INVALID RANGES --
+ *
+ * Invalid ranges are used to keep track of where we have extended a
+ * file, but have not yet written that data to disk. In the past we
+ * would clear up the invalid ranges as we wrote to those areas, but
+ * before data was actually flushed to disk. The problem with that
+ * approach is that the data can be left in the cache and is therefore
+ * still not valid on disk. So now we clear up the ranges here, when
+ * the flags field has VNODE_WRITE set, indicating a write is about to
+ * occur. This isn't ideal (ideally we want to clear them up when
+ * know the data has been successfully written), but it's the best we
+ * can do.
+ *
+ * For reads, we use the invalid ranges here in block map to indicate
+ * to the caller that the data should be zeroed (a_bpn == -1). We
+ * have to be careful about what ranges we return to the cluster code.
+ * Currently the cluster code can only handle non-rounded values for
+ * the EOF; it cannot handle funny sized ranges in the middle of the
+ * file (the main problem is that it sends down odd sized I/Os to the
+ * disk). Our code currently works because whilst the very first
+ * offset and the last offset in the invalid ranges are not aligned,
+ * gaps in the invalid ranges between the first and last, have to be
+ * aligned (because we always write page sized blocks). For example,
+ * consider this arrangement:
+ *
+ * +-------------+-----+-------+------+
+ * | |XXXXX| |XXXXXX|
+ * +-------------+-----+-------+------+
+ * a b c d
+ *
+ * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
+ * are not necessarily aligned, b and c *must* be.
+ *
+ * Zero-filling occurs in a number of ways:
+ *
+ * 1. When a read occurs and we return with a_bpn == -1.
+ *
+ * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
+ * which will cause us to iterate over the ranges bringing in
+ * pages that are not present in the cache and zeroing them. Any
+ * pages that are already in the cache are left untouched. Note
+ * that hfs_fsync does not always flush invalid ranges.
+ *
+ * 3. When we extend a file we zero out from the old EOF to the end
+ * of the page. It would be nice if we didn't have to do this if
+ * the page wasn't present (and could defer it), but because of
+ * the problem described above, we have to.
+ *
+ * The invalid ranges are also used to restrict the size that we write
+ * out on disk: see hfs_prepare_fork_for_update.
+ *
+ * Note that invalid ranges are ignored when neither the VNODE_READ or
+ * the VNODE_WRITE flag is specified. This is useful for the
+ * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
+ * just want to know whether blocks are physically allocated or not.
*/
int
hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
struct cnode *cp;
struct filefork *fp;
struct hfsmount *hfsmp;
- size_t bytesContAvail = 0;
+ size_t bytesContAvail = ap->a_size;
int retval = E_NONE;
int syslocks = 0;
int lockflags = 0;
if (ap->a_bpn == NULL)
return (0);
+ hfsmp = VTOHFS(vp);
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
- if (VTOC(vp)->c_lockowner != current_thread()) {
- hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ if (cp->c_lockowner != current_thread()) {
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
tooklock = 1;
}
+
+ // For reads, check the invalid ranges
+ if (ISSET(ap->a_flags, VNODE_READ)) {
+ if (ap->a_foffset >= fp->ff_size) {
+ retval = ERANGE;
+ goto exit;
+ }
+
+ overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+ ap->a_foffset + (off_t)bytesContAvail - 1,
+ &invalid_range);
+ switch(overlaptype) {
+ case RL_MATCHINGOVERLAP:
+ case RL_OVERLAPCONTAINSRANGE:
+ case RL_OVERLAPSTARTSBEFORE:
+ /* There's no valid block for this byte offset */
+ *ap->a_bpn = (daddr64_t)-1;
+ /* There's no point limiting the amount to be returned
+ * if the invalid range that was hit extends all the way
+ * to the EOF (i.e. there's no valid bytes between the
+ * end of this range and the file's EOF):
+ */
+ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+ bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+ }
+
+ retval = 0;
+ goto exit;
+
+ case RL_OVERLAPISCONTAINED:
+ case RL_OVERLAPENDSAFTER:
+ /* The range of interest hits an invalid block before the end: */
+ if (invalid_range->rl_start == ap->a_foffset) {
+ /* There's actually no valid information to be had starting here: */
+ *ap->a_bpn = (daddr64_t)-1;
+ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+ bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+ }
+
+ retval = 0;
+ goto exit;
+ } else {
+ /*
+ * Sadly, the lower layers don't like us to
+ * return unaligned ranges, so we skip over
+ * any invalid ranges here that are less than
+ * a page: zeroing of those bits is not our
+ * responsibility (it's dealt with elsewhere).
+ */
+ do {
+ off_t rounded_start = round_page_64(invalid_range->rl_start);
+ if ((off_t)bytesContAvail < rounded_start - ap->a_foffset)
+ break;
+ if (rounded_start < invalid_range->rl_end + 1) {
+ bytesContAvail = rounded_start - ap->a_foffset;
+ break;
+ }
+ } while ((invalid_range = TAILQ_NEXT(invalid_range,
+ rl_link)));
+ }
+ break;
+
+ case RL_NOOVERLAP:
+ break;
+ } // switch
+ }
}
- hfsmp = VTOHFS(vp);
- cp = VTOC(vp);
- fp = VTOF(vp);
+
+#if CONFIG_PROTECT
+ if (cp->c_cpentry) {
+ const int direction = (ISSET(ap->a_flags, VNODE_WRITE)
+ ? VNODE_WRITE : VNODE_READ);
+
+ cp_io_params_t io_params;
+ cp_io_params(hfsmp, cp->c_cpentry,
+ off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)),
+ direction, &io_params);
+
+ if (io_params.max_len < (off_t)bytesContAvail)
+ bytesContAvail = io_params.max_len;
+
+ if (io_params.phys_offset != -1) {
+ *ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset)
+ / hfsmp->hfs_logical_block_size);
+
+ retval = 0;
+ goto exit;
+ }
+ }
+#endif
retry:
+
/* Check virtual blocks only when performing write operation */
if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
if (hfs_start_transaction(hfsmp) != 0) {
cp->c_blocks += loanedBlocks;
fp->ff_blocks += loanedBlocks;
- HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfs_lock_mount (hfsmp);
hfsmp->loanedBlocks += loanedBlocks;
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount (hfsmp);
hfs_systemfile_unlock(hfsmp, lockflags);
cp->c_flag |= C_MODIFIED;
if (started_tr) {
- (void) hfs_update(vp, TRUE);
+ (void) hfs_update(vp, 0);
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
hfs_end_transaction(hfsmp);
}
}
- retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
+ retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
ap->a_bpn, &bytesContAvail);
if (syslocks) {
hfs_systemfile_unlock(hfsmp, lockflags);
syslocks = 0;
}
- if (started_tr) {
- (void) hfs_update(vp, TRUE);
- (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
- hfs_end_transaction(hfsmp);
- started_tr = 0;
- }
if (retval) {
/* On write, always return error because virtual blocks, if any,
* should have been allocated in ExtendFileC(). We do not
(ap->a_flags & VNODE_WRITE) ||
((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
goto exit;
- }
-
+ }
+
/* Validate if the start offset is within logical file size */
- if (ap->a_foffset > fp->ff_size) {
- goto exit;
+ if (ap->a_foffset >= fp->ff_size) {
+ goto exit;
}
- /* Searching file extents has failed for read operation, therefore
- * search rangelist for any uncommitted holes in the file.
+ /*
+ * At this point, we have encountered a failure during
+ * MapFileBlockC that resulted in ERANGE, and we are not
+ * servicing a write, and there are borrowed blocks.
+ *
+ * However, the cluster layer will not call blockmap for
+ * blocks that are borrowed and in-cache. We have to assume
+ * that because we observed ERANGE being emitted from
+ * MapFileBlockC, this extent range is not valid on-disk. So
+ * we treat this as a mapping that needs to be zero-filled
+ * prior to reading.
*/
- overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
- ap->a_foffset + (off_t)(ap->a_size - 1),
- &invalid_range);
- switch(overlaptype) {
- case RL_OVERLAPISCONTAINED:
- /* start_offset <= rl_start, end_offset >= rl_end */
- if (ap->a_foffset != invalid_range->rl_start) {
- break;
- }
- case RL_MATCHINGOVERLAP:
- /* start_offset = rl_start, end_offset = rl_end */
- case RL_OVERLAPCONTAINSRANGE:
- /* start_offset >= rl_start, end_offset <= rl_end */
- case RL_OVERLAPSTARTSBEFORE:
- /* start_offset > rl_start, end_offset >= rl_start */
- if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
- bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
- } else {
- bytesContAvail = fp->ff_size - ap->a_foffset;
- }
- if (bytesContAvail > ap->a_size) {
- bytesContAvail = ap->a_size;
- }
- *ap->a_bpn = (daddr64_t)-1;
- retval = 0;
- break;
- case RL_OVERLAPENDSAFTER:
- /* start_offset < rl_start, end_offset < rl_end */
- case RL_NOOVERLAP:
- break;
- }
+
+ if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail)
+ bytesContAvail = fp->ff_size - ap->a_foffset;
+
+ *ap->a_bpn = (daddr64_t) -1;
+ retval = 0;
+
goto exit;
}
- /* MapFileC() found a valid extent in the filefork. Search the
- * mapping information further for invalid file ranges
- */
- overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
- ap->a_foffset + (off_t)bytesContAvail - 1,
- &invalid_range);
- if (overlaptype != RL_NOOVERLAP) {
- switch(overlaptype) {
- case RL_MATCHINGOVERLAP:
- case RL_OVERLAPCONTAINSRANGE:
- case RL_OVERLAPSTARTSBEFORE:
- /* There's no valid block for this byte offset */
- *ap->a_bpn = (daddr64_t)-1;
- /* There's no point limiting the amount to be returned
- * if the invalid range that was hit extends all the way
- * to the EOF (i.e. there's no valid bytes between the
- * end of this range and the file's EOF):
- */
- if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
- ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
- bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
- }
- break;
-
- case RL_OVERLAPISCONTAINED:
- case RL_OVERLAPENDSAFTER:
- /* The range of interest hits an invalid block before the end: */
- if (invalid_range->rl_start == ap->a_foffset) {
- /* There's actually no valid information to be had starting here: */
- *ap->a_bpn = (daddr64_t)-1;
- if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
- ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
- bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+exit:
+ if (retval == 0) {
+ if (ISSET(ap->a_flags, VNODE_WRITE)) {
+ struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
+
+ // See if we might be overlapping invalid ranges...
+ if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) {
+ /*
+ * Mark the file as needing an update if we think the
+ * on-disk EOF has changed.
+ */
+ if (ap->a_foffset <= r->rl_start)
+ SET(cp->c_flag, C_MODIFIED);
+
+ /*
+ * This isn't the ideal place to put this. Ideally, we
+ * should do something *after* we have successfully
+ * written to the range, but that's difficult to do
+ * because we cannot take locks in the callback. At
+ * present, the cluster code will call us with VNODE_WRITE
+ * set just before it's about to write the data so we know
+ * that data is about to be written. If we get an I/O
+ * error at this point then chances are the metadata
+ * update to follow will also have an I/O error so the
+ * risk here is small.
+ */
+ rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1,
+ &fp->ff_invalidranges);
+
+ if (!TAILQ_FIRST(&fp->ff_invalidranges)) {
+ cp->c_flag &= ~C_ZFWANTSYNC;
+ cp->c_zftimeout = 0;
}
- } else {
- bytesContAvail = invalid_range->rl_start - ap->a_foffset;
}
- break;
+ }
- case RL_NOOVERLAP:
- break;
- } /* end switch */
- if (bytesContAvail > ap->a_size)
- bytesContAvail = ap->a_size;
- }
-
-exit:
- if (retval == 0) {
if (ap->a_run)
*ap->a_run = bytesContAvail;
*(int *)ap->a_poff = 0;
}
+ if (started_tr) {
+ hfs_update(vp, TRUE);
+ hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ }
+
if (tooklock)
hfs_unlock(cp);
return (MacToVFSError(retval));
}
-
/*
* prepare and issue the I/O
* buf_strategy knows how to deal
vnode_t vp = buf_vnode(bp);
int error = 0;
-#if CONFIG_PROTECT
- cnode_t *cp = NULL;
-
- if ((cp = cp_get_protected_cnode(vp)) != NULL) {
- /*
- * Some paths to hfs_vnop_strategy will take the cnode lock,
- * and some won't. But since content protection is only enabled
- * for files that (a) aren't system files and (b) are regular
- * files, any valid cnode here will be unlocked.
- */
- hfs_lock(cp, HFS_SHARED_LOCK);
- buf_setcpaddr(bp, cp->c_cpentry);
+ /* Mark buffer as containing static data if cnode flag set */
+ if (VTOC(vp)->c_flag & C_SSD_STATIC) {
+ buf_markstatic(bp);
}
-#endif /* CONFIG_PROTECT */
- error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
+ /* Mark buffer as containing static data if cnode flag set */
+ if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
+ bufattr_markgreedymode(&bp->b_attr);
+ }
-#if CONFIG_PROTECT
- if (cp) {
- hfs_unlock(cp);
+ /* mark buffer as containing burst mode data if cnode flag set */
+ if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
+ bufattr_markisochronous(&bp->b_attr);
}
-#endif
- return error;
-}
+#if CONFIG_PROTECT
+ error = cp_handle_strategy(bp);
-static int
-hfs_minorupdate(struct vnode *vp) {
- struct cnode *cp = VTOC(vp);
- cp->c_flag &= ~C_MODIFIED;
- cp->c_touch_acctime = 0;
- cp->c_touch_chgtime = 0;
- cp->c_touch_modtime = 0;
+ if (error)
+ return error;
+#endif
- return 0;
+ error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
+
+ return error;
}
int
-do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
+do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
{
register struct cnode *cp = VTOC(vp);
struct filefork *fp = VTOF(vp);
- struct proc *p = vfs_context_proc(context);;
kauth_cred_t cred = vfs_context_ucred(context);
int retval;
off_t bytesToAdd;
int blksize;
struct hfsmount *hfsmp;
int lockflags;
+ int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
blksize = VTOVCB(vp)->blockSize;
fileblocks = fp->ff_blocks;
filebytes = (off_t)fileblocks * (off_t)blksize;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+ KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
if (length < 0)
/* All or nothing and don't round up to clumpsize. */
eflags = kEFAllMask | kEFNoClumpMask;
- if (cred && suser(cred, NULL) != 0)
+ if (cred && (suser(cred, NULL) != 0)) {
eflags |= kEFReserveMask; /* keep a reserve */
+ }
/*
* Allocate Journal and Quota files in metadata zone.
lockflags |= SFL_EXTENTS;
lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+ /*
+ * Keep growing the file as long as the current EOF is
+ * less than the desired value.
+ */
while ((length > filebytes) && (retval == E_NONE)) {
bytesToAdd = length - filebytes;
retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
hfs_systemfile_unlock(hfsmp, lockflags);
if (hfsmp->jnl) {
- if (skipupdate) {
- (void) hfs_minorupdate(vp);
- }
- else {
- (void) hfs_update(vp, TRUE);
- (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
- }
+ hfs_update(vp, 0);
+ hfs_volupdate(hfsmp, VOL_UPDATE, 0);
}
hfs_end_transaction(hfsmp);
if (retval)
goto Err_Exit;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
}
- if (!(flags & IO_NOZEROFILL)) {
+ if (ISSET(flags, IO_NOZEROFILL)) {
+ // An optimisation for the hibernation file
+ if (vnode_isswap(vp))
+ rl_remove_all(&fp->ff_invalidranges);
+ } else {
if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
- struct rl_entry *invalid_range;
- off_t zero_limit;
-
- zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
- if (length < zero_limit) zero_limit = length;
-
if (length > (off_t)fp->ff_size) {
struct timeval tv;
/* Extending the file: time to fill out the current last page w. zeroes? */
- if ((fp->ff_size & PAGE_MASK_64) &&
- (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
- fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
-
- /* There's some valid data at the start of the (current) last page
+ if (fp->ff_size & PAGE_MASK_64) {
+ /* There might be some valid data at the start of the (current) last page
of the file, so zero out the remainder of that page to ensure the
- entire page contains valid data. Since there is no invalid range
- possible past the (current) eof, there's no need to remove anything
- from the invalid range list before calling cluster_write(): */
+ entire page contains valid data. */
hfs_unlock(cp);
- retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
- fp->ff_size, (off_t)0,
- (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
- hfs_lock(cp, HFS_FORCE_LOCK);
+ retval = hfs_zero_eof_page(vp, length);
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
if (retval) goto Err_Exit;
-
- /* Merely invalidate the remaining area, if necessary: */
- if (length > zero_limit) {
- microuptime(&tv);
- rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
- cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
- }
- } else {
- /* The page containing the (current) eof is invalid: just add the
- remainder of the page to the invalid list, along with the area
- being newly allocated:
- */
+ }
microuptime(&tv);
rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
- };
}
} else {
panic("hfs_truncate: invoked on non-UBC object?!");
};
}
- cp->c_touch_modtime = TRUE;
+ if (suppress_times == 0) {
+ cp->c_touch_modtime = TRUE;
+ }
fp->ff_size = length;
} else { /* Shorten the size of the file */
- if ((off_t)fp->ff_size > length) {
+ // An optimisation for the hibernation file
+ if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
+ rl_remove_all(&fp->ff_invalidranges);
+ } else if ((off_t)fp->ff_size > length) {
/* Any space previously marked as invalid is now irrelevant: */
rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
}
u_int32_t finalblks;
u_int32_t loanedBlocks;
- HFS_MOUNT_LOCK(hfsmp, TRUE);
-
+ hfs_lock_mount(hfsmp);
loanedBlocks = fp->ff_unallocblocks;
cp->c_blocks -= loanedBlocks;
fp->ff_blocks -= loanedBlocks;
cp->c_blocks += loanedBlocks;
fp->ff_blocks += loanedBlocks;
}
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount (hfsmp);
}
- /*
- * For a TBE process the deallocation of the file blocks is
- * delayed until the file is closed. And hfs_close calls
- * truncate with the IO_NDELAY flag set. So when IO_NDELAY
- * isn't set, we make sure this isn't a TBE process.
- */
- if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
-#if QUOTA
- off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
-#endif /* QUOTA */
- if (hfs_start_transaction(hfsmp) != 0) {
- retval = EINVAL;
- goto Err_Exit;
- }
+ off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
- if (fp->ff_unallocblocks == 0) {
- /* Protect extents b-tree and allocation bitmap */
- lockflags = SFL_BITMAP;
- if (overflow_extents(fp))
- lockflags |= SFL_EXTENTS;
- lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+ if (fp->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
- retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
- FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
+ retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
+ FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
- hfs_systemfile_unlock(hfsmp, lockflags);
- }
- if (hfsmp->jnl) {
- if (retval == 0) {
- fp->ff_size = length;
- }
- if (skipupdate) {
- (void) hfs_minorupdate(vp);
- }
- else {
- (void) hfs_update(vp, TRUE);
- (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
- }
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ if (hfsmp->jnl) {
+ if (retval == 0) {
+ fp->ff_size = length;
}
- hfs_end_transaction(hfsmp);
+ hfs_update(vp, 0);
+ hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+ hfs_end_transaction(hfsmp);
- filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
- if (retval)
- goto Err_Exit;
+ filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+ if (retval)
+ goto Err_Exit;
#if QUOTA
- /* These are bytesreleased */
- (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+ /* These are bytesreleased */
+ (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
#endif /* QUOTA */
- }
- /* Only set update flag if the logical length changes */
- if ((off_t)fp->ff_size != length)
+
+ //
+ // Unlike when growing a file, we adjust the hotfile block count here
+ // instead of deeper down in the block allocation code because we do
+ // not necessarily have a vnode or "fcb" at the time we're deleting
+ // the file and so we wouldn't know if it was hotfile cached or not
+ //
+ hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize));
+
+
+ /*
+ * Only set update flag if the logical length changes & we aren't
+ * suppressing modtime updates.
+ */
+ if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
cp->c_touch_modtime = TRUE;
+ }
fp->ff_size = length;
}
if (cp->c_mode & (S_ISUID | S_ISGID)) {
- if (!vfs_context_issuser(context)) {
+ if (!vfs_context_issuser(context))
cp->c_mode &= ~(S_ISUID | S_ISGID);
- skipupdate = 0;
- }
- }
- if (skipupdate) {
- retval = hfs_minorupdate(vp);
}
- else {
- cp->c_touch_chgtime = TRUE; /* status changed */
+ cp->c_flag |= C_MODIFIED;
+ cp->c_touch_chgtime = TRUE; /* status changed */
+ if (suppress_times == 0) {
cp->c_touch_modtime = TRUE; /* file data was modified */
- retval = hfs_update(vp, MNT_WAIT);
+
+ /*
+ * If we are not suppressing the modtime update, then
+ * update the gen count as well.
+ */
+ if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
+ hfs_incr_gencount(cp);
+ }
}
+
+ retval = hfs_update(vp, 0);
if (retval) {
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
-1, -1, -1, retval, 0);
}
Err_Exit:
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+ KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
(int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
return (retval);
struct filefork *fp = VTOF(vp);
struct cnode *cp = VTOC(vp);
+#if QUOTA
int retval = 0;
+#endif /* QUOTA */
/* Cannot truncate an HFS directory! */
if (vnode_isdir(vp)) {
if (fp->ff_unallocblocks > 0) {
u_int32_t loanedBlocks;
- HFS_MOUNT_LOCK(hfsmp, TRUE);
-
+ hfs_lock_mount (hfsmp);
loanedBlocks = fp->ff_unallocblocks;
cp->c_blocks -= loanedBlocks;
fp->ff_blocks -= loanedBlocks;
hfsmp->loanedBlocks -= loanedBlocks;
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount (hfsmp);
}
return 0;
* for use when deleting a file. The simplification here is that we know
* that we are releasing all blocks.
*
+ * Note that this function may be called when there is no vnode backing
+ * the file fork in question. We may call this from hfs_vnop_inactive
+ * to clear out resource fork data (and may not want to clear out the data
+ * fork yet). As a result, we pointer-check both sets of inputs before
+ * doing anything with them.
+ *
* The caller is responsible for saving off a copy of the filefork(s)
* embedded within the cnode prior to calling this function. The pointers
* supplied as arguments must be valid even if the cnode is no longer valid.
blksize = hfsmp->blockSize;
/* Data Fork */
- if (datafork->ff_blocks > 0) {
+ if (datafork) {
+ off_t prev_filebytes;
+ datafork->ff_size = 0;
+
fileblocks = datafork->ff_blocks;
- filebytes = (off_t)fileblocks * (off_t)blksize;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+ prev_filebytes = filebytes;
/* We killed invalid ranges and loaned blocks before we removed the catalog entry */
while (filebytes > 0) {
- if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
+ if (filebytes > HFS_BIGFILE_SIZE) {
filebytes -= HFS_BIGFILE_SIZE;
} else {
filebytes = 0;
hfs_systemfile_unlock(hfsmp, lockflags);
}
- if (error == 0) {
- datafork->ff_size = filebytes;
- }
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ struct cnode *cp = datafork ? FTOC(datafork) : NULL;
+ struct vnode *vp;
+ vp = cp ? CTOV(cp, 0) : NULL;
+ hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize));
+ prev_filebytes = filebytes;
+
/* Finish the transaction and start over if necessary */
hfs_end_transaction(hfsmp);
}
/* Resource fork */
- if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
+ if (error == 0 && rsrcfork) {
+ rsrcfork->ff_size = 0;
+
fileblocks = rsrcfork->ff_blocks;
filebytes = (off_t)fileblocks * (off_t)blksize;
/* We killed invalid ranges and loaned blocks before we removed the catalog entry */
while (filebytes > 0) {
- if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
+ if (filebytes > HFS_BIGFILE_SIZE) {
filebytes -= HFS_BIGFILE_SIZE;
} else {
filebytes = 0;
hfs_systemfile_unlock(hfsmp, lockflags);
}
- if (error == 0) {
- rsrcfork->ff_size = filebytes;
- }
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
/* Finish the transaction and start over if necessary */
return error;
}
+errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
+{
+ errno_t error;
+
+ /*
+ * Call ubc_setsize to give the VM subsystem a chance to do
+ * whatever it needs to with existing pages before we delete
+ * blocks. Note that symlinks don't use the UBC so we'll
+ * get back ENOENT in that case.
+ */
+ if (have_cnode_lock) {
+ error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
+ if (error == EAGAIN) {
+ cnode_t *cp = VTOC(vp);
+
+ if (cp->c_truncatelockowner != current_thread()) {
+#if DEVELOPMENT || DEBUG
+ panic("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
+#else
+ printf("hfs: hfs_ubc_setsize called without exclusive truncate lock!\n");
+#endif
+ }
+
+ hfs_unlock(cp);
+ error = ubc_setsize_ex(vp, len, 0);
+ hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+ }
+ } else
+ error = ubc_setsize_ex(vp, len, 0);
+
+ return error == ENOENT ? 0 : error;
+}
/*
* Truncate a cnode to at most length size, freeing (or adding) the
* disk blocks.
*/
int
-hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
- int skipupdate, vfs_context_t context)
+hfs_truncate(struct vnode *vp, off_t length, int flags,
+ int truncateflags, vfs_context_t context)
{
- struct filefork *fp = VTOF(vp);
+ struct filefork *fp = VTOF(vp);
off_t filebytes;
u_int32_t fileblocks;
- int blksize, error = 0;
+ int blksize;
+ errno_t error = 0;
struct cnode *cp = VTOC(vp);
+ hfsmount_t *hfsmp = VTOHFS(vp);
/* Cannot truncate an HFS directory! */
if (vnode_isdir(vp)) {
return (EISDIR);
}
/* A swap file cannot change size. */
- if (vnode_isswap(vp) && (length != 0)) {
+ if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
return (EPERM);
}
- blksize = VTOVCB(vp)->blockSize;
+ blksize = hfsmp->blockSize;
fileblocks = fp->ff_blocks;
filebytes = (off_t)fileblocks * (off_t)blksize;
- //
- // Have to do this here so that we don't wind up with
- // i/o pending for blocks that are about to be released
- // if we truncate the file.
- //
- // If skipsetsize is set, then the caller is responsible
- // for the ubc_setsize.
- //
- // Even if skipsetsize is set, if the length is zero we
- // want to call ubc_setsize() because as of SnowLeopard
- // it will no longer cause any page-ins and it will drop
- // any dirty pages so that we don't do any i/o that we
- // don't have to. This also prevents a race where i/o
- // for truncated blocks may overwrite later data if the
- // blocks get reallocated to a different file.
- //
- if (!skipsetsize || length == 0)
- ubc_setsize(vp, length);
+ bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
+
+ error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
+ if (error)
+ return error;
+
+ if (!caller_has_cnode_lock) {
+ error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error)
+ return error;
+ }
// have to loop truncating or growing files that are
// really big because otherwise transactions can get
if (length < filebytes) {
while (filebytes > length) {
- if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ if ((filebytes - length) > HFS_BIGFILE_SIZE) {
filebytes -= HFS_BIGFILE_SIZE;
} else {
filebytes = length;
}
- cp->c_flag |= C_FORCEUPDATE;
- error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
+ error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
if (error)
break;
}
} else if (length > filebytes) {
- while (filebytes < length) {
- if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
- filebytes += HFS_BIGFILE_SIZE;
- } else {
- filebytes = length;
+ kauth_cred_t cred = vfs_context_ucred(context);
+ const bool keep_reserve = cred && suser(cred, NULL) != 0;
+
+ if (hfs_freeblks(hfsmp, keep_reserve)
+ < howmany(length - filebytes, blksize)) {
+ error = ENOSPC;
+ } else {
+ while (filebytes < length) {
+ if ((length - filebytes) > HFS_BIGFILE_SIZE) {
+ filebytes += HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = length;
+ }
+ error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
+ if (error)
+ break;
}
- cp->c_flag |= C_FORCEUPDATE;
- error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
- if (error)
- break;
}
} else /* Same logical size */ {
- error = do_hfs_truncate(vp, length, flags, skipupdate, context);
+ error = do_hfs_truncate(vp, length, flags, truncateflags, context);
}
/* Files that are changing size are not hot file candidates. */
if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
fp->ff_bytesread = 0;
}
- return (error);
-}
+ if (!caller_has_cnode_lock)
+ hfs_unlock(cp);
+
+ // Make sure UBC's size matches up (in case we didn't completely succeed)
+ errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
+ if (!error)
+ error = err2;
+
+ return error;
+}
/*
check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
- hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+ hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
- if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
goto Err_Exit;
}
* value of filebytes is 0, length will be at least 1.
*/
if (length > filebytes) {
+ if (ISSET(extendFlags, kEFAllMask)
+ && (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask))
+ < howmany(length - filebytes, hfsmp->blockSize))) {
+ retval = ENOSPC;
+ goto Err_Exit;
+ }
+
off_t total_bytes_added = 0, orig_request_size;
orig_request_size = moreBytesRequested = length - filebytes;
/* Protect extents b-tree and allocation bitmap */
lockflags = SFL_BITMAP;
if (overflow_extents(fp))
- lockflags |= SFL_EXTENTS;
+ lockflags |= SFL_EXTENTS;
lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
- bytesRequested = HFS_BIGFILE_SIZE;
+ bytesRequested = HFS_BIGFILE_SIZE;
} else {
- bytesRequested = moreBytesRequested;
+ bytesRequested = moreBytesRequested;
}
if (extendFlags & kEFContigMask) {
hfs_systemfile_unlock(hfsmp, lockflags);
if (hfsmp->jnl) {
- (void) hfs_update(vp, TRUE);
+ (void) hfs_update(vp, 0);
(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
}
} else { /* Shorten the size of the file */
- if (fp->ff_size > length) {
- /*
- * Any buffers that are past the truncation point need to be
- * invalidated (to maintain buffer cache consistency).
- */
- }
+ /*
+ * N.B. At present, this code is never called. If and when we
+ * do start using it, it looks like there might be slightly
+ * strange semantics with the file size: it's possible for the
+ * file size to *increase* e.g. if current file size is 5,
+ * length is 1024 and filebytes is 4096, the file size will
+ * end up being 1024 bytes. This isn't necessarily a problem
+ * but it's not consistent with the code above which doesn't
+ * change the file size.
+ */
- retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
+ retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
/*
if (fp->ff_size > filebytes) {
fp->ff_size = filebytes;
- hfs_unlock(cp);
- ubc_setsize(vp, fp->ff_size);
- hfs_lock(cp, HFS_FORCE_LOCK);
+ hfs_ubc_setsize(vp, fp->ff_size, true);
}
}
Std_Exit:
+ cp->c_flag |= C_MODIFIED;
cp->c_touch_chgtime = TRUE;
cp->c_touch_modtime = TRUE;
- retval2 = hfs_update(vp, MNT_WAIT);
+ retval2 = hfs_update(vp, 0);
if (retval == 0)
retval = retval2;
Err_Exit:
- hfs_unlock_truncate(cp, 0);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
hfs_unlock(cp);
return (retval);
}
upl_t upl;
upl_page_info_t *pl;
off_t f_offset;
+ off_t page_needed_f_offset;
int offset;
int isize;
+ int upl_size;
int pg_index;
boolean_t truncate_lock_held = FALSE;
boolean_t file_converted = FALSE;
fp = VTOF(vp);
#if CONFIG_PROTECT
- if ((error = cp_handle_vnop(cp, CP_READ_ACCESS | CP_WRITE_ACCESS)) != 0) {
+ if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
+ /*
+ * If we errored here, then this means that one of two things occurred:
+ * 1. there was a problem with the decryption of the key.
+ * 2. the device is locked and we are not allowed to access this particular file.
+ *
+ * Either way, this means that we need to shut down this upl now. As long as
+ * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
+ * then we create a upl and immediately abort it.
+ */
+ if (ap->a_pl == NULL) {
+ /* create the upl */
+ ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
+ UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
+ /* mark the range as needed so it doesn't immediately get discarded upon abort */
+ ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
+
+ /* Abort the range */
+ ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
+ }
+
+
return error;
}
#endif /* CONFIG_PROTECT */
goto pagein_done;
}
+ page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
+
retry_pagein:
/*
* take truncate lock (shared/recursive) to guard against
if (vfs_isforce(vp->v_mount)) {
if (cp->c_flag & C_DELETED) {
/* If we don't get it, then just go ahead and operate without the lock */
- truncate_lock_held = hfs_try_trunclock(cp, HFS_RECURSE_TRUNCLOCK);
+ truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
}
}
else {
- hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK);
+ hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
truncate_lock_held = TRUE;
}
error = EINVAL;
goto pagein_done;
}
- isize = ap->a_size;
+ ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
- /*
+ upl_size = isize = ap->a_size;
+
+ /*
* Scan from the back to find the last page in the UPL, so that we
* aren't looking at a UPL that may have already been freed by the
* preceding aborts/completions.
int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
if (compressed) {
+
if (truncate_lock_held) {
/*
* can't hold the truncate lock when calling into the decmpfs layer
* takes the lock shared, we can deadlock if some other thread
* tries to grab the lock exclusively in between.
*/
- hfs_unlock_truncate(cp, 1);
+ hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
truncate_lock_held = FALSE;
}
ap->a_pl = upl;
* compressed once the compression lock is successfully taken
* i.e. we would block on that lock while the file is being inflated
*/
+ if (error == 0 && vnode_isfastdevicecandidate(vp)) {
+ (void) hfs_addhotfile(vp);
+ }
if (compressed) {
if (error == 0) {
/* successful page-in, update the access time */
VTOC(vp)->c_touch_acctime = TRUE;
- /* compressed files are not hot file candidates */
+ //
+ // compressed files are not traditional hot file candidates
+ // but they may be for CF (which ignores the ff_bytesread
+ // field)
+ //
if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
fp->ff_bytesread = 0;
}
* indication that the pagein needs to be redriven
*/
ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
+ } else if (error == ENOSPC) {
+
+ if (upl_size == PAGE_SIZE)
+ panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
+
+ ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
+
+ ap->a_size = PAGE_SIZE;
+ ap->a_pl = NULL;
+ ap->a_pl_offset = 0;
+ ap->a_f_offset = page_needed_f_offset;
+
+ goto retry_pagein;
}
goto pagein_next_range;
}
/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
- hfs_lock(cp, HFS_FORCE_LOCK);
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
took_cnode_lock = 1;
}
/*
fp->ff_bytesread += bytesread;
}
cp->c_touch_acctime = TRUE;
+
+ if (vnode_isfastdevicecandidate(vp)) {
+ (void) hfs_addhotfile(vp);
+ }
if (took_cnode_lock)
hfs_unlock(cp);
}
pagein_done:
if (truncate_lock_held == TRUE) {
/* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
- hfs_unlock_truncate(cp, 1);
+ hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
}
return (error);
cp = VTOC(vp);
fp = VTOF(vp);
- /*
- * Figure out where the file ends, for pageout purposes. If
- * ff_new_size > ff_size, then we're in the middle of extending the
- * file via a write, so it is safe (and necessary) that we be able
- * to pageout up to that point.
- */
- filesize = fp->ff_size;
- if (fp->ff_new_size > filesize)
- filesize = fp->ff_new_size;
-
a_flags = ap->a_flags;
a_pl_offset = ap->a_pl_offset;
a_pl_offset = 0;
/*
- * take truncate lock (shared) to guard against
- * zero-fill thru fsync interfering, but only for v2
+ * For V2 semantics, we want to take the cnode truncate lock
+ * shared to guard against the file size changing via zero-filling.
+ *
+ * However, we have to be careful because we may be invoked
+ * via the ubc_msync path to write out dirty mmap'd pages
+ * in response to a lock event on a content-protected
+ * filesystem (e.g. to write out class A files).
+ * As a result, we want to take the truncate lock 'SHARED' with
+ * the mini-recursion locktype so that we don't deadlock/panic
+ * because we may be already holding the truncate lock exclusive to force any other
+ * IOs to have blocked behind us.
*/
- hfs_lock_truncate(cp, HFS_SHARED_LOCK);
+ hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
if (a_flags & UPL_MSYNC) {
request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
* it was either passed in or we succesfully created it
*/
+ /*
+ * Figure out where the file ends, for pageout purposes. If
+ * ff_new_size > ff_size, then we're in the middle of extending the
+ * file via a write, so it is safe (and necessary) that we be able
+ * to pageout up to that point.
+ */
+ filesize = fp->ff_size;
+ if (fp->ff_new_size > filesize)
+ filesize = fp->ff_new_size;
+
/*
* Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
* UPL instead of relying on the UPL passed into us. We go ahead and do that here,
}
xsize = num_of_pages * PAGE_SIZE;
- if (!vnode_isswap(vp)) {
- off_t end_of_range;
- int tooklock;
-
- tooklock = 0;
-
- if (cp->c_lockowner != current_thread()) {
- if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
- /*
- * we're in the v2 path, so we are the
- * owner of the UPL... we may have already
- * processed some of the UPL, so abort it
- * from the current working offset to the
- * end of the UPL
- */
- ubc_upl_abort_range(upl,
- offset,
- ap->a_size - offset,
- UPL_ABORT_FREE_ON_EMPTY);
- goto pageout_done;
- }
- tooklock = 1;
- }
- end_of_range = f_offset + xsize - 1;
-
- if (end_of_range >= filesize) {
- end_of_range = (off_t)(filesize - 1);
- }
- if (f_offset < filesize) {
- rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
- cp->c_flag |= C_MODIFIED; /* leof is dirty */
- }
- if (tooklock) {
- hfs_unlock(cp);
- }
- }
if ((error = cluster_pageout(vp, upl, offset, f_offset,
xsize, filesize, a_flags))) {
if (error_ret == 0)
}
} /* end block for v2 pageout behavior */
else {
- if (!vnode_isswap(vp)) {
- off_t end_of_range;
- int tooklock = 0;
-
- if (cp->c_lockowner != current_thread()) {
- if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
- if (!(a_flags & UPL_NOCOMMIT)) {
- ubc_upl_abort_range(upl,
- a_pl_offset,
- ap->a_size,
- UPL_ABORT_FREE_ON_EMPTY);
- }
- goto pageout_done;
- }
- tooklock = 1;
- }
- end_of_range = ap->a_f_offset + ap->a_size - 1;
-
- if (end_of_range >= filesize) {
- end_of_range = (off_t)(filesize - 1);
- }
- if (ap->a_f_offset < filesize) {
- rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
- cp->c_flag |= C_MODIFIED; /* leof is dirty */
- }
-
- if (tooklock) {
- hfs_unlock(cp);
- }
- }
/*
* just call cluster_pageout for old pre-v2 behavior
*/
}
/*
- * If data was written, update the modification time of the file.
- * If setuid or setgid bits are set and this process is not the
- * superuser then clear the setuid and setgid bits as a precaution
- * against tampering.
+ * If data was written, update the modification time of the file
+ * but only if it's mapped writable; we will have touched the
+ * modifcation time for direct writes.
*/
- if (retval == 0) {
- cp->c_touch_modtime = TRUE;
- cp->c_touch_chgtime = TRUE;
- if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
- (vfs_context_suser(ap->a_context) != 0)) {
- hfs_lock(cp, HFS_FORCE_LOCK);
- cp->c_mode &= ~(S_ISUID | S_ISGID);
- hfs_unlock(cp);
+ if (retval == 0 && (ubc_is_mapped_writable(vp)
+ || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+ // Check again with lock
+ bool mapped_writable = ubc_is_mapped_writable(vp);
+ if (mapped_writable
+ || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
+ cp->c_touch_modtime = TRUE;
+ cp->c_touch_chgtime = TRUE;
+
+ /*
+ * We only need to increment the generation counter if
+ * it's currently mapped writable because we incremented
+ * the counter in hfs_vnop_mnomap.
+ */
+ if (mapped_writable)
+ hfs_incr_gencount(VTOC(vp));
+
+ /*
+ * If setuid or setgid bits are set and this process is
+ * not the superuser then clear the setuid and setgid bits
+ * as a precaution against tampering.
+ */
+ if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
+ (vfs_context_suser(ap->a_context) != 0)) {
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ }
}
+
+ hfs_unlock(cp);
}
pageout_done:
if (is_pageoutv2) {
- /* release truncate lock (shared) */
- hfs_unlock_truncate(cp, 0);
+ /*
+ * Release the truncate lock. Note that because
+ * we may have taken the lock recursively by
+ * being invoked via ubc_msync due to lockdown,
+ * we should release it recursively, too.
+ */
+ hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
}
return (retval);
}
return (retval);
}
+
+int
+hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks, vfs_context_t ctx)
+{
+ _dk_cs_pin_t pin;
+ unsigned ioc;
+ int err;
+
+ memset(&pin, 0, sizeof(pin));
+ pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize;
+ pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize;
+ switch (pin_state) {
+ case HFS_PIN_IT:
+ ioc = _DKIOCCSPINEXTENT;
+ pin.cp_flags = _DKIOCCSPINTOFASTMEDIA;
+ break;
+ case HFS_PIN_IT | HFS_TEMP_PIN:
+ ioc = _DKIOCCSPINEXTENT;
+ pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN;
+ break;
+ case HFS_PIN_IT | HFS_DATALESS_PIN:
+ ioc = _DKIOCCSPINEXTENT;
+ pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE;
+ break;
+ case HFS_UNPIN_IT:
+ ioc = _DKIOCCSUNPINEXTENT;
+ pin.cp_flags = 0;
+ break;
+ case HFS_UNPIN_IT | HFS_EVICT_PIN:
+ ioc = _DKIOCCSPINEXTENT;
+ pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA;
+ break;
+ default:
+ return EINVAL;
+ }
+ err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, ctx);
+ return err;
+}
+
+//
+// The cnode lock should already be held on entry to this function
+//
+int
+hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned, vfs_context_t ctx)
+{
+ struct filefork *fp = VTOF(vp);
+ int i, err=0, need_put=0;
+ struct vnode *rsrc_vp=NULL;
+ uint32_t npinned = 0;
+ off_t offset;
+
+ if (num_blocks_pinned) {
+ *num_blocks_pinned = 0;
+ }
+
+ if (vnode_vtype(vp) != VREG) {
+ /* Not allowed to pin directories or symlinks */
+ printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp));
+ return (EPERM);
+ }
+
+ if (fp->ff_unallocblocks) {
+ printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks);
+ return (EINVAL);
+ }
+
+ /*
+ * It is possible that if the caller unlocked/re-locked the cnode after checking
+ * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
+ * cnode was unlocked. So check the condition again and return ENOENT so that
+ * the caller knows why we failed to pin the vnode.
+ */
+ if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) {
+ // makes no sense to pin something that's pending deletion
+ return ENOENT;
+ }
+
+ if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
+ if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) {
+ //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
+ // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
+
+ fp = VTOC(rsrc_vp)->c_rsrcfork;
+ need_put = 1;
+ }
+ }
+ if (fp->ff_blocks == 0) {
+ if (need_put) {
+ //
+ // use a distinct error code for a compressed file that has no resource fork;
+ // we return EALREADY to indicate that the data is already probably hot file
+ // cached because it's in an EA and the attributes btree is on the ssd
+ //
+ err = EALREADY;
+ } else {
+ err = EINVAL;
+ }
+ goto out;
+ }
+
+ offset = 0;
+ for (i = 0; i < kHFSPlusExtentDensity; i++) {
+ if (fp->ff_extents[i].startBlock == 0) {
+ break;
+ }
+
+ err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, ctx);
+ if (err) {
+ break;
+ } else {
+ npinned += fp->ff_extents[i].blockCount;
+ }
+ }
+
+ if (err || npinned == 0) {
+ goto out;
+ }
+
+ if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) {
+ uint32_t pblocks;
+ uint8_t forktype = 0;
+
+ if (fp == VTOC(vp)->c_rsrcfork) {
+ forktype = 0xff;
+ }
+ /*
+ * The file could have overflow extents, better pin them.
+ *
+ * We assume that since we are holding the cnode lock for this cnode,
+ * the files extents cannot be manipulated, but the tree could, so we
+ * need to ensure that it doesn't change behind our back as we iterate it.
+ */
+ int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
+ err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks);
+ hfs_systemfile_unlock (hfsmp, lockflags);
+
+ if (err) {
+ goto out;
+ }
+ npinned += pblocks;
+ }
+
+out:
+ if (num_blocks_pinned) {
+ *num_blocks_pinned = npinned;
+ }
+
+ if (need_put && rsrc_vp) {
+ //
+ // have to unlock the cnode since it's shared between the
+ // resource fork vnode and the data fork vnode (and the
+ // vnode_put() may need to re-acquire the cnode lock to
+ // reclaim the resource fork vnode)
+ //
+ hfs_unlock(VTOC(vp));
+ vnode_put(rsrc_vp);
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ }
+ return err;
+}
+
+
/*
* Relocate a file to a new location on disk
* cnode must be locked on entry
enum vtype vnodetype;
vnodetype = vnode_vtype(vp);
- if (vnodetype != VREG && vnodetype != VLNK) {
+ if (vnodetype != VREG) {
+ /* Not allowed to move symlinks. */
return (EPERM);
}
return EINVAL;
}
#endif
-
/* If it's an SSD, also disable HFS relocation */
if (hfsmp->hfs_flags & HFS_SSD) {
return EINVAL;
}
+
blksize = hfsmp->blockSize;
if (blockHint == 0)
blockHint = hfsmp->nextAllocation;
- if ((fp->ff_size > 0x7fffffff) ||
- ((fp->ff_size > blksize) && vnodetype == VLNK)) {
+ if (fp->ff_size > 0x7fffffff) {
return (EFBIG);
}
if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
hfs_unlock(cp);
- hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+ hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
/* Force lock since callers expects lock to be held. */
- if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
- hfs_unlock_truncate(cp, 0);
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
return (retval);
}
/* No need to continue if file was removed. */
if (cp->c_flag & C_NOEXISTS) {
- hfs_unlock_truncate(cp, 0);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
return (ENOENT);
}
took_trunc_lock = 1;
if (hfs_start_transaction(hfsmp) != 0) {
if (took_trunc_lock)
- hfs_unlock_truncate(cp, 0);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
return (EINVAL);
}
started_tr = 1;
nextallocsave = hfsmp->nextAllocation;
retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
if (eflags & kEFMetadataMask) {
- HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfs_lock_mount(hfsmp);
HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
MarkVCBDirty(hfsmp);
- HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ hfs_unlock_mount(hfsmp);
}
retval = MacToVFSError(retval);
retval = ENOSPC;
goto restore;
} else if (fp->ff_blocks < (headblks + datablks)) {
- printf("hfs_relocate: allocation failed");
+ printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
retval = ENOSPC;
goto restore;
}
*/
if (vnodetype == VLNK)
- retval = hfs_clonelink(vp, blksize, cred, p);
+ retval = EPERM;
else if (vnode_issystem(vp))
retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
else
goto restore;
out:
if (took_trunc_lock)
- hfs_unlock_truncate(cp, 0);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
if (lockflags) {
hfs_systemfile_unlock(hfsmp, lockflags);
/* Push cnode's new extent data to disk. */
if (retval == 0) {
- (void) hfs_update(vp, MNT_WAIT);
+ hfs_update(vp, 0);
}
if (hfsmp->jnl) {
if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
- (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+ (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
else
- (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+ (void) hfs_flushvolumeheader(hfsmp, 0);
}
exit:
if (started_tr)
restore:
if (fp->ff_blocks == headblks) {
if (took_trunc_lock)
- hfs_unlock_truncate(cp, 0);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
goto exit;
}
/*
lockflags = 0;
if (took_trunc_lock)
- hfs_unlock_truncate(cp, 0);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
goto exit;
}
-/*
- * Clone a symlink.
- *
- */
-static int
-hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
-{
- struct buf *head_bp = NULL;
- struct buf *tail_bp = NULL;
- int error;
-
-
- error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
- if (error)
- goto out;
-
- tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
- if (tail_bp == NULL) {
- error = EIO;
- goto out;
- }
- bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
- error = (int)buf_bwrite(tail_bp);
-out:
- if (head_bp) {
- buf_markinvalid(head_bp);
- buf_brelse(head_bp);
- }
- (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
-
- return (error);
-}
-
/*
* Clone a file's data within the file.
*
hfs_unlock(VTOC(vp));
#if CONFIG_PROTECT
- if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
- hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
return (error);
}
#endif /* CONFIG_PROTECT */
- if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
- hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize, VM_KERN_MEMORY_FILE)) {
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
return (ENOMEM);
}
break;
}
if (uio_resid(auio) != 0) {
- printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
+ printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
error = EIO;
break;
}
ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
} else {
/*
- * No need to call ubc_sync_range or hfs_invalbuf
+ * No need to call ubc_msync or hfs_invalbuf
* since the file was copied using IO_NOCACHE and
* the copy was done starting and ending on a page
* boundary in the file.
}
kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
- hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
return (error);
}
bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
breadcnt = bufsize / iosize;
- if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize, VM_KERN_MEMORY_FILE)) {
return (ENOMEM);
}
start_blk = ((daddr64_t)blkstart * blksize) / iosize;
return (error);
}
+
+errno_t hfs_flush_invalid_ranges(vnode_t vp)
+{
+ cnode_t *cp = VTOC(vp);
+
+ assert(cp->c_lockowner == current_thread());
+ assert(cp->c_truncatelockowner == current_thread());
+
+ if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout)
+ return 0;
+
+ filefork_t *fp = VTOF(vp);
+
+ /*
+ * We can't hold the cnode lock whilst we call cluster_write so we
+ * need to copy the extents into a local buffer.
+ */
+ int max_exts = 16;
+ struct ext {
+ off_t start, end;
+ } exts_buf[max_exts]; // 256 bytes
+ struct ext *exts = exts_buf;
+ int ext_count = 0;
+ errno_t ret;
+
+ struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
+
+ while (r) {
+ /* If we have more than can fit in our stack buffer, switch
+ to a heap buffer. */
+ if (exts == exts_buf && ext_count == max_exts) {
+ max_exts = 256;
+ MALLOC(exts, struct ext *, sizeof(struct ext) * max_exts,
+ M_TEMP, M_WAITOK);
+ memcpy(exts, exts_buf, ext_count * sizeof(struct ext));
+ }
+
+ struct rl_entry *next = TAILQ_NEXT(r, rl_link);
+
+ exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end };
+
+ if (!next || (ext_count == max_exts && exts != exts_buf)) {
+ hfs_unlock(cp);
+ for (int i = 0; i < ext_count; ++i) {
+ ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1,
+ exts[i].start, 0,
+ IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
+ if (ret) {
+ hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+ goto exit;
+ }
+ }
+
+ if (!next) {
+ hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+ break;
+ }
+
+ /* Push any existing clusters which should clean up our invalid
+ ranges as they go through hfs_vnop_blockmap. */
+ cluster_push(vp, 0);
+
+ hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+
+ /*
+ * Get back to where we were (given we dropped the lock).
+ * This shouldn't be many because we pushed above.
+ */
+ TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) {
+ if (r->rl_end > exts[ext_count - 1].end)
+ break;
+ }
+
+ ext_count = 0;
+ } else
+ r = next;
+ }
+
+ ret = 0;
+
+exit:
+
+ if (exts != exts_buf)
+ FREE(exts, M_TEMP);
+
+ return ret;
+}