/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/* @(#)hfs_readwrite.c 1.0
*
#include <sys/resourcevar.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
+#include <sys/filedesc.h>
#include <sys/stat.h>
#include <sys/buf.h>
#include <sys/proc.h>
+#include <sys/kauth.h>
#include <sys/vnode.h>
+#include <sys/vnode_internal.h>
#include <sys/uio.h>
+#include <sys/vfs_context.h>
+#include <sys/fsevents.h>
+#include <kern/kalloc.h>
+#include <sys/disk.h>
+#include <sys/sysctl.h>
+#include <sys/fsctl.h>
#include <miscfs/specfs/specdev.h>
#include <sys/ubc.h>
+#include <sys/ubc_internal.h>
+
#include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
#include <sys/kdebug.h>
#include "hfs.h"
+#include "hfs_attrlist.h"
#include "hfs_endian.h"
+#include "hfs_fsctl.h"
#include "hfs_quota.h"
#include "hfscommon/headers/FileMgrInternal.h"
#include "hfscommon/headers/BTreesInternal.h"
#include "hfs_cnode.h"
#include "hfs_dbg.h"
-extern int overflow_extents(struct filefork *fp);
-
#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
enum {
MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
};
-extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
+/* from bsd/hfs/hfs_vfsops.c */
+extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
+static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
+static int hfs_clonefile(struct vnode *, int, int, int);
+static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
+static int hfs_minorupdate(struct vnode *vp);
+static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
-/*****************************************************************************
-*
-* Operations on vnodes
-*
-*****************************************************************************/
-/*
-#% read vp L L L
-#
- vop_read {
- IN struct vnode *vp;
- INOUT struct uio *uio;
- IN int ioflag;
- IN struct ucred *cred;
+int flush_cache_on_write = 0;
+SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
- */
+/*
+ * Read data from a file.
+ */
int
-hfs_read(ap)
- struct vop_read_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap;
+hfs_vnop_read(struct vnop_read_args *ap)
{
- register struct uio *uio = ap->a_uio;
- register struct vnode *vp = ap->a_vp;
+ uio_t uio = ap->a_uio;
+ struct vnode *vp = ap->a_vp;
struct cnode *cp;
struct filefork *fp;
- struct buf *bp;
- daddr_t logBlockNo;
- u_long fragSize, moveSize, startOffset, ioxfersize;
- int devBlockSize = 0;
- off_t bytesRemaining;
+ struct hfsmount *hfsmp;
+ off_t filesize;
+ off_t filebytes;
+ off_t start_resid = uio_resid(uio);
+ off_t offset = uio_offset(uio);
int retval = 0;
- off_t filesize;
- off_t filebytes;
/* Preflight checks */
- if (vp->v_type != VREG && vp->v_type != VLNK)
- return (EISDIR); /* HFS can only read files */
- if (uio->uio_resid == 0)
+ if (!vnode_isreg(vp)) {
+ /* can only read regular files */
+ if (vnode_isdir(vp))
+ return (EISDIR);
+ else
+ return (EPERM);
+ }
+ if (start_resid == 0)
return (0); /* Nothing left to do */
- if (uio->uio_offset < 0)
+ if (offset < 0)
return (EINVAL); /* cant read from a negative offset */
+
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
+ return 0;
+ }
+ /* otherwise read the resource fork normally */
+ } else {
+ int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+ if (compressed) {
+ retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
+ if (compressed) {
+ if (retval == 0) {
+ /* successful read, update the access time */
+ VTOC(vp)->c_touch_acctime = TRUE;
+
+ /* compressed files are not hot file candidates */
+ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+ VTOF(vp)->ff_bytesread = 0;
+ }
+ }
+ return retval;
+ }
+ /* otherwise the file was converted back to a regular file while we were reading it */
+ retval = 0;
+ }
+ }
+#endif /* HFS_COMPRESSION */
cp = VTOC(vp);
fp = VTOF(vp);
+ hfsmp = VTOHFS(vp);
+
+ /* Protect against a size change. */
+ hfs_lock_truncate(cp, 0);
+
filesize = fp->ff_size;
- filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
- if (uio->uio_offset > filesize) {
- if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
- return (EFBIG);
- else
- return (0);
+ filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+ if (offset > filesize) {
+ if ((hfsmp->hfs_flags & HFS_STANDARD) &&
+ (offset > (off_t)MAXHFSFILESIZE)) {
+ retval = EFBIG;
+ }
+ goto exit;
}
- VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
-
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
- (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
+ (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
- if (UBCISVALID(vp)) {
- retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
- } else {
+ retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
- for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
-
- if ((bytesRemaining = (filesize - uio->uio_offset)) <= 0)
- break;
-
- logBlockNo = (daddr_t)(uio->uio_offset / PAGE_SIZE_64);
- startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64);
- fragSize = PAGE_SIZE;
-
- if (((logBlockNo * PAGE_SIZE) + fragSize) < filesize)
- ioxfersize = fragSize;
- else {
- ioxfersize = filesize - (logBlockNo * PAGE_SIZE);
- ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1);
- }
- moveSize = ioxfersize;
- moveSize -= startOffset;
-
- if (bytesRemaining < moveSize)
- moveSize = bytesRemaining;
-
- if (uio->uio_resid < moveSize) {
- moveSize = uio->uio_resid;
- };
- if (moveSize == 0) {
- break;
- };
-
- if (( uio->uio_offset + fragSize) >= filesize) {
- retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
-
- } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) {
- daddr_t nextLogBlockNo = logBlockNo + 1;
- int nextsize;
-
- if (((nextLogBlockNo * PAGE_SIZE) +
- (daddr_t)fragSize) < filesize)
- nextsize = fragSize;
- else {
- nextsize = filesize - (nextLogBlockNo * PAGE_SIZE);
- nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1);
- }
- retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp);
- } else {
- retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
- };
-
- if (retval != E_NONE) {
- if (bp) {
- brelse(bp);
- bp = NULL;
- }
- break;
- };
- vp->v_lastr = logBlockNo;
-
- /*
- * We should only get non-zero b_resid when an I/O retval
- * has occurred, which should cause us to break above.
- * However, if the short read did not cause an retval,
- * then we want to ensure that we do not uiomove bad
- * or uninitialized data.
- */
- ioxfersize -= bp->b_resid;
-
- if (ioxfersize < moveSize) { /* XXX PPD This should take the offset into account, too! */
- if (ioxfersize == 0)
- break;
- moveSize = ioxfersize;
- }
- if ((startOffset + moveSize) > bp->b_bcount)
- panic("hfs_read: bad startOffset or moveSize\n");
-
- if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio)))
- break;
-
- if (S_ISREG(cp->c_mode) &&
- (((startOffset + moveSize) == fragSize) || (uio->uio_offset == filesize))) {
- bp->b_flags |= B_AGE;
- };
-
- brelse(bp);
- /* Start of loop resets bp to NULL before reaching outside this block... */
- }
-
- if (bp != NULL) {
- brelse(bp);
- }
- }
-
- cp->c_flag |= C_ACCESS;
+ cp->c_touch_acctime = TRUE;
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
- (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
+ (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
+
+ /*
+ * Keep track blocks read
+ */
+ if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
+ int took_cnode_lock = 0;
+ off_t bytesread;
+ bytesread = start_resid - uio_resid(uio);
+
+ /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+ if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ took_cnode_lock = 1;
+ }
+ /*
+ * If this file hasn't been seen since the start of
+ * the current sampling period then start over.
+ */
+ if (cp->c_atime < hfsmp->hfc_timebase) {
+ struct timeval tv;
+
+ fp->ff_bytesread = bytesread;
+ microtime(&tv);
+ cp->c_atime = tv.tv_sec;
+ } else {
+ fp->ff_bytesread += bytesread;
+ }
+ if (took_cnode_lock)
+ hfs_unlock(cp);
+ }
+exit:
+ hfs_unlock_truncate(cp, 0);
return (retval);
}
/*
- * Write data to a file or directory.
-#% write vp L L L
-#
- vop_write {
- IN struct vnode *vp;
- INOUT struct uio *uio;
- IN int ioflag;
- IN struct ucred *cred;
-
- */
+ * Write data to a file.
+ */
int
-hfs_write(ap)
- struct vop_write_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap;
+hfs_vnop_write(struct vnop_write_args *ap)
{
+ uio_t uio = ap->a_uio;
struct vnode *vp = ap->a_vp;
- struct uio *uio = ap->a_uio;
struct cnode *cp;
struct filefork *fp;
- struct buf *bp;
- struct proc *p;
- struct timeval tv;
- ExtendedVCB *vcb;
- int devBlockSize = 0;
- daddr_t logBlockNo;
- long fragSize;
- off_t origFileSize, currOffset, writelimit, bytesToAdd;
- off_t actualBytesAdded;
- u_long blkoffset, resid, xfersize, clearSize;
- int eflags, ioflag;
- int retval;
- off_t filebytes;
- u_long fileblocks;
struct hfsmount *hfsmp;
- int started_tr = 0, grabbed_lock = 0;
+ kauth_cred_t cred = NULL;
+ off_t origFileSize;
+ off_t writelimit;
+ off_t bytesToAdd = 0;
+ off_t actualBytesAdded;
+ off_t filebytes;
+ off_t offset;
+ ssize_t resid;
+ int eflags;
+ int ioflag = ap->a_ioflag;
+ int retval = 0;
+ int lockflags;
+ int cnode_locked = 0;
+ int partialwrite = 0;
+ int exclusive_lock = 0;
+
+#if HFS_COMPRESSION
+ if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+ int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+ switch(state) {
+ case FILE_IS_COMPRESSED:
+ return EACCES;
+ case FILE_IS_CONVERTING:
+ /* if FILE_IS_CONVERTING, we allow writes */
+ break;
+ default:
+ printf("invalid state %d for compressed file\n", state);
+ /* fall through */
+ }
+ }
+#endif
- ioflag = ap->a_ioflag;
+ // LP64todo - fix this! uio_resid may be 64-bit value
+ resid = uio_resid(uio);
+ offset = uio_offset(uio);
- if (uio->uio_offset < 0)
+ if (ioflag & IO_APPEND) {
+ exclusive_lock = 1;
+ }
+
+ if (offset < 0)
return (EINVAL);
- if (uio->uio_resid == 0)
+ if (resid == 0)
return (E_NONE);
- if (vp->v_type != VREG && vp->v_type != VLNK)
- return (EISDIR); /* Can only write files */
+ if (!vnode_isreg(vp))
+ return (EPERM); /* Can only write regular files */
cp = VTOC(vp);
fp = VTOF(vp);
- vcb = VTOVCB(vp);
- fileblocks = fp->ff_blocks;
- filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
-
- if (ioflag & IO_APPEND)
- uio->uio_offset = fp->ff_size;
- if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
- return (EPERM);
-
- // XXXdbg - don't allow modification of the journal or journal_info_block
- if (VTOHFS(vp)->jnl && cp->c_datafork) {
- struct HFSPlusExtentDescriptor *extd;
+ hfsmp = VTOHFS(vp);
- extd = &cp->c_datafork->ff_data.cf_extents[0];
- if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
- return EPERM;
- }
+ eflags = kEFDeferMask; /* defer file block allocations */
+#ifdef HFS_SPARSE_DEV
+ /*
+ * When the underlying device is sparse and space
+ * is low (< 8MB), stop doing delayed allocations
+ * and begin doing synchronous I/O.
+ */
+ if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+ (hfs_freeblks(hfsmp, 0) < 2048)) {
+ eflags &= ~kEFDeferMask;
+ ioflag |= IO_SYNC;
}
+#endif /* HFS_SPARSE_DEV */
- writelimit = uio->uio_offset + uio->uio_resid;
+again:
+ /* Protect against a size change. */
+ hfs_lock_truncate(cp, exclusive_lock);
- /*
- * Maybe this should be above the vnode op call, but so long as
- * file servers have no limits, I don't think it matters.
- */
- p = uio->uio_procp;
- if (vp->v_type == VREG && p &&
- writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
- psignal(p, SIGXFSZ);
- return (EFBIG);
+ if (ioflag & IO_APPEND) {
+ uio_setoffset(uio, fp->ff_size);
+ offset = fp->ff_size;
+ }
+ if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
+ retval = EPERM;
+ goto exit;
}
- p = current_proc();
-
- VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
- resid = uio->uio_resid;
origFileSize = fp->ff_size;
- eflags = kEFDeferMask; /* defer file block allocations */
- filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
-
- /*
- * NOTE: In the following loop there are two positions tracked:
- * currOffset is the current I/O starting offset. currOffset
- * is never >LEOF; the LEOF is nudged along with currOffset as
- * data is zeroed or written. uio->uio_offset is the start of
- * the current I/O operation. It may be arbitrarily beyond
- * currOffset.
- *
- * The following is true at all times:
- * currOffset <= LEOF <= uio->uio_offset <= writelimit
+ writelimit = offset + resid;
+ filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+
+ /* If the truncate lock is shared, and if we either have virtual
+ * blocks or will need to extend the file, upgrade the truncate
+ * to exclusive lock. If upgrade fails, we lose the lock and
+ * have to get exclusive lock again. Note that we want to
+ * grab the truncate lock exclusive even if we're not allocating new blocks
+ * because we could still be growing past the LEOF.
*/
- currOffset = MIN(uio->uio_offset, fp->ff_size);
+ if ((exclusive_lock == 0) &&
+ ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
+ exclusive_lock = 1;
+ /* Lock upgrade failed and we lost our shared lock, try again */
+ if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
+ goto again;
+ }
+ }
+
+ if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
+ goto exit;
+ }
+ cnode_locked = 1;
+
+ if (!exclusive_lock) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
+ (int)offset, uio_resid(uio), (int)fp->ff_size,
+ (int)filebytes, 0);
+ }
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
- (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
- retval = 0;
+ /* Check if we do not need to extend the file */
+ if (writelimit <= filebytes) {
+ goto sizeok;
+ }
- /* Now test if we need to extend the file */
- /* Doing so will adjust the filebytes for us */
+ cred = vfs_context_ucred(ap->a_context);
+ bytesToAdd = writelimit - filebytes;
#if QUOTA
- if(writelimit > filebytes) {
- bytesToAdd = writelimit - filebytes;
-
- retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)),
- ap->a_cred, 0);
- if (retval)
- return (retval);
- }
+ retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
+ cred, 0);
+ if (retval)
+ goto exit;
#endif /* QUOTA */
- hfsmp = VTOHFS(vp);
- if (writelimit > filebytes) {
- hfs_global_shared_lock_acquire(hfsmp);
- grabbed_lock = 1;
- }
- if (hfsmp->jnl && (writelimit > filebytes)) {
- if (journal_start_transaction(hfsmp->jnl) != 0) {
- hfs_global_shared_lock_release(hfsmp);
- return EINVAL;
- }
- started_tr = 1;
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto exit;
}
while (writelimit > filebytes) {
-
bytesToAdd = writelimit - filebytes;
- if (suser(ap->a_cred, NULL) != 0)
+ if (cred && suser(cred, NULL) != 0)
eflags |= kEFReserveMask;
- /* lock extents b-tree (also protects volume bitmap) */
- retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
- if (retval != E_NONE)
- break;
-
- retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ /* Files that are changing size are not hot file candidates. */
+ if (hfsmp->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
+ }
+ retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
0, eflags, &actualBytesAdded));
- (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
if ((actualBytesAdded == 0) && (retval == E_NONE))
retval = ENOSPC;
if (retval != E_NONE)
break;
- filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+ filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
- (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
+ (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
}
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ (void) hfs_end_transaction(hfsmp);
- // XXXdbg
- if (started_tr) {
- hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
- journal_end_transaction(hfsmp->jnl);
- started_tr = 0;
- }
- if (grabbed_lock) {
- hfs_global_shared_lock_release(hfsmp);
- grabbed_lock = 0;
+ /*
+ * If we didn't grow the file enough try a partial write.
+ * POSIX expects this behavior.
+ */
+ if ((retval == ENOSPC) && (filebytes > offset)) {
+ retval = 0;
+ partialwrite = 1;
+ uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
+ resid -= bytesToAdd;
+ writelimit = filebytes;
}
-
- if (UBCISVALID(vp) && retval == E_NONE) {
+sizeok:
+ if (retval == E_NONE) {
off_t filesize;
off_t zero_off;
off_t tail_off;
off_t inval_start;
off_t inval_end;
- off_t io_start, io_end;
+ off_t io_start;
int lflag;
struct rl_entry *invalid_range;
else
filesize = fp->ff_size;
- lflag = (ioflag & IO_SYNC);
+ lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
- if (uio->uio_offset <= fp->ff_size) {
- zero_off = uio->uio_offset & ~PAGE_MASK_64;
+ if (offset <= fp->ff_size) {
+ zero_off = offset & ~PAGE_MASK_64;
/* Check to see whether the area between the zero_offset and the start
of the transfer to see whether is invalid and should be zero-filled
as part of the transfer:
*/
- if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
- lflag |= IO_HEADZEROFILL;
+ if (offset > zero_off) {
+ if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
+ lflag |= IO_HEADZEROFILL;
+ }
} else {
off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
will be handled by the cluser_write of the actual data.
*/
inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
- inval_end = uio->uio_offset & ~PAGE_MASK_64;
+ inval_end = offset & ~PAGE_MASK_64;
zero_off = fp->ff_size;
if ((fp->ff_size & PAGE_MASK_64) &&
};
if (inval_start < inval_end) {
+ struct timeval tv;
/* There's some range of data that's going to be marked invalid */
if (zero_off < inval_start) {
and the actual write will start on a page past inval_end. Now's the last
chance to zero-fill the page containing the EOF:
*/
- retval = cluster_write(vp, (struct uio *) 0,
+ hfs_unlock(cp);
+ cnode_locked = 0;
+ retval = cluster_write(vp, (uio_t) 0,
fp->ff_size, inval_start,
- zero_off, (off_t)0, devBlockSize,
+ zero_off, (off_t)0,
lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ cnode_locked = 1;
if (retval) goto ioerr_exit;
+ offset = uio_offset(uio);
};
/* Mark the remaining area of the newly allocated space as invalid: */
rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
- cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
+ microuptime(&tv);
+ cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
zero_off = fp->ff_size = inval_end;
};
- if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
+ if (offset > zero_off) lflag |= IO_HEADZEROFILL;
};
/* Check to see whether the area between the end of the write and the end of
* made readable (removed from the invalid ranges) before cluster_write
* tries to write it:
*/
- io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
- io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
+ io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
if (io_start < fp->ff_size) {
+ off_t io_end;
+
+ io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
};
- retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
- tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
-
- if (uio->uio_offset > fp->ff_size) {
- fp->ff_size = uio->uio_offset;
- ubc_setsize(vp, fp->ff_size); /* XXX check errors */
+ hfs_unlock(cp);
+ cnode_locked = 0;
+
+ /*
+ * We need to tell UBC the fork's new size BEFORE calling
+ * cluster_write, in case any of the new pages need to be
+ * paged out before cluster_write completes (which does happen
+ * in embedded systems due to extreme memory pressure).
+ * Similarly, we need to tell hfs_vnop_pageout what the new EOF
+ * will be, so that it can pass that on to cluster_pageout, and
+ * allow those pageouts.
+ *
+ * We don't update ff_size yet since we don't want pageins to
+ * be able to see uninitialized data between the old and new
+ * EOF, until cluster_write has completed and initialized that
+ * part of the file.
+ *
+ * The vnode pager relies on the file size last given to UBC via
+ * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
+ * ff_size (whichever is larger). NOTE: ff_new_size is always
+ * zero, unless we are extending the file via write.
+ */
+ if (filesize > fp->ff_size) {
+ fp->ff_new_size = filesize;
+ ubc_setsize(vp, filesize);
}
- if (resid > uio->uio_resid)
- cp->c_flag |= C_CHANGE | C_UPDATE;
- } else {
- while (retval == E_NONE && uio->uio_resid > 0) {
- logBlockNo = currOffset / PAGE_SIZE;
- blkoffset = currOffset & PAGE_MASK;
-
- if ((filebytes - currOffset) < PAGE_SIZE_64)
- fragSize = filebytes - ((off_t)logBlockNo * PAGE_SIZE_64);
- else
- fragSize = PAGE_SIZE;
- xfersize = fragSize - blkoffset;
-
- /* Make any adjustments for boundary conditions */
- if (currOffset + (off_t)xfersize > writelimit)
- xfersize = writelimit - currOffset;
-
- /*
- * There is no need to read into bp if:
- * We start on a block boundary and will overwrite the whole block
- *
- * OR
- */
- if ((blkoffset == 0) && (xfersize >= fragSize)) {
- bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
- retval = 0;
-
- if (bp->b_blkno == -1) {
- brelse(bp);
- retval = EIO; /* XXX */
- break;
- }
- } else {
-
- if (currOffset == fp->ff_size && blkoffset == 0) {
- bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
- retval = 0;
- if (bp->b_blkno == -1) {
- brelse(bp);
- retval = EIO; /* XXX */
- break;
- }
- } else {
- /*
- * This I/O transfer is not sufficiently aligned,
- * so read the affected block into a buffer:
- */
- retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp);
- if (retval != E_NONE) {
- if (bp)
- brelse(bp);
- break;
- }
- }
- }
-
- /* See if we are starting to write within file boundaries:
- * If not, then we need to present a "hole" for the area
- * between the current EOF and the start of the current
- * I/O operation:
- *
- * Note that currOffset is only less than uio_offset if
- * uio_offset > LEOF...
- */
- if (uio->uio_offset > currOffset) {
- clearSize = MIN(uio->uio_offset - currOffset, xfersize);
- bzero(bp->b_data + blkoffset, clearSize);
- currOffset += clearSize;
- blkoffset += clearSize;
- xfersize -= clearSize;
- }
-
- if (xfersize > 0) {
- retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio);
- currOffset += xfersize;
- }
-
- if (ioflag & IO_SYNC) {
- (void)VOP_BWRITE(bp);
- } else if ((xfersize + blkoffset) == fragSize) {
- bp->b_flags |= B_AGE;
- bawrite(bp);
- } else {
- bdwrite(bp);
+ retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
+ tail_off, lflag | IO_NOZERODIRTY);
+ if (retval) {
+ fp->ff_new_size = 0; /* no longer extending; use ff_size */
+ if (filesize > origFileSize) {
+ ubc_setsize(vp, origFileSize);
}
-
- /* Update the EOF if we just extended the file
- * (the PEOF has already been moved out and the
- * block mapping table has been updated):
- */
- if (currOffset > fp->ff_size) {
- fp->ff_size = currOffset;
- if (UBCISVALID(vp))
- ubc_setsize(vp, fp->ff_size); /* XXX check errors */
+ goto ioerr_exit;
+ }
+
+ if (filesize > origFileSize) {
+ fp->ff_size = filesize;
+
+ /* Files that are changing size are not hot file candidates. */
+ if (hfsmp->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
}
- if (retval || (resid == 0))
- break;
- cp->c_flag |= C_CHANGE | C_UPDATE;
- } /* endwhile */
+ }
+ fp->ff_new_size = 0; /* ff_size now has the correct size */
+
+ /* If we wrote some bytes, then touch the change and mod times */
+ if (resid > uio_resid(uio)) {
+ cp->c_touch_chgtime = TRUE;
+ cp->c_touch_modtime = TRUE;
+ }
+ }
+ if (partialwrite) {
+ uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
+ resid += bytesToAdd;
+ }
+
+ // XXXdbg - see radar 4871353 for more info
+ {
+ if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
+ VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+ }
}
ioerr_exit:
* we clear the setuid and setgid bits as a precaution against
* tampering.
*/
- if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
- cp->c_mode &= ~(S_ISUID | S_ISGID);
-
+ if (cp->c_mode & (S_ISUID | S_ISGID)) {
+ cred = vfs_context_ucred(ap->a_context);
+ if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
+ if (!cnode_locked) {
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ cnode_locked = 1;
+ }
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ }
+ }
if (retval) {
if (ioflag & IO_UNIT) {
- (void)VOP_TRUNCATE(vp, origFileSize,
- ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
- uio->uio_offset -= resid - uio->uio_resid;
- uio->uio_resid = resid;
- filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+ if (!cnode_locked) {
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ cnode_locked = 1;
+ }
+ (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
+ 0, 0, ap->a_context);
+ // LP64todo - fix this! resid needs to by user_ssize_t
+ uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
+ uio_setresid(uio, resid);
+ filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+ }
+ } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
+ if (!cnode_locked) {
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ cnode_locked = 1;
}
- } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
- tv = time;
- retval = VOP_UPDATE(vp, &tv, &tv, 1);
+ retval = hfs_update(vp, TRUE);
}
+ /* Updating vcbWrCnt doesn't need to be atomic. */
+ hfsmp->vcbWrCnt++;
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
- (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
-
+ (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
+exit:
+ if (cnode_locked)
+ hfs_unlock(cp);
+ hfs_unlock_truncate(cp, exclusive_lock);
return (retval);
}
+/* support for the "bulk-access" fcntl */
-/*
-
-#% ioctl vp U U U
-#
- vop_ioctl {
- IN struct vnode *vp;
- IN u_long command;
- IN caddr_t data;
- IN int fflag;
- IN struct ucred *cred;
- IN struct proc *p;
-
- */
+#define CACHE_LEVELS 16
+#define NUM_CACHE_ENTRIES (64*16)
+#define PARENT_IDS_FLAG 0x100
+struct access_cache {
+ int numcached;
+ int cachehits; /* these two for statistics gathering */
+ int lookups;
+ unsigned int *acache;
+ unsigned char *haveaccess;
+};
-/* ARGSUSED */
-int
-hfs_ioctl(ap)
- struct vop_ioctl_args /* {
- struct vnode *a_vp;
- int a_command;
- caddr_t a_data;
- int a_fflag;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap;
-{
- switch (ap->a_command) {
- case 1: {
- register struct cnode *cp;
- register struct vnode *vp;
- register struct radvisory *ra;
- struct filefork *fp;
- int devBlockSize = 0;
- int error;
+struct access_t {
+ uid_t uid; /* IN: effective user id */
+ short flags; /* IN: access requested (i.e. R_OK) */
+ short num_groups; /* IN: number of groups user belongs to */
+ int num_files; /* IN: number of files to process */
+ int *file_ids; /* IN: array of file ids */
+ gid_t *groups; /* IN: array of groups */
+ short *access; /* OUT: access info for each file (0 for 'has access') */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_access_t {
+ uid_t uid; /* IN: effective user id */
+ short flags; /* IN: access requested (i.e. R_OK) */
+ short num_groups; /* IN: number of groups user belongs to */
+ int num_files; /* IN: number of files to process */
+ user32_addr_t file_ids; /* IN: array of file ids */
+ user32_addr_t groups; /* IN: array of groups */
+ user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
+};
- vp = ap->a_vp;
+struct user64_access_t {
+ uid_t uid; /* IN: effective user id */
+ short flags; /* IN: access requested (i.e. R_OK) */
+ short num_groups; /* IN: number of groups user belongs to */
+ int num_files; /* IN: number of files to process */
+ user64_addr_t file_ids; /* IN: array of file ids */
+ user64_addr_t groups; /* IN: array of groups */
+ user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
+};
- if (vp->v_type != VREG)
- return EINVAL;
-
- VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
- error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
- if (error)
- return (error);
- ra = (struct radvisory *)(ap->a_data);
- cp = VTOC(vp);
- fp = VTOF(vp);
+// these are the "extended" versions of the above structures
+// note that it is crucial that they be different sized than
+// the regular version
+struct ext_access_t {
+ uint32_t flags; /* IN: access requested (i.e. R_OK) */
+ uint32_t num_files; /* IN: number of files to process */
+ uint32_t map_size; /* IN: size of the bit map */
+ uint32_t *file_ids; /* IN: Array of file ids */
+ char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
+ short *access; /* OUT: access info for each file (0 for 'has access') */
+ uint32_t num_parents; /* future use */
+ cnid_t *parents; /* future use */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_ext_access_t {
+ uint32_t flags; /* IN: access requested (i.e. R_OK) */
+ uint32_t num_files; /* IN: number of files to process */
+ uint32_t map_size; /* IN: size of the bit map */
+ user32_addr_t file_ids; /* IN: Array of file ids */
+ user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
+ user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
+ uint32_t num_parents; /* future use */
+ user32_addr_t parents; /* future use */
+};
- if (ra->ra_offset >= fp->ff_size) {
- VOP_UNLOCK(vp, 0, ap->a_p);
- return (EFBIG);
- }
- VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
+struct user64_ext_access_t {
+ uint32_t flags; /* IN: access requested (i.e. R_OK) */
+ uint32_t num_files; /* IN: number of files to process */
+ uint32_t map_size; /* IN: size of the bit map */
+ user64_addr_t file_ids; /* IN: array of file ids */
+ user64_addr_t bitmap; /* IN: array of groups */
+ user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
+ uint32_t num_parents;/* future use */
+ user64_addr_t parents;/* future use */
+};
- error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
- VOP_UNLOCK(vp, 0, ap->a_p);
- return (error);
+/*
+ * Perform a binary search for the given parent_id. Return value is
+ * the index if there is a match. If no_match_indexp is non-NULL it
+ * will be assigned with the index to insert the item (even if it was
+ * not found).
+ */
+static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
+{
+ int index=-1;
+ unsigned int lo=0;
+
+ do {
+ unsigned int mid = ((hi - lo)/2) + lo;
+ unsigned int this_id = array[mid];
+
+ if (parent_id == this_id) {
+ hi = mid;
+ break;
}
+
+ if (parent_id < this_id) {
+ hi = mid;
+ continue;
+ }
+
+ if (parent_id > this_id) {
+ lo = mid + 1;
+ continue;
+ }
+ } while(lo < hi);
- case 2: /* F_READBOOTBLOCKS */
- case 3: /* F_WRITEBOOTBLOCKS */
- {
- struct vnode *vp = ap->a_vp;
- struct vnode *devvp = NULL;
- struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
- int devBlockSize;
- int error;
- struct iovec aiov;
- struct uio auio;
- u_long blockNumber;
- u_long blockOffset;
- u_long xfersize;
- struct buf *bp;
-
- if ((vp->v_flag & VROOT) == 0) return EINVAL;
- if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
-
- devvp = VTOHFS(vp)->hfs_devvp;
- aiov.iov_base = btd->fbt_buffer;
- aiov.iov_len = btd->fbt_length;
-
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = btd->fbt_offset;
- auio.uio_resid = btd->fbt_length;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
- auio.uio_procp = ap->a_p;
-
- VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
-
- while (auio.uio_resid > 0) {
- blockNumber = auio.uio_offset / devBlockSize;
- error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
- if (error) {
- if (bp) brelse(bp);
- return error;
- };
-
- blockOffset = auio.uio_offset % devBlockSize;
- xfersize = devBlockSize - blockOffset;
- error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
- if (error) {
- brelse(bp);
- return error;
- };
- if (auio.uio_rw == UIO_WRITE) {
- error = VOP_BWRITE(bp);
- if (error) return error;
- } else {
- brelse(bp);
- };
- };
- };
- return 0;
-
- case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
- {
- *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
- return 0;
- }
-
- default:
- return (ENOTTY);
+ /* check if lo and hi converged on the match */
+ if (parent_id == array[hi]) {
+ index = hi;
+ }
+
+ if (no_match_indexp) {
+ *no_match_indexp = hi;
}
- /* Should never get here */
- return 0;
+ return index;
}
-
-/* ARGSUSED */
-int
-hfs_select(ap)
- struct vop_select_args /* {
- struct vnode *a_vp;
- int a_which;
- int a_fflags;
- struct ucred *a_cred;
- void *a_wql;
- struct proc *a_p;
- } */ *ap;
+
+
+static int
+lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
{
- /*
- * We should really check to see if I/O is possible.
- */
- return (1);
+ unsigned int hi;
+ int matches = 0;
+ int index, no_match_index;
+
+ if (cache->numcached == 0) {
+ *indexp = 0;
+ return 0; // table is empty, so insert at index=0 and report no match
+ }
+
+ if (cache->numcached > NUM_CACHE_ENTRIES) {
+ /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
+ cache->numcached, NUM_CACHE_ENTRIES);*/
+ cache->numcached = NUM_CACHE_ENTRIES;
+ }
+
+ hi = cache->numcached - 1;
+
+ index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
+
+ /* if no existing entry found, find index for new one */
+ if (index == -1) {
+ index = no_match_index;
+ matches = 0;
+ } else {
+ matches = 1;
+ }
+
+ *indexp = index;
+ return matches;
}
/*
- * Bmap converts a the logical block number of a file to its physical block
- * number on the disk.
- */
-
-/*
- * vp - address of vnode file the file
- * bn - which logical block to convert to a physical block number.
- * vpp - returns the vnode for the block special file holding the filesystem
- * containing the file of interest
- * bnp - address of where to return the filesystem physical block number
-#% bmap vp L L L
-#% bmap vpp - U -
-#
- vop_bmap {
- IN struct vnode *vp;
- IN daddr_t bn;
- OUT struct vnode **vpp;
- IN daddr_t *bnp;
- OUT int *runp;
- */
-/*
- * Converts a logical block number to a physical block, and optionally returns
- * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
- * The physical block number is based on the device block size, currently its 512.
- * The block run is returned in logical blocks, and is the REMAINING amount of blocks
+ * Add a node to the access_cache at the given index (or do a lookup first
+ * to find the index if -1 is passed in). We currently do a replace rather
+ * than an insert if the cache is full.
*/
-
-int
-hfs_bmap(ap)
- struct vop_bmap_args /* {
- struct vnode *a_vp;
- daddr_t a_bn;
- struct vnode **a_vpp;
- daddr_t *a_bnp;
- int *a_runp;
- } */ *ap;
+static void
+add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
{
- struct vnode *vp = ap->a_vp;
- struct cnode *cp = VTOC(vp);
- struct filefork *fp = VTOF(vp);
- struct hfsmount *hfsmp = VTOHFS(vp);
- int retval = E_NONE;
- daddr_t logBlockSize;
- size_t bytesContAvail = 0;
- off_t blockposition;
- struct proc *p = NULL;
- int lockExtBtree;
- struct rl_entry *invalid_range;
- enum rl_overlaptype overlaptype;
+ int lookup_index = -1;
+
+ /* need to do a lookup first if -1 passed for index */
+ if (index == -1) {
+ if (lookup_bucket(cache, &lookup_index, nodeID)) {
+ if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
+ // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
+ cache->haveaccess[lookup_index] = access;
+ }
- /*
- * Check for underlying vnode requests and ensure that logical
- * to physical mapping is requested.
- */
- if (ap->a_vpp != NULL)
- *ap->a_vpp = cp->c_devvp;
- if (ap->a_bnp == NULL)
- return (0);
+ /* mission accomplished */
+ return;
+ } else {
+ index = lookup_index;
+ }
- /* Only clustered I/O should have delayed allocations. */
- DBG_ASSERT(fp->ff_unallocblocks == 0);
+ }
- logBlockSize = GetLogicalBlockSize(vp);
- blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
+ /* if the cache is full, do a replace rather than an insert */
+ if (cache->numcached >= NUM_CACHE_ENTRIES) {
+ //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
+ cache->numcached = NUM_CACHE_ENTRIES-1;
- lockExtBtree = overflow_extents(fp);
- if (lockExtBtree) {
- p = current_proc();
- retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
- LK_EXCLUSIVE | LK_CANRECURSE, p);
- if (retval)
- return (retval);
+ if (index > cache->numcached) {
+ // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
+ index = cache->numcached;
}
+ }
- retval = MacToVFSError(
- MapFileBlockC (HFSTOVCB(hfsmp),
- (FCB*)fp,
- MAXPHYSIO,
- blockposition,
- ap->a_bnp,
- &bytesContAvail));
+ if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
+ index++;
+ }
- if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
-
- if (retval == E_NONE) {
- /* Adjust the mapping information for invalid file ranges: */
- overlaptype = rl_scan(&fp->ff_invalidranges,
- blockposition,
- blockposition + MAXPHYSIO - 1,
- &invalid_range);
- if (overlaptype != RL_NOOVERLAP) {
- switch(overlaptype) {
- case RL_MATCHINGOVERLAP:
- case RL_OVERLAPCONTAINSRANGE:
- case RL_OVERLAPSTARTSBEFORE:
- /* There's no valid block for this byte offset: */
- *ap->a_bnp = (daddr_t)-1;
- bytesContAvail = invalid_range->rl_end + 1 - blockposition;
- break;
-
- case RL_OVERLAPISCONTAINED:
- case RL_OVERLAPENDSAFTER:
- /* The range of interest hits an invalid block before the end: */
- if (invalid_range->rl_start == blockposition) {
- /* There's actually no valid information to be had starting here: */
- *ap->a_bnp = (daddr_t)-1;
- if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
- (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
- bytesContAvail = invalid_range->rl_end + 1 - blockposition;
- };
- } else {
- bytesContAvail = invalid_range->rl_start - blockposition;
- };
- break;
- };
- if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
- };
-
- /* Figure out how many read ahead blocks there are */
- if (ap->a_runp != NULL) {
- if (can_cluster(logBlockSize)) {
- /* Make sure this result never goes negative: */
- *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
- } else {
- *ap->a_runp = 0;
- };
- };
- };
-
- return (retval);
+ if (index >= 0 && index < cache->numcached) {
+ /* only do bcopy if we're inserting */
+ bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
+ bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
+ }
+
+ cache->acache[index] = nodeID;
+ cache->haveaccess[index] = access;
+ cache->numcached++;
}
-/* blktooff converts logical block number to file offset */
-int
-hfs_blktooff(ap)
- struct vop_blktooff_args /* {
- struct vnode *a_vp;
- daddr_t a_lblkno;
- off_t *a_offset;
- } */ *ap;
-{
- if (ap->a_vp == NULL)
- return (EINVAL);
- *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
+struct cinfo {
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+ cnid_t parentcnid;
+ u_int16_t recflags;
+};
- return(0);
-}
+static int
+snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
+{
+ struct cinfo *cip = (struct cinfo *)arg;
-int
-hfs_offtoblk(ap)
- struct vop_offtoblk_args /* {
- struct vnode *a_vp;
- off_t a_offset;
- daddr_t *a_lblkno;
- } */ *ap;
-{
- if (ap->a_vp == NULL)
- return (EINVAL);
- *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
+ cip->uid = attrp->ca_uid;
+ cip->gid = attrp->ca_gid;
+ cip->mode = attrp->ca_mode;
+ cip->parentcnid = descp->cd_parentcnid;
+ cip->recflags = attrp->ca_recflags;
+
+ return (0);
+}
- return(0);
+/*
+ * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
+ * isn't incore, then go to the catalog.
+ */
+static int
+do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
+ struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
+{
+ int error = 0;
+
+ /* if this id matches the one the fsctl was called with, skip the lookup */
+ if (cnid == skip_cp->c_cnid) {
+ cnattrp->ca_uid = skip_cp->c_uid;
+ cnattrp->ca_gid = skip_cp->c_gid;
+ cnattrp->ca_mode = skip_cp->c_mode;
+ cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
+ keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
+ } else {
+ struct cinfo c_info;
+
+ /* otherwise, check the cnode hash incase the file/dir is incore */
+ if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
+ cnattrp->ca_uid = c_info.uid;
+ cnattrp->ca_gid = c_info.gid;
+ cnattrp->ca_mode = c_info.mode;
+ cnattrp->ca_recflags = c_info.recflags;
+ keyp->hfsPlus.parentID = c_info.parentcnid;
+ } else {
+ int lockflags;
+
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+ /* lookup this cnid in the catalog */
+ error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ cache->lookups++;
+ }
+ }
+
+ return (error);
}
-int
-hfs_cmap(ap)
- struct vop_cmap_args /* {
- struct vnode *a_vp;
- off_t a_foffset;
- size_t a_size;
- daddr_t *a_bpn;
- size_t *a_run;
- void *a_poff;
- } */ *ap;
+
+/*
+ * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
+ * up to CACHE_LEVELS as we progress towards the root.
+ */
+static int
+do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
+ struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
+ struct vfs_context *my_context,
+ char *bitmap,
+ uint32_t map_size,
+ cnid_t* parents,
+ uint32_t num_parents)
{
- struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
- struct filefork *fp = VTOF(ap->a_vp);
- size_t bytesContAvail = 0;
- int retval = E_NONE;
- int lockExtBtree = 0;
- struct proc *p = NULL;
- struct rl_entry *invalid_range;
- enum rl_overlaptype overlaptype;
- int started_tr = 0, grabbed_lock = 0;
+ int myErr = 0;
+ int myResult;
+ HFSCatalogNodeID thisNodeID;
+ unsigned int myPerms;
+ struct cat_attr cnattr;
+ int cache_index = -1, scope_index = -1, scope_idx_start = -1;
+ CatalogKey catkey;
+
+ int i = 0, ids_to_cache = 0;
+ int parent_ids[CACHE_LEVELS];
+
+ thisNodeID = nodeID;
+ while (thisNodeID >= kRootDirID) {
+ myResult = 0; /* default to "no access" */
+
+ /* check the cache before resorting to hitting the catalog */
+
+ /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
+ * to look any further after hitting cached dir */
+
+ if (lookup_bucket(cache, &cache_index, thisNodeID)) {
+ cache->cachehits++;
+ myErr = cache->haveaccess[cache_index];
+ if (scope_index != -1) {
+ if (myErr == ESRCH) {
+ myErr = 0;
+ }
+ } else {
+ scope_index = 0; // so we'll just use the cache result
+ scope_idx_start = ids_to_cache;
+ }
+ myResult = (myErr == 0) ? 1 : 0;
+ goto ExitThisRoutine;
+ }
- /*
- * Check for underlying vnode requests and ensure that logical
- * to physical mapping is requested.
- */
- if (ap->a_bpn == NULL)
- return (0);
- p = current_proc();
- if (fp->ff_unallocblocks) {
- lockExtBtree = 1;
+ if (parents) {
+ int tmp;
+ tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
+ if (scope_index == -1)
+ scope_index = tmp;
+ if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
+ scope_idx_start = ids_to_cache;
+ }
+ }
- // XXXdbg
- hfs_global_shared_lock_acquire(hfsmp);
- grabbed_lock = 1;
+ /* remember which parents we want to cache */
+ if (ids_to_cache < CACHE_LEVELS) {
+ parent_ids[ids_to_cache] = thisNodeID;
+ ids_to_cache++;
+ }
+ // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
+ if (bitmap && map_size) {
+ bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
+ }
+
- if (hfsmp->jnl) {
- if (journal_start_transaction(hfsmp->jnl) != 0) {
- hfs_global_shared_lock_release(hfsmp);
- return EINVAL;
- } else {
- started_tr = 1;
- }
- }
+ /* do the lookup (checks the cnode hash, then the catalog) */
+ myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
+ if (myErr) {
+ goto ExitThisRoutine; /* no access */
+ }
- if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
- if (started_tr) {
- journal_end_transaction(hfsmp->jnl);
- }
- if (grabbed_lock) {
- hfs_global_shared_lock_release(hfsmp);
- }
- return (retval);
- }
- } else if (overflow_extents(fp)) {
- lockExtBtree = 1;
- if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
- return retval;
- }
+ /* Root always gets access. */
+ if (suser(myp_ucred, NULL) == 0) {
+ thisNodeID = catkey.hfsPlus.parentID;
+ myResult = 1;
+ continue;
}
- /*
- * Check for any delayed allocations.
- */
- if (fp->ff_unallocblocks) {
- SInt64 reqbytes, actbytes;
+ // if the thing has acl's, do the full permission check
+ if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+ struct vnode *vp;
- reqbytes = (SInt64)fp->ff_unallocblocks *
- (SInt64)HFSTOVCB(hfsmp)->blockSize;
- /*
- * Release the blocks on loan and aquire some real ones.
- * Note that we can race someone else for these blocks
- * (and lose) so cmap needs to handle a failure here.
- * Currently this race can't occur because all allocations
- * are protected by an exclusive lock on the Extents
- * Overflow file.
- */
- HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
- FTOC(fp)->c_blocks -= fp->ff_unallocblocks;
- fp->ff_blocks -= fp->ff_unallocblocks;
- fp->ff_unallocblocks = 0;
-
- while (retval == 0 && reqbytes > 0) {
- retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
- (FCB*)fp, reqbytes, 0,
- kEFAllMask | kEFNoClumpMask, &actbytes));
- if (retval == 0 && actbytes == 0)
- retval = ENOSPC;
-
- if (retval) {
- fp->ff_unallocblocks =
- reqbytes / HFSTOVCB(hfsmp)->blockSize;
- HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
- FTOC(fp)->c_blocks += fp->ff_unallocblocks;
- fp->ff_blocks += fp->ff_unallocblocks;
- }
- reqbytes -= actbytes;
- }
+ /* get the vnode for this cnid */
+ myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
+ if ( myErr ) {
+ myResult = 0;
+ goto ExitThisRoutine;
+ }
- if (retval) {
- (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
- if (started_tr) {
- hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
- journal_end_transaction(hfsmp->jnl);
- }
- if (grabbed_lock) {
- hfs_global_shared_lock_release(hfsmp);
- }
- return (retval);
- }
- VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
- }
+ thisNodeID = VTOC(vp)->c_parentcnid;
- retval = MacToVFSError(
- MapFileBlockC (HFSTOVCB(hfsmp),
- (FCB *)fp,
- ap->a_size,
- ap->a_foffset,
- ap->a_bpn,
- &bytesContAvail));
+ hfs_unlock(VTOC(vp));
- if (lockExtBtree)
- (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
+ if (vnode_vtype(vp) == VDIR) {
+ myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
+ } else {
+ myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
+ }
- // XXXdbg
- if (started_tr) {
- hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
- journal_end_transaction(hfsmp->jnl);
- started_tr = 0;
- }
- if (grabbed_lock) {
- hfs_global_shared_lock_release(hfsmp);
- grabbed_lock = 0;
+ vnode_put(vp);
+ if (myErr) {
+ myResult = 0;
+ goto ExitThisRoutine;
+ }
+ } else {
+ unsigned int flags;
+
+ myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
+ cnattr.ca_mode, hfsmp->hfs_mp,
+ myp_ucred, theProcPtr);
+
+ if (cnattr.ca_mode & S_IFDIR) {
+ flags = R_OK | X_OK;
+ } else {
+ flags = R_OK;
+ }
+ if ( (myPerms & flags) != flags) {
+ myResult = 0;
+ myErr = EACCES;
+ goto ExitThisRoutine; /* no access */
+ }
+
+ /* up the hierarchy we go */
+ thisNodeID = catkey.hfsPlus.parentID;
}
-
- if (retval == E_NONE) {
- /* Adjust the mapping information for invalid file ranges: */
- overlaptype = rl_scan(&fp->ff_invalidranges,
- ap->a_foffset,
- ap->a_foffset + (off_t)bytesContAvail - 1,
- &invalid_range);
- if (overlaptype != RL_NOOVERLAP) {
- switch(overlaptype) {
- case RL_MATCHINGOVERLAP:
- case RL_OVERLAPCONTAINSRANGE:
- case RL_OVERLAPSTARTSBEFORE:
- /* There's no valid block for this byte offset: */
- *ap->a_bpn = (daddr_t)-1;
-
- /* There's no point limiting the amount to be returned if the
- invalid range that was hit extends all the way to the EOF
- (i.e. there's no valid bytes between the end of this range
- and the file's EOF):
- */
- if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
- (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
- bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
- };
- break;
-
- case RL_OVERLAPISCONTAINED:
- case RL_OVERLAPENDSAFTER:
- /* The range of interest hits an invalid block before the end: */
- if (invalid_range->rl_start == ap->a_foffset) {
- /* There's actually no valid information to be had starting here: */
- *ap->a_bpn = (daddr_t)-1;
- if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
- (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
- bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
- };
- } else {
- bytesContAvail = invalid_range->rl_start - ap->a_foffset;
- };
- break;
- };
- if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
- };
-
- if (ap->a_run) *ap->a_run = bytesContAvail;
- };
+ }
- if (ap->a_poff)
- *(int *)ap->a_poff = 0;
+ /* if here, we have access to this node */
+ myResult = 1;
- return (retval);
-}
+ ExitThisRoutine:
+ if (parents && myErr == 0 && scope_index == -1) {
+ myErr = ESRCH;
+ }
+
+ if (myErr) {
+ myResult = 0;
+ }
+ *err = myErr;
+
+ /* cache the parent directory(ies) */
+ for (i = 0; i < ids_to_cache; i++) {
+ if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
+ add_node(cache, -1, parent_ids[i], ESRCH);
+ } else {
+ add_node(cache, -1, parent_ids[i], myErr);
+ }
+ }
+ return (myResult);
+}
-/*
- * Read or write a buffer that is not contiguous on disk. We loop over
- * each device block, copying to or from caller's buffer.
- *
- * We could be a bit more efficient by transferring as much data as is
- * contiguous. But since this routine should rarely be called, and that
- * would be more complicated; best to keep it simple.
- */
static int
-hfs_strategy_fragmented(struct buf *bp)
+do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
+ struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
{
- register struct vnode *vp = bp->b_vp;
- register struct cnode *cp = VTOC(vp);
- register struct vnode *devvp = cp->c_devvp;
- caddr_t ioaddr; /* Address of fragment within bp */
- struct buf *frag = NULL; /* For reading or writing a single block */
- int retval = 0;
- long remaining; /* Bytes (in bp) left to transfer */
- off_t offset; /* Logical offset of current fragment in vp */
- u_long block_size; /* Size of one device block (and one I/O) */
+ boolean_t is64bit;
+
+ /*
+ * NOTE: on entry, the vnode is locked. Incase this vnode
+ * happens to be in our list of file_ids, we'll note it
+ * avoid calling hfs_chashget_nowait() on that id as that
+ * will cause a "locking against myself" panic.
+ */
+ Boolean check_leaf = true;
+
+ struct user64_ext_access_t *user_access_structp;
+ struct user64_ext_access_t tmp_user_access;
+ struct access_cache cache;
+
+ int error = 0, prev_parent_check_ok=1;
+ unsigned int i;
+
+ short flags;
+ unsigned int num_files = 0;
+ int map_size = 0;
+ int num_parents = 0;
+ int *file_ids=NULL;
+ short *access=NULL;
+ char *bitmap=NULL;
+ cnid_t *parents=NULL;
+ int leaf_index;
- /* Make sure we redo this mapping for the next I/O */
- bp->b_blkno = bp->b_lblkno;
+ cnid_t cnid;
+ cnid_t prevParent_cnid = 0;
+ unsigned int myPerms;
+ short myaccess = 0;
+ struct cat_attr cnattr;
+ CatalogKey catkey;
+ struct cnode *skip_cp = VTOC(vp);
+ kauth_cred_t cred = vfs_context_ucred(context);
+ proc_t p = vfs_context_proc(context);
+
+ is64bit = proc_is64bit(p);
+
+ /* initialize the local cache and buffers */
+ cache.numcached = 0;
+ cache.cachehits = 0;
+ cache.lookups = 0;
+ cache.acache = NULL;
+ cache.haveaccess = NULL;
+
+ /* struct copyin done during dispatch... need to copy file_id array separately */
+ if (ap->a_data == NULL) {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ if (is64bit) {
+ if (arg_size != sizeof(struct user64_ext_access_t)) {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ user_access_structp = (struct user64_ext_access_t *)ap->a_data;
+
+ } else if (arg_size == sizeof(struct user32_access_t)) {
+ struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
+
+ // convert an old style bulk-access struct to the new style
+ tmp_user_access.flags = accessp->flags;
+ tmp_user_access.num_files = accessp->num_files;
+ tmp_user_access.map_size = 0;
+ tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
+ tmp_user_access.bitmap = USER_ADDR_NULL;
+ tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
+ tmp_user_access.num_parents = 0;
+ user_access_structp = &tmp_user_access;
+
+ } else if (arg_size == sizeof(struct user32_ext_access_t)) {
+ struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
+
+ // up-cast from a 32-bit version of the struct
+ tmp_user_access.flags = accessp->flags;
+ tmp_user_access.num_files = accessp->num_files;
+ tmp_user_access.map_size = accessp->map_size;
+ tmp_user_access.num_parents = accessp->num_parents;
+
+ tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
+ tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
+ tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
+ tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
+
+ user_access_structp = &tmp_user_access;
+ } else {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ map_size = user_access_structp->map_size;
+
+ num_files = user_access_structp->num_files;
+
+ num_parents= user_access_structp->num_parents;
+
+ if (num_files < 1) {
+ goto err_exit_bulk_access;
+ }
+ if (num_files > 1024) {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ if (num_parents > 1024) {
+ error = EINVAL;
+ goto err_exit_bulk_access;
+ }
+
+ file_ids = (int *) kalloc(sizeof(int) * num_files);
+ access = (short *) kalloc(sizeof(short) * num_files);
+ if (map_size) {
+ bitmap = (char *) kalloc(sizeof(char) * map_size);
+ }
+
+ if (num_parents) {
+ parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
+ }
+
+ cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
+ cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+
+ if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
+ if (file_ids) {
+ kfree(file_ids, sizeof(int) * num_files);
+ }
+ if (bitmap) {
+ kfree(bitmap, sizeof(char) * map_size);
+ }
+ if (access) {
+ kfree(access, sizeof(short) * num_files);
+ }
+ if (cache.acache) {
+ kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+ }
+ if (cache.haveaccess) {
+ kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+ }
+ if (parents) {
+ kfree(parents, sizeof(cnid_t) * num_parents);
+ }
+ return ENOMEM;
+ }
+
+ // make sure the bitmap is zero'ed out...
+ if (bitmap) {
+ bzero(bitmap, (sizeof(char) * map_size));
+ }
+
+ if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
+ num_files * sizeof(int)))) {
+ goto err_exit_bulk_access;
+ }
- /* Set up the logical position and number of bytes to read/write */
- offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
- block_size = VTOHFS(vp)->hfs_phys_block_size;
+ if (num_parents) {
+ if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
+ num_parents * sizeof(cnid_t)))) {
+ goto err_exit_bulk_access;
+ }
+ }
- /* Get an empty buffer to do the deblocking */
- frag = geteblk(block_size);
- if (ISSET(bp->b_flags, B_READ))
- SET(frag->b_flags, B_READ);
-
- for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
- ioaddr += block_size, offset += block_size,
- remaining -= block_size) {
- frag->b_resid = frag->b_bcount;
- CLR(frag->b_flags, B_DONE);
-
- /* Map the current position to a physical block number */
- retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
- NULL, NULL);
- if (retval != 0)
- break;
+ flags = user_access_structp->flags;
+ if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
+ flags = R_OK;
+ }
+
+ /* check if we've been passed leaf node ids or parent ids */
+ if (flags & PARENT_IDS_FLAG) {
+ check_leaf = false;
+ }
+
+ /* Check access to each file_id passed in */
+ for (i = 0; i < num_files; i++) {
+ leaf_index=-1;
+ cnid = (cnid_t) file_ids[i];
+
+ /* root always has access */
+ if ((!parents) && (!suser(cred, NULL))) {
+ access[i] = 0;
+ continue;
+ }
+
+ if (check_leaf) {
+ /* do the lookup (checks the cnode hash, then the catalog) */
+ error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
+ if (error) {
+ access[i] = (short) error;
+ continue;
+ }
+
+ if (parents) {
+ // Check if the leaf matches one of the parent scopes
+ leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
+ if (leaf_index >= 0 && parents[leaf_index] == cnid)
+ prev_parent_check_ok = 0;
+ else if (leaf_index >= 0)
+ prev_parent_check_ok = 1;
+ }
- /*
- * Did we try to read a hole?
- * (Should never happen for metadata!)
- */
- if ((long)frag->b_lblkno == -1) {
- bzero(ioaddr, block_size);
- continue;
+ // if the thing has acl's, do the full permission check
+ if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+ struct vnode *cvp;
+ int myErr = 0;
+ /* get the vnode for this cnid */
+ myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
+ if ( myErr ) {
+ access[i] = myErr;
+ continue;
}
- /* If writing, copy before I/O */
- if (!ISSET(bp->b_flags, B_READ))
- bcopy(ioaddr, frag->b_data, block_size);
-
- /* Call the device to do the I/O and wait for it */
- frag->b_blkno = frag->b_lblkno;
- frag->b_vp = devvp; /* Used to dispatch via VOP_STRATEGY */
- frag->b_dev = devvp->v_rdev;
- retval = VOP_STRATEGY(frag);
- frag->b_vp = NULL;
- if (retval != 0)
- break;
- retval = biowait(frag);
- if (retval != 0)
- break;
+ hfs_unlock(VTOC(cvp));
+
+ if (vnode_vtype(cvp) == VDIR) {
+ myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
+ } else {
+ myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
+ }
+
+ vnode_put(cvp);
+ if (myErr) {
+ access[i] = myErr;
+ continue;
+ }
+ } else {
+ /* before calling CheckAccess(), check the target file for read access */
+ myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
+ cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
- /* If reading, copy after the I/O */
- if (ISSET(bp->b_flags, B_READ))
- bcopy(frag->b_data, ioaddr, block_size);
+ /* fail fast if no access */
+ if ((myPerms & flags) == 0) {
+ access[i] = EACCES;
+ continue;
+ }
+ }
+ } else {
+ /* we were passed an array of parent ids */
+ catkey.hfsPlus.parentID = cnid;
}
+
+ /* if the last guy had the same parent and had access, we're done */
+ if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
+ cache.cachehits++;
+ access[i] = 0;
+ continue;
+ }
+
+ myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
+ skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
+
+ if (myaccess || (error == ESRCH && leaf_index != -1)) {
+ access[i] = 0; // have access.. no errors to report
+ } else {
+ access[i] = (error != 0 ? (short) error : EACCES);
+ }
+
+ prevParent_cnid = catkey.hfsPlus.parentID;
+ }
+
+ /* copyout the access array */
+ if ((error = copyout((caddr_t)access, user_access_structp->access,
+ num_files * sizeof (short)))) {
+ goto err_exit_bulk_access;
+ }
+ if (map_size && bitmap) {
+ if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
+ map_size * sizeof (char)))) {
+ goto err_exit_bulk_access;
+ }
+ }
- frag->b_vp = NULL;
- //
- // XXXdbg - in the case that this is a meta-data block, it won't affect
- // the journal because this bp is for a physical disk block,
- // not a logical block that is part of the catalog or extents
- // files.
- SET(frag->b_flags, B_INVAL);
- brelse(frag);
-
- if ((bp->b_error = retval) != 0)
- SET(bp->b_flags, B_ERROR);
-
- biodone(bp); /* This I/O is now complete */
- return retval;
+
+ err_exit_bulk_access:
+
+ //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
+
+ if (file_ids)
+ kfree(file_ids, sizeof(int) * num_files);
+ if (parents)
+ kfree(parents, sizeof(cnid_t) * num_parents);
+ if (bitmap)
+ kfree(bitmap, sizeof(char) * map_size);
+ if (access)
+ kfree(access, sizeof(short) * num_files);
+ if (cache.acache)
+ kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+ if (cache.haveaccess)
+ kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+
+ return (error);
}
+/* end "bulk-access" support */
+
+
/*
- * Calculate the logical to physical mapping if not done already,
- * then call the device strategy routine.
-#
-#vop_strategy {
-# IN struct buf *bp;
- */
+ * Callback for use with freeze ioctl.
+ */
+static int
+hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
+{
+ vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
+
+ return 0;
+}
+
+/*
+ * Control filesystem operating characteristics.
+ */
int
-hfs_strategy(ap)
- struct vop_strategy_args /* {
- struct buf *a_bp;
- } */ *ap;
+hfs_vnop_ioctl( struct vnop_ioctl_args /* {
+ vnode_t a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ vfs_context_t a_context;
+ } */ *ap)
{
- register struct buf *bp = ap->a_bp;
- register struct vnode *vp = bp->b_vp;
- register struct cnode *cp = VTOC(vp);
- int retval = 0;
- off_t offset;
- size_t bytes_contig;
+ struct vnode * vp = ap->a_vp;
+ struct hfsmount *hfsmp = VTOHFS(vp);
+ vfs_context_t context = ap->a_context;
+ kauth_cred_t cred = vfs_context_ucred(context);
+ proc_t p = vfs_context_proc(context);
+ struct vfsstatfs *vfsp;
+ boolean_t is64bit;
+ off_t jnl_start, jnl_size;
+ struct hfs_journal_info *jip;
+#if HFS_COMPRESSION
+ int compressed = 0;
+ off_t uncompressed_size = -1;
+ int decmpfs_error = 0;
- if ( !(bp->b_flags & B_VECTORLIST)) {
- if (vp->v_type == VBLK || vp->v_type == VCHR)
- panic("hfs_strategy: device vnode passed!");
+ if (ap->a_command == F_RDADVISE) {
+ /* we need to inspect the decmpfs state of the file as early as possible */
+ compressed = hfs_file_is_compressed(VTOC(vp), 0);
+ if (compressed) {
+ if (VNODE_IS_RSRC(vp)) {
+ /* if this is the resource fork, treat it as if it were empty */
+ uncompressed_size = 0;
+ } else {
+ decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
+ if (decmpfs_error != 0) {
+ /* failed to get the uncompressed size, we'll check for this later */
+ uncompressed_size = -1;
+ }
+ }
+ }
+ }
+#endif /* HFS_COMPRESSION */
- if (bp->b_flags & B_PAGELIST) {
- /*
- * If we have a page list associated with this bp,
- * then go through cluster_bp since it knows how to
- * deal with a page request that might span non-
- * contiguous physical blocks on the disk...
- */
- retval = cluster_bp(bp);
- vp = cp->c_devvp;
- bp->b_dev = vp->v_rdev;
+ is64bit = proc_is64bit(p);
- return (retval);
+ switch (ap->a_command) {
+
+ case HFS_GETPATH:
+ {
+ struct vnode *file_vp;
+ cnid_t cnid;
+ int outlen;
+ char *bufptr;
+ int error;
+
+ /* Caller must be owner of file system. */
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES);
}
-
- /*
- * If we don't already know the filesystem relative block
- * number then get it using VOP_BMAP(). If VOP_BMAP()
- * returns the block number as -1 then we've got a hole in
- * the file. Although HFS filesystems don't create files with
- * holes, invalidating of subranges of the file (lazy zero
- * filling) may create such a situation.
+ /* Target vnode must be file system's root. */
+ if (!vnode_isvroot(vp)) {
+ return (EINVAL);
+ }
+ bufptr = (char *)ap->a_data;
+ cnid = strtoul(bufptr, NULL, 10);
+
+ /* We need to call hfs_vfs_vget to leverage the code that will
+ * fix the origin list for us if needed, as opposed to calling
+ * hfs_vget, since we will need the parent for build_path call.
*/
- if (bp->b_blkno == bp->b_lblkno) {
- offset = (off_t) bp->b_lblkno *
- (off_t) GetLogicalBlockSize(vp);
-
- if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
- &bp->b_blkno, &bytes_contig, NULL))) {
- bp->b_error = retval;
- bp->b_flags |= B_ERROR;
- biodone(bp);
- return (retval);
- }
- if (bytes_contig < bp->b_bcount)
- {
- /*
- * We were asked to read a block that wasn't
- * contiguous, so we have to read each of the
- * pieces and copy them into the buffer.
- * Since ordinary file I/O goes through
- * cluster_io (which won't ask us for
- * discontiguous data), this is probably an
- * attempt to read or write metadata.
- */
- return hfs_strategy_fragmented(bp);
- }
- if ((long)bp->b_blkno == -1)
- clrbuf(bp);
+
+ if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
+ return (error);
}
- if ((long)bp->b_blkno == -1) {
- biodone(bp);
- return (0);
+ error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
+ vnode_put(file_vp);
+
+ return (error);
+ }
+
+ case HFS_PREV_LINK:
+ case HFS_NEXT_LINK:
+ {
+ cnid_t linkfileid;
+ cnid_t nextlinkid;
+ cnid_t prevlinkid;
+ int error;
+
+ /* Caller must be owner of file system. */
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES);
}
- if (bp->b_validend == 0) {
- /*
- * Record the exact size of the I/O transfer about to
- * be made:
- */
- bp->b_validend = bp->b_bcount;
+ /* Target vnode must be file system's root. */
+ if (!vnode_isvroot(vp)) {
+ return (EINVAL);
}
+ linkfileid = *(cnid_t *)ap->a_data;
+ if (linkfileid < kHFSFirstUserCatalogNodeID) {
+ return (EINVAL);
+ }
+ if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
+ return (error);
+ }
+ if (ap->a_command == HFS_NEXT_LINK) {
+ *(cnid_t *)ap->a_data = nextlinkid;
+ } else {
+ *(cnid_t *)ap->a_data = prevlinkid;
+ }
+ return (0);
}
- vp = cp->c_devvp;
- bp->b_dev = vp->v_rdev;
- return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
-}
+ case HFS_RESIZE_PROGRESS: {
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES); /* must be owner of file system */
+ }
+ if (!vnode_isvroot(vp)) {
+ return (EINVAL);
+ }
+ /* file system must not be mounted read-only */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
-/*
-#
-#% truncate vp L L L
-#
-vop_truncate {
- IN struct vnode *vp;
- IN off_t length;
- IN int flags; (IO_SYNC)
- IN struct ucred *cred;
- IN struct proc *p;
-};
- * Truncate a cnode to at most length size, freeing (or adding) the
- * disk blocks.
- */
-int hfs_truncate(ap)
- struct vop_truncate_args /* {
- struct vnode *a_vp;
- off_t a_length;
- int a_flags;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap;
-{
- register struct vnode *vp = ap->a_vp;
- register struct cnode *cp = VTOC(vp);
- struct filefork *fp = VTOF(vp);
- off_t length;
- long vflags;
- struct timeval tv;
- int retval;
- off_t bytesToAdd;
- off_t actualBytesAdded;
- off_t filebytes;
- u_long fileblocks;
- int blksize;
- struct hfsmount *hfsmp;
+ return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
+ }
- if (vp->v_type != VREG && vp->v_type != VLNK)
- return (EISDIR); /* cannot truncate an HFS directory! */
+ case HFS_RESIZE_VOLUME: {
+ u_int64_t newsize;
+ u_int64_t cursize;
- length = ap->a_length;
- blksize = VTOVCB(vp)->blockSize;
- fileblocks = fp->ff_blocks;
- filebytes = (off_t)fileblocks * (off_t)blksize;
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES); /* must be owner of file system */
+ }
+ if (!vnode_isvroot(vp)) {
+ return (EINVAL);
+ }
+
+ /* filesystem must not be mounted read only */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ newsize = *(u_int64_t *)ap->a_data;
+ cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
+
+ if (newsize > cursize) {
+ return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
+ } else if (newsize < cursize) {
+ return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
+ } else {
+ return (0);
+ }
+ }
+ case HFS_CHANGE_NEXT_ALLOCATION: {
+ int error = 0; /* Assume success */
+ u_int32_t location;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
- (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+ if (vnode_vfsisrdonly(vp)) {
+ return (EROFS);
+ }
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES); /* must be owner of file system */
+ }
+ if (!vnode_isvroot(vp)) {
+ return (EINVAL);
+ }
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+ location = *(u_int32_t *)ap->a_data;
+ if ((location >= hfsmp->allocLimit) &&
+ (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
+ error = EINVAL;
+ goto fail_change_next_allocation;
+ }
+ /* Return previous value. */
+ *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
+ if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
+ /* On magic value for location, set nextAllocation to next block
+ * after metadata zone and set flag in mount structure to indicate
+ * that nextAllocation should not be updated again.
+ */
+ if (hfsmp->hfs_metazone_end != 0) {
+ HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
+ }
+ hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
+ } else {
+ hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
+ HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
+ }
+ MarkVCBDirty(hfsmp);
+fail_change_next_allocation:
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ return (error);
+ }
- if (length < 0)
- return (EINVAL);
+#ifdef HFS_SPARSE_DEV
+ case HFS_SETBACKINGSTOREINFO: {
+ struct vnode * bsfs_rootvp;
+ struct vnode * di_vp;
+ struct hfs_backingstoreinfo *bsdata;
+ int error = 0;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+ return (EALREADY);
+ }
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES); /* must be owner of file system */
+ }
+ bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
+ if (bsdata == NULL) {
+ return (EINVAL);
+ }
+ if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
+ return (error);
+ }
+ if ((error = vnode_getwithref(di_vp))) {
+ file_drop(bsdata->backingfd);
+ return(error);
+ }
- if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
- return (EFBIG);
+ if (vnode_mount(vp) == vnode_mount(di_vp)) {
+ (void)vnode_put(di_vp);
+ file_drop(bsdata->backingfd);
+ return (EINVAL);
+ }
- hfsmp = VTOHFS(vp);
+ /*
+ * Obtain the backing fs root vnode and keep a reference
+ * on it. This reference will be dropped in hfs_unmount.
+ */
+ error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
+ if (error) {
+ (void)vnode_put(di_vp);
+ file_drop(bsdata->backingfd);
+ return (error);
+ }
+ vnode_ref(bsfs_rootvp);
+ vnode_put(bsfs_rootvp);
- tv = time;
- retval = E_NONE;
+ hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
+ hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+ hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
+ hfsmp->hfs_sparsebandblks *= 4;
- /*
- * We cannot just check if fp->ff_size == length (as an optimization)
- * since there may be extra physical blocks that also need truncation.
- */
-#if QUOTA
- if (retval = hfs_getinoquota(cp))
- return(retval);
-#endif /* QUOTA */
+ vfs_markdependency(hfsmp->hfs_mp);
- /*
- * Lengthen the size of the file. We must ensure that the
- * last byte of the file is allocated. Since the smallest
- * value of ff_size is 0, length will be at least 1.
- */
- if (length > fp->ff_size) {
-#if QUOTA
- retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
- ap->a_cred, 0);
- if (retval)
- goto Err_Exit;
-#endif /* QUOTA */
/*
- * If we don't have enough physical space then
- * we need to extend the physical size.
+ * If the sparse image is on a sparse image file (as opposed to a sparse
+ * bundle), then we may need to limit the free space to the maximum size
+ * of a file on that volume. So we query (using pathconf), and if we get
+ * a meaningful result, we cache the number of blocks for later use in
+ * hfs_freeblks().
*/
- if (length > filebytes) {
- int eflags;
-
- /* All or nothing and don't round up to clumpsize. */
- eflags = kEFAllMask | kEFNoClumpMask;
+ hfsmp->hfs_backingfs_maxblocks = 0;
+ if (vnode_vtype(di_vp) == VREG) {
+ int terr;
+ int hostbits;
+ terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
+ if (terr == 0 && hostbits != 0 && hostbits < 64) {
+ u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
+
+ hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
+ }
+ }
+
+ (void)vnode_put(di_vp);
+ file_drop(bsdata->backingfd);
+ return (0);
+ }
+ case HFS_CLRBACKINGSTOREINFO: {
+ struct vnode * tmpvp;
- if (suser(ap->a_cred, NULL) != 0)
- eflags |= kEFReserveMask; /* keep a reserve */
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ if (suser(cred, NULL) &&
+ kauth_cred_getuid(cred) != vfsp->f_owner) {
+ return (EACCES); /* must be owner of file system */
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
- // XXXdbg
- hfs_global_shared_lock_acquire(hfsmp);
- if (hfsmp->jnl) {
- if (journal_start_transaction(hfsmp->jnl) != 0) {
- retval = EINVAL;
- goto Err_Exit;
- }
- }
+ if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+ hfsmp->hfs_backingfs_rootvp) {
- /* lock extents b-tree (also protects volume bitmap) */
- retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
- if (retval) {
- if (hfsmp->jnl) {
- journal_end_transaction(hfsmp->jnl);
- }
- hfs_global_shared_lock_release(hfsmp);
+ hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
+ tmpvp = hfsmp->hfs_backingfs_rootvp;
+ hfsmp->hfs_backingfs_rootvp = NULLVP;
+ hfsmp->hfs_sparsebandblks = 0;
+ vnode_rele(tmpvp);
+ }
+ return (0);
+ }
+#endif /* HFS_SPARSE_DEV */
- goto Err_Exit;
- }
+ case F_FREEZE_FS: {
+ struct mount *mp;
+
+ mp = vnode_mount(vp);
+ hfsmp = VFSTOHFS(mp);
- while ((length > filebytes) && (retval == E_NONE)) {
- bytesToAdd = length - filebytes;
- retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
- (FCB*)fp,
- bytesToAdd,
- 0,
- eflags,
- &actualBytesAdded));
+ if (!(hfsmp->jnl))
+ return (ENOTSUP);
- filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
- if (actualBytesAdded == 0 && retval == E_NONE) {
- if (length > filebytes)
- length = filebytes;
- break;
- }
- } /* endwhile */
+ vfsp = vfs_statfs(mp);
+
+ if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+ !kauth_cred_issuser(cred))
+ return (EACCES);
- (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+ lck_rw_lock_exclusive(&hfsmp->hfs_insync);
+
+ // flush things before we get started to try and prevent
+ // dirty data from being paged out while we're frozen.
+ // note: can't do this after taking the lock as it will
+ // deadlock against ourselves.
+ vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
+ hfs_global_exclusive_lock_acquire(hfsmp);
+
+ // DO NOT call hfs_journal_flush() because that takes a
+ // shared lock on the global exclusive lock!
+ journal_flush(hfsmp->jnl);
+
+ // don't need to iterate on all vnodes, we just need to
+ // wait for writes to the system files and the device vnode
+ if (HFSTOVCB(hfsmp)->extentsRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
+ if (HFSTOVCB(hfsmp)->catalogRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
+ if (HFSTOVCB(hfsmp)->allocationsRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
+ if (hfsmp->hfs_attribute_vp)
+ vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
+ vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
+
+ hfsmp->hfs_freezing_proc = current_proc();
- // XXXdbg
- if (hfsmp->jnl) {
- hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
- journal_end_transaction(hfsmp->jnl);
- }
- hfs_global_shared_lock_release(hfsmp);
+ return (0);
+ }
- if (retval)
- goto Err_Exit;
+ case F_THAW_FS: {
+ vfsp = vfs_statfs(vnode_mount(vp));
+ if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+ !kauth_cred_issuser(cred))
+ return (EACCES);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
- (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+ // if we're not the one who froze the fs then we
+ // can't thaw it.
+ if (hfsmp->hfs_freezing_proc != current_proc()) {
+ return EPERM;
}
-
- if (!(ap->a_flags & IO_NOZEROFILL)) {
- if (UBCINFOEXISTS(vp) && retval == E_NONE) {
- struct rl_entry *invalid_range;
- int devBlockSize;
- off_t zero_limit;
-
- zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
- if (length < zero_limit) zero_limit = length;
- if (length > fp->ff_size) {
- /* Extending the file: time to fill out the current last page w. zeroes? */
- if ((fp->ff_size & PAGE_MASK_64) &&
- (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
- fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
-
- /* There's some valid data at the start of the (current) last page
- of the file, so zero out the remainder of that page to ensure the
- entire page contains valid data. Since there is no invalid range
- possible past the (current) eof, there's no need to remove anything
- from the invalid range list before calling cluster_write(): */
- VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
- retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
- fp->ff_size, (off_t)0, devBlockSize,
- (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
- if (retval) goto Err_Exit;
-
- /* Merely invalidate the remaining area, if necessary: */
- if (length > zero_limit) {
- rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
- cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
- }
- } else {
- /* The page containing the (current) eof is invalid: just add the
- remainder of the page to the invalid list, along with the area
- being newly allocated:
- */
- rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
- cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
- };
- }
- } else {
- panic("hfs_truncate: invoked on non-UBC object?!");
- };
- }
- cp->c_flag |= C_UPDATE;
- fp->ff_size = length;
+ // NOTE: if you add code here, also go check the
+ // code that "thaws" the fs in hfs_vnop_close()
+ //
+ hfsmp->hfs_freezing_proc = NULL;
+ hfs_global_exclusive_lock_release(hfsmp);
+ lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
- if (UBCISVALID(vp))
- ubc_setsize(vp, fp->ff_size); /* XXX check errors */
+ return (0);
+ }
- } else { /* Shorten the size of the file */
+ case HFS_BULKACCESS_FSCTL: {
+ int size;
+
+ if (hfsmp->hfs_flags & HFS_STANDARD) {
+ return EINVAL;
+ }
- if (fp->ff_size > length) {
- /*
- * Any buffers that are past the truncation point need to be
- * invalidated (to maintain buffer cache consistency). For
- * simplicity, we invalidate all the buffers by calling vinvalbuf.
- */
- if (UBCISVALID(vp))
- ubc_setsize(vp, length); /* XXX check errors */
+ if (is64bit) {
+ size = sizeof(struct user64_access_t);
+ } else {
+ size = sizeof(struct user32_access_t);
+ }
+
+ return do_bulk_access_check(hfsmp, vp, ap, size, context);
+ }
- vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
- retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
+ case HFS_EXT_BULKACCESS_FSCTL: {
+ int size;
- /* Any space previously marked as invalid is now irrelevant: */
- rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
+ if (hfsmp->hfs_flags & HFS_STANDARD) {
+ return EINVAL;
+ }
+
+ if (is64bit) {
+ size = sizeof(struct user64_ext_access_t);
+ } else {
+ size = sizeof(struct user32_ext_access_t);
+ }
+
+ return do_bulk_access_check(hfsmp, vp, ap, size, context);
+ }
+
+ case HFS_SETACLSTATE: {
+ int state;
+
+ if (ap->a_data == NULL) {
+ return (EINVAL);
}
- /*
- * Account for any unmapped blocks. Note that the new
- * file length can still end up with unmapped blocks.
- */
- if (fp->ff_unallocblocks > 0) {
- u_int32_t finalblks;
+ vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+ state = *(int *)ap->a_data;
- /* lock extents b-tree */
- retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
- LK_EXCLUSIVE, ap->a_p);
- if (retval)
- goto Err_Exit;
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ // super-user can enable or disable acl's on a volume.
+ // the volume owner can only enable acl's
+ if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
+ return (EPERM);
+ }
+ if (state == 0 || state == 1)
+ return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
+ else
+ return (EINVAL);
+ }
- VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
- cp->c_blocks -= fp->ff_unallocblocks;
- fp->ff_blocks -= fp->ff_unallocblocks;
- fp->ff_unallocblocks = 0;
+ case HFS_SET_XATTREXTENTS_STATE: {
+ int state;
- finalblks = (length + blksize - 1) / blksize;
- if (finalblks > fp->ff_blocks) {
- /* calculate required unmapped blocks */
- fp->ff_unallocblocks = finalblks - fp->ff_blocks;
- VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
- cp->c_blocks += fp->ff_unallocblocks;
- fp->ff_blocks += fp->ff_unallocblocks;
- }
- (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
- LK_RELEASE, ap->a_p);
+ if (ap->a_data == NULL) {
+ return (EINVAL);
}
- /*
- * For a TBE process the deallocation of the file blocks is
- * delayed until the file is closed. And hfs_close calls
- * truncate with the IO_NDELAY flag set. So when IO_NDELAY
- * isn't set, we make sure this isn't a TBE process.
+ state = *(int *)ap->a_data;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+
+ /* Super-user can enable or disable extent-based extended
+ * attribute support on a volume
*/
- if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
-#if QUOTA
- off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
-#endif /* QUOTA */
- // XXXdbg
- hfs_global_shared_lock_acquire(hfsmp);
- if (hfsmp->jnl) {
- if (journal_start_transaction(hfsmp->jnl) != 0) {
- retval = EINVAL;
- goto Err_Exit;
- }
- }
+ if (!is_suser()) {
+ return (EPERM);
+ }
+ if (state == 0 || state == 1)
+ return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
+ else
+ return (EINVAL);
+ }
- /* lock extents b-tree (also protects volume bitmap) */
- retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
- if (retval) {
- if (hfsmp->jnl) {
- journal_end_transaction(hfsmp->jnl);
- }
- hfs_global_shared_lock_release(hfsmp);
- goto Err_Exit;
- }
-
- if (fp->ff_unallocblocks == 0)
- retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
- (FCB*)fp, length, false));
+ case F_FULLFSYNC: {
+ int error;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+ if (error == 0) {
+ error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
+ hfs_unlock(VTOC(vp));
+ }
- (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+ return error;
+ }
- // XXXdbg
- if (hfsmp->jnl) {
- hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
- journal_end_transaction(hfsmp->jnl);
- }
- hfs_global_shared_lock_release(hfsmp);
+ case F_CHKCLEAN: {
+ register struct cnode *cp;
+ int error;
- filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
- if (retval)
- goto Err_Exit;
-#if QUOTA
- /* These are bytesreleased */
- (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
-#endif /* QUOTA */
+ if (!vnode_isreg(vp))
+ return EINVAL;
+
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+ if (error == 0) {
+ cp = VTOC(vp);
+ /*
+ * used by regression test to determine if
+ * all the dirty pages (via write) have been cleaned
+ * after a call to 'fsysnc'.
+ */
+ error = is_file_clean(vp, VTOF(vp)->ff_size);
+ hfs_unlock(cp);
}
- /* Only set update flag if the logical length changes */
- if (fp->ff_size != length)
- cp->c_flag |= C_UPDATE;
- fp->ff_size = length;
+ return (error);
}
- cp->c_flag |= C_CHANGE;
- retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
- if (retval) {
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
- -1, -1, -1, retval, 0);
+
+ case F_RDADVISE: {
+ register struct radvisory *ra;
+ struct filefork *fp;
+ int error;
+
+ if (!vnode_isreg(vp))
+ return EINVAL;
+
+ ra = (struct radvisory *)(ap->a_data);
+ fp = VTOF(vp);
+
+ /* Protect against a size change. */
+ hfs_lock_truncate(VTOC(vp), TRUE);
+
+#if HFS_COMPRESSION
+ if (compressed && (uncompressed_size == -1)) {
+ /* fetching the uncompressed size failed above, so return the error */
+ error = decmpfs_error;
+ } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
+ (!compressed && (ra->ra_offset >= fp->ff_size))) {
+ error = EFBIG;
+ }
+#else /* HFS_COMPRESSION */
+ if (ra->ra_offset >= fp->ff_size) {
+ error = EFBIG;
+ }
+#endif /* HFS_COMPRESSION */
+ else {
+ error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
+ }
+
+ hfs_unlock_truncate(VTOC(vp), TRUE);
+ return (error);
+ }
+
+ case F_READBOOTSTRAP:
+ case F_WRITEBOOTSTRAP:
+ {
+ struct vnode *devvp = NULL;
+ user_fbootstraptransfer_t *user_bootstrapp;
+ int devBlockSize;
+ int error;
+ uio_t auio;
+ daddr64_t blockNumber;
+ u_int32_t blockOffset;
+ u_int32_t xfersize;
+ struct buf *bp;
+ user_fbootstraptransfer_t user_bootstrap;
+
+ if (!vnode_isvroot(vp))
+ return (EINVAL);
+ /* LP64 - when caller is a 64 bit process then we are passed a pointer
+ * to a user_fbootstraptransfer_t else we get a pointer to a
+ * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
+ */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ if (is64bit) {
+ user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
+ }
+ else {
+ user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
+ user_bootstrapp = &user_bootstrap;
+ user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
+ user_bootstrap.fbt_length = bootstrapp->fbt_length;
+ user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
+ }
+
+ if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) ||
+ (user_bootstrapp->fbt_length > 1024)) {
+ return EINVAL;
+ }
+
+ if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
+ return EINVAL;
+
+ devvp = VTOHFS(vp)->hfs_devvp;
+ auio = uio_create(1, user_bootstrapp->fbt_offset,
+ is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
+ (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
+ uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
+
+ devBlockSize = vfs_devblocksize(vnode_mount(vp));
+
+ while (uio_resid(auio) > 0) {
+ blockNumber = uio_offset(auio) / devBlockSize;
+ error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
+ if (error) {
+ if (bp) buf_brelse(bp);
+ uio_free(auio);
+ return error;
+ };
+
+ blockOffset = uio_offset(auio) % devBlockSize;
+ xfersize = devBlockSize - blockOffset;
+ error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
+ if (error) {
+ buf_brelse(bp);
+ uio_free(auio);
+ return error;
+ };
+ if (uio_rw(auio) == UIO_WRITE) {
+ error = VNOP_BWRITE(bp);
+ if (error) {
+ uio_free(auio);
+ return error;
+ }
+ } else {
+ buf_brelse(bp);
+ };
+ };
+ uio_free(auio);
+ };
+ return 0;
+
+ case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
+ {
+ if (is64bit) {
+ *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+ }
+ else {
+ *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+ }
+ return 0;
+ }
+
+ case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
+ break;
+
+ case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
+ break;
+
+ case HFS_FSCTL_SET_VERY_LOW_DISK:
+ if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_FSCTL_SET_LOW_DISK:
+ if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
+ || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
+
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_FSCTL_SET_DESIRED_DISK:
+ if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_VOLUME_STATUS:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
+ break;
+
+ case HFS_SET_BOOT_INFO:
+ if (!vnode_isvroot(vp))
+ return(EINVAL);
+ if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
+ return(EACCES); /* must be superuser or owner of filesystem */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+ bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+ break;
+
+ case HFS_GET_BOOT_INFO:
+ if (!vnode_isvroot(vp))
+ return(EINVAL);
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+ bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ break;
+
+ case HFS_MARK_BOOT_CORRUPT:
+ /* Mark the boot volume corrupt by setting
+ * kHFSVolumeInconsistentBit in the volume header. This will
+ * force fsck_hfs on next mount.
+ */
+ if (!is_suser()) {
+ return EACCES;
+ }
+
+ /* Allowed only on the root vnode of the boot volume */
+ if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
+ !vnode_isvroot(vp)) {
+ return EINVAL;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
+ hfs_mark_volume_inconsistent(hfsmp);
+ break;
+
+ case HFS_FSCTL_GET_JOURNAL_INFO:
+ jip = (struct hfs_journal_info*)ap->a_data;
+
+ if (vp == NULLVP)
+ return EINVAL;
+
+ if (hfsmp->jnl == NULL) {
+ jnl_start = 0;
+ jnl_size = 0;
+ } else {
+ jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
+ jnl_size = (off_t)hfsmp->jnl_size;
+ }
+
+ jip->jstart = jnl_start;
+ jip->jsize = jnl_size;
+ break;
+
+ case HFS_SET_ALWAYS_ZEROFILL: {
+ struct cnode *cp = VTOC(vp);
+
+ if (*(int *)ap->a_data) {
+ cp->c_flag |= C_ALWAYS_ZEROFILL;
+ } else {
+ cp->c_flag &= ~C_ALWAYS_ZEROFILL;
+ }
+ break;
+ }
+
+ default:
+ return (ENOTTY);
+ }
+
+ return 0;
+}
+
+/*
+ * select
+ */
+int
+hfs_vnop_select(__unused struct vnop_select_args *ap)
+/*
+ struct vnop_select_args {
+ vnode_t a_vp;
+ int a_which;
+ int a_fflags;
+ void *a_wql;
+ vfs_context_t a_context;
+ };
+*/
+{
+ /*
+ * We should really check to see if I/O is possible.
+ */
+ return (1);
+}
+
+/*
+ * Converts a logical block number to a physical block, and optionally returns
+ * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
+ * The physical block number is based on the device block size, currently its 512.
+ * The block run is returned in logical blocks, and is the REMAINING amount of blocks
+ */
+int
+hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
+{
+ struct filefork *fp = VTOF(vp);
+ struct hfsmount *hfsmp = VTOHFS(vp);
+ int retval = E_NONE;
+ u_int32_t logBlockSize;
+ size_t bytesContAvail = 0;
+ off_t blockposition;
+ int lockExtBtree;
+ int lockflags = 0;
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (vpp != NULL)
+ *vpp = hfsmp->hfs_devvp;
+ if (bnp == NULL)
+ return (0);
+
+ logBlockSize = GetLogicalBlockSize(vp);
+ blockposition = (off_t)bn * logBlockSize;
+
+ lockExtBtree = overflow_extents(fp);
+
+ if (lockExtBtree)
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+ retval = MacToVFSError(
+ MapFileBlockC (HFSTOVCB(hfsmp),
+ (FCB*)fp,
+ MAXPHYSIO,
+ blockposition,
+ bnp,
+ &bytesContAvail));
+
+ if (lockExtBtree)
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (retval == E_NONE) {
+ /* Figure out how many read ahead blocks there are */
+ if (runp != NULL) {
+ if (can_cluster(logBlockSize)) {
+ /* Make sure this result never goes negative: */
+ *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
+ } else {
+ *runp = 0;
+ }
+ }
+ }
+ return (retval);
+}
+
+/*
+ * Convert logical block number to file offset.
+ */
+int
+hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
+/*
+ struct vnop_blktooff_args {
+ vnode_t a_vp;
+ daddr64_t a_lblkno;
+ off_t *a_offset;
+ };
+*/
+{
+ if (ap->a_vp == NULL)
+ return (EINVAL);
+ *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
+
+ return(0);
+}
+
+/*
+ * Convert file offset to logical block number.
+ */
+int
+hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
+/*
+ struct vnop_offtoblk_args {
+ vnode_t a_vp;
+ off_t a_offset;
+ daddr64_t *a_lblkno;
+ };
+*/
+{
+ if (ap->a_vp == NULL)
+ return (EINVAL);
+ *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
+
+ return(0);
+}
+
+/*
+ * Map file offset to physical block number.
+ *
+ * If this function is called for write operation, and if the file
+ * had virtual blocks allocated (delayed allocation), real blocks
+ * are allocated by calling ExtendFileC().
+ *
+ * If this function is called for read operation, and if the file
+ * had virtual blocks allocated (delayed allocation), no change
+ * to the size of file is done, and if required, rangelist is
+ * searched for mapping.
+ *
+ * System file cnodes are expected to be locked (shared or exclusive).
+ */
+int
+hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
+/*
+ struct vnop_blockmap_args {
+ vnode_t a_vp;
+ off_t a_foffset;
+ size_t a_size;
+ daddr64_t *a_bpn;
+ size_t *a_run;
+ void *a_poff;
+ int a_flags;
+ vfs_context_t a_context;
+ };
+*/
+{
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ struct hfsmount *hfsmp;
+ size_t bytesContAvail = 0;
+ int retval = E_NONE;
+ int syslocks = 0;
+ int lockflags = 0;
+ struct rl_entry *invalid_range;
+ enum rl_overlaptype overlaptype;
+ int started_tr = 0;
+ int tooklock = 0;
+
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ /* allow blockmaps to the resource fork */
+ } else {
+ if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+ int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+ switch(state) {
+ case FILE_IS_COMPRESSED:
+ return ENOTSUP;
+ case FILE_IS_CONVERTING:
+ /* if FILE_IS_CONVERTING, we allow blockmap */
+ break;
+ default:
+ printf("invalid state %d for compressed file\n", state);
+ /* fall through */
+ }
+ }
+ }
+#endif /* HFS_COMPRESSION */
+
+ /* Do not allow blockmap operation on a directory */
+ if (vnode_isdir(vp)) {
+ return (ENOTSUP);
+ }
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_bpn == NULL)
+ return (0);
+
+ if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
+ if (VTOC(vp)->c_lockowner != current_thread()) {
+ hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ tooklock = 1;
+ }
+ }
+ hfsmp = VTOHFS(vp);
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
+retry:
+ /* Check virtual blocks only when performing write operation */
+ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto exit;
+ } else {
+ started_tr = 1;
+ }
+ syslocks = SFL_EXTENTS | SFL_BITMAP;
+
+ } else if (overflow_extents(fp)) {
+ syslocks = SFL_EXTENTS;
+ }
+
+ if (syslocks)
+ lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
+
+ /*
+ * Check for any delayed allocations.
+ */
+ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+ int64_t actbytes;
+ u_int32_t loanedBlocks;
+
+ //
+ // Make sure we have a transaction. It's possible
+ // that we came in and fp->ff_unallocblocks was zero
+ // but during the time we blocked acquiring the extents
+ // btree, ff_unallocblocks became non-zero and so we
+ // will need to start a transaction.
+ //
+ if (started_tr == 0) {
+ if (syslocks) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ syslocks = 0;
+ }
+ goto retry;
+ }
+
+ /*
+ * Note: ExtendFileC will Release any blocks on loan and
+ * aquire real blocks. So we ask to extend by zero bytes
+ * since ExtendFileC will account for the virtual blocks.
+ */
+
+ loanedBlocks = fp->ff_unallocblocks;
+ retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
+ kEFAllMask | kEFNoClumpMask, &actbytes);
+
+ if (retval) {
+ fp->ff_unallocblocks = loanedBlocks;
+ cp->c_blocks += loanedBlocks;
+ fp->ff_blocks += loanedBlocks;
+
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfsmp->loanedBlocks += loanedBlocks;
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ cp->c_flag |= C_MODIFIED;
+ if (started_tr) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ }
+ goto exit;
+ }
+ }
+
+ retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
+ ap->a_bpn, &bytesContAvail);
+ if (syslocks) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ syslocks = 0;
+ }
+
+ if (started_tr) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ }
+ if (retval) {
+ /* On write, always return error because virtual blocks, if any,
+ * should have been allocated in ExtendFileC(). We do not
+ * allocate virtual blocks on read, therefore return error
+ * only if no virtual blocks are allocated. Otherwise we search
+ * rangelist for zero-fills
+ */
+ if ((MacToVFSError(retval) != ERANGE) ||
+ (ap->a_flags & VNODE_WRITE) ||
+ ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
+ goto exit;
+ }
+
+ /* Validate if the start offset is within logical file size */
+ if (ap->a_foffset > fp->ff_size) {
+ goto exit;
+ }
+
+ /* Searching file extents has failed for read operation, therefore
+ * search rangelist for any uncommitted holes in the file.
+ */
+ overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+ ap->a_foffset + (off_t)(ap->a_size - 1),
+ &invalid_range);
+ switch(overlaptype) {
+ case RL_OVERLAPISCONTAINED:
+ /* start_offset <= rl_start, end_offset >= rl_end */
+ if (ap->a_foffset != invalid_range->rl_start) {
+ break;
+ }
+ case RL_MATCHINGOVERLAP:
+ /* start_offset = rl_start, end_offset = rl_end */
+ case RL_OVERLAPCONTAINSRANGE:
+ /* start_offset >= rl_start, end_offset <= rl_end */
+ case RL_OVERLAPSTARTSBEFORE:
+ /* start_offset > rl_start, end_offset >= rl_start */
+ if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
+ bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
+ } else {
+ bytesContAvail = fp->ff_size - ap->a_foffset;
+ }
+ if (bytesContAvail > ap->a_size) {
+ bytesContAvail = ap->a_size;
+ }
+ *ap->a_bpn = (daddr64_t)-1;
+ retval = 0;
+ break;
+ case RL_OVERLAPENDSAFTER:
+ /* start_offset < rl_start, end_offset < rl_end */
+ case RL_NOOVERLAP:
+ break;
+ }
+ goto exit;
+ }
+
+ /* MapFileC() found a valid extent in the filefork. Search the
+ * mapping information further for invalid file ranges
+ */
+ overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+ ap->a_foffset + (off_t)bytesContAvail - 1,
+ &invalid_range);
+ if (overlaptype != RL_NOOVERLAP) {
+ switch(overlaptype) {
+ case RL_MATCHINGOVERLAP:
+ case RL_OVERLAPCONTAINSRANGE:
+ case RL_OVERLAPSTARTSBEFORE:
+ /* There's no valid block for this byte offset */
+ *ap->a_bpn = (daddr64_t)-1;
+ /* There's no point limiting the amount to be returned
+ * if the invalid range that was hit extends all the way
+ * to the EOF (i.e. there's no valid bytes between the
+ * end of this range and the file's EOF):
+ */
+ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+ bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+ }
+ break;
+
+ case RL_OVERLAPISCONTAINED:
+ case RL_OVERLAPENDSAFTER:
+ /* The range of interest hits an invalid block before the end: */
+ if (invalid_range->rl_start == ap->a_foffset) {
+ /* There's actually no valid information to be had starting here: */
+ *ap->a_bpn = (daddr64_t)-1;
+ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+ bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+ }
+ } else {
+ bytesContAvail = invalid_range->rl_start - ap->a_foffset;
+ }
+ break;
+
+ case RL_NOOVERLAP:
+ break;
+ } /* end switch */
+ if (bytesContAvail > ap->a_size)
+ bytesContAvail = ap->a_size;
+ }
+
+exit:
+ if (retval == 0) {
+ if (ap->a_run)
+ *ap->a_run = bytesContAvail;
+
+ if (ap->a_poff)
+ *(int *)ap->a_poff = 0;
+ }
+
+ if (tooklock)
+ hfs_unlock(cp);
+
+ return (MacToVFSError(retval));
+}
+
+
+/*
+ * prepare and issue the I/O
+ * buf_strategy knows how to deal
+ * with requests that require
+ * fragmented I/Os
+ */
+int
+hfs_vnop_strategy(struct vnop_strategy_args *ap)
+{
+ buf_t bp = ap->a_bp;
+ vnode_t vp = buf_vnode(bp);
+
+ return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
+}
+
+static int
+hfs_minorupdate(struct vnode *vp) {
+ struct cnode *cp = VTOC(vp);
+ cp->c_flag &= ~C_MODIFIED;
+ cp->c_touch_acctime = 0;
+ cp->c_touch_chgtime = 0;
+ cp->c_touch_modtime = 0;
+
+ return 0;
+}
+
+static int
+do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
+{
+ register struct cnode *cp = VTOC(vp);
+ struct filefork *fp = VTOF(vp);
+ struct proc *p = vfs_context_proc(context);;
+ kauth_cred_t cred = vfs_context_ucred(context);
+ int retval;
+ off_t bytesToAdd;
+ off_t actualBytesAdded;
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int blksize;
+ struct hfsmount *hfsmp;
+ int lockflags;
+
+ blksize = VTOVCB(vp)->blockSize;
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+ (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+
+ if (length < 0)
+ return (EINVAL);
+
+ /* This should only happen with a corrupt filesystem */
+ if ((off_t)fp->ff_size < 0)
+ return (EINVAL);
+
+ if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
+ return (EFBIG);
+
+ hfsmp = VTOHFS(vp);
+
+ retval = E_NONE;
+
+ /* Files that are changing size are not hot file candidates. */
+ if (hfsmp->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
+ }
+
+ /*
+ * We cannot just check if fp->ff_size == length (as an optimization)
+ * since there may be extra physical blocks that also need truncation.
+ */
+#if QUOTA
+ if ((retval = hfs_getinoquota(cp)))
+ return(retval);
+#endif /* QUOTA */
+
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of ff_size is 0, length will be at least 1.
+ */
+ if (length > (off_t)fp->ff_size) {
+#if QUOTA
+ retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
+ cred, 0);
+ if (retval)
+ goto Err_Exit;
+#endif /* QUOTA */
+ /*
+ * If we don't have enough physical space then
+ * we need to extend the physical size.
+ */
+ if (length > filebytes) {
+ int eflags;
+ u_int32_t blockHint = 0;
+
+ /* All or nothing and don't round up to clumpsize. */
+ eflags = kEFAllMask | kEFNoClumpMask;
+
+ if (cred && suser(cred, NULL) != 0)
+ eflags |= kEFReserveMask; /* keep a reserve */
+
+ /*
+ * Allocate Journal and Quota files in metadata zone.
+ */
+ if (filebytes == 0 &&
+ hfsmp->hfs_flags & HFS_METADATA_ZONE &&
+ hfs_virtualmetafile(cp)) {
+ eflags |= kEFMetadataMask;
+ blockHint = hfsmp->hfs_metazone_start;
+ }
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ while ((length > filebytes) && (retval == E_NONE)) {
+ bytesToAdd = length - filebytes;
+ retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
+ (FCB*)fp,
+ bytesToAdd,
+ blockHint,
+ eflags,
+ &actualBytesAdded));
+
+ filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+ if (actualBytesAdded == 0 && retval == E_NONE) {
+ if (length > filebytes)
+ length = filebytes;
+ break;
+ }
+ } /* endwhile */
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (hfsmp->jnl) {
+ if (skipupdate) {
+ (void) hfs_minorupdate(vp);
+ }
+ else {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+ }
+
+ hfs_end_transaction(hfsmp);
+
+ if (retval)
+ goto Err_Exit;
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+ }
+
+ if (!(flags & IO_NOZEROFILL)) {
+ if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
+ struct rl_entry *invalid_range;
+ off_t zero_limit;
+
+ zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+ if (length < zero_limit) zero_limit = length;
+
+ if (length > (off_t)fp->ff_size) {
+ struct timeval tv;
+
+ /* Extending the file: time to fill out the current last page w. zeroes? */
+ if ((fp->ff_size & PAGE_MASK_64) &&
+ (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
+ fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
+
+ /* There's some valid data at the start of the (current) last page
+ of the file, so zero out the remainder of that page to ensure the
+ entire page contains valid data. Since there is no invalid range
+ possible past the (current) eof, there's no need to remove anything
+ from the invalid range list before calling cluster_write(): */
+ hfs_unlock(cp);
+ retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
+ fp->ff_size, (off_t)0,
+ (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ if (retval) goto Err_Exit;
+
+ /* Merely invalidate the remaining area, if necessary: */
+ if (length > zero_limit) {
+ microuptime(&tv);
+ rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
+ cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+ }
+ } else {
+ /* The page containing the (current) eof is invalid: just add the
+ remainder of the page to the invalid list, along with the area
+ being newly allocated:
+ */
+ microuptime(&tv);
+ rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
+ cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+ };
+ }
+ } else {
+ panic("hfs_truncate: invoked on non-UBC object?!");
+ };
+ }
+ cp->c_touch_modtime = TRUE;
+ fp->ff_size = length;
+
+ } else { /* Shorten the size of the file */
+
+ if ((off_t)fp->ff_size > length) {
+ /* Any space previously marked as invalid is now irrelevant: */
+ rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
+ }
+
+ /*
+ * Account for any unmapped blocks. Note that the new
+ * file length can still end up with unmapped blocks.
+ */
+ if (fp->ff_unallocblocks > 0) {
+ u_int32_t finalblks;
+ u_int32_t loanedBlocks;
+
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+
+ loanedBlocks = fp->ff_unallocblocks;
+ cp->c_blocks -= loanedBlocks;
+ fp->ff_blocks -= loanedBlocks;
+ fp->ff_unallocblocks = 0;
+
+ hfsmp->loanedBlocks -= loanedBlocks;
+
+ finalblks = (length + blksize - 1) / blksize;
+ if (finalblks > fp->ff_blocks) {
+ /* calculate required unmapped blocks */
+ loanedBlocks = finalblks - fp->ff_blocks;
+ hfsmp->loanedBlocks += loanedBlocks;
+
+ fp->ff_unallocblocks = loanedBlocks;
+ cp->c_blocks += loanedBlocks;
+ fp->ff_blocks += loanedBlocks;
+ }
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ }
+
+ /*
+ * For a TBE process the deallocation of the file blocks is
+ * delayed until the file is closed. And hfs_close calls
+ * truncate with the IO_NDELAY flag set. So when IO_NDELAY
+ * isn't set, we make sure this isn't a TBE process.
+ */
+ if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
+#if QUOTA
+ off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+#endif /* QUOTA */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ if (fp->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
+ (FCB*)fp, length, false));
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ if (hfsmp->jnl) {
+ if (retval == 0) {
+ fp->ff_size = length;
+ }
+ if (skipupdate) {
+ (void) hfs_minorupdate(vp);
+ }
+ else {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+ }
+ hfs_end_transaction(hfsmp);
+
+ filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+ if (retval)
+ goto Err_Exit;
+#if QUOTA
+ /* These are bytesreleased */
+ (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+#endif /* QUOTA */
+ }
+ /* Only set update flag if the logical length changes */
+ if ((off_t)fp->ff_size != length)
+ cp->c_touch_modtime = TRUE;
+ fp->ff_size = length;
+ }
+ if (cp->c_mode & (S_ISUID | S_ISGID)) {
+ if (!vfs_context_issuser(context)) {
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ skipupdate = 0;
+ }
+ }
+ if (skipupdate) {
+ retval = hfs_minorupdate(vp);
+ }
+ else {
+ cp->c_touch_chgtime = TRUE; /* status changed */
+ cp->c_touch_modtime = TRUE; /* file data was modified */
+ retval = hfs_update(vp, MNT_WAIT);
+ }
+ if (retval) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ -1, -1, -1, retval, 0);
+ }
+
+Err_Exit:
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+ (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
+
+ return (retval);
+}
+
+
+
+/*
+ * Truncate a cnode to at most length size, freeing (or adding) the
+ * disk blocks.
+ */
+__private_extern__
+int
+hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
+ int skipupdate, vfs_context_t context)
+{
+ struct filefork *fp = VTOF(vp);
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int blksize, error = 0;
+ struct cnode *cp = VTOC(vp);
+
+ /* Cannot truncate an HFS directory! */
+ if (vnode_isdir(vp)) {
+ return (EISDIR);
+ }
+ /* A swap file cannot change size. */
+ if (vnode_isswap(vp) && (length != 0)) {
+ return (EPERM);
+ }
+
+ blksize = VTOVCB(vp)->blockSize;
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ //
+ // Have to do this here so that we don't wind up with
+ // i/o pending for blocks that are about to be released
+ // if we truncate the file.
+ //
+ // If skipsetsize is set, then the caller is responsible
+ // for the ubc_setsize.
+ //
+ // Even if skipsetsize is set, if the length is zero we
+ // want to call ubc_setsize() because as of SnowLeopard
+ // it will no longer cause any page-ins and it will drop
+ // any dirty pages so that we don't do any i/o that we
+ // don't have to. This also prevents a race where i/o
+ // for truncated blocks may overwrite later data if the
+ // blocks get reallocated to a different file.
+ //
+ if (!skipsetsize || length == 0)
+ ubc_setsize(vp, length);
+
+ // have to loop truncating or growing files that are
+ // really big because otherwise transactions can get
+ // enormous and consume too many kernel resources.
+
+ if (length < filebytes) {
+ while (filebytes > length) {
+ if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ filebytes -= HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = length;
+ }
+ cp->c_flag |= C_FORCEUPDATE;
+ error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
+ if (error)
+ break;
+ }
+ } else if (length > filebytes) {
+ while (filebytes < length) {
+ if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ filebytes += HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = length;
+ }
+ cp->c_flag |= C_FORCEUPDATE;
+ error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
+ if (error)
+ break;
+ }
+ } else /* Same logical size */ {
+
+ error = do_hfs_truncate(vp, length, flags, skipupdate, context);
+ }
+ /* Files that are changing size are not hot file candidates. */
+ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
+ }
+
+ return (error);
+}
+
+
+
+/*
+ * Preallocate file storage space.
+ */
+int
+hfs_vnop_allocate(struct vnop_allocate_args /* {
+ vnode_t a_vp;
+ off_t a_length;
+ u_int32_t a_flags;
+ off_t *a_bytesallocated;
+ off_t a_offset;
+ vfs_context_t a_context;
+ } */ *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ ExtendedVCB *vcb;
+ off_t length = ap->a_length;
+ off_t startingPEOF;
+ off_t moreBytesRequested;
+ off_t actualBytesAdded;
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int retval, retval2;
+ u_int32_t blockHint;
+ u_int32_t extendFlags; /* For call to ExtendFileC */
+ struct hfsmount *hfsmp;
+ kauth_cred_t cred = vfs_context_ucred(ap->a_context);
+ int lockflags;
+
+ *(ap->a_bytesallocated) = 0;
+
+ if (!vnode_isreg(vp))
+ return (EISDIR);
+ if (length < (off_t)0)
+ return (EINVAL);
+
+ cp = VTOC(vp);
+
+ hfs_lock_truncate(cp, TRUE);
+
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+ goto Err_Exit;
+ }
+
+ fp = VTOF(vp);
+ hfsmp = VTOHFS(vp);
+ vcb = VTOVCB(vp);
+
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
+
+ if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Fill in the flags word for the call to Extend the file */
+
+ extendFlags = kEFNoClumpMask;
+ if (ap->a_flags & ALLOCATECONTIG)
+ extendFlags |= kEFContigMask;
+ if (ap->a_flags & ALLOCATEALL)
+ extendFlags |= kEFAllMask;
+ if (cred && suser(cred, NULL) != 0)
+ extendFlags |= kEFReserveMask;
+ if (hfs_virtualmetafile(cp))
+ extendFlags |= kEFMetadataMask;
+
+ retval = E_NONE;
+ blockHint = 0;
+ startingPEOF = filebytes;
+
+ if (ap->a_flags & ALLOCATEFROMPEOF)
+ length += filebytes;
+ else if (ap->a_flags & ALLOCATEFROMVOL)
+ blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
+
+ /* If no changes are necesary, then we're done */
+ if (filebytes == length)
+ goto Std_Exit;
+
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of filebytes is 0, length will be at least 1.
+ */
+ if (length > filebytes) {
+ off_t total_bytes_added = 0, orig_request_size;
+
+ orig_request_size = moreBytesRequested = length - filebytes;
+
+#if QUOTA
+ retval = hfs_chkdq(cp,
+ (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
+ cred, 0);
+ if (retval)
+ goto Err_Exit;
+
+#endif /* QUOTA */
+ /*
+ * Metadata zone checks.
+ */
+ if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+ /*
+ * Allocate Journal and Quota files in metadata zone.
+ */
+ if (hfs_virtualmetafile(cp)) {
+ blockHint = hfsmp->hfs_metazone_start;
+ } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
+ (blockHint <= hfsmp->hfs_metazone_end)) {
+ /*
+ * Move blockHint outside metadata zone.
+ */
+ blockHint = hfsmp->hfs_metazone_end + 1;
+ }
+ }
+
+
+ while ((length > filebytes) && (retval == E_NONE)) {
+ off_t bytesRequested;
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
+ bytesRequested = HFS_BIGFILE_SIZE;
+ } else {
+ bytesRequested = moreBytesRequested;
+ }
+
+ if (extendFlags & kEFContigMask) {
+ // if we're on a sparse device, this will force it to do a
+ // full scan to find the space needed.
+ hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
+ }
+
+ retval = MacToVFSError(ExtendFileC(vcb,
+ (FCB*)fp,
+ bytesRequested,
+ blockHint,
+ extendFlags,
+ &actualBytesAdded));
+
+ if (retval == E_NONE) {
+ *(ap->a_bytesallocated) += actualBytesAdded;
+ total_bytes_added += actualBytesAdded;
+ moreBytesRequested -= actualBytesAdded;
+ if (blockHint != 0) {
+ blockHint += actualBytesAdded / vcb->blockSize;
+ }
+ }
+ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (hfsmp->jnl) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+
+ hfs_end_transaction(hfsmp);
+ }
+
+
+ /*
+ * if we get an error and no changes were made then exit
+ * otherwise we must do the hfs_update to reflect the changes
+ */
+ if (retval && (startingPEOF == filebytes))
+ goto Err_Exit;
+
+ /*
+ * Adjust actualBytesAdded to be allocation block aligned, not
+ * clump size aligned.
+ * NOTE: So what we are reporting does not affect reality
+ * until the file is closed, when we truncate the file to allocation
+ * block size.
+ */
+ if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
+ *(ap->a_bytesallocated) =
+ roundup(orig_request_size, (off_t)vcb->blockSize);
+
+ } else { /* Shorten the size of the file */
+
+ if (fp->ff_size > length) {
+ /*
+ * Any buffers that are past the truncation point need to be
+ * invalidated (to maintain buffer cache consistency).
+ */
+ }
+
+ retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
+ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+ /*
+ * if we get an error and no changes were made then exit
+ * otherwise we must do the hfs_update to reflect the changes
+ */
+ if (retval && (startingPEOF == filebytes)) goto Err_Exit;
+#if QUOTA
+ /* These are bytesreleased */
+ (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
+#endif /* QUOTA */
+
+ if (fp->ff_size > filebytes) {
+ fp->ff_size = filebytes;
+
+ hfs_unlock(cp);
+ ubc_setsize(vp, fp->ff_size);
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ }
+ }
+
+Std_Exit:
+ cp->c_touch_chgtime = TRUE;
+ cp->c_touch_modtime = TRUE;
+ retval2 = hfs_update(vp, MNT_WAIT);
+
+ if (retval == 0)
+ retval = retval2;
+Err_Exit:
+ hfs_unlock_truncate(cp, TRUE);
+ hfs_unlock(cp);
+ return (retval);
+}
+
+
+/*
+ * Pagein for HFS filesystem
+ */
+int
+hfs_vnop_pagein(struct vnop_pagein_args *ap)
+/*
+ struct vnop_pagein_args {
+ vnode_t a_vp,
+ upl_t a_pl,
+ vm_offset_t a_pl_offset,
+ off_t a_f_offset,
+ size_t a_size,
+ int a_flags
+ vfs_context_t a_context;
+ };
+*/
+{
+ vnode_t vp = ap->a_vp;
+ int error;
+
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ /* allow pageins of the resource fork */
+ } else {
+ int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+ if (compressed) {
+ error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+ if (compressed) {
+ if (error == 0) {
+ /* successful page-in, update the access time */
+ VTOC(vp)->c_touch_acctime = TRUE;
+
+ /* compressed files are not hot file candidates */
+ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+ VTOF(vp)->ff_bytesread = 0;
+ }
+ }
+ return error;
+ }
+ /* otherwise the file was converted back to a regular file while we were reading it */
+ }
+ }
+#endif
+
+ error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+ ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
+ /*
+ * Keep track of blocks read.
+ */
+ if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+ struct cnode *cp;
+ struct filefork *fp;
+ int bytesread;
+ int took_cnode_lock = 0;
+
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
+ if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
+ bytesread = fp->ff_size;
+ else
+ bytesread = ap->a_size;
+
+ /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+ if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ took_cnode_lock = 1;
+ }
+ /*
+ * If this file hasn't been seen since the start of
+ * the current sampling period then start over.
+ */
+ if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+ struct timeval tv;
+
+ fp->ff_bytesread = bytesread;
+ microtime(&tv);
+ cp->c_atime = tv.tv_sec;
+ } else {
+ fp->ff_bytesread += bytesread;
+ }
+ cp->c_touch_acctime = TRUE;
+ if (took_cnode_lock)
+ hfs_unlock(cp);
+ }
+ return (error);
+}
+
+/*
+ * Pageout for HFS filesystem.
+ */
+int
+hfs_vnop_pageout(struct vnop_pageout_args *ap)
+/*
+ struct vnop_pageout_args {
+ vnode_t a_vp,
+ upl_t a_pl,
+ vm_offset_t a_pl_offset,
+ off_t a_f_offset,
+ size_t a_size,
+ int a_flags
+ vfs_context_t a_context;
+ };
+*/
+{
+ vnode_t vp = ap->a_vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ int retval = 0;
+ off_t filesize;
+ upl_t upl;
+ upl_page_info_t* pl;
+ vm_offset_t a_pl_offset;
+ int a_flags;
+ int is_pageoutv2 = 0;
+ kern_return_t kret;
+
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
+ /*
+ * Figure out where the file ends, for pageout purposes. If
+ * ff_new_size > ff_size, then we're in the middle of extending the
+ * file via a write, so it is safe (and necessary) that we be able
+ * to pageout up to that point.
+ */
+ filesize = fp->ff_size;
+ if (fp->ff_new_size > filesize)
+ filesize = fp->ff_new_size;
+
+ a_flags = ap->a_flags;
+ a_pl_offset = ap->a_pl_offset;
+
+ /*
+ * we can tell if we're getting the new or old behavior from the UPL
+ */
+ if ((upl = ap->a_pl) == NULL) {
+ int request_flags;
+
+ is_pageoutv2 = 1;
+ /*
+ * we're in control of any UPL we commit
+ * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
+ */
+ a_flags &= ~UPL_NOCOMMIT;
+ a_pl_offset = 0;
+
+ /*
+ * take truncate lock (shared) to guard against
+ * zero-fill thru fsync interfering, but only for v2
+ */
+ hfs_lock_truncate(cp, 0);
+
+ if (a_flags & UPL_MSYNC) {
+ request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
+ }
+ else {
+ request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
+ }
+ kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
+
+ if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+ retval = EINVAL;
+ goto pageout_done;
+ }
+ }
+ /*
+ * from this point forward upl points at the UPL we're working with
+ * it was either passed in or we succesfully created it
+ */
+
+ /*
+ * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
+ * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
+ * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
+ * N dirty ranges in the UPL. Note that this is almost a direct copy of the
+ * logic in vnode_pageout except that we need to do it after grabbing the truncate
+ * lock in HFS so that we don't lock invert ourselves.
+ *
+ * Note that we can still get into this function on behalf of the default pager with
+ * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
+ * since fsync and other writing threads will grab the locks, then mark the
+ * relevant pages as busy. But the pageout codepath marks the pages as busy,
+ * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
+ * we do not try to grab anything for the pre-V2 case, which should only be accessed
+ * by the paging/VM system.
+ */
+
+ if (is_pageoutv2) {
+ off_t f_offset;
+ int offset;
+ int isize;
+ int pg_index;
+ int error;
+ int error_ret = 0;
+
+ isize = ap->a_size;
+ f_offset = ap->a_f_offset;
+
+ /*
+ * Scan from the back to find the last page in the UPL, so that we
+ * aren't looking at a UPL that may have already been freed by the
+ * preceding aborts/completions.
+ */
+ for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+ if (upl_page_present(pl, --pg_index))
+ break;
+ if (pg_index == 0) {
+ ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+ goto pageout_done;
+ }
+ }
+
+ /*
+ * initialize the offset variables before we touch the UPL.
+ * a_f_offset is the position into the file, in bytes
+ * offset is the position into the UPL, in bytes
+ * pg_index is the pg# of the UPL we're operating on.
+ * isize is the offset into the UPL of the last non-clean page.
+ */
+ isize = ((pg_index + 1) * PAGE_SIZE);
+
+ offset = 0;
+ pg_index = 0;
+
+ while (isize) {
+ int xsize;
+ int num_of_pages;
+
+ if ( !upl_page_present(pl, pg_index)) {
+ /*
+ * we asked for RET_ONLY_DIRTY, so it's possible
+ * to get back empty slots in the UPL.
+ * just skip over them
+ */
+ f_offset += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ isize -= PAGE_SIZE;
+ pg_index++;
+
+ continue;
+ }
+ if ( !upl_dirty_page(pl, pg_index)) {
+ panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
+ }
+
+ /*
+ * We know that we have at least one dirty page.
+ * Now checking to see how many in a row we have
+ */
+ num_of_pages = 1;
+ xsize = isize - PAGE_SIZE;
+
+ while (xsize) {
+ if ( !upl_dirty_page(pl, pg_index + num_of_pages))
+ break;
+ num_of_pages++;
+ xsize -= PAGE_SIZE;
+ }
+ xsize = num_of_pages * PAGE_SIZE;
+
+ if (!vnode_isswap(vp)) {
+ off_t end_of_range;
+ int tooklock;
+
+ tooklock = 0;
+
+ if (cp->c_lockowner != current_thread()) {
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+ /*
+ * we're in the v2 path, so we are the
+ * owner of the UPL... we may have already
+ * processed some of the UPL, so abort it
+ * from the current working offset to the
+ * end of the UPL
+ */
+ ubc_upl_abort_range(upl,
+ offset,
+ ap->a_size - offset,
+ UPL_ABORT_FREE_ON_EMPTY);
+ goto pageout_done;
+ }
+ tooklock = 1;
+ }
+ end_of_range = f_offset + xsize - 1;
+
+ if (end_of_range >= filesize) {
+ end_of_range = (off_t)(filesize - 1);
+ }
+ if (f_offset < filesize) {
+ rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
+ cp->c_flag |= C_MODIFIED; /* leof is dirty */
+ }
+ if (tooklock) {
+ hfs_unlock(cp);
+ }
+ }
+ if ((error = cluster_pageout(vp, upl, offset, f_offset,
+ xsize, filesize, a_flags))) {
+ if (error_ret == 0)
+ error_ret = error;
+ }
+ f_offset += xsize;
+ offset += xsize;
+ isize -= xsize;
+ pg_index += num_of_pages;
+ }
+ /* capture errnos bubbled out of cluster_pageout if they occurred */
+ if (error_ret != 0) {
+ retval = error_ret;
+ }
+ } /* end block for v2 pageout behavior */
+ else {
+ if (!vnode_isswap(vp)) {
+ off_t end_of_range;
+ int tooklock = 0;
+
+ if (cp->c_lockowner != current_thread()) {
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+ if (!(a_flags & UPL_NOCOMMIT)) {
+ ubc_upl_abort_range(upl,
+ a_pl_offset,
+ ap->a_size,
+ UPL_ABORT_FREE_ON_EMPTY);
+ }
+ goto pageout_done;
+ }
+ tooklock = 1;
+ }
+ end_of_range = ap->a_f_offset + ap->a_size - 1;
+
+ if (end_of_range >= filesize) {
+ end_of_range = (off_t)(filesize - 1);
+ }
+ if (ap->a_f_offset < filesize) {
+ rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
+ cp->c_flag |= C_MODIFIED; /* leof is dirty */
+ }
+
+ if (tooklock) {
+ hfs_unlock(cp);
+ }
+ }
+ /*
+ * just call cluster_pageout for old pre-v2 behavior
+ */
+ retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
+ ap->a_size, filesize, a_flags);
+ }
+
+ /*
+ * If data was written, update the modification time of the file.
+ * If setuid or setgid bits are set and this process is not the
+ * superuser then clear the setuid and setgid bits as a precaution
+ * against tampering.
+ */
+ if (retval == 0) {
+ cp->c_touch_modtime = TRUE;
+ cp->c_touch_chgtime = TRUE;
+ if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
+ (vfs_context_suser(ap->a_context) != 0)) {
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ hfs_unlock(cp);
+ }
+ }
+
+pageout_done:
+ if (is_pageoutv2) {
+ /* release truncate lock (shared) */
+ hfs_unlock_truncate(cp, 0);
+ }
+ return (retval);
+}
+
+/*
+ * Intercept B-Tree node writes to unswap them if necessary.
+ */
+int
+hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
+{
+ int retval = 0;
+ register struct buf *bp = ap->a_bp;
+ register struct vnode *vp = buf_vnode(bp);
+ BlockDescriptor block;
+
+ /* Trap B-Tree writes */
+ if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
+ (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
+ (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
+ (vp == VTOHFS(vp)->hfc_filevp)) {
+
+ /*
+ * Swap and validate the node if it is in native byte order.
+ * This is always be true on big endian, so we always validate
+ * before writing here. On little endian, the node typically has
+ * been swapped and validated when it was written to the journal,
+ * so we won't do anything here.
+ */
+ if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
+ /* Prepare the block pointer */
+ block.blockHeader = bp;
+ block.buffer = (char *)buf_dataptr(bp);
+ block.blockNum = buf_lblkno(bp);
+ /* not found in cache ==> came from disk */
+ block.blockReadFromDisk = (buf_fromcache(bp) == 0);
+ block.blockSize = buf_count(bp);
+
+ /* Endian un-swap B-Tree node */
+ retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
+ if (retval)
+ panic("hfs_vnop_bwrite: about to write corrupt node!\n");
+ }
}
-Err_Exit:
-
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
- (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
+ /* This buffer shouldn't be locked anymore but if it is clear it */
+ if ((buf_flags(bp) & B_LOCKED)) {
+ // XXXdbg
+ if (VTOHFS(vp)->jnl) {
+ panic("hfs: CLEARING the lock bit on bp %p\n", bp);
+ }
+ buf_clearflags(bp, B_LOCKED);
+ }
+ retval = vn_bwrite (ap);
return (retval);
}
-
-
/*
-#
-#% allocate vp L L L
-#
-vop_allocate {
- IN struct vnode *vp;
- IN off_t length;
- IN int flags;
- OUT off_t *bytesallocated;
- IN off_t offset;
- IN struct ucred *cred;
- IN struct proc *p;
-};
- * allocate a cnode to at most length size
+ * Relocate a file to a new location on disk
+ * cnode must be locked on entry
+ *
+ * Relocation occurs by cloning the file's data from its
+ * current set of blocks to a new set of blocks. During
+ * the relocation all of the blocks (old and new) are
+ * owned by the file.
+ *
+ * -----------------
+ * |///////////////|
+ * -----------------
+ * 0 N (file offset)
+ *
+ * ----------------- -----------------
+ * |///////////////| | | STEP 1 (acquire new blocks)
+ * ----------------- -----------------
+ * 0 N N+1 2N
+ *
+ * ----------------- -----------------
+ * |///////////////| |///////////////| STEP 2 (clone data)
+ * ----------------- -----------------
+ * 0 N N+1 2N
+ *
+ * -----------------
+ * |///////////////| STEP 3 (head truncate blocks)
+ * -----------------
+ * 0 N
+ *
+ * During steps 2 and 3 page-outs to file offsets less
+ * than or equal to N are suspended.
+ *
+ * During step 3 page-ins to the file get suspended.
*/
-int hfs_allocate(ap)
- struct vop_allocate_args /* {
- struct vnode *a_vp;
- off_t a_length;
- u_int32_t a_flags;
- off_t *a_bytesallocated;
- off_t a_offset;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap;
+__private_extern__
+int
+hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
+ struct proc *p)
{
- struct vnode *vp = ap->a_vp;
- struct cnode *cp = VTOC(vp);
- struct filefork *fp = VTOF(vp);
- off_t length = ap->a_length;
- off_t startingPEOF;
- off_t moreBytesRequested;
- off_t actualBytesAdded;
- off_t filebytes;
- u_long fileblocks;
- long vflags;
- struct timeval tv;
- int retval, retval2;
- UInt32 blockHint;
- UInt32 extendFlags =0; /* For call to ExtendFileC */
- struct hfsmount *hfsmp;
-
+ struct cnode *cp;
+ struct filefork *fp;
+ struct hfsmount *hfsmp;
+ u_int32_t headblks;
+ u_int32_t datablks;
+ u_int32_t blksize;
+ u_int32_t growsize;
+ u_int32_t nextallocsave;
+ daddr64_t sector_a, sector_b;
+ int eflags;
+ off_t newbytes;
+ int retval;
+ int lockflags = 0;
+ int took_trunc_lock = 0;
+ int started_tr = 0;
+ enum vtype vnodetype;
+
+ vnodetype = vnode_vtype(vp);
+ if (vnodetype != VREG && vnodetype != VLNK) {
+ return (EPERM);
+ }
+
hfsmp = VTOHFS(vp);
+ if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
+ return (ENOSPC);
+ }
- *(ap->a_bytesallocated) = 0;
- fileblocks = fp->ff_blocks;
- filebytes = (off_t)fileblocks * (off_t)VTOVCB(vp)->blockSize;
-
- if (length < (off_t)0)
- return (EINVAL);
- if (vp->v_type != VREG && vp->v_type != VLNK)
- return (EISDIR);
- if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= filebytes))
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+ if (fp->ff_unallocblocks)
return (EINVAL);
+ blksize = hfsmp->blockSize;
+ if (blockHint == 0)
+ blockHint = hfsmp->nextAllocation;
- /* Fill in the flags word for the call to Extend the file */
-
- if (ap->a_flags & ALLOCATECONTIG)
- extendFlags |= kEFContigMask;
-
- if (ap->a_flags & ALLOCATEALL)
- extendFlags |= kEFAllMask;
-
- if (suser(ap->a_cred, NULL) != 0)
- extendFlags |= kEFReserveMask;
-
- tv = time;
- retval = E_NONE;
- blockHint = 0;
- startingPEOF = filebytes;
+ if ((fp->ff_size > 0x7fffffff) ||
+ ((fp->ff_size > blksize) && vnodetype == VLNK)) {
+ return (EFBIG);
+ }
- if (ap->a_flags & ALLOCATEFROMPEOF)
- length += filebytes;
- else if (ap->a_flags & ALLOCATEFROMVOL)
- blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
+ //
+ // We do not believe that this call to hfs_fsync() is
+ // necessary and it causes a journal transaction
+ // deadlock so we are removing it.
+ //
+ //if (vnodetype == VREG && !vnode_issystem(vp)) {
+ // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
+ // if (retval)
+ // return (retval);
+ //}
+
+ if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
+ hfs_unlock(cp);
+ hfs_lock_truncate(cp, TRUE);
+ /* Force lock since callers expects lock to be held. */
+ if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
+ hfs_unlock_truncate(cp, TRUE);
+ return (retval);
+ }
+ /* No need to continue if file was removed. */
+ if (cp->c_flag & C_NOEXISTS) {
+ hfs_unlock_truncate(cp, TRUE);
+ return (ENOENT);
+ }
+ took_trunc_lock = 1;
+ }
+ headblks = fp->ff_blocks;
+ datablks = howmany(fp->ff_size, blksize);
+ growsize = datablks * blksize;
+ eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
+ if (blockHint >= hfsmp->hfs_metazone_start &&
+ blockHint <= hfsmp->hfs_metazone_end)
+ eflags |= kEFMetadataMask;
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ if (took_trunc_lock)
+ hfs_unlock_truncate(cp, TRUE);
+ return (EINVAL);
+ }
+ started_tr = 1;
+ /*
+ * Protect the extents b-tree and the allocation bitmap
+ * during MapFileBlockC and ExtendFileC operations.
+ */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
- /* If no changes are necesary, then we're done */
- if (filebytes == length)
- goto Std_Exit;
+ retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, §or_a, NULL);
+ if (retval) {
+ retval = MacToVFSError(retval);
+ goto out;
+ }
/*
- * Lengthen the size of the file. We must ensure that the
- * last byte of the file is allocated. Since the smallest
- * value of filebytes is 0, length will be at least 1.
+ * STEP 1 - acquire new allocation blocks.
*/
- if (length > filebytes) {
- moreBytesRequested = length - filebytes;
-
-#if QUOTA
- retval = hfs_chkdq(cp,
- (int64_t)(roundup(moreBytesRequested, VTOVCB(vp)->blockSize)),
- ap->a_cred, 0);
- if (retval)
- return (retval);
+ nextallocsave = hfsmp->nextAllocation;
+ retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
+ if (eflags & kEFMetadataMask) {
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+ HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
+ MarkVCBDirty(hfsmp);
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ }
-#endif /* QUOTA */
- // XXXdbg
- hfs_global_shared_lock_acquire(hfsmp);
- if (hfsmp->jnl) {
- if (journal_start_transaction(hfsmp->jnl) != 0) {
- retval = EINVAL;
- goto Err_Exit;
- }
+ retval = MacToVFSError(retval);
+ if (retval == 0) {
+ cp->c_flag |= C_MODIFIED;
+ if (newbytes < growsize) {
+ retval = ENOSPC;
+ goto restore;
+ } else if (fp->ff_blocks < (headblks + datablks)) {
+ printf("hfs_relocate: allocation failed");
+ retval = ENOSPC;
+ goto restore;
}
- /* lock extents b-tree (also protects volume bitmap) */
- retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+ retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, §or_b, NULL);
if (retval) {
- if (hfsmp->jnl) {
- journal_end_transaction(hfsmp->jnl);
+ retval = MacToVFSError(retval);
+ } else if ((sector_a + 1) == sector_b) {
+ retval = ENOSPC;
+ goto restore;
+ } else if ((eflags & kEFMetadataMask) &&
+ ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
+ hfsmp->hfs_metazone_end)) {
+#if 0
+ const char * filestr;
+ char emptystr = '\0';
+
+ if (cp->c_desc.cd_nameptr != NULL) {
+ filestr = (const char *)&cp->c_desc.cd_nameptr[0];
+ } else if (vnode_name(vp) != NULL) {
+ filestr = vnode_name(vp);
+ } else {
+ filestr = &emptystr;
}
- hfs_global_shared_lock_release(hfsmp);
- goto Err_Exit;
- }
-
- retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
- (FCB*)fp,
- moreBytesRequested,
- blockHint,
- extendFlags,
- &actualBytesAdded));
-
- *(ap->a_bytesallocated) = actualBytesAdded;
- filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
-
- (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
-
- // XXXdbg
- if (hfsmp->jnl) {
- hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
- journal_end_transaction(hfsmp->jnl);
+#endif
+ retval = ENOSPC;
+ goto restore;
}
- hfs_global_shared_lock_release(hfsmp);
+ }
+ /* Done with system locks and journal for now. */
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ lockflags = 0;
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ if (retval) {
/*
- * if we get an error and no changes were made then exit
- * otherwise we must do the VOP_UPDATE to reflect the changes
- */
- if (retval && (startingPEOF == filebytes))
- goto Err_Exit;
-
- /*
- * Adjust actualBytesAdded to be allocation block aligned, not
- * clump size aligned.
- * NOTE: So what we are reporting does not affect reality
- * until the file is closed, when we truncate the file to allocation
- * block size.
+ * Check to see if failure is due to excessive fragmentation.
*/
- if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
- *(ap->a_bytesallocated) =
- roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize);
-
- } else { /* Shorten the size of the file */
-
- if (fp->ff_size > length) {
- /*
- * Any buffers that are past the truncation point need to be
- * invalidated (to maintain buffer cache consistency). For
- * simplicity, we invalidate all the buffers by calling vinvalbuf.
- */
- vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
- (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
- }
-
- // XXXdbg
- hfs_global_shared_lock_acquire(hfsmp);
- if (hfsmp->jnl) {
- if (journal_start_transaction(hfsmp->jnl) != 0) {
- retval = EINVAL;
- goto Err_Exit;
- }
+ if ((retval == ENOSPC) &&
+ (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
+ hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
}
+ goto out;
+ }
+ /*
+ * STEP 2 - clone file data into the new allocation blocks.
+ */
- /* lock extents b-tree (also protects volume bitmap) */
- retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
- if (retval) {
- if (hfsmp->jnl) {
- journal_end_transaction(hfsmp->jnl);
- }
- hfs_global_shared_lock_release(hfsmp);
-
- goto Err_Exit;
- }
-
- retval = MacToVFSError(
- TruncateFileC(
- VTOVCB(vp),
- (FCB*)fp,
- length,
- false));
- (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
- filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
+ if (vnodetype == VLNK)
+ retval = hfs_clonelink(vp, blksize, cred, p);
+ else if (vnode_issystem(vp))
+ retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
+ else
+ retval = hfs_clonefile(vp, headblks, datablks, blksize);
+
+ /* Start transaction for step 3 or for a restore. */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto out;
+ }
+ started_tr = 1;
+ if (retval)
+ goto restore;
- if (hfsmp->jnl) {
- hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
- journal_end_transaction(hfsmp->jnl);
- }
- hfs_global_shared_lock_release(hfsmp);
-
+ /*
+ * STEP 3 - switch to cloned data and remove old blocks.
+ */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ lockflags = 0;
+ if (retval)
+ goto restore;
+out:
+ if (took_trunc_lock)
+ hfs_unlock_truncate(cp, TRUE);
+
+ if (lockflags) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ lockflags = 0;
+ }
- /*
- * if we get an error and no changes were made then exit
- * otherwise we must do the VOP_UPDATE to reflect the changes
- */
- if (retval && (startingPEOF == filebytes)) goto Err_Exit;
-#if QUOTA
- /* These are bytesreleased */
- (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
-#endif /* QUOTA */
+ /* Push cnode's new extent data to disk. */
+ if (retval == 0) {
+ (void) hfs_update(vp, MNT_WAIT);
+ }
+ if (hfsmp->jnl) {
+ if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
+ (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+ else
+ (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+ }
+exit:
+ if (started_tr)
+ hfs_end_transaction(hfsmp);
- if (fp->ff_size > filebytes) {
- fp->ff_size = filebytes;
+ return (retval);
- if (UBCISVALID(vp))
- ubc_setsize(vp, fp->ff_size); /* XXX check errors */
- }
+restore:
+ if (fp->ff_blocks == headblks) {
+ if (took_trunc_lock)
+ hfs_unlock_truncate(cp, TRUE);
+ goto exit;
+ }
+ /*
+ * Give back any newly allocated space.
+ */
+ if (lockflags == 0) {
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
}
-Std_Exit:
- cp->c_flag |= C_CHANGE | C_UPDATE;
- retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
+ (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
- if (retval == 0)
- retval = retval2;
-Err_Exit:
- return (retval);
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ lockflags = 0;
+
+ if (took_trunc_lock)
+ hfs_unlock_truncate(cp, TRUE);
+ goto exit;
}
/*
- * pagein for HFS filesystem
+ * Clone a symlink.
+ *
*/
-int
-hfs_pagein(ap)
- struct vop_pagein_args /* {
- struct vnode *a_vp,
- upl_t a_pl,
- vm_offset_t a_pl_offset,
- off_t a_f_offset,
- size_t a_size,
- struct ucred *a_cred,
- int a_flags
- } */ *ap;
+static int
+hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
{
- register struct vnode *vp = ap->a_vp;
- int devBlockSize = 0;
+ struct buf *head_bp = NULL;
+ struct buf *tail_bp = NULL;
int error;
- if (vp->v_type != VREG && vp->v_type != VLNK)
- panic("hfs_pagein: vp not UBC type\n");
- VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
+ error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
+ if (error)
+ goto out;
+
+ tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
+ if (tail_bp == NULL) {
+ error = EIO;
+ goto out;
+ }
+ bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
+ error = (int)buf_bwrite(tail_bp);
+out:
+ if (head_bp) {
+ buf_markinvalid(head_bp);
+ buf_brelse(head_bp);
+ }
+ (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
- error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
- ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
- ap->a_flags);
return (error);
}
-/*
- * pageout for HFS filesystem.
+/*
+ * Clone a file's data within the file.
+ *
*/
-int
-hfs_pageout(ap)
- struct vop_pageout_args /* {
- struct vnode *a_vp,
- upl_t a_pl,
- vm_offset_t a_pl_offset,
- off_t a_f_offset,
- size_t a_size,
- struct ucred *a_cred,
- int a_flags
- } */ *ap;
+static int
+hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
{
- struct vnode *vp = ap->a_vp;
- struct cnode *cp = VTOC(vp);
- struct filefork *fp = VTOF(vp);
- int retval;
- int devBlockSize = 0;
- off_t end_of_range;
- off_t filesize;
-
- if (UBCINVALID(vp))
- panic("hfs_pageout: Not a VREG: vp=%x", vp);
-
- VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
- filesize = fp->ff_size;
- end_of_range = ap->a_f_offset + ap->a_size - 1;
+ caddr_t bufp;
+ size_t bufsize;
+ size_t copysize;
+ size_t iosize;
+ size_t offset;
+ off_t writebase;
+ uio_t auio;
+ int error = 0;
+
+ writebase = blkstart * blksize;
+ copysize = blkcnt * blksize;
+ iosize = bufsize = MIN(copysize, 128 * 1024);
+ offset = 0;
+
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+ return (ENOMEM);
+ }
+ hfs_unlock(VTOC(vp));
+
+ auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+
+ while (offset < copysize) {
+ iosize = MIN(copysize - offset, iosize);
+
+ uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, (uintptr_t)bufp, iosize);
+
+ error = cluster_read(vp, auio, copysize, IO_NOCACHE);
+ if (error) {
+ printf("hfs_clonefile: cluster_read failed - %d\n", error);
+ break;
+ }
+ if (uio_resid(auio) != 0) {
+ printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
+ error = EIO;
+ break;
+ }
- if (end_of_range >= filesize)
- end_of_range = (off_t)(filesize - 1);
- if (ap->a_f_offset < filesize)
- rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
+ uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
+ uio_addiov(auio, (uintptr_t)bufp, iosize);
- retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
- filesize, devBlockSize, ap->a_flags);
+ error = cluster_write(vp, auio, writebase + offset,
+ writebase + offset + iosize,
+ uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
+ if (error) {
+ printf("hfs_clonefile: cluster_write failed - %d\n", error);
+ break;
+ }
+ if (uio_resid(auio) != 0) {
+ printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
+ error = EIO;
+ break;
+ }
+ offset += iosize;
+ }
+ uio_free(auio);
- /*
- * If we successfully wrote any data, and we are not the superuser
- * we clear the setuid and setgid bits as a precaution against
- * tampering.
- */
- if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
- cp->c_mode &= ~(S_ISUID | S_ISGID);
+ if ((blksize & PAGE_MASK)) {
+ /*
+ * since the copy may not have started on a PAGE
+ * boundary (or may not have ended on one), we
+ * may have pages left in the cache since NOCACHE
+ * will let partially written pages linger...
+ * lets just flush the entire range to make sure
+ * we don't have any pages left that are beyond
+ * (or intersect) the real LEOF of this file
+ */
+ ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
+ } else {
+ /*
+ * No need to call ubc_sync_range or hfs_invalbuf
+ * since the file was copied using IO_NOCACHE and
+ * the copy was done starting and ending on a page
+ * boundary in the file.
+ */
+ }
+ kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
- return (retval);
+ hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ return (error);
}
/*
- * Intercept B-Tree node writes to unswap them if necessary.
-#
-#vop_bwrite {
-# IN struct buf *bp;
+ * Clone a system (metadata) file.
+ *
*/
-int
-hfs_bwrite(ap)
- struct vop_bwrite_args /* {
- struct buf *a_bp;
- } */ *ap;
+static int
+hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+ kauth_cred_t cred, struct proc *p)
{
- int retval = 0;
- register struct buf *bp = ap->a_bp;
- register struct vnode *vp = bp->b_vp;
-#if BYTE_ORDER == LITTLE_ENDIAN
- BlockDescriptor block;
+ caddr_t bufp;
+ char * offset;
+ size_t bufsize;
+ size_t iosize;
+ struct buf *bp = NULL;
+ daddr64_t blkno;
+ daddr64_t blk;
+ daddr64_t start_blk;
+ daddr64_t last_blk;
+ int breadcnt;
+ int i;
+ int error = 0;
+
+
+ iosize = GetLogicalBlockSize(vp);
+ bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
+ breadcnt = bufsize / iosize;
+
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+ return (ENOMEM);
+ }
+ start_blk = ((daddr64_t)blkstart * blksize) / iosize;
+ last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
+ blkno = 0;
+
+ while (blkno < last_blk) {
+ /*
+ * Read up to a megabyte
+ */
+ offset = bufp;
+ for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
+ error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
+ if (error) {
+ printf("hfs_clonesysfile: meta_bread error %d\n", error);
+ goto out;
+ }
+ if (buf_count(bp) != iosize) {
+ printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
+ goto out;
+ }
+ bcopy((char *)buf_dataptr(bp), offset, iosize);
- /* Trap B-Tree writes */
- if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
- (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
+ buf_markinvalid(bp);
+ buf_brelse(bp);
+ bp = NULL;
- /* Swap if the B-Tree node is in native byte order */
- if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
- /* Prepare the block pointer */
- block.blockHeader = bp;
- block.buffer = bp->b_data;
- /* not found in cache ==> came from disk */
- block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
- block.blockSize = bp->b_bcount;
-
- /* Endian un-swap B-Tree node */
- SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
+ offset += iosize;
+ }
+
+ /*
+ * Write up to a megabyte
+ */
+ offset = bufp;
+ for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
+ bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
+ if (bp == NULL) {
+ printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
+ error = EIO;
+ goto out;
+ }
+ bcopy(offset, (char *)buf_dataptr(bp), iosize);
+ error = (int)buf_bwrite(bp);
+ bp = NULL;
+ if (error)
+ goto out;
+ offset += iosize;
}
-
- /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
}
-#endif
- /* This buffer shouldn't be locked anymore but if it is clear it */
- if (ISSET(bp->b_flags, B_LOCKED)) {
- // XXXdbg
- if (VTOHFS(vp)->jnl) {
- panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
- }
- CLR(bp->b_flags, B_LOCKED);
- printf("hfs_bwrite: called with lock bit set\n");
+out:
+ if (bp) {
+ buf_brelse(bp);
}
- retval = vn_bwrite (ap);
- return (retval);
+ kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
+
+ error = hfs_fsync(vp, MNT_WAIT, 0, p);
+
+ return (error);
}