+
+ case F_RDADVISE: {
+ register struct radvisory *ra;
+ struct filefork *fp;
+ int error;
+
+ if (!vnode_isreg(vp))
+ return EINVAL;
+
+ ra = (struct radvisory *)(ap->a_data);
+ fp = VTOF(vp);
+
+ /* Protect against a size change. */
+ hfs_lock_truncate(VTOC(vp), TRUE);
+
+#if HFS_COMPRESSION
+ if (compressed && (uncompressed_size == -1)) {
+ /* fetching the uncompressed size failed above, so return the error */
+ error = decmpfs_error;
+ } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
+ (!compressed && (ra->ra_offset >= fp->ff_size))) {
+ error = EFBIG;
+ }
+#else /* HFS_COMPRESSION */
+ if (ra->ra_offset >= fp->ff_size) {
+ error = EFBIG;
+ }
+#endif /* HFS_COMPRESSION */
+ else {
+ error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
+ }
+
+ hfs_unlock_truncate(VTOC(vp), TRUE);
+ return (error);
+ }
+
+ case F_READBOOTSTRAP:
+ case F_WRITEBOOTSTRAP:
+ {
+ struct vnode *devvp = NULL;
+ user_fbootstraptransfer_t *user_bootstrapp;
+ int devBlockSize;
+ int error;
+ uio_t auio;
+ daddr64_t blockNumber;
+ u_int32_t blockOffset;
+ u_int32_t xfersize;
+ struct buf *bp;
+ user_fbootstraptransfer_t user_bootstrap;
+
+ if (!vnode_isvroot(vp))
+ return (EINVAL);
+ /* LP64 - when caller is a 64 bit process then we are passed a pointer
+ * to a user_fbootstraptransfer_t else we get a pointer to a
+ * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
+ */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ if (is64bit) {
+ user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
+ }
+ else {
+ user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
+ user_bootstrapp = &user_bootstrap;
+ user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
+ user_bootstrap.fbt_length = bootstrapp->fbt_length;
+ user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
+ }
+
+ if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) ||
+ (user_bootstrapp->fbt_length > 1024)) {
+ return EINVAL;
+ }
+
+ if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
+ return EINVAL;
+
+ devvp = VTOHFS(vp)->hfs_devvp;
+ auio = uio_create(1, user_bootstrapp->fbt_offset,
+ is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
+ (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
+ uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
+
+ devBlockSize = vfs_devblocksize(vnode_mount(vp));
+
+ while (uio_resid(auio) > 0) {
+ blockNumber = uio_offset(auio) / devBlockSize;
+ error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
+ if (error) {
+ if (bp) buf_brelse(bp);
+ uio_free(auio);
+ return error;
+ };
+
+ blockOffset = uio_offset(auio) % devBlockSize;
+ xfersize = devBlockSize - blockOffset;
+ error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
+ if (error) {
+ buf_brelse(bp);
+ uio_free(auio);
+ return error;
+ };
+ if (uio_rw(auio) == UIO_WRITE) {
+ error = VNOP_BWRITE(bp);
+ if (error) {
+ uio_free(auio);
+ return error;
+ }
+ } else {
+ buf_brelse(bp);
+ };
+ };
+ uio_free(auio);
+ };
+ return 0;
+
+ case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
+ {
+ if (is64bit) {
+ *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+ }
+ else {
+ *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+ }
+ return 0;
+ }
+
+ case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
+ break;
+
+ case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
+ break;
+
+ case HFS_FSCTL_SET_VERY_LOW_DISK:
+ if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_FSCTL_SET_LOW_DISK:
+ if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
+ || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
+
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_FSCTL_SET_DESIRED_DISK:
+ if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_VOLUME_STATUS:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
+ break;
+
+ case HFS_SET_BOOT_INFO:
+ if (!vnode_isvroot(vp))
+ return(EINVAL);
+ if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
+ return(EACCES); /* must be superuser or owner of filesystem */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+ bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+ break;
+
+ case HFS_GET_BOOT_INFO:
+ if (!vnode_isvroot(vp))
+ return(EINVAL);
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+ bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ break;
+
+ case HFS_MARK_BOOT_CORRUPT:
+ /* Mark the boot volume corrupt by setting
+ * kHFSVolumeInconsistentBit in the volume header. This will
+ * force fsck_hfs on next mount.
+ */
+ if (!is_suser()) {
+ return EACCES;
+ }
+
+ /* Allowed only on the root vnode of the boot volume */
+ if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
+ !vnode_isvroot(vp)) {
+ return EINVAL;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
+ hfs_mark_volume_inconsistent(hfsmp);
+ break;
+
+ case HFS_FSCTL_GET_JOURNAL_INFO:
+ jip = (struct hfs_journal_info*)ap->a_data;
+
+ if (vp == NULLVP)
+ return EINVAL;
+
+ if (hfsmp->jnl == NULL) {
+ jnl_start = 0;
+ jnl_size = 0;
+ } else {
+ jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
+ jnl_size = (off_t)hfsmp->jnl_size;
+ }
+
+ jip->jstart = jnl_start;
+ jip->jsize = jnl_size;
+ break;
+
+ case HFS_SET_ALWAYS_ZEROFILL: {
+ struct cnode *cp = VTOC(vp);
+
+ if (*(int *)ap->a_data) {
+ cp->c_flag |= C_ALWAYS_ZEROFILL;
+ } else {
+ cp->c_flag &= ~C_ALWAYS_ZEROFILL;
+ }
+ break;
+ }
+
+ default:
+ return (ENOTTY);
+ }
+
+ return 0;
+}
+
+/*
+ * select
+ */
+int
+hfs_vnop_select(__unused struct vnop_select_args *ap)
+/*
+ struct vnop_select_args {
+ vnode_t a_vp;
+ int a_which;
+ int a_fflags;
+ void *a_wql;
+ vfs_context_t a_context;
+ };
+*/
+{
+ /*
+ * We should really check to see if I/O is possible.
+ */
+ return (1);
+}
+
+/*
+ * Converts a logical block number to a physical block, and optionally returns
+ * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
+ * The physical block number is based on the device block size, currently its 512.
+ * The block run is returned in logical blocks, and is the REMAINING amount of blocks
+ */
+int
+hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
+{
+ struct filefork *fp = VTOF(vp);
+ struct hfsmount *hfsmp = VTOHFS(vp);
+ int retval = E_NONE;
+ u_int32_t logBlockSize;
+ size_t bytesContAvail = 0;
+ off_t blockposition;
+ int lockExtBtree;
+ int lockflags = 0;
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (vpp != NULL)
+ *vpp = hfsmp->hfs_devvp;
+ if (bnp == NULL)
+ return (0);
+
+ logBlockSize = GetLogicalBlockSize(vp);
+ blockposition = (off_t)bn * logBlockSize;
+
+ lockExtBtree = overflow_extents(fp);
+
+ if (lockExtBtree)
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+ retval = MacToVFSError(
+ MapFileBlockC (HFSTOVCB(hfsmp),
+ (FCB*)fp,
+ MAXPHYSIO,
+ blockposition,
+ bnp,
+ &bytesContAvail));
+
+ if (lockExtBtree)
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (retval == E_NONE) {
+ /* Figure out how many read ahead blocks there are */
+ if (runp != NULL) {
+ if (can_cluster(logBlockSize)) {
+ /* Make sure this result never goes negative: */
+ *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
+ } else {
+ *runp = 0;
+ }
+ }
+ }
+ return (retval);
+}
+
+/*
+ * Convert logical block number to file offset.
+ */
+int
+hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
+/*
+ struct vnop_blktooff_args {
+ vnode_t a_vp;
+ daddr64_t a_lblkno;
+ off_t *a_offset;
+ };
+*/
+{
+ if (ap->a_vp == NULL)
+ return (EINVAL);
+ *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
+
+ return(0);
+}
+
+/*
+ * Convert file offset to logical block number.
+ */
+int
+hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
+/*
+ struct vnop_offtoblk_args {
+ vnode_t a_vp;
+ off_t a_offset;
+ daddr64_t *a_lblkno;
+ };
+*/
+{
+ if (ap->a_vp == NULL)
+ return (EINVAL);
+ *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
+
+ return(0);
+}
+
+/*
+ * Map file offset to physical block number.
+ *
+ * If this function is called for write operation, and if the file
+ * had virtual blocks allocated (delayed allocation), real blocks
+ * are allocated by calling ExtendFileC().
+ *
+ * If this function is called for read operation, and if the file
+ * had virtual blocks allocated (delayed allocation), no change
+ * to the size of file is done, and if required, rangelist is
+ * searched for mapping.
+ *
+ * System file cnodes are expected to be locked (shared or exclusive).
+ */
+int
+hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
+/*
+ struct vnop_blockmap_args {
+ vnode_t a_vp;
+ off_t a_foffset;
+ size_t a_size;
+ daddr64_t *a_bpn;
+ size_t *a_run;
+ void *a_poff;
+ int a_flags;
+ vfs_context_t a_context;
+ };
+*/
+{
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ struct hfsmount *hfsmp;
+ size_t bytesContAvail = 0;
+ int retval = E_NONE;
+ int syslocks = 0;
+ int lockflags = 0;
+ struct rl_entry *invalid_range;
+ enum rl_overlaptype overlaptype;
+ int started_tr = 0;
+ int tooklock = 0;
+
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ /* allow blockmaps to the resource fork */
+ } else {
+ if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+ int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+ switch(state) {
+ case FILE_IS_COMPRESSED:
+ return ENOTSUP;
+ case FILE_IS_CONVERTING:
+ /* if FILE_IS_CONVERTING, we allow blockmap */
+ break;
+ default:
+ printf("invalid state %d for compressed file\n", state);
+ /* fall through */
+ }
+ }
+ }
+#endif /* HFS_COMPRESSION */
+
+ /* Do not allow blockmap operation on a directory */
+ if (vnode_isdir(vp)) {
+ return (ENOTSUP);
+ }
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_bpn == NULL)
+ return (0);
+
+ if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
+ if (VTOC(vp)->c_lockowner != current_thread()) {
+ hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+ tooklock = 1;
+ }
+ }
+ hfsmp = VTOHFS(vp);
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
+retry:
+ /* Check virtual blocks only when performing write operation */
+ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto exit;
+ } else {
+ started_tr = 1;
+ }
+ syslocks = SFL_EXTENTS | SFL_BITMAP;
+
+ } else if (overflow_extents(fp)) {
+ syslocks = SFL_EXTENTS;
+ }
+
+ if (syslocks)
+ lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
+
+ /*
+ * Check for any delayed allocations.
+ */
+ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+ int64_t actbytes;
+ u_int32_t loanedBlocks;
+
+ //
+ // Make sure we have a transaction. It's possible
+ // that we came in and fp->ff_unallocblocks was zero
+ // but during the time we blocked acquiring the extents
+ // btree, ff_unallocblocks became non-zero and so we
+ // will need to start a transaction.
+ //
+ if (started_tr == 0) {
+ if (syslocks) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ syslocks = 0;
+ }
+ goto retry;
+ }
+
+ /*
+ * Note: ExtendFileC will Release any blocks on loan and
+ * aquire real blocks. So we ask to extend by zero bytes
+ * since ExtendFileC will account for the virtual blocks.
+ */
+
+ loanedBlocks = fp->ff_unallocblocks;
+ retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
+ kEFAllMask | kEFNoClumpMask, &actbytes);
+
+ if (retval) {
+ fp->ff_unallocblocks = loanedBlocks;
+ cp->c_blocks += loanedBlocks;
+ fp->ff_blocks += loanedBlocks;
+
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+ hfsmp->loanedBlocks += loanedBlocks;
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ cp->c_flag |= C_MODIFIED;
+ if (started_tr) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ }
+ goto exit;
+ }
+ }
+
+ retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
+ ap->a_bpn, &bytesContAvail);
+ if (syslocks) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ syslocks = 0;
+ }
+
+ if (started_tr) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ }
+ if (retval) {
+ /* On write, always return error because virtual blocks, if any,
+ * should have been allocated in ExtendFileC(). We do not
+ * allocate virtual blocks on read, therefore return error
+ * only if no virtual blocks are allocated. Otherwise we search
+ * rangelist for zero-fills
+ */
+ if ((MacToVFSError(retval) != ERANGE) ||
+ (ap->a_flags & VNODE_WRITE) ||
+ ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
+ goto exit;
+ }
+
+ /* Validate if the start offset is within logical file size */
+ if (ap->a_foffset > fp->ff_size) {
+ goto exit;
+ }
+
+ /* Searching file extents has failed for read operation, therefore
+ * search rangelist for any uncommitted holes in the file.
+ */
+ overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+ ap->a_foffset + (off_t)(ap->a_size - 1),
+ &invalid_range);
+ switch(overlaptype) {
+ case RL_OVERLAPISCONTAINED:
+ /* start_offset <= rl_start, end_offset >= rl_end */
+ if (ap->a_foffset != invalid_range->rl_start) {
+ break;
+ }
+ case RL_MATCHINGOVERLAP:
+ /* start_offset = rl_start, end_offset = rl_end */
+ case RL_OVERLAPCONTAINSRANGE:
+ /* start_offset >= rl_start, end_offset <= rl_end */
+ case RL_OVERLAPSTARTSBEFORE:
+ /* start_offset > rl_start, end_offset >= rl_start */
+ if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
+ bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
+ } else {
+ bytesContAvail = fp->ff_size - ap->a_foffset;
+ }
+ if (bytesContAvail > ap->a_size) {
+ bytesContAvail = ap->a_size;
+ }
+ *ap->a_bpn = (daddr64_t)-1;
+ retval = 0;
+ break;
+ case RL_OVERLAPENDSAFTER:
+ /* start_offset < rl_start, end_offset < rl_end */
+ case RL_NOOVERLAP:
+ break;
+ }
+ goto exit;
+ }
+
+ /* MapFileC() found a valid extent in the filefork. Search the
+ * mapping information further for invalid file ranges
+ */
+ overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+ ap->a_foffset + (off_t)bytesContAvail - 1,
+ &invalid_range);
+ if (overlaptype != RL_NOOVERLAP) {
+ switch(overlaptype) {
+ case RL_MATCHINGOVERLAP:
+ case RL_OVERLAPCONTAINSRANGE:
+ case RL_OVERLAPSTARTSBEFORE:
+ /* There's no valid block for this byte offset */
+ *ap->a_bpn = (daddr64_t)-1;
+ /* There's no point limiting the amount to be returned
+ * if the invalid range that was hit extends all the way
+ * to the EOF (i.e. there's no valid bytes between the
+ * end of this range and the file's EOF):
+ */
+ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+ bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+ }
+ break;
+
+ case RL_OVERLAPISCONTAINED:
+ case RL_OVERLAPENDSAFTER:
+ /* The range of interest hits an invalid block before the end: */
+ if (invalid_range->rl_start == ap->a_foffset) {
+ /* There's actually no valid information to be had starting here: */
+ *ap->a_bpn = (daddr64_t)-1;
+ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+ bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+ }
+ } else {
+ bytesContAvail = invalid_range->rl_start - ap->a_foffset;
+ }
+ break;
+
+ case RL_NOOVERLAP:
+ break;
+ } /* end switch */
+ if (bytesContAvail > ap->a_size)
+ bytesContAvail = ap->a_size;
+ }
+
+exit:
+ if (retval == 0) {
+ if (ap->a_run)
+ *ap->a_run = bytesContAvail;
+
+ if (ap->a_poff)
+ *(int *)ap->a_poff = 0;
+ }
+
+ if (tooklock)
+ hfs_unlock(cp);
+
+ return (MacToVFSError(retval));
+}
+
+
+/*
+ * prepare and issue the I/O
+ * buf_strategy knows how to deal
+ * with requests that require
+ * fragmented I/Os
+ */
+int
+hfs_vnop_strategy(struct vnop_strategy_args *ap)
+{
+ buf_t bp = ap->a_bp;
+ vnode_t vp = buf_vnode(bp);
+
+ return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
+}
+
+static int
+hfs_minorupdate(struct vnode *vp) {
+ struct cnode *cp = VTOC(vp);
+ cp->c_flag &= ~C_MODIFIED;
+ cp->c_touch_acctime = 0;
+ cp->c_touch_chgtime = 0;
+ cp->c_touch_modtime = 0;
+
+ return 0;
+}
+
+static int
+do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
+{
+ register struct cnode *cp = VTOC(vp);
+ struct filefork *fp = VTOF(vp);
+ struct proc *p = vfs_context_proc(context);;
+ kauth_cred_t cred = vfs_context_ucred(context);
+ int retval;
+ off_t bytesToAdd;
+ off_t actualBytesAdded;
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int blksize;
+ struct hfsmount *hfsmp;
+ int lockflags;
+
+ blksize = VTOVCB(vp)->blockSize;
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+ (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+
+ if (length < 0)
+ return (EINVAL);
+
+ /* This should only happen with a corrupt filesystem */
+ if ((off_t)fp->ff_size < 0)
+ return (EINVAL);
+
+ if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
+ return (EFBIG);
+
+ hfsmp = VTOHFS(vp);
+
+ retval = E_NONE;
+
+ /* Files that are changing size are not hot file candidates. */
+ if (hfsmp->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
+ }
+
+ /*
+ * We cannot just check if fp->ff_size == length (as an optimization)
+ * since there may be extra physical blocks that also need truncation.
+ */
+#if QUOTA
+ if ((retval = hfs_getinoquota(cp)))
+ return(retval);
+#endif /* QUOTA */
+
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of ff_size is 0, length will be at least 1.
+ */
+ if (length > (off_t)fp->ff_size) {
+#if QUOTA
+ retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
+ cred, 0);
+ if (retval)
+ goto Err_Exit;
+#endif /* QUOTA */
+ /*
+ * If we don't have enough physical space then
+ * we need to extend the physical size.
+ */
+ if (length > filebytes) {
+ int eflags;
+ u_int32_t blockHint = 0;
+
+ /* All or nothing and don't round up to clumpsize. */
+ eflags = kEFAllMask | kEFNoClumpMask;
+
+ if (cred && suser(cred, NULL) != 0)
+ eflags |= kEFReserveMask; /* keep a reserve */
+
+ /*
+ * Allocate Journal and Quota files in metadata zone.
+ */
+ if (filebytes == 0 &&
+ hfsmp->hfs_flags & HFS_METADATA_ZONE &&
+ hfs_virtualmetafile(cp)) {
+ eflags |= kEFMetadataMask;
+ blockHint = hfsmp->hfs_metazone_start;
+ }
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ while ((length > filebytes) && (retval == E_NONE)) {
+ bytesToAdd = length - filebytes;
+ retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
+ (FCB*)fp,
+ bytesToAdd,
+ blockHint,
+ eflags,
+ &actualBytesAdded));
+
+ filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+ if (actualBytesAdded == 0 && retval == E_NONE) {
+ if (length > filebytes)
+ length = filebytes;
+ break;
+ }
+ } /* endwhile */
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (hfsmp->jnl) {
+ if (skipupdate) {
+ (void) hfs_minorupdate(vp);
+ }
+ else {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+ }
+
+ hfs_end_transaction(hfsmp);
+
+ if (retval)
+ goto Err_Exit;
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+ }
+
+ if (!(flags & IO_NOZEROFILL)) {
+ if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
+ struct rl_entry *invalid_range;
+ off_t zero_limit;
+
+ zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+ if (length < zero_limit) zero_limit = length;
+
+ if (length > (off_t)fp->ff_size) {
+ struct timeval tv;
+
+ /* Extending the file: time to fill out the current last page w. zeroes? */
+ if ((fp->ff_size & PAGE_MASK_64) &&
+ (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
+ fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
+
+ /* There's some valid data at the start of the (current) last page
+ of the file, so zero out the remainder of that page to ensure the
+ entire page contains valid data. Since there is no invalid range
+ possible past the (current) eof, there's no need to remove anything
+ from the invalid range list before calling cluster_write(): */
+ hfs_unlock(cp);
+ retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
+ fp->ff_size, (off_t)0,
+ (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ if (retval) goto Err_Exit;
+
+ /* Merely invalidate the remaining area, if necessary: */
+ if (length > zero_limit) {
+ microuptime(&tv);
+ rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
+ cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+ }
+ } else {
+ /* The page containing the (current) eof is invalid: just add the
+ remainder of the page to the invalid list, along with the area
+ being newly allocated:
+ */
+ microuptime(&tv);
+ rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
+ cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+ };
+ }
+ } else {
+ panic("hfs_truncate: invoked on non-UBC object?!");
+ };
+ }
+ cp->c_touch_modtime = TRUE;
+ fp->ff_size = length;
+
+ } else { /* Shorten the size of the file */
+
+ if ((off_t)fp->ff_size > length) {
+ /* Any space previously marked as invalid is now irrelevant: */
+ rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
+ }
+
+ /*
+ * Account for any unmapped blocks. Note that the new
+ * file length can still end up with unmapped blocks.
+ */
+ if (fp->ff_unallocblocks > 0) {
+ u_int32_t finalblks;
+ u_int32_t loanedBlocks;
+
+ HFS_MOUNT_LOCK(hfsmp, TRUE);
+
+ loanedBlocks = fp->ff_unallocblocks;
+ cp->c_blocks -= loanedBlocks;
+ fp->ff_blocks -= loanedBlocks;
+ fp->ff_unallocblocks = 0;
+
+ hfsmp->loanedBlocks -= loanedBlocks;
+
+ finalblks = (length + blksize - 1) / blksize;
+ if (finalblks > fp->ff_blocks) {
+ /* calculate required unmapped blocks */
+ loanedBlocks = finalblks - fp->ff_blocks;
+ hfsmp->loanedBlocks += loanedBlocks;
+
+ fp->ff_unallocblocks = loanedBlocks;
+ cp->c_blocks += loanedBlocks;
+ fp->ff_blocks += loanedBlocks;
+ }
+ HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+ }
+
+ /*
+ * For a TBE process the deallocation of the file blocks is
+ * delayed until the file is closed. And hfs_close calls
+ * truncate with the IO_NDELAY flag set. So when IO_NDELAY
+ * isn't set, we make sure this isn't a TBE process.
+ */
+ if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
+#if QUOTA
+ off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+#endif /* QUOTA */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ if (fp->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
+ (FCB*)fp, length, false));
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ if (hfsmp->jnl) {
+ if (retval == 0) {
+ fp->ff_size = length;
+ }
+ if (skipupdate) {
+ (void) hfs_minorupdate(vp);
+ }
+ else {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+ }
+ hfs_end_transaction(hfsmp);
+
+ filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+ if (retval)
+ goto Err_Exit;
+#if QUOTA
+ /* These are bytesreleased */
+ (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+#endif /* QUOTA */
+ }
+ /* Only set update flag if the logical length changes */
+ if ((off_t)fp->ff_size != length)
+ cp->c_touch_modtime = TRUE;
+ fp->ff_size = length;
+ }
+ if (cp->c_mode & (S_ISUID | S_ISGID)) {
+ if (!vfs_context_issuser(context)) {
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ skipupdate = 0;
+ }
+ }
+ if (skipupdate) {
+ retval = hfs_minorupdate(vp);
+ }
+ else {
+ cp->c_touch_chgtime = TRUE; /* status changed */
+ cp->c_touch_modtime = TRUE; /* file data was modified */
+ retval = hfs_update(vp, MNT_WAIT);
+ }
+ if (retval) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ -1, -1, -1, retval, 0);
+ }
+
+Err_Exit:
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+ (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
+
+ return (retval);
+}
+
+
+
+/*
+ * Truncate a cnode to at most length size, freeing (or adding) the
+ * disk blocks.
+ */
+__private_extern__
+int
+hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
+ int skipupdate, vfs_context_t context)
+{
+ struct filefork *fp = VTOF(vp);
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int blksize, error = 0;
+ struct cnode *cp = VTOC(vp);
+
+ /* Cannot truncate an HFS directory! */
+ if (vnode_isdir(vp)) {
+ return (EISDIR);
+ }
+ /* A swap file cannot change size. */
+ if (vnode_isswap(vp) && (length != 0)) {
+ return (EPERM);
+ }
+
+ blksize = VTOVCB(vp)->blockSize;
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ //
+ // Have to do this here so that we don't wind up with
+ // i/o pending for blocks that are about to be released
+ // if we truncate the file.
+ //
+ // If skipsetsize is set, then the caller is responsible
+ // for the ubc_setsize.
+ //
+ // Even if skipsetsize is set, if the length is zero we
+ // want to call ubc_setsize() because as of SnowLeopard
+ // it will no longer cause any page-ins and it will drop
+ // any dirty pages so that we don't do any i/o that we
+ // don't have to. This also prevents a race where i/o
+ // for truncated blocks may overwrite later data if the
+ // blocks get reallocated to a different file.
+ //
+ if (!skipsetsize || length == 0)
+ ubc_setsize(vp, length);
+
+ // have to loop truncating or growing files that are
+ // really big because otherwise transactions can get
+ // enormous and consume too many kernel resources.
+
+ if (length < filebytes) {
+ while (filebytes > length) {
+ if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ filebytes -= HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = length;
+ }
+ cp->c_flag |= C_FORCEUPDATE;
+ error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
+ if (error)
+ break;
+ }
+ } else if (length > filebytes) {
+ while (filebytes < length) {
+ if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ filebytes += HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = length;
+ }
+ cp->c_flag |= C_FORCEUPDATE;
+ error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
+ if (error)
+ break;
+ }
+ } else /* Same logical size */ {
+
+ error = do_hfs_truncate(vp, length, flags, skipupdate, context);
+ }
+ /* Files that are changing size are not hot file candidates. */
+ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
+ }
+
+ return (error);
+}
+
+
+
+/*
+ * Preallocate file storage space.
+ */
+int
+hfs_vnop_allocate(struct vnop_allocate_args /* {
+ vnode_t a_vp;
+ off_t a_length;
+ u_int32_t a_flags;
+ off_t *a_bytesallocated;
+ off_t a_offset;
+ vfs_context_t a_context;
+ } */ *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ ExtendedVCB *vcb;
+ off_t length = ap->a_length;
+ off_t startingPEOF;
+ off_t moreBytesRequested;
+ off_t actualBytesAdded;
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int retval, retval2;
+ u_int32_t blockHint;
+ u_int32_t extendFlags; /* For call to ExtendFileC */
+ struct hfsmount *hfsmp;
+ kauth_cred_t cred = vfs_context_ucred(ap->a_context);
+ int lockflags;
+
+ *(ap->a_bytesallocated) = 0;
+
+ if (!vnode_isreg(vp))
+ return (EISDIR);
+ if (length < (off_t)0)
+ return (EINVAL);
+
+ cp = VTOC(vp);
+
+ hfs_lock_truncate(cp, TRUE);
+
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+ goto Err_Exit;
+ }
+
+ fp = VTOF(vp);
+ hfsmp = VTOHFS(vp);
+ vcb = VTOVCB(vp);
+
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
+
+ if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Fill in the flags word for the call to Extend the file */
+
+ extendFlags = kEFNoClumpMask;
+ if (ap->a_flags & ALLOCATECONTIG)
+ extendFlags |= kEFContigMask;
+ if (ap->a_flags & ALLOCATEALL)
+ extendFlags |= kEFAllMask;
+ if (cred && suser(cred, NULL) != 0)
+ extendFlags |= kEFReserveMask;
+ if (hfs_virtualmetafile(cp))
+ extendFlags |= kEFMetadataMask;
+
+ retval = E_NONE;
+ blockHint = 0;
+ startingPEOF = filebytes;
+
+ if (ap->a_flags & ALLOCATEFROMPEOF)
+ length += filebytes;
+ else if (ap->a_flags & ALLOCATEFROMVOL)
+ blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
+
+ /* If no changes are necesary, then we're done */
+ if (filebytes == length)
+ goto Std_Exit;
+
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of filebytes is 0, length will be at least 1.
+ */
+ if (length > filebytes) {
+ off_t total_bytes_added = 0, orig_request_size;
+
+ orig_request_size = moreBytesRequested = length - filebytes;
+
+#if QUOTA
+ retval = hfs_chkdq(cp,
+ (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
+ cred, 0);
+ if (retval)
+ goto Err_Exit;
+
+#endif /* QUOTA */
+ /*
+ * Metadata zone checks.
+ */
+ if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+ /*
+ * Allocate Journal and Quota files in metadata zone.
+ */
+ if (hfs_virtualmetafile(cp)) {
+ blockHint = hfsmp->hfs_metazone_start;
+ } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
+ (blockHint <= hfsmp->hfs_metazone_end)) {
+ /*
+ * Move blockHint outside metadata zone.
+ */
+ blockHint = hfsmp->hfs_metazone_end + 1;
+ }
+ }
+
+
+ while ((length > filebytes) && (retval == E_NONE)) {
+ off_t bytesRequested;
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
+ bytesRequested = HFS_BIGFILE_SIZE;
+ } else {
+ bytesRequested = moreBytesRequested;
+ }
+
+ if (extendFlags & kEFContigMask) {
+ // if we're on a sparse device, this will force it to do a
+ // full scan to find the space needed.
+ hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
+ }
+
+ retval = MacToVFSError(ExtendFileC(vcb,
+ (FCB*)fp,
+ bytesRequested,
+ blockHint,
+ extendFlags,
+ &actualBytesAdded));
+
+ if (retval == E_NONE) {
+ *(ap->a_bytesallocated) += actualBytesAdded;
+ total_bytes_added += actualBytesAdded;
+ moreBytesRequested -= actualBytesAdded;
+ if (blockHint != 0) {
+ blockHint += actualBytesAdded / vcb->blockSize;
+ }
+ }
+ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (hfsmp->jnl) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+
+ hfs_end_transaction(hfsmp);
+ }
+
+
+ /*
+ * if we get an error and no changes were made then exit
+ * otherwise we must do the hfs_update to reflect the changes
+ */
+ if (retval && (startingPEOF == filebytes))
+ goto Err_Exit;
+
+ /*
+ * Adjust actualBytesAdded to be allocation block aligned, not
+ * clump size aligned.
+ * NOTE: So what we are reporting does not affect reality
+ * until the file is closed, when we truncate the file to allocation
+ * block size.
+ */
+ if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
+ *(ap->a_bytesallocated) =
+ roundup(orig_request_size, (off_t)vcb->blockSize);
+
+ } else { /* Shorten the size of the file */
+
+ if (fp->ff_size > length) {
+ /*
+ * Any buffers that are past the truncation point need to be
+ * invalidated (to maintain buffer cache consistency).
+ */
+ }
+
+ retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
+ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+ /*
+ * if we get an error and no changes were made then exit
+ * otherwise we must do the hfs_update to reflect the changes
+ */
+ if (retval && (startingPEOF == filebytes)) goto Err_Exit;
+#if QUOTA
+ /* These are bytesreleased */
+ (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
+#endif /* QUOTA */
+
+ if (fp->ff_size > filebytes) {
+ fp->ff_size = filebytes;
+
+ hfs_unlock(cp);
+ ubc_setsize(vp, fp->ff_size);
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ }
+ }
+
+Std_Exit:
+ cp->c_touch_chgtime = TRUE;
+ cp->c_touch_modtime = TRUE;
+ retval2 = hfs_update(vp, MNT_WAIT);
+
+ if (retval == 0)
+ retval = retval2;
+Err_Exit:
+ hfs_unlock_truncate(cp, TRUE);
+ hfs_unlock(cp);
+ return (retval);
+}
+
+
+/*
+ * Pagein for HFS filesystem
+ */
+int
+hfs_vnop_pagein(struct vnop_pagein_args *ap)
+/*
+ struct vnop_pagein_args {
+ vnode_t a_vp,
+ upl_t a_pl,
+ vm_offset_t a_pl_offset,
+ off_t a_f_offset,
+ size_t a_size,
+ int a_flags
+ vfs_context_t a_context;
+ };
+*/
+{
+ vnode_t vp = ap->a_vp;
+ int error;
+
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ /* allow pageins of the resource fork */
+ } else {
+ int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+ if (compressed) {
+ error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+ if (compressed) {
+ if (error == 0) {
+ /* successful page-in, update the access time */
+ VTOC(vp)->c_touch_acctime = TRUE;
+
+ /* compressed files are not hot file candidates */
+ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+ VTOF(vp)->ff_bytesread = 0;
+ }
+ }
+ return error;
+ }
+ /* otherwise the file was converted back to a regular file while we were reading it */
+ }
+ }
+#endif
+
+ error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+ ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
+ /*
+ * Keep track of blocks read.
+ */
+ if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+ struct cnode *cp;
+ struct filefork *fp;
+ int bytesread;
+ int took_cnode_lock = 0;
+
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
+ if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
+ bytesread = fp->ff_size;
+ else
+ bytesread = ap->a_size;
+
+ /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+ if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ took_cnode_lock = 1;
+ }
+ /*
+ * If this file hasn't been seen since the start of
+ * the current sampling period then start over.
+ */
+ if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+ struct timeval tv;
+
+ fp->ff_bytesread = bytesread;
+ microtime(&tv);
+ cp->c_atime = tv.tv_sec;
+ } else {
+ fp->ff_bytesread += bytesread;
+ }
+ cp->c_touch_acctime = TRUE;
+ if (took_cnode_lock)
+ hfs_unlock(cp);
+ }
+ return (error);
+}
+
+/*
+ * Pageout for HFS filesystem.
+ */
+int
+hfs_vnop_pageout(struct vnop_pageout_args *ap)
+/*
+ struct vnop_pageout_args {
+ vnode_t a_vp,
+ upl_t a_pl,
+ vm_offset_t a_pl_offset,
+ off_t a_f_offset,
+ size_t a_size,
+ int a_flags
+ vfs_context_t a_context;
+ };
+*/
+{
+ vnode_t vp = ap->a_vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ int retval = 0;
+ off_t filesize;
+ upl_t upl;
+ upl_page_info_t* pl;
+ vm_offset_t a_pl_offset;
+ int a_flags;
+ int is_pageoutv2 = 0;
+ kern_return_t kret;
+
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
+ /*
+ * Figure out where the file ends, for pageout purposes. If
+ * ff_new_size > ff_size, then we're in the middle of extending the
+ * file via a write, so it is safe (and necessary) that we be able
+ * to pageout up to that point.
+ */
+ filesize = fp->ff_size;
+ if (fp->ff_new_size > filesize)
+ filesize = fp->ff_new_size;
+
+ a_flags = ap->a_flags;
+ a_pl_offset = ap->a_pl_offset;
+
+ /*
+ * we can tell if we're getting the new or old behavior from the UPL
+ */
+ if ((upl = ap->a_pl) == NULL) {
+ int request_flags;
+
+ is_pageoutv2 = 1;
+ /*
+ * we're in control of any UPL we commit
+ * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
+ */
+ a_flags &= ~UPL_NOCOMMIT;
+ a_pl_offset = 0;
+
+ /*
+ * take truncate lock (shared) to guard against
+ * zero-fill thru fsync interfering, but only for v2
+ */
+ hfs_lock_truncate(cp, 0);
+
+ if (a_flags & UPL_MSYNC) {
+ request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
+ }
+ else {
+ request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
+ }
+ kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
+
+ if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+ retval = EINVAL;
+ goto pageout_done;
+ }
+ }
+ /*
+ * from this point forward upl points at the UPL we're working with
+ * it was either passed in or we succesfully created it
+ */
+
+ /*
+ * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
+ * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
+ * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
+ * N dirty ranges in the UPL. Note that this is almost a direct copy of the
+ * logic in vnode_pageout except that we need to do it after grabbing the truncate
+ * lock in HFS so that we don't lock invert ourselves.
+ *
+ * Note that we can still get into this function on behalf of the default pager with
+ * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
+ * since fsync and other writing threads will grab the locks, then mark the
+ * relevant pages as busy. But the pageout codepath marks the pages as busy,
+ * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
+ * we do not try to grab anything for the pre-V2 case, which should only be accessed
+ * by the paging/VM system.
+ */
+
+ if (is_pageoutv2) {
+ off_t f_offset;
+ int offset;
+ int isize;
+ int pg_index;
+ int error;
+ int error_ret = 0;
+
+ isize = ap->a_size;
+ f_offset = ap->a_f_offset;
+
+ /*
+ * Scan from the back to find the last page in the UPL, so that we
+ * aren't looking at a UPL that may have already been freed by the
+ * preceding aborts/completions.
+ */
+ for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+ if (upl_page_present(pl, --pg_index))
+ break;
+ if (pg_index == 0) {
+ ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+ goto pageout_done;
+ }
+ }
+
+ /*
+ * initialize the offset variables before we touch the UPL.
+ * a_f_offset is the position into the file, in bytes
+ * offset is the position into the UPL, in bytes
+ * pg_index is the pg# of the UPL we're operating on.
+ * isize is the offset into the UPL of the last non-clean page.
+ */
+ isize = ((pg_index + 1) * PAGE_SIZE);
+
+ offset = 0;
+ pg_index = 0;
+
+ while (isize) {
+ int xsize;
+ int num_of_pages;
+
+ if ( !upl_page_present(pl, pg_index)) {
+ /*
+ * we asked for RET_ONLY_DIRTY, so it's possible
+ * to get back empty slots in the UPL.
+ * just skip over them
+ */
+ f_offset += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ isize -= PAGE_SIZE;
+ pg_index++;
+
+ continue;
+ }
+ if ( !upl_dirty_page(pl, pg_index)) {
+ panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
+ }
+
+ /*
+ * We know that we have at least one dirty page.
+ * Now checking to see how many in a row we have
+ */
+ num_of_pages = 1;
+ xsize = isize - PAGE_SIZE;
+
+ while (xsize) {
+ if ( !upl_dirty_page(pl, pg_index + num_of_pages))
+ break;
+ num_of_pages++;
+ xsize -= PAGE_SIZE;
+ }
+ xsize = num_of_pages * PAGE_SIZE;
+
+ if (!vnode_isswap(vp)) {
+ off_t end_of_range;
+ int tooklock;
+
+ tooklock = 0;
+
+ if (cp->c_lockowner != current_thread()) {
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+ /*
+ * we're in the v2 path, so we are the
+ * owner of the UPL... we may have already
+ * processed some of the UPL, so abort it
+ * from the current working offset to the
+ * end of the UPL
+ */
+ ubc_upl_abort_range(upl,
+ offset,
+ ap->a_size - offset,
+ UPL_ABORT_FREE_ON_EMPTY);
+ goto pageout_done;
+ }
+ tooklock = 1;
+ }
+ end_of_range = f_offset + xsize - 1;
+
+ if (end_of_range >= filesize) {
+ end_of_range = (off_t)(filesize - 1);
+ }
+ if (f_offset < filesize) {
+ rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
+ cp->c_flag |= C_MODIFIED; /* leof is dirty */
+ }
+ if (tooklock) {
+ hfs_unlock(cp);
+ }
+ }
+ if ((error = cluster_pageout(vp, upl, offset, f_offset,
+ xsize, filesize, a_flags))) {
+ if (error_ret == 0)
+ error_ret = error;
+ }
+ f_offset += xsize;
+ offset += xsize;
+ isize -= xsize;
+ pg_index += num_of_pages;
+ }
+ /* capture errnos bubbled out of cluster_pageout if they occurred */
+ if (error_ret != 0) {
+ retval = error_ret;
+ }
+ } /* end block for v2 pageout behavior */
+ else {
+ if (!vnode_isswap(vp)) {
+ off_t end_of_range;
+ int tooklock = 0;
+
+ if (cp->c_lockowner != current_thread()) {
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+ if (!(a_flags & UPL_NOCOMMIT)) {
+ ubc_upl_abort_range(upl,
+ a_pl_offset,
+ ap->a_size,
+ UPL_ABORT_FREE_ON_EMPTY);
+ }
+ goto pageout_done;
+ }
+ tooklock = 1;
+ }
+ end_of_range = ap->a_f_offset + ap->a_size - 1;
+
+ if (end_of_range >= filesize) {
+ end_of_range = (off_t)(filesize - 1);
+ }
+ if (ap->a_f_offset < filesize) {
+ rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
+ cp->c_flag |= C_MODIFIED; /* leof is dirty */
+ }
+
+ if (tooklock) {
+ hfs_unlock(cp);
+ }
+ }
+ /*
+ * just call cluster_pageout for old pre-v2 behavior
+ */
+ retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
+ ap->a_size, filesize, a_flags);
+ }
+
+ /*
+ * If data was written, update the modification time of the file.
+ * If setuid or setgid bits are set and this process is not the
+ * superuser then clear the setuid and setgid bits as a precaution
+ * against tampering.
+ */
+ if (retval == 0) {
+ cp->c_touch_modtime = TRUE;
+ cp->c_touch_chgtime = TRUE;
+ if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
+ (vfs_context_suser(ap->a_context) != 0)) {
+ hfs_lock(cp, HFS_FORCE_LOCK);
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ hfs_unlock(cp);
+ }
+ }
+
+pageout_done:
+ if (is_pageoutv2) {
+ /* release truncate lock (shared) */
+ hfs_unlock_truncate(cp, 0);
+ }
+ return (retval);
+}
+
+/*
+ * Intercept B-Tree node writes to unswap them if necessary.
+ */
+int
+hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
+{
+ int retval = 0;
+ register struct buf *bp = ap->a_bp;
+ register struct vnode *vp = buf_vnode(bp);
+ BlockDescriptor block;
+
+ /* Trap B-Tree writes */
+ if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
+ (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
+ (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
+ (vp == VTOHFS(vp)->hfc_filevp)) {
+
+ /*
+ * Swap and validate the node if it is in native byte order.
+ * This is always be true on big endian, so we always validate
+ * before writing here. On little endian, the node typically has
+ * been swapped and validated when it was written to the journal,
+ * so we won't do anything here.
+ */
+ if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
+ /* Prepare the block pointer */
+ block.blockHeader = bp;
+ block.buffer = (char *)buf_dataptr(bp);
+ block.blockNum = buf_lblkno(bp);
+ /* not found in cache ==> came from disk */
+ block.blockReadFromDisk = (buf_fromcache(bp) == 0);
+ block.blockSize = buf_count(bp);
+
+ /* Endian un-swap B-Tree node */
+ retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
+ if (retval)
+ panic("hfs_vnop_bwrite: about to write corrupt node!\n");
+ }