xnu-1486.2.11.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c

index 825ec1ca55976a6144e1589fb3feea05ea17adc8..6dc30afad3270c1f85516de58e350d3a72a36f44 100644 (file)
--- a/bsd/hfs/hfs_readwrite.c
+++ b/bsd/hfs/hfs_readwrite.c
@@ -1,56 +1,36 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
   * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
   * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  /*     @(#)hfs_readwrite.c     1.0
   *
- *     (c) 1990, 1992 NeXT Computer, Inc.  All Rights Reserved
- *     (c) 1998       Apple Computer, Inc.  All Rights Reserved
+ *     (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
   *     
- *
   *     hfs_readwrite.c -- vnode operations to deal with reading and writing files.
   *
- *     MODIFICATION HISTORY:
- *      9-Nov-1999     Scott Roberts   hfs_allocate now returns sizes based on allocation block boundaries (#2398794)
- *      3-Feb-1999     Pat Dirks               Merged in Joe's change to hfs_truncate to skip vinvalbuf if LEOF isn't changing (#2302796)
- *                                                             Removed superfluous (and potentially dangerous) second call to vinvalbuf() in hfs_truncate.
- *      2-Dec-1998     Pat Dirks               Added support for read/write bootstrap ioctls.
- *     10-Nov-1998     Pat Dirks               Changed read/write/truncate logic to optimize block sizes for first extents of a file.
- *                              Changed hfs_strategy to correct I/O sizes from cluser code I/O requests in light of
- *                              different block sizing.  Changed bexpand to handle RELEASE_BUFFER flag.
- *     22-Sep-1998     Don Brady               Changed truncate zero-fill to use bwrite after several bawrites have been queued.
- *     11-Sep-1998     Pat Dirks               Fixed buffering logic to not rely on B_CACHE, which is set for empty buffers that
- *                                                             have been pre-read by cluster_read (use b_validend > 0 instead).
- *  27-Aug-1998        Pat Dirks               Changed hfs_truncate to use cluster_write in place of bawrite where possible.
- *     25-Aug-1998     Pat Dirks               Changed hfs_write to do small device-block aligned writes into buffers without doing
- *                                                             read-ahead of the buffer.  Added bexpand to deal with incomplete [dirty] buffers.
- *                                                             Fixed can_cluster macro to use MAXPHYSIO instead of MAXBSIZE.
- *     19-Aug-1998     Don Brady               Remove optimization in hfs_truncate that prevented extra physical blocks from
- *                                                             being truncated (radar #2265750). Also set fcb->fcbEOF before calling vinvalbuf.
- *      7-Jul-1998     Pat Dirks               Added code to honor IO_NOZEROFILL in hfs_truncate.
- *     16-Jul-1998     Don Brady               In hfs_bmap use MAXPHYSIO instead of MAXBSIZE when calling MapFileBlockC (radar #2263753).
- *     16-Jul-1998     Don Brady               Fix error handling in hfs_allocate (radar #2252265).
- *     04-Jul-1998     chw                             Synchronized options in hfs_allocate with flags in call to ExtendFileC
- *     25-Jun-1998     Don Brady               Add missing blockNo incrementing to zero fill loop in hfs_truncate.
- *     22-Jun-1998     Don Brady               Add bp = NULL assignment after brelse in hfs_read.
- *      4-Jun-1998     Pat Dirks               Split off from hfs_vnodeops.c
   */
  
  #include <sys/param.h>
@@ -58,26 +38,40 @@
  #include <sys/resourcevar.h>
  #include <sys/kernel.h>
  #include <sys/fcntl.h>
+#include <sys/filedesc.h>
  #include <sys/stat.h>
  #include <sys/buf.h>
  #include <sys/proc.h>
-//#include <mach/machine/vm_types.h>
+#include <sys/kauth.h>
  #include <sys/vnode.h>
+#include <sys/vnode_internal.h>
  #include <sys/uio.h>
+#include <sys/vfs_context.h>
+#include <sys/fsevents.h>
+#include <kern/kalloc.h>
+#include <sys/disk.h>
+#include <sys/sysctl.h>
+#include <sys/fsctl.h>
  
  #include <miscfs/specfs/specdev.h>
  
  #include <sys/ubc.h>
+#include <sys/ubc_internal.h>
+
  #include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
  
  #include <sys/kdebug.h>
  
  #include       "hfs.h"
-#include       "hfs_dbg.h"
+#include       "hfs_attrlist.h"
  #include       "hfs_endian.h"
+#include       "hfs_fsctl.h"
+#include       "hfs_quota.h"
  #include       "hfscommon/headers/FileMgrInternal.h"
  #include       "hfscommon/headers/BTreesInternal.h"
-
+#include       "hfs_cnode.h"
+#include       "hfs_dbg.h"
  
  #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  
@@ -85,416 +79,356 @@ enum {
         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  };
  
-extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
-
-#if DBG_VOP_TEST_LOCKS
-extern void DbgVopTest(int maxSlots, int retval, VopDbgStoreRec *VopDbgStore, char *funcname);
-#endif
+/* from bsd/hfs/hfs_vfsops.c */
+extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
  
-#if HFS_DIAGNOSTIC
-void debug_check_blocksizes(struct vnode *vp);
-#endif
+static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
+static int  hfs_clonefile(struct vnode *, int, int, int);
+static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
+static int  hfs_minorupdate(struct vnode *vp);
+static int  do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
  
-/*****************************************************************************
-*
-*      Operations on vnodes
-*
-*****************************************************************************/
  
-/*
-#% read                vp      L L L
-#
- vop_read {
-     IN struct vnode *vp;
-     INOUT struct uio *uio;
-     IN int ioflag;
-     IN struct ucred *cred;
+int flush_cache_on_write = 0;
+SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
  
-     */
  
+/*
+ * Read data from a file.
+ */
  int
-hfs_read(ap)
-struct vop_read_args /* {
-    struct vnode *a_vp;
-    struct uio *a_uio;
-    int a_ioflag;
-    struct ucred *a_cred;
-} */ *ap;
+hfs_vnop_read(struct vnop_read_args *ap)
  {
-    register struct vnode      *vp;
-    struct hfsnode                     *hp;
-    register struct uio        *uio;
-    struct buf                                 *bp;
-    daddr_t                            logBlockNo;
-    u_long                                     fragSize, moveSize, startOffset, ioxfersize;
-    int                                                devBlockSize = 0;
-    off_t                                      bytesRemaining;
-    int                                        retval;
-    u_short                            mode;
-    FCB                                                *fcb;
-
-    DBG_FUNC_NAME("hfs_read");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    vp = ap->a_vp;
-    hp = VTOH(vp);
-    fcb = HTOFCB(hp);
-    mode = hp->h_meta->h_mode;
-    uio = ap->a_uio;
-
-#if HFS_DIAGNOSTIC
-    if (uio->uio_rw != UIO_READ)
-        panic("%s: mode", funcname);
-#endif
-
-    /* Can only read files */
-    if (ap->a_vp->v_type != VREG && ap->a_vp->v_type != VLNK) {
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);
-    }
-    DBG_RW(("\tfile size Ox%X\n", (u_int)fcb->fcbEOF));
-    DBG_RW(("\tstarting at offset Ox%X of file, length Ox%X\n", (u_int)uio->uio_offset, (u_int)uio->uio_resid));
+       uio_t uio = ap->a_uio;
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp;
+       struct filefork *fp;
+       struct hfsmount *hfsmp;
+       off_t filesize;
+       off_t filebytes;
+       off_t start_resid = uio_resid(uio);
+       off_t offset = uio_offset(uio);
+       int retval = 0;
+
+       /* Preflight checks */
+       if (!vnode_isreg(vp)) {
+               /* can only read regular files */
+               if (vnode_isdir(vp))
+                       return (EISDIR);
+               else
+                       return (EPERM);
+       }
+       if (start_resid == 0)
+               return (0);             /* Nothing left to do */
+       if (offset < 0)
+               return (EINVAL);        /* cant read from a negative offset */
+       
+#if HFS_COMPRESSION
+       if (VNODE_IS_RSRC(vp)) {
+               if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
+                       return 0;
+               }
+               /* otherwise read the resource fork normally */
+       } else {
+               int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+               if (compressed) {
+                       retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
+                       if (compressed) {
+                               if (retval == 0) {
+                                       /* successful read, update the access time */
+                                       VTOC(vp)->c_touch_acctime = TRUE;
+                                       
+                                       /* compressed files are not hot file candidates */
+                                       if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+                                               VTOF(vp)->ff_bytesread = 0;
+                                       }
+                               }
+                               return retval;
+                       }
+                       /* otherwise the file was converted back to a regular file while we were reading it */
+                       retval = 0;
+               }
+       }
+#endif /* HFS_COMPRESSION */
  
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+       hfsmp = VTOHFS(vp);
  
-    /*
-     * If they didn't ask for any data, then we are done.
-     */
-    if (uio->uio_resid == 0) {
-        DBG_VOP_LOCKS_TEST(E_NONE);
-        return (E_NONE);
-    }
+       /* Protect against a size change. */
+       hfs_lock_truncate(cp, 0);
  
-    /* cant read from a negative offset */
-    if (uio->uio_offset < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
+       filesize = fp->ff_size;
+       filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+       if (offset > filesize) {
+               if ((hfsmp->hfs_flags & HFS_STANDARD) &&
+                   (offset > (off_t)MAXHFSFILESIZE)) {
+                       retval = EFBIG;
+               }
+               goto exit;
+       }
  
-    if (uio->uio_offset > fcb->fcbEOF) {
-        if ( (!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
-            retval = EFBIG;
-        else
-            retval = E_NONE;
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
+               (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
  
-        DBG_VOP_LOCKS_TEST(retval);
-        return (retval);
-    }
+       retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
  
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
-                 (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-
-    if (UBCISVALID(vp))
-        retval = cluster_read(vp, uio, (off_t)fcb->fcbEOF, devBlockSize, 0);
-    else {
-
-        for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
-
-            if ((bytesRemaining = (fcb->fcbEOF - uio->uio_offset)) <= 0)
-                break;
-
-            logBlockNo  = (daddr_t)(uio->uio_offset / PAGE_SIZE_64);
-            startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64);
-            fragSize    = PAGE_SIZE;
-
-            if (((logBlockNo * PAGE_SIZE) + fragSize) < fcb->fcbEOF)
-                ioxfersize = fragSize;
-            else {
-                ioxfersize = fcb->fcbEOF - (logBlockNo * PAGE_SIZE);
-                ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1);
-            }
-            DBG_RW(("\tat logBlockNo Ox%X, with Ox%lX left to read\n", logBlockNo, (UInt32)uio->uio_resid));
-            moveSize = ioxfersize;
-            DBG_RW(("\tmoveSize = Ox%lX; ioxfersize = Ox%lX; startOffset = Ox%lX.\n",
-                    moveSize, ioxfersize, startOffset));
-            DBG_ASSERT(moveSize >= startOffset);
-            moveSize -= startOffset;
-
-            if (bytesRemaining < moveSize)
-                moveSize = bytesRemaining;
-
-            if (uio->uio_resid < moveSize) {
-                moveSize = uio->uio_resid;
-                DBG_RW(("\treducing moveSize to Ox%lX (uio->uio_resid).\n", moveSize));
-            };
-            if (moveSize == 0) {
-                break;
-            };
-
-            DBG_RW(("\tat logBlockNo Ox%X, extent of Ox%lX, xfer of Ox%lX; moveSize = Ox%lX\n", logBlockNo, fragSize, ioxfersize, moveSize));
-
-            if (( uio->uio_offset + fragSize) >= fcb->fcbEOF) {
-                retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
-
-            } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) {
-                daddr_t nextLogBlockNo = logBlockNo + 1;
-                int nextsize;
-
-                if (((nextLogBlockNo * PAGE_SIZE) +
-                     (daddr_t)fragSize) < fcb->fcbEOF)
-                    nextsize = fragSize;
-                else {
-                    nextsize = fcb->fcbEOF - (nextLogBlockNo * PAGE_SIZE);
-                    nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1);
-                }
-                retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp);
-            } else {
-                retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
-            };
-
-            if (retval != E_NONE) {
-                if (bp) {
-                    brelse(bp);
-                    bp = NULL;
-                }
-                break;
-            };
-            vp->v_lastr = logBlockNo;
-
-            /*
-             * We should only get non-zero b_resid when an I/O retval
-             * has occurred, which should cause us to break above.
-             * However, if the short read did not cause an retval,
-             * then we want to ensure that we do not uiomove bad
-             * or uninitialized data.
-             */
-            ioxfersize -= bp->b_resid;
-
-            if (ioxfersize < moveSize) {                       /* XXX PPD This should take the offset into account, too! */
-                if (ioxfersize == 0)
-                    break;
-                moveSize = ioxfersize;
-            }
-            if ((startOffset + moveSize) > bp->b_bcount)
-                panic("hfs_read: bad startOffset or moveSize\n");
-
-            DBG_RW(("\tcopying Ox%lX bytes from %lX; resid = Ox%lX...\n", moveSize, (char *)bp->b_data + startOffset, bp->b_resid));
-
-            if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio)))
-                break;
-
-            if (S_ISREG(mode) &&
-                (((startOffset + moveSize) == fragSize) || (uio->uio_offset == fcb->fcbEOF))) {
-                bp->b_flags |= B_AGE;
-            };
-
-            DBG_ASSERT(bp->b_bcount == bp->b_validend);
-
-            brelse(bp);
-            /* Start of loop resets bp to NULL before reaching outside this block... */
-        }
-
-        if (bp != NULL) {
-            DBG_ASSERT(bp->b_bcount == bp->b_validend);
-            brelse(bp);
-        };
-    }
+       cp->c_touch_acctime = TRUE;
  
-    if (HTOVCB(hp)->vcbSigWord == kHFSPlusSigWord)
-        hp->h_nodeflags |= IN_ACCESS;
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
+               (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
  
-    DBG_VOP_LOCKS_TEST(retval);
+       /*
+        * Keep track blocks read
+        */
+       if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
+               int took_cnode_lock = 0;
+               off_t bytesread;
  
-    #if HFS_DIAGNOSTIC
-        debug_check_blocksizes(vp);
-    #endif
+               bytesread = start_resid - uio_resid(uio);
  
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
+               /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+               if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
+                       hfs_lock(cp, HFS_FORCE_LOCK);
+                       took_cnode_lock = 1;
+               }
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < hfsmp->hfc_timebase) {
+                       struct timeval tv;
  
-    return (retval);
+                       fp->ff_bytesread = bytesread;
+                       microtime(&tv);
+                       cp->c_atime = tv.tv_sec;
+               } else {
+                       fp->ff_bytesread += bytesread;
+               }
+               if (took_cnode_lock)
+                       hfs_unlock(cp);
+       }
+exit:
+       hfs_unlock_truncate(cp, 0);
+       return (retval);
  }
  
  /*
- * Write data to a file or directory.
-#% write       vp      L L L
-#
- vop_write {
-     IN struct vnode *vp;
-     INOUT struct uio *uio;
-     IN int ioflag;
-     IN struct ucred *cred;
-
-     */
+ * Write data to a file.
+ */
  int
-hfs_write(ap)
-struct vop_write_args /* {
-    struct vnode *a_vp;
-    struct uio *a_uio;
-    int a_ioflag;
-    struct ucred *a_cred;
-} */ *ap;
+hfs_vnop_write(struct vnop_write_args *ap)
  {
-    struct hfsnode             *hp = VTOH(ap->a_vp);
-    struct uio                         *uio = ap->a_uio;
-    struct vnode               *vp = ap->a_vp ;
-    struct vnode               *dev;
-    struct buf                         *bp;
-    struct proc                *p, *cp;
-    struct timeval tv;
-    FCB                                        *fcb = HTOFCB(hp);
-    ExtendedVCB                        *vcb = HTOVCB(hp);
-    int                                        devBlockSize = 0;
-    daddr_t                    logBlockNo;
-    long                               fragSize;
-    off_t                              origFileSize, currOffset, writelimit, bytesToAdd;
-    off_t                              actualBytesAdded;
-    u_long                             blkoffset, resid, xfersize, clearSize;
-    int                                        flags, ioflag;
-    int                                retval;
-    DBG_FUNC_NAME("hfs_write");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_RW(("\thfsnode 0x%x (%s)\n", (u_int)hp, H_NAME(hp)));
-    DBG_RW(("\tstarting at offset Ox%lX of file, length Ox%lX\n", (UInt32)uio->uio_offset, (UInt32)uio->uio_resid));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    dev = hp->h_meta->h_devvp;
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
+       uio_t uio = ap->a_uio;
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp;
+       struct filefork *fp;
+       struct hfsmount *hfsmp;
+       kauth_cred_t cred = NULL;
+       off_t origFileSize;
+       off_t writelimit;
+       off_t bytesToAdd = 0;
+       off_t actualBytesAdded;
+       off_t filebytes;
+       off_t offset;
+       ssize_t resid;
+       int eflags;
+       int ioflag = ap->a_ioflag;
+       int retval = 0;
+       int lockflags;
+       int cnode_locked = 0;
+       int partialwrite = 0;
+       int exclusive_lock = 0;
+
+#if HFS_COMPRESSION
+       if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+               int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+               switch(state) {
+                       case FILE_IS_COMPRESSED:
+                               return EACCES;
+                       case FILE_IS_CONVERTING:
+                               /* if FILE_IS_CONVERTING, we allow writes */
+                               break;
+                       default:
+                               printf("invalid state %d for compressed file\n", state);
+                               /* fall through */
+               }
+       }
  #endif
  
-    if (uio->uio_offset < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
-
-    if (uio->uio_resid == 0) {
-        DBG_VOP_LOCKS_TEST(E_NONE);
-        return (E_NONE);
-    }
-
-    if (ap->a_vp->v_type != VREG && ap->a_vp->v_type != VLNK) {                /* Can only write files */
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);
-    };
-
-#if HFS_DIAGNOSTIC
-       if (uio->uio_rw != UIO_WRITE)
-               panic("%s: mode", funcname);
-#endif
+       // LP64todo - fix this! uio_resid may be 64-bit value
+       resid = uio_resid(uio);
+       offset = uio_offset(uio);
  
-    ioflag = ap->a_ioflag;
-    uio = ap->a_uio;
-    vp = ap->a_vp;
+       if (ioflag & IO_APPEND) {
+           exclusive_lock = 1;
+       }
+       
+       if (offset < 0)
+               return (EINVAL);
+       if (resid == 0)
+               return (E_NONE);
+       if (!vnode_isreg(vp))
+               return (EPERM);  /* Can only write regular files */
+
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+       hfsmp = VTOHFS(vp);
+
+       eflags = kEFDeferMask;  /* defer file block allocations */
+#ifdef HFS_SPARSE_DEV
+       /* 
+        * When the underlying device is sparse and space
+        * is low (< 8MB), stop doing delayed allocations
+        * and begin doing synchronous I/O.
+        */
+       if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+           (hfs_freeblks(hfsmp, 0) < 2048)) {
+               eflags &= ~kEFDeferMask;
+               ioflag |= IO_SYNC;
+       }
+#endif /* HFS_SPARSE_DEV */
  
-    if (ioflag & IO_APPEND) uio->uio_offset = fcb->fcbEOF;
-    if ((hp->h_meta->h_pflags & APPEND) && uio->uio_offset != fcb->fcbEOF)
-       return (EPERM);
+again:
+       /* Protect against a size change. */
+       hfs_lock_truncate(cp, exclusive_lock);
  
-       writelimit = uio->uio_offset + uio->uio_resid;
+       if (ioflag & IO_APPEND) {
+               uio_setoffset(uio, fp->ff_size);
+               offset = fp->ff_size;
+       }
+       if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
+               retval = EPERM;
+               goto exit;
+       }
  
-    /*
-    * Maybe this should be above the vnode op call, but so long as
-    * file servers have no limits, I don't think it matters.
-    */
-    p = uio->uio_procp;
-    if (vp->v_type == VREG && p &&
-        writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
-        psignal(p, SIGXFSZ);
-        return (EFBIG);
-    };
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-
-    resid = uio->uio_resid;
-    origFileSize = fcb->fcbEOF;
-    flags = ioflag & IO_SYNC ? B_SYNC : 0;
-
-    DBG_RW(("\tLEOF is 0x%lX, PEOF is 0x%lX.\n", fcb->fcbEOF, fcb->fcbPLen));
+       origFileSize = fp->ff_size;
+       writelimit = offset + resid;
+       filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
  
-    /*
-    NOTE:      In the following loop there are two positions tracked:
-    currOffset is the current I/O starting offset.  currOffset is never >LEOF; the
-    LEOF is nudged along with currOffset as data is zeroed or written.
-    uio->uio_offset is the start of the current I/O operation.  It may be arbitrarily
-    beyond currOffset.
+       /* If the truncate lock is shared, and if we either have virtual 
+        * blocks or will need to extend the file, upgrade the truncate 
+        * to exclusive lock.  If upgrade fails, we lose the lock and 
+        * have to get exclusive lock again.  Note that we want to
+        * grab the truncate lock exclusive even if we're not allocating new blocks
+        * because we could still be growing past the LEOF.
+        */
+       if ((exclusive_lock == 0) && 
+           ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
+               exclusive_lock = 1;
+               /* Lock upgrade failed and we lost our shared lock, try again */
+               if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
+                       goto again;
+               } 
+       }
  
-    The following is true at all times:
+       if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
+               goto exit;
+       }
+       cnode_locked = 1;
+       
+       if (!exclusive_lock) {
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
+                            (int)offset, uio_resid(uio), (int)fp->ff_size,
+                            (int)filebytes, 0);
+       }
  
-    currOffset <= LEOF <= uio->uio_offset <= writelimit
-    */
-    currOffset = MIN(uio->uio_offset, fcb->fcbEOF);
+       /* Check if we do not need to extend the file */
+       if (writelimit <= filebytes) {
+               goto sizeok;
+       }
  
-    DBG_RW(("\tstarting I/O loop at 0x%lX.\n", (u_long)currOffset));
+       cred = vfs_context_ucred(ap->a_context);
+       bytesToAdd = writelimit - filebytes;
  
-    cp = current_proc();
+#if QUOTA
+       retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)), 
+                          cred, 0);
+       if (retval)
+               goto exit;
+#endif /* QUOTA */
  
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-    retval = 0;
+       if (hfs_start_transaction(hfsmp) != 0) {
+               retval = EINVAL;
+               goto exit;
+       }
  
-    /* Now test if we need to extend the file */
-    /* Doing so will adjust the fcbPLen for us */
+       while (writelimit > filebytes) {
+               bytesToAdd = writelimit - filebytes;
+               if (cred && suser(cred, NULL) != 0)
+                       eflags |= kEFReserveMask;
  
-    while (writelimit > (off_t)fcb->fcbPLen) {
+               /* Protect extents b-tree and allocation bitmap */
+               lockflags = SFL_BITMAP;
+               if (overflow_extents(fp))
+                       lockflags |= SFL_EXTENTS;
+               lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
         
-        bytesToAdd = writelimit - fcb->fcbPLen;
-        DBG_RW(("\textending file by 0x%lX bytes; 0x%lX blocks free",
-                (unsigned long)bytesToAdd, (unsigned long)vcb->freeBlocks));
-
-        /* lock extents b-tree (also protects volume bitmap) */
-        retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, cp);
-        if (retval != E_NONE)
-            break;
-
-        retval = MacToVFSError(
-                            ExtendFileC (vcb,
-                                            fcb,
-                                            bytesToAdd,
-                                            0,
-                                            kEFContigBit,
-                                            &actualBytesAdded));
-
-        (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, cp);
-        DBG_VOP_CONT(("\tactual bytes added = 0x%lX bytes, retval = %d...\n", actualBytesAdded, retval));
-        if ((actualBytesAdded == 0) && (retval == E_NONE)) retval = ENOSPC;
-        if (retval != E_NONE) break;
-
-        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
-                    (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-    };
-
-       if (UBCISVALID(vp) && retval == E_NONE) {
+               /* Files that are changing size are not hot file candidates. */
+               if (hfsmp->hfc_stage == HFC_RECORDING) {
+                       fp->ff_bytesread = 0;
+               }
+               retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
+                               0, eflags, &actualBytesAdded));
+
+               hfs_systemfile_unlock(hfsmp, lockflags);
+
+               if ((actualBytesAdded == 0) && (retval == E_NONE))
+                       retval = ENOSPC;
+               if (retval != E_NONE)
+                       break;
+               filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
+                       (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
+       }
+       (void) hfs_update(vp, TRUE);
+       (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+       (void) hfs_end_transaction(hfsmp);
+
+       /*
+        * If we didn't grow the file enough try a partial write.
+        * POSIX expects this behavior.
+        */
+       if ((retval == ENOSPC) && (filebytes > offset)) {
+               retval = 0;
+               partialwrite = 1;
+               uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
+               resid -= bytesToAdd;
+               writelimit = filebytes;
+       }
+sizeok:
+       if (retval == E_NONE) {
                 off_t filesize;
                 off_t zero_off;
                 off_t tail_off;
                 off_t inval_start;
                 off_t inval_end;
-               off_t io_start, io_end;
+               off_t io_start;
                 int lflag;
                 struct rl_entry *invalid_range;
  
-               if (writelimit > fcb->fcbEOF)
+               if (writelimit > fp->ff_size)
                         filesize = writelimit;
                 else
-                       filesize = fcb->fcbEOF;
+                       filesize = fp->ff_size;
  
-               lflag = (ioflag & IO_SYNC);
+               lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
  
-               if (uio->uio_offset <= fcb->fcbEOF) {
-                       zero_off = uio->uio_offset & ~PAGE_MASK_64;
+               if (offset <= fp->ff_size) {
+                       zero_off = offset & ~PAGE_MASK_64;
                         
                         /* Check to see whether the area between the zero_offset and the start
                            of the transfer to see whether is invalid and should be zero-filled
                            as part of the transfer:
                          */
-                       if (rl_scan(&hp->h_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP) {
-                               lflag |= IO_HEADZEROFILL;
-                       };
+                       if (offset > zero_off) {
+                               if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
+                                       lflag |= IO_HEADZEROFILL;
+                       }
                 } else {
-                       off_t eof_page_base = fcb->fcbEOF & ~PAGE_MASK_64;
+                       off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
                         
-                       /* The bytes between fcb->fcbEOF and uio->uio_offset must never be
+                       /* The bytes between fp->ff_size and uio->uio_offset must never be
                            read without being zeroed.  The current last block is filled with zeroes
                            if it holds valid data but in all cases merely do a little bookkeeping
                            to track the area from the end of the current last page to the start of
@@ -506,14 +440,14 @@ struct vop_write_args /* {
                            may be past the start of the write, in which case the zeroing
                            will be handled by the cluser_write of the actual data.
                          */
-                       inval_start = (fcb->fcbEOF + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
-                       inval_end = uio->uio_offset & ~PAGE_MASK_64;
-                       zero_off = fcb->fcbEOF;
+                       inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+                       inval_end = offset & ~PAGE_MASK_64;
+                       zero_off = fp->ff_size;
                         
-                       if ((fcb->fcbEOF & PAGE_MASK_64) &&
-                               (rl_scan(&hp->h_invalidranges,
+                       if ((fp->ff_size & PAGE_MASK_64) &&
+                               (rl_scan(&fp->ff_invalidranges,
                                                         eof_page_base,
-                                                       fcb->fcbEOF - 1,
+                                                       fp->ff_size - 1,
                                                         &invalid_range) != RL_NOOVERLAP)) {
                                 /* The page containing the EOF is not valid, so the
                                    entire page must be made inaccessible now.  If the write
@@ -531,6 +465,7 @@ struct vop_write_args /* {
                         };
                         
                         if (inval_start < inval_end) {
+                               struct timeval tv;
                                 /* There's some range of data that's going to be marked invalid */
                                 
                                 if (zero_off < inval_start) {
@@ -538,17 +473,26 @@ struct vop_write_args /* {
                                            and the actual write will start on a page past inval_end.  Now's the last
                                            chance to zero-fill the page containing the EOF:
                                          */
-                                       retval = cluster_write(vp, (struct uio *) 0, fcb->fcbEOF, inval_start,
-                                                                                       zero_off, (off_t)0, devBlockSize, lflag | IO_HEADZEROFILL);
+                                       hfs_unlock(cp);
+                                       cnode_locked = 0;
+                                       retval = cluster_write(vp, (uio_t) 0,
+                                                       fp->ff_size, inval_start,
+                                                       zero_off, (off_t)0,
+                                                       lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
+                                       hfs_lock(cp, HFS_FORCE_LOCK);
+                                       cnode_locked = 1;
                                         if (retval) goto ioerr_exit;
+                                       offset = uio_offset(uio);
                                 };
                                 
                                 /* Mark the remaining area of the newly allocated space as invalid: */
-                               rl_add(inval_start, inval_end - 1 , &hp->h_invalidranges);
-                               zero_off = fcb->fcbEOF = inval_end;
+                               rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
+                               microuptime(&tv);
+                               cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+                               zero_off = fp->ff_size = inval_end;
                         };
                         
-                       if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
+                       if (offset > zero_off) lflag |= IO_HEADZEROFILL;
                 };
  
                 /* Check to see whether the area between the end of the write and the end of
@@ -557,7 +501,7 @@ struct vop_write_args /* {
                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
                 if (tail_off > filesize) tail_off = filesize;
                 if (tail_off > writelimit) {
-                       if (rl_scan(&hp->h_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
+                       if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
                                 lflag |= IO_TAILZEROFILL;
                         };
                 };
@@ -572,1466 +516,3589 @@ struct vop_write_args /* {
                  *       made readable (removed from the invalid ranges) before cluster_write
                  *       tries to write it:
                  */
-               io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
-               io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
-               if (io_start < fcb->fcbEOF) {
-                       rl_remove(io_start, io_end - 1, &hp->h_invalidranges);
+               io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
+               if (io_start < fp->ff_size) {
+                       off_t io_end;
+
+                       io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
+                       rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
                 };
-               retval = cluster_write(vp, uio, fcb->fcbEOF, filesize, zero_off, tail_off, devBlockSize, lflag);
-                               
-               if (uio->uio_offset > fcb->fcbEOF) {
-                       fcb->fcbEOF = uio->uio_offset;
  
-                       ubc_setsize(vp, (off_t)fcb->fcbEOF);       /* XXX check errors */
+               hfs_unlock(cp);
+               cnode_locked = 0;
+               
+               /*
+                * We need to tell UBC the fork's new size BEFORE calling
+                * cluster_write, in case any of the new pages need to be
+                * paged out before cluster_write completes (which does happen
+                * in embedded systems due to extreme memory pressure).
+                * Similarly, we need to tell hfs_vnop_pageout what the new EOF
+                * will be, so that it can pass that on to cluster_pageout, and
+                * allow those pageouts.
+                *
+                * We don't update ff_size yet since we don't want pageins to
+                * be able to see uninitialized data between the old and new
+                * EOF, until cluster_write has completed and initialized that
+                * part of the file.
+                *
+                * The vnode pager relies on the file size last given to UBC via
+                * ubc_setsize.  hfs_vnop_pageout relies on fp->ff_new_size or
+                * ff_size (whichever is larger).  NOTE: ff_new_size is always
+                * zero, unless we are extending the file via write.
+                */
+               if (filesize > fp->ff_size) {
+                       fp->ff_new_size = filesize;
+                       ubc_setsize(vp, filesize);
                 }
-               if (resid > uio->uio_resid) hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-
-    } else {
+               retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
+                               tail_off, lflag | IO_NOZERODIRTY);
+               if (retval) {
+                       fp->ff_new_size = 0;    /* no longer extending; use ff_size */
+                       if (filesize > origFileSize) {
+                               ubc_setsize(vp, origFileSize);
+                       }
+                       goto ioerr_exit;
+               }
+               
+               if (filesize > origFileSize) {
+                       fp->ff_size = filesize;
+                       
+                       /* Files that are changing size are not hot file candidates. */
+                       if (hfsmp->hfc_stage == HFC_RECORDING) {
+                               fp->ff_bytesread = 0;
+                       }
+               }
+               fp->ff_new_size = 0;    /* ff_size now has the correct size */
+               
+               /* If we wrote some bytes, then touch the change and mod times */
+               if (resid > uio_resid(uio)) {
+                       cp->c_touch_chgtime = TRUE;
+                       cp->c_touch_modtime = TRUE;
+               }
+       }
+       if (partialwrite) {
+               uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
+               resid += bytesToAdd;
+       }
  
-        while (retval == E_NONE && uio->uio_resid > 0) {
-            logBlockNo = currOffset / PAGE_SIZE;
-            blkoffset  = currOffset & PAGE_MASK;
-
-            if (((off_t)(fcb->fcbPLen) - currOffset) < PAGE_SIZE_64)
-                fragSize = (off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * PAGE_SIZE_64);
-            else
-                fragSize = PAGE_SIZE;
-            xfersize = fragSize - blkoffset;
-
-            DBG_RW(("\tcurrOffset = Ox%lX, logBlockNo = Ox%X, blkoffset = Ox%lX, xfersize = Ox%lX, fragSize = Ox%lX.\n",
-                    (unsigned long)currOffset, logBlockNo, blkoffset, xfersize, fragSize));
-
-            /* Make any adjustments for boundary conditions */
-            if (currOffset + (off_t)xfersize > writelimit) {
-                xfersize = writelimit - currOffset;
-                DBG_RW(("\ttrimming xfersize to 0x%lX to match writelimit (uio_resid)...\n", xfersize));
-            };
-
-            /*
-            * There is no need to read into bp if:
-            * We start on a block boundary and will overwrite the whole block
-            *
-            *                                          OR
-            */
-            if ((blkoffset == 0) && (xfersize >= fragSize)) {
-                DBG_RW(("\tRequesting %ld-byte block Ox%lX w/o read...\n", fragSize, (long)logBlockNo));
-
-                bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
-                retval = 0;
-
-                if (bp->b_blkno == -1) {
-                    brelse(bp);
-                    retval = EIO;              /* XXX */
-                    break;
-                }
-            } else {
-
-                if (currOffset == fcb->fcbEOF && blkoffset == 0) {
-                    bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
-                    retval = 0;
-
-                    if (bp->b_blkno == -1) {
-                        brelse(bp);
-                        retval = EIO;          /* XXX */
-                        break;
-                    }
-
-                } else {
-                    /*
-                    * This I/O transfer is not sufficiently aligned, so read the affected block into a buffer:
-                    */
-                    DBG_VOP(("\tRequesting block Ox%X, size = 0x%08lX...\n", logBlockNo, fragSize));
-                    retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp);
-
-                    if (retval != E_NONE) {
-                        if (bp)
-                            brelse(bp);
-                        break;
-                    }
-                }
-            }
-
-            /* See if we are starting to write within file boundaries:
-                If not, then we need to present a "hole" for the area between
-                the current EOF and the start of the current I/O operation:
-
-                Note that currOffset is only less than uio_offset if uio_offset > LEOF...
-                */
-            if (uio->uio_offset > currOffset) {
-                clearSize = MIN(uio->uio_offset - currOffset, xfersize);
-                DBG_RW(("\tzeroing Ox%lX bytes Ox%lX bytes into block Ox%X...\n", clearSize, blkoffset, logBlockNo));
-                bzero(bp->b_data + blkoffset, clearSize);
-                currOffset += clearSize;
-                blkoffset += clearSize;
-                xfersize -= clearSize;
-            };
-
-            if (xfersize > 0) {
-                DBG_RW(("\tCopying Ox%lX bytes Ox%lX bytes into block Ox%X... ioflag == 0x%X\n",
-                        xfersize, blkoffset, logBlockNo, ioflag));
-                retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio);
-                currOffset += xfersize;
-            };
-            DBG_ASSERT((bp->b_bcount % devBlockSize) == 0);
-
-            if (ioflag & IO_SYNC) {
-                (void)VOP_BWRITE(bp);
-                //DBG_RW(("\tissuing bwrite\n"));
-            } else if ((xfersize + blkoffset) == fragSize) {
-                //DBG_RW(("\tissuing bawrite\n"));
-                bp->b_flags |= B_AGE;
-                bawrite(bp);
-            } else {
-                //DBG_RW(("\tissuing bdwrite\n"));
-                bdwrite(bp);
-            };
-
-            /* Update the EOF if we just extended the file
-                (the PEOF has already been moved out and the block mapping table has been updated): */
-            if (currOffset > fcb->fcbEOF) {
-                DBG_VOP(("\textending EOF to 0x%lX...\n", (UInt32)fcb->fcbEOF));
-                fcb->fcbEOF = currOffset;
-
-                if (UBCISVALID(vp))
-                    ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
-            };
-
-            if (retval || (resid == 0))
-                break;
-            hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-        };
-    };
+       // XXXdbg - see radar 4871353 for more info
+       {
+           if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
+               VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+           }
+       }
  
  ioerr_exit:
-    /*
+       /*
          * If we successfully wrote any data, and we are not the superuser
-     * we clear the setuid and setgid bits as a precaution against
-     * tampering.
-     */
-    if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
-    hp->h_meta->h_mode &= ~(ISUID | ISGID);
-
-    if (retval) {
-        if (ioflag & IO_UNIT) {
-            (void)VOP_TRUNCATE(vp, origFileSize,
-                            ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
-            uio->uio_offset -= resid - uio->uio_resid;
-            uio->uio_resid = resid;
-        }
-    } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
-        tv = time;
-        retval = VOP_UPDATE(vp, &tv, &tv, 1);
-    }
+        * we clear the setuid and setgid bits as a precaution against
+        * tampering.
+        */
+       if (cp->c_mode & (S_ISUID | S_ISGID)) {
+               cred = vfs_context_ucred(ap->a_context);
+               if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
+                       if (!cnode_locked) {
+                               hfs_lock(cp, HFS_FORCE_LOCK);
+                               cnode_locked = 1;
+                       }
+                       cp->c_mode &= ~(S_ISUID | S_ISGID);
+               }
+       }
+       if (retval) {
+               if (ioflag & IO_UNIT) {
+                       if (!cnode_locked) {
+                               hfs_lock(cp, HFS_FORCE_LOCK);
+                               cnode_locked = 1;
+                       }
+                       (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
+                                          0, 0, ap->a_context);
+                       // LP64todo - fix this!  resid needs to by user_ssize_t
+                       uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
+                       uio_setresid(uio, resid);
+                       filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+               }
+       } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
+               if (!cnode_locked) {
+                       hfs_lock(cp, HFS_FORCE_LOCK);
+                       cnode_locked = 1;
+               }
+               retval = hfs_update(vp, TRUE);
+       }
+       /* Updating vcbWrCnt doesn't need to be atomic. */
+       hfsmp->vcbWrCnt++;
+
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
+               (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
+exit:
+       if (cnode_locked)
+               hfs_unlock(cp);
+       hfs_unlock_truncate(cp, exclusive_lock);
+       return (retval);
+}
  
-    #if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-    #endif
+/* support for the "bulk-access" fcntl */
  
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF, (int)fcb->fcbPLen, 0);
+#define CACHE_LEVELS 16
+#define NUM_CACHE_ENTRIES (64*16)
+#define PARENT_IDS_FLAG 0x100
  
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
-}
+struct access_cache {
+       int numcached;
+       int cachehits; /* these two for statistics gathering */
+       int lookups;
+       unsigned int *acache;
+       unsigned char *haveaccess;
+};
  
+struct access_t {
+       uid_t     uid;              /* IN: effective user id */
+       short     flags;            /* IN: access requested (i.e. R_OK) */
+       short     num_groups;       /* IN: number of groups user belongs to */
+       int       num_files;        /* IN: number of files to process */
+       int       *file_ids;        /* IN: array of file ids */
+       gid_t     *groups;          /* IN: array of groups */
+       short     *access;          /* OUT: access info for each file (0 for 'has access') */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_access_t {
+       uid_t     uid;              /* IN: effective user id */
+       short     flags;            /* IN: access requested (i.e. R_OK) */
+       short     num_groups;       /* IN: number of groups user belongs to */
+       int       num_files;        /* IN: number of files to process */
+       user32_addr_t      file_ids;        /* IN: array of file ids */
+       user32_addr_t      groups;          /* IN: array of groups */
+       user32_addr_t      access;          /* OUT: access info for each file (0 for 'has access') */
+};
  
-/*
+struct user64_access_t {
+       uid_t           uid;                    /* IN: effective user id */
+       short           flags;                  /* IN: access requested (i.e. R_OK) */
+       short           num_groups;             /* IN: number of groups user belongs to */
+       int             num_files;              /* IN: number of files to process */
+       user64_addr_t   file_ids;               /* IN: array of file ids */
+       user64_addr_t   groups;                 /* IN: array of groups */
+       user64_addr_t   access;                 /* OUT: access info for each file (0 for 'has access') */
+};
  
-#% ioctl       vp      U U U
-#
- vop_ioctl {
-     IN struct vnode *vp;
-     IN u_long command;
-     IN caddr_t data;
-     IN int fflag;
-     IN struct ucred *cred;
-     IN struct proc *p;
  
-     */
+// these are the "extended" versions of the above structures
+// note that it is crucial that they be different sized than
+// the regular version
+struct ext_access_t {
+       uint32_t   flags;           /* IN: access requested (i.e. R_OK) */
+       uint32_t   num_files;       /* IN: number of files to process */
+       uint32_t   map_size;        /* IN: size of the bit map */
+       uint32_t  *file_ids;        /* IN: Array of file ids */
+       char      *bitmap;          /* OUT: hash-bitmap of interesting directory ids */
+       short     *access;          /* OUT: access info for each file (0 for 'has access') */
+       uint32_t   num_parents;   /* future use */
+       cnid_t      *parents;   /* future use */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_ext_access_t {
+       uint32_t   flags;           /* IN: access requested (i.e. R_OK) */
+       uint32_t   num_files;       /* IN: number of files to process */
+       uint32_t   map_size;        /* IN: size of the bit map */
+       user32_addr_t  file_ids;        /* IN: Array of file ids */
+       user32_addr_t     bitmap;          /* OUT: hash-bitmap of interesting directory ids */
+       user32_addr_t access;          /* OUT: access info for each file (0 for 'has access') */
+       uint32_t   num_parents;   /* future use */
+       user32_addr_t parents;   /* future use */
+};
+
+struct user64_ext_access_t {
+       uint32_t      flags;        /* IN: access requested (i.e. R_OK) */
+       uint32_t      num_files;    /* IN: number of files to process */
+       uint32_t      map_size;     /* IN: size of the bit map */
+       user64_addr_t   file_ids;     /* IN: array of file ids */
+       user64_addr_t   bitmap;       /* IN: array of groups */
+       user64_addr_t   access;       /* OUT: access info for each file (0 for 'has access') */
+       uint32_t      num_parents;/* future use */
+       user64_addr_t   parents;/* future use */
+};
  
  
-/* ARGSUSED */
-int
-hfs_ioctl(ap)
-struct vop_ioctl_args /* {
-    struct vnode *a_vp;
-    int  a_command;
-    caddr_t  a_data;
-    int  a_fflag;
-    struct ucred *a_cred;
-    struct proc *a_p;
-} */ *ap;
+/*
+ * Perform a binary search for the given parent_id. Return value is 
+ * the index if there is a match.  If no_match_indexp is non-NULL it
+ * will be assigned with the index to insert the item (even if it was
+ * not found).
+ */
+static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
  {
-    DBG_FUNC_NAME("hfs_ioctl");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
+    int index=-1;
+    unsigned int lo=0;
+       
+    do {
+       unsigned int mid = ((hi - lo)/2) + lo;
+       unsigned int this_id = array[mid];
+               
+       if (parent_id == this_id) {
+           hi = mid;
+           break;
+       }
+               
+       if (parent_id < this_id) {
+           hi = mid;
+           continue;
+       }
+               
+       if (parent_id > this_id) {
+           lo = mid + 1;
+           continue;
+       }
+    } while(lo < hi);
  
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_UNLOCKED, VOPDBG_UNLOCKED, VOPDBG_UNLOCKED, VOPDBG_POS);
+    /* check if lo and hi converged on the match */
+    if (parent_id == array[hi]) {
+       index = hi;
+    }
+       
+    if (no_match_indexp) {
+       *no_match_indexp = hi;
+    }
  
-    switch (ap->a_command) {
+    return index;
+}
+ 
+ 
+static int
+lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
+{
+    unsigned int hi;
+    int matches = 0;
+    int index, no_match_index;
         
-        case 1:
-    {   register struct hfsnode *hp;
-            register struct vnode *vp;
-       register struct radvisory *ra;
-       FCB *fcb;
-       int devBlockSize = 0;
-       int error;
+    if (cache->numcached == 0) {
+       *indexp = 0;
+       return 0; // table is empty, so insert at index=0 and report no match
+    }
+       
+    if (cache->numcached > NUM_CACHE_ENTRIES) {
+       /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
+         cache->numcached, NUM_CACHE_ENTRIES);*/
+       cache->numcached = NUM_CACHE_ENTRIES;
+    }
+       
+    hi = cache->numcached - 1;
+       
+    index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
+
+    /* if no existing entry found, find index for new one */
+    if (index == -1) {
+       index = no_match_index;
+       matches = 0;
+    } else {
+       matches = 1;
+    }
+       
+    *indexp = index;
+    return matches;
+}
  
-       vp = ap->a_vp;
+/*
+ * Add a node to the access_cache at the given index (or do a lookup first
+ * to find the index if -1 is passed in). We currently do a replace rather
+ * than an insert if the cache is full.
+ */
+static void
+add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
+{
+    int lookup_index = -1;
+
+    /* need to do a lookup first if -1 passed for index */
+    if (index == -1) {
+       if (lookup_bucket(cache, &lookup_index, nodeID)) {
+           if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
+               // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
+               cache->haveaccess[lookup_index] = access;
+           }
  
-       VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+           /* mission accomplished */
+           return;
+       } else {
+           index = lookup_index;
+       }
  
-       ra = (struct radvisory *)(ap->a_data);
-       hp = VTOH(vp);
+    }
  
-       fcb = HTOFCB(hp);
+    /* if the cache is full, do a replace rather than an insert */
+    if (cache->numcached >= NUM_CACHE_ENTRIES) {
+       //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
+       cache->numcached = NUM_CACHE_ENTRIES-1;
  
-       if (ra->ra_offset >= fcb->fcbEOF) {
-           VOP_UNLOCK(vp, 0, ap->a_p);
-           DBG_VOP_LOCKS_TEST(EFBIG);
-           return (EFBIG);
+       if (index > cache->numcached) {
+           //    printf("hfs: index %d pinned to %d\n", index, cache->numcached);
+           index = cache->numcached;
         }
-       VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-
-       error = advisory_read(vp, fcb->fcbEOF, ra->ra_offset, ra->ra_count, devBlockSize);
-       VOP_UNLOCK(vp, 0, ap->a_p);
+    }
  
-       DBG_VOP_LOCKS_TEST(error);
-       return (error);
-            }
-
-        case 2: /* F_READBOOTBLOCKS */
-        case 3: /* F_WRITEBOOTBLOCKS */
-            {
-           struct vnode *vp = ap->a_vp;
-           struct hfsnode *hp = VTOH(vp);
-           struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
-           int devBlockSize;
-           int error;
-           struct iovec aiov;
-           struct uio auio;
-           u_long blockNumber;
-           u_long blockOffset;
-           u_long xfersize;
-           struct buf *bp;
+    if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
+       index++;
+    }
  
-            if ((vp->v_flag & VROOT) == 0) return EINVAL;
-            if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
-           
-           aiov.iov_base = btd->fbt_buffer;
-           aiov.iov_len = btd->fbt_length;
-           
-           auio.uio_iov = &aiov;
-           auio.uio_iovcnt = 1;
-           auio.uio_offset = btd->fbt_offset;
-           auio.uio_resid = btd->fbt_length;
-           auio.uio_segflg = UIO_USERSPACE;
-           auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
-           auio.uio_procp = ap->a_p;
-
-           VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-
-           while (auio.uio_resid > 0) {
-             blockNumber = auio.uio_offset / devBlockSize;
-             error = bread(hp->h_meta->h_devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
-             if (error) {
-                  if (bp) brelse(bp);
-                  return error;
-                };
-
-                blockOffset = auio.uio_offset % devBlockSize;
-             xfersize = devBlockSize - blockOffset;
-             error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
-                if (error) {
-                  brelse(bp);
-                  return error;
-                };
-                if (auio.uio_rw == UIO_WRITE) {
-                  error = VOP_BWRITE(bp);
-                  if (error) return error;
-                } else {
-                  brelse(bp);
-                };
-            };
-        };
-        return 0;
-
-        case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
-            {
-            *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
-            return 0;
-            }
-
-        default:
-            DBG_VOP_LOCKS_TEST(ENOTTY);
-            return (ENOTTY);
+    if (index >= 0 && index < cache->numcached) {
+       /* only do bcopy if we're inserting */
+       bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
+       bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
      }
  
-    /* Should never get here */
-       return 0;
+    cache->acache[index] = nodeID;
+    cache->haveaccess[index] = access;
+    cache->numcached++;
  }
  
-/* ARGSUSED */
-int
-hfs_select(ap)
-struct vop_select_args /* {
-    struct vnode *a_vp;
-    int  a_which;
-    int  a_fflags;
-    struct ucred *a_cred;
-       void *a_wql;
-    struct proc *a_p;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_select");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
  
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
+struct cinfo {
+    uid_t   uid;
+    gid_t   gid;
+    mode_t  mode;
+    cnid_t  parentcnid;
+    u_int16_t recflags;
+};
  
-    /*
-     * We should really check to see if I/O is possible.
-     */
-    DBG_VOP_LOCKS_TEST(1);
-    return (1);
+static int
+snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
+{
+    struct cinfo *cip = (struct cinfo *)arg;
+
+    cip->uid = attrp->ca_uid;
+    cip->gid = attrp->ca_gid;
+    cip->mode = attrp->ca_mode;
+    cip->parentcnid = descp->cd_parentcnid;
+    cip->recflags = attrp->ca_recflags;
+       
+    return (0);
  }
  
+/*
+ * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
+ * isn't incore, then go to the catalog.
+ */ 
+static int
+do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, 
+    struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
+{
+    int error = 0;
+
+    /* if this id matches the one the fsctl was called with, skip the lookup */
+    if (cnid == skip_cp->c_cnid) {
+       cnattrp->ca_uid = skip_cp->c_uid;
+       cnattrp->ca_gid = skip_cp->c_gid;
+       cnattrp->ca_mode = skip_cp->c_mode;
+       cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
+       keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
+    } else {
+       struct cinfo c_info;
+
+       /* otherwise, check the cnode hash incase the file/dir is incore */
+       if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
+           cnattrp->ca_uid = c_info.uid;
+           cnattrp->ca_gid = c_info.gid;
+           cnattrp->ca_mode = c_info.mode;
+           cnattrp->ca_recflags = c_info.recflags;
+           keyp->hfsPlus.parentID = c_info.parentcnid;
+       } else {
+           int lockflags;
+                       
+           lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                       
+           /* lookup this cnid in the catalog */
+           error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
+                       
+           hfs_systemfile_unlock(hfsmp, lockflags);
+                       
+           cache->lookups++;
+       }
+    }
+       
+    return (error);
+}
  
  
  /*
- * Mmap a file
- *
- * NB Currently unsupported.
-# XXX - not used
-#
- vop_mmap {
-     IN struct vnode *vp;
-     IN int fflags;
-     IN struct ucred *cred;
-     IN struct proc *p;
+ * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
+ * up to CACHE_LEVELS as we progress towards the root.
+ */
+static int 
+do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, 
+    struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
+    struct vfs_context *my_context,
+    char *bitmap,
+    uint32_t map_size,
+    cnid_t* parents,
+    uint32_t num_parents)
+{
+    int                     myErr = 0;
+    int                     myResult;
+    HFSCatalogNodeID        thisNodeID;
+    unsigned int            myPerms;
+    struct cat_attr         cnattr;
+    int                     cache_index = -1, scope_index = -1, scope_idx_start = -1;
+    CatalogKey              catkey;
+
+    int i = 0, ids_to_cache = 0;
+    int parent_ids[CACHE_LEVELS];
+
+    thisNodeID = nodeID;
+    while (thisNodeID >=  kRootDirID) {
+       myResult = 0;   /* default to "no access" */
+              
+       /* check the cache before resorting to hitting the catalog */
+
+       /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
+        * to look any further after hitting cached dir */
+
+       if (lookup_bucket(cache, &cache_index, thisNodeID)) {
+           cache->cachehits++;
+           myErr = cache->haveaccess[cache_index];
+           if (scope_index != -1) {
+               if (myErr == ESRCH) {
+                   myErr = 0;
+               }
+           } else {
+               scope_index = 0;   // so we'll just use the cache result 
+               scope_idx_start = ids_to_cache;
+           }
+           myResult = (myErr == 0) ? 1 : 0;
+           goto ExitThisRoutine;
+       }
  
-     */
  
-/* ARGSUSED */
+       if (parents) {
+           int tmp;
+           tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
+           if (scope_index == -1)
+               scope_index = tmp;
+           if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
+               scope_idx_start = ids_to_cache;
+           }
+       }          
  
-int
-hfs_mmap(ap)
-struct vop_mmap_args /* {
-    struct vnode *a_vp;
-    int  a_fflags;
-    struct ucred *a_cred;
-    struct proc *a_p;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_mmap");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
+       /* remember which parents we want to cache */
+       if (ids_to_cache < CACHE_LEVELS) {
+           parent_ids[ids_to_cache] = thisNodeID;
+           ids_to_cache++;
+       }
+       // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
+       if (bitmap && map_size) {
+           bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));            
+       }
+              
  
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
+       /* do the lookup (checks the cnode hash, then the catalog) */
+       myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
+       if (myErr) {
+           goto ExitThisRoutine; /* no access */
+       }
  
-    DBG_VOP_LOCKS_TEST(EINVAL);
-    return (EINVAL);
-}
+       /* Root always gets access. */
+       if (suser(myp_ucred, NULL) == 0) {
+               thisNodeID = catkey.hfsPlus.parentID;
+               myResult = 1;
+               continue;
+       }
  
+       // if the thing has acl's, do the full permission check
+       if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+           struct vnode *vp;
  
+           /* get the vnode for this cnid */
+           myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
+           if ( myErr ) {
+               myResult = 0;
+               goto ExitThisRoutine;
+           }
  
-/*
- * Seek on a file
- *
- * Nothing to do, so just return.
-# XXX - not used
-# Needs work: Is newoff right?  What's it mean?
-#
- vop_seek {
-     IN struct vnode *vp;
-     IN off_t oldoff;
-     IN off_t newoff;
-     IN struct ucred *cred;
-     */
-/* ARGSUSED */
-int
-hfs_seek(ap)
-struct vop_seek_args /* {
-    struct vnode *a_vp;
-    off_t  a_oldoff;
-    off_t  a_newoff;
-    struct ucred *a_cred;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_seek");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
+           thisNodeID = VTOC(vp)->c_parentcnid;
  
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
+           hfs_unlock(VTOC(vp));
  
-    DBG_VOP_LOCKS_TEST(E_NONE);
-    return (E_NONE);
-}
+           if (vnode_vtype(vp) == VDIR) {
+               myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
+           } else {
+               myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
+           }
  
+           vnode_put(vp);
+           if (myErr) {
+               myResult = 0;
+               goto ExitThisRoutine;
+           }
+       } else {
+           unsigned int flags;
+                  
+           myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
+               cnattr.ca_mode, hfsmp->hfs_mp,
+               myp_ucred, theProcPtr);
+
+           if (cnattr.ca_mode & S_IFDIR) {
+               flags = R_OK | X_OK;
+           } else {
+               flags = R_OK;
+           }
+           if ( (myPerms & flags) != flags) {
+               myResult = 0;
+               myErr = EACCES;
+               goto ExitThisRoutine;   /* no access */
+           }
  
-/*
- * Bmap converts a the logical block number of a file to its physical block
- * number on the disk.
- */
+           /* up the hierarchy we go */
+           thisNodeID = catkey.hfsPlus.parentID;
+       }
+    }
  
-/*
- * vp  - address of vnode file the file
- * bn  - which logical block to convert to a physical block number.
- * vpp - returns the vnode for the block special file holding the filesystem
- *      containing the file of interest
- * bnp - address of where to return the filesystem physical block number
-#% bmap                vp      L L L
-#% bmap                vpp     - U -
-#
- vop_bmap {
-     IN struct vnode *vp;
-     IN daddr_t bn;
-     OUT struct vnode **vpp;
-     IN daddr_t *bnp;
-     OUT int *runp;
-     */
-/*
- * Converts a logical block number to a physical block, and optionally returns
- * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
- * The physical block number is based on the device block size, currently its 512.
- * The block run is returned in logical blocks, and is the REMAINING amount of blocks
- */
+    /* if here, we have access to this node */
+    myResult = 1;
  
-int
-hfs_bmap(ap)
-struct vop_bmap_args /* {
-    struct vnode *a_vp;
-    daddr_t a_bn;
-    struct vnode **a_vpp;
-    daddr_t *a_bnp;
-    int *a_runp;
-} */ *ap;
+  ExitThisRoutine:
+    if (parents && myErr == 0 && scope_index == -1) {
+       myErr = ESRCH;
+    }
+                               
+    if (myErr) {
+       myResult = 0;
+    }
+    *err = myErr;
+
+    /* cache the parent directory(ies) */
+    for (i = 0; i < ids_to_cache; i++) {
+       if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
+           add_node(cache, -1, parent_ids[i], ESRCH);
+       } else {
+           add_node(cache, -1, parent_ids[i], myErr);
+       }
+    }
+
+    return (myResult);
+}
+
+static int
+do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
+    struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
  {
-    struct hfsnode             *hp = VTOH(ap->a_vp);
-    struct hfsmount    *hfsmp = VTOHFS(ap->a_vp);
-    int                                        retval = E_NONE;
-    daddr_t                            logBlockSize;
-    size_t                             bytesContAvail = 0;
-    off_t blockposition;
-    struct proc                        *p = NULL;
-    int                                        lockExtBtree;
-    struct rl_entry *invalid_range;
-    enum rl_overlaptype overlaptype;
-
-#define DEBUG_BMAP 0
-#if DEBUG_BMAP
-    DBG_FUNC_NAME("hfs_bmap");
-    DBG_VOP_LOCKS_DECL(2);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-    if (ap->a_vpp != NULL) {
-        DBG_VOP_LOCKS_INIT(1,*ap->a_vpp, VOPDBG_IGNORE, VOPDBG_UNLOCKED, VOPDBG_IGNORE, VOPDBG_POS);
-    } else {
-        DBG_VOP_LOCKS_INIT(1,NULL, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-       };
-#endif
+    boolean_t is64bit;
  
-       DBG_IO(("\tMapped blk %d --> ", ap->a_bn));
      /*
-     * Check for underlying vnode requests and ensure that logical
-     * to physical mapping is requested.
+     * NOTE: on entry, the vnode is locked. Incase this vnode
+     * happens to be in our list of file_ids, we'll note it
+     * avoid calling hfs_chashget_nowait() on that id as that
+     * will cause a "locking against myself" panic.
       */
-    if (ap->a_vpp != NULL)
-        *ap->a_vpp = VTOH(ap->a_vp)->h_meta->h_devvp;
-    if (ap->a_bnp == NULL)
-        return (0);
-
-    logBlockSize = GetLogicalBlockSize(ap->a_vp);
-    blockposition = (off_t)(ap->a_bn * logBlockSize);
-        
-    lockExtBtree = hasOverflowExtents(hp);
-    if (lockExtBtree)
-    {
-        p = current_proc();
-        retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p);
-        if (retval)
-            return (retval);
+    Boolean check_leaf = true;
+               
+    struct user64_ext_access_t *user_access_structp;
+    struct user64_ext_access_t tmp_user_access;
+    struct access_cache cache;
+               
+    int error = 0, prev_parent_check_ok=1;
+    unsigned int i;
+               
+    short flags;
+    unsigned int num_files = 0;
+    int map_size = 0;
+    int num_parents = 0;
+    int *file_ids=NULL;
+    short *access=NULL;
+    char *bitmap=NULL;
+    cnid_t *parents=NULL;
+    int leaf_index;
+       
+    cnid_t cnid;
+    cnid_t prevParent_cnid = 0;
+    unsigned int myPerms;
+    short myaccess = 0;
+    struct cat_attr cnattr;
+    CatalogKey catkey;
+    struct cnode *skip_cp = VTOC(vp);
+    kauth_cred_t cred = vfs_context_ucred(context);
+    proc_t p = vfs_context_proc(context);
+
+    is64bit = proc_is64bit(p);
+
+    /* initialize the local cache and buffers */
+    cache.numcached = 0;
+    cache.cachehits = 0;
+    cache.lookups = 0;
+    cache.acache = NULL;
+    cache.haveaccess = NULL;
+               
+    /* struct copyin done during dispatch... need to copy file_id array separately */
+    if (ap->a_data == NULL) {
+       error = EINVAL;
+       goto err_exit_bulk_access;
      }
  
-    retval = MacToVFSError(
-                            MapFileBlockC (HFSTOVCB(hfsmp),
-                                            HTOFCB(hp),
-                                            MAXPHYSIO,
-                                            blockposition,
-                                            ap->a_bnp,
-                                            &bytesContAvail));
+    if (is64bit) {
+       if (arg_size != sizeof(struct user64_ext_access_t)) {
+           error = EINVAL;
+           goto err_exit_bulk_access;
+       }
  
-    if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
-
-    if (retval == E_NONE) {
-        /* Adjust the mapping information for invalid file ranges: */
-        overlaptype = rl_scan(&hp->h_invalidranges,
-                            blockposition,
-                            blockposition + MAXPHYSIO - 1,
-                            &invalid_range);
-        if (overlaptype != RL_NOOVERLAP) {
-            switch(overlaptype) {
-                case RL_MATCHINGOVERLAP:
-                case RL_OVERLAPCONTAINSRANGE:
-                case RL_OVERLAPSTARTSBEFORE:
-                    /* There's no valid block for this byte offset: */
-                    *ap->a_bnp = (daddr_t)-1;
-                    bytesContAvail = invalid_range->rl_end + 1 - blockposition;
-                    break;
-                
-                case RL_OVERLAPISCONTAINED:
-                case RL_OVERLAPENDSAFTER:
-                    /* The range of interest hits an invalid block before the end: */
-                    if (invalid_range->rl_start == blockposition) {
-                       /* There's actually no valid information to be had starting here: */
-                       *ap->a_bnp = (daddr_t)-1;
-                                               if ((HTOFCB(hp)->fcbEOF > (invalid_range->rl_end + 1)) &&
-                                                       (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
-                               bytesContAvail = invalid_range->rl_end + 1 - blockposition;
-                       };
-                    } else {
-                       bytesContAvail = invalid_range->rl_start - blockposition;
-                    };
-                    break;
-            };
-                       if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
-        };
-        
-        /* Figure out how many read ahead blocks there are */
-        if (ap->a_runp != NULL) {
-            if (can_cluster(logBlockSize)) {
-                /* Make sure this result never goes negative: */
-                *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
-            } else {
-                *ap->a_runp = 0;
-            };
-        };
-    };
-
-    DBG_IO(("%d:%d.\n", *ap->a_bnp, (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1));
-
-#if DEBUG_BMAP
-
-    DBG_VOP_LOCKS_TEST(retval);
-#endif
+       user_access_structp = (struct user64_ext_access_t *)ap->a_data;
  
-    if (ap->a_runp) {
-        DBG_ASSERT((*ap->a_runp * logBlockSize) < bytesContAvail);                                                     /* At least *ap->a_runp blocks left and ... */
-        if (can_cluster(logBlockSize)) {
-            DBG_ASSERT(bytesContAvail - (*ap->a_runp * logBlockSize) < (2*logBlockSize));      /* ... at most 1 logical block accounted for by current block */
-                                                                                            /* ... plus some sub-logical block sized piece */
-        };
-    };
+    } else if (arg_size == sizeof(struct user32_access_t)) {
+       struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
  
-    return (retval);
-}
+       // convert an old style bulk-access struct to the new style
+       tmp_user_access.flags     = accessp->flags;
+       tmp_user_access.num_files = accessp->num_files;
+       tmp_user_access.map_size  = 0;
+       tmp_user_access.file_ids  = CAST_USER_ADDR_T(accessp->file_ids);
+       tmp_user_access.bitmap    = USER_ADDR_NULL;
+       tmp_user_access.access    = CAST_USER_ADDR_T(accessp->access);
+       tmp_user_access.num_parents = 0;
+       user_access_structp = &tmp_user_access;
  
-/* blktooff converts logical block number to file offset */
+    } else if (arg_size == sizeof(struct user32_ext_access_t)) {
+       struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
  
-int
-hfs_blktooff(ap)
-struct vop_blktooff_args /* {
-    struct vnode *a_vp;
-    daddr_t a_lblkno;  
-    off_t *a_offset;
-} */ *ap;
-{      
+       // up-cast from a 32-bit version of the struct
+       tmp_user_access.flags     = accessp->flags;
+       tmp_user_access.num_files = accessp->num_files;
+       tmp_user_access.map_size  = accessp->map_size;
+       tmp_user_access.num_parents  = accessp->num_parents;
+
+       tmp_user_access.file_ids  = CAST_USER_ADDR_T(accessp->file_ids);
+       tmp_user_access.bitmap    = CAST_USER_ADDR_T(accessp->bitmap);
+       tmp_user_access.access    = CAST_USER_ADDR_T(accessp->access);
+       tmp_user_access.parents    = CAST_USER_ADDR_T(accessp->parents);
+
+       user_access_structp = &tmp_user_access;
+    } else {
+       error = EINVAL;
+       goto err_exit_bulk_access;
+    }
+               
+    map_size = user_access_structp->map_size;
+
+    num_files = user_access_structp->num_files;
+
+    num_parents= user_access_structp->num_parents;
+
+    if (num_files < 1) {
+       goto err_exit_bulk_access;
+    }
+    if (num_files > 1024) {
+       error = EINVAL;
+       goto err_exit_bulk_access;
+    }
+
+    if (num_parents > 1024) {
+       error = EINVAL;
+       goto err_exit_bulk_access;
+    }
+               
+    file_ids = (int *) kalloc(sizeof(int) * num_files);
+    access = (short *) kalloc(sizeof(short) * num_files);
+    if (map_size) {
+       bitmap = (char *) kalloc(sizeof(char) * map_size);
+    }
+
+    if (num_parents) {
+       parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
+    }
+
+    cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
+    cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+               
+    if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
+       if (file_ids) {
+           kfree(file_ids, sizeof(int) * num_files);
+       }
+       if (bitmap) {
+           kfree(bitmap, sizeof(char) * map_size);
+       }
+       if (access) {
+           kfree(access, sizeof(short) * num_files);
+       }
+       if (cache.acache) {
+           kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+       }
+       if (cache.haveaccess) {
+           kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+       }
+       if (parents) {
+           kfree(parents, sizeof(cnid_t) * num_parents);
+       }                       
+       return ENOMEM;
+    }
+               
+    // make sure the bitmap is zero'ed out...
+    if (bitmap) {
+       bzero(bitmap, (sizeof(char) * map_size));
+    }
+
+    if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
+               num_files * sizeof(int)))) {
+       goto err_exit_bulk_access;
+    }
+       
+    if (num_parents) {
+       if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
+                   num_parents * sizeof(cnid_t)))) {
+           goto err_exit_bulk_access;
+       }
+    }
+       
+    flags = user_access_structp->flags;
+    if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
+       flags = R_OK;
+    }
+               
+    /* check if we've been passed leaf node ids or parent ids */
+    if (flags & PARENT_IDS_FLAG) {
+       check_leaf = false;
+    }
+               
+    /* Check access to each file_id passed in */
+    for (i = 0; i < num_files; i++) {
+       leaf_index=-1;
+       cnid = (cnid_t) file_ids[i];
+                       
+       /* root always has access */
+       if ((!parents) && (!suser(cred, NULL))) {
+           access[i] = 0;
+           continue;
+       }
+                       
+       if (check_leaf) {
+           /* do the lookup (checks the cnode hash, then the catalog) */
+           error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
+           if (error) {
+               access[i] = (short) error;
+               continue;
+           }
+           
+           if (parents) {
+               // Check if the leaf matches one of the parent scopes
+               leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
+               if (leaf_index >= 0 && parents[leaf_index] == cnid)
+                   prev_parent_check_ok = 0;
+               else if (leaf_index >= 0)
+                   prev_parent_check_ok = 1;
+           }
+
+           // if the thing has acl's, do the full permission check
+           if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+               struct vnode *cvp;
+               int myErr = 0;
+               /* get the vnode for this cnid */
+               myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
+               if ( myErr ) {
+                   access[i] = myErr;
+                   continue;
+               }
+               
+               hfs_unlock(VTOC(cvp));
+               
+               if (vnode_vtype(cvp) == VDIR) {
+                   myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
+               } else {
+                   myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
+               }
+               
+               vnode_put(cvp);
+               if (myErr) {
+                   access[i] = myErr;
+                   continue;
+               }
+           } else {
+               /* before calling CheckAccess(), check the target file for read access */
+               myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
+                   cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
+               
+               /* fail fast if no access */ 
+               if ((myPerms & flags) == 0) {
+                   access[i] = EACCES;
+                   continue;
+               }                                                       
+           }
+       } else {
+           /* we were passed an array of parent ids */
+           catkey.hfsPlus.parentID = cnid;
+       }
+                       
+       /* if the last guy had the same parent and had access, we're done */
+       if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
+           cache.cachehits++;
+           access[i] = 0;
+           continue;
+       }
+                       
+       myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 
+           skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
+                       
+       if (myaccess || (error == ESRCH && leaf_index != -1)) {
+           access[i] = 0; // have access.. no errors to report
+       } else {
+           access[i] = (error != 0 ? (short) error : EACCES);
+       }
+                       
+       prevParent_cnid = catkey.hfsPlus.parentID;
+    }
+               
+    /* copyout the access array */
+    if ((error = copyout((caddr_t)access, user_access_structp->access, 
+               num_files * sizeof (short)))) {
+       goto err_exit_bulk_access;
+    }
+    if (map_size && bitmap) {
+       if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap, 
+                   map_size * sizeof (char)))) {
+           goto err_exit_bulk_access;
+       }
+    }
+       
+               
+  err_exit_bulk_access:
+               
+    //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
+               
+    if (file_ids) 
+       kfree(file_ids, sizeof(int) * num_files);
+    if (parents) 
+       kfree(parents, sizeof(cnid_t) * num_parents);
+    if (bitmap) 
+       kfree(bitmap, sizeof(char) * map_size);
+    if (access)
+       kfree(access, sizeof(short) * num_files);
+    if (cache.acache)
+       kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+    if (cache.haveaccess)
+       kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+               
+    return (error);
+}
+
+
+/* end "bulk-access" support */
+
+
+/*
+ * Callback for use with freeze ioctl.
+ */
+static int
+hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
+{
+       vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
+
+       return 0;
+}
+
+/*
+ * Control filesystem operating characteristics.
+ */
+int
+hfs_vnop_ioctl( struct vnop_ioctl_args /* {
+               vnode_t a_vp;
+               int  a_command;
+               caddr_t  a_data;
+               int  a_fflag;
+               vfs_context_t a_context;
+       } */ *ap)
+{
+       struct vnode * vp = ap->a_vp;
+       struct hfsmount *hfsmp = VTOHFS(vp);
+       vfs_context_t context = ap->a_context;
+       kauth_cred_t cred = vfs_context_ucred(context);
+       proc_t p = vfs_context_proc(context);
+       struct vfsstatfs *vfsp;
+       boolean_t is64bit;
+       off_t jnl_start, jnl_size;
+       struct hfs_journal_info *jip;
+#if HFS_COMPRESSION
+       int compressed = 0;
+       off_t uncompressed_size = -1;
+       int decmpfs_error = 0;
+       
+       if (ap->a_command == F_RDADVISE) {
+               /* we need to inspect the decmpfs state of the file as early as possible */
+               compressed = hfs_file_is_compressed(VTOC(vp), 0);
+               if (compressed) {
+                       if (VNODE_IS_RSRC(vp)) {
+                               /* if this is the resource fork, treat it as if it were empty */
+                               uncompressed_size = 0;
+                       } else {
+                               decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
+                               if (decmpfs_error != 0) {
+                                       /* failed to get the uncompressed size, we'll check for this later */
+                                       uncompressed_size = -1;
+                               }
+                       }
+               }
+       }
+#endif /* HFS_COMPRESSION */
+
+       is64bit = proc_is64bit(p);
+
+       switch (ap->a_command) {
+
+       case HFS_GETPATH:
+       {
+               struct vnode *file_vp;
+               cnid_t  cnid;
+               int  outlen;
+               char *bufptr;
+               int error;
+
+               /* Caller must be owner of file system. */
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               if (suser(cred, NULL) &&
+                       kauth_cred_getuid(cred) != vfsp->f_owner) {
+                       return (EACCES);
+               }
+               /* Target vnode must be file system's root. */
+               if (!vnode_isvroot(vp)) {
+                       return (EINVAL);
+               }
+               bufptr = (char *)ap->a_data;
+               cnid = strtoul(bufptr, NULL, 10);
+
+               /* We need to call hfs_vfs_vget to leverage the code that will
+                * fix the origin list for us if needed, as opposed to calling
+                * hfs_vget, since we will need the parent for build_path call.
+                */
+
+               if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
+                       return (error);
+               }
+               error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
+               vnode_put(file_vp);
+
+               return (error);
+       }
+
+       case HFS_PREV_LINK:
+       case HFS_NEXT_LINK:
+       {
+               cnid_t linkfileid;
+               cnid_t nextlinkid;
+               cnid_t prevlinkid;
+               int error;
+
+               /* Caller must be owner of file system. */
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               if (suser(cred, NULL) &&
+                       kauth_cred_getuid(cred) != vfsp->f_owner) {
+                       return (EACCES);
+               }
+               /* Target vnode must be file system's root. */
+               if (!vnode_isvroot(vp)) {
+                       return (EINVAL);
+               }
+               linkfileid = *(cnid_t *)ap->a_data;
+               if (linkfileid < kHFSFirstUserCatalogNodeID) {
+                       return (EINVAL);
+               }
+               if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
+                       return (error);
+               }
+               if (ap->a_command == HFS_NEXT_LINK) {
+                       *(cnid_t *)ap->a_data = nextlinkid;
+               } else {
+                       *(cnid_t *)ap->a_data = prevlinkid;
+               }
+               return (0);
+       }
+
+       case HFS_RESIZE_PROGRESS: {
+
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               if (suser(cred, NULL) &&
+                       kauth_cred_getuid(cred) != vfsp->f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               if (!vnode_isvroot(vp)) {
+                       return (EINVAL);
+               }
+               /* file system must not be mounted read-only */
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+
+               return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
+       }
+
+       case HFS_RESIZE_VOLUME: {
+               u_int64_t newsize;
+               u_int64_t cursize;
+
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               if (suser(cred, NULL) &&
+                       kauth_cred_getuid(cred) != vfsp->f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               if (!vnode_isvroot(vp)) {
+                       return (EINVAL);
+               }
+               
+               /* filesystem must not be mounted read only */
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+               newsize = *(u_int64_t *)ap->a_data;
+               cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
+               
+               if (newsize > cursize) {
+                       return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
+               } else if (newsize < cursize) {
+                       return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
+               } else {
+                       return (0);
+               }
+       }
+       case HFS_CHANGE_NEXT_ALLOCATION: {
+               int error = 0;          /* Assume success */
+               u_int32_t location;
+
+               if (vnode_vfsisrdonly(vp)) {
+                       return (EROFS);
+               }
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               if (suser(cred, NULL) &&
+                       kauth_cred_getuid(cred) != vfsp->f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               if (!vnode_isvroot(vp)) {
+                       return (EINVAL);
+               }
+               HFS_MOUNT_LOCK(hfsmp, TRUE);
+               location = *(u_int32_t *)ap->a_data;
+               if ((location >= hfsmp->allocLimit) &&
+                       (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
+                       error = EINVAL;
+                       goto fail_change_next_allocation;
+               }
+               /* Return previous value. */
+               *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
+               if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
+                       /* On magic value for location, set nextAllocation to next block
+                        * after metadata zone and set flag in mount structure to indicate 
+                        * that nextAllocation should not be updated again.
+                        */
+                       if (hfsmp->hfs_metazone_end != 0) {
+                               HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
+                       }
+                       hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION; 
+               } else {
+                       hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION; 
+                       HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
+               }
+               MarkVCBDirty(hfsmp);
+fail_change_next_allocation:
+               HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+               return (error);
+       }
+
+#ifdef HFS_SPARSE_DEV
+       case HFS_SETBACKINGSTOREINFO: {
+               struct vnode * bsfs_rootvp;
+               struct vnode * di_vp;
+               struct hfs_backingstoreinfo *bsdata;
+               int error = 0;
+               
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+                       return (EALREADY);
+               }
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               if (suser(cred, NULL) &&
+                       kauth_cred_getuid(cred) != vfsp->f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
+               if (bsdata == NULL) {
+                       return (EINVAL);
+               }
+               if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
+                       return (error);
+               }
+               if ((error = vnode_getwithref(di_vp))) {
+                       file_drop(bsdata->backingfd);
+                       return(error);
+               }
+
+               if (vnode_mount(vp) == vnode_mount(di_vp)) {
+                       (void)vnode_put(di_vp);
+                       file_drop(bsdata->backingfd);
+                       return (EINVAL);
+               }
+
+               /*
+                * Obtain the backing fs root vnode and keep a reference
+                * on it.  This reference will be dropped in hfs_unmount.
+                */
+               error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
+               if (error) {
+                       (void)vnode_put(di_vp);
+                       file_drop(bsdata->backingfd);
+                       return (error);
+               }
+               vnode_ref(bsfs_rootvp);
+               vnode_put(bsfs_rootvp);
+
+               hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
+               hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+               hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
+               hfsmp->hfs_sparsebandblks *= 4;
+
+               vfs_markdependency(hfsmp->hfs_mp);
+
+               /*
+                * If the sparse image is on a sparse image file (as opposed to a sparse
+                * bundle), then we may need to limit the free space to the maximum size
+                * of a file on that volume.  So we query (using pathconf), and if we get
+                * a meaningful result, we cache the number of blocks for later use in
+                * hfs_freeblks().
+                */
+               hfsmp->hfs_backingfs_maxblocks = 0;
+               if (vnode_vtype(di_vp) == VREG) {
+                       int terr;
+                       int hostbits;
+                       terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
+                       if (terr == 0 && hostbits != 0 && hostbits < 64) {
+                               u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
+                               
+                               hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
+                       }
+               }
+                               
+               (void)vnode_put(di_vp);
+               file_drop(bsdata->backingfd);
+               return (0);
+       }
+       case HFS_CLRBACKINGSTOREINFO: {
+               struct vnode * tmpvp;
+
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               if (suser(cred, NULL) &&
+                       kauth_cred_getuid(cred) != vfsp->f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+
+               if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+                   hfsmp->hfs_backingfs_rootvp) {
+
+                       hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
+                       tmpvp = hfsmp->hfs_backingfs_rootvp;
+                       hfsmp->hfs_backingfs_rootvp = NULLVP;
+                       hfsmp->hfs_sparsebandblks = 0;
+                       vnode_rele(tmpvp);
+               }
+               return (0);
+       }
+#endif /* HFS_SPARSE_DEV */
+
+       case F_FREEZE_FS: {
+               struct mount *mp;
+ 
+               mp = vnode_mount(vp);
+               hfsmp = VFSTOHFS(mp);
+
+               if (!(hfsmp->jnl))
+                       return (ENOTSUP);
+
+               vfsp = vfs_statfs(mp);
+       
+               if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+                       !kauth_cred_issuser(cred))
+                       return (EACCES);
+
+               lck_rw_lock_exclusive(&hfsmp->hfs_insync);
+ 
+               // flush things before we get started to try and prevent
+               // dirty data from being paged out while we're frozen.
+               // note: can't do this after taking the lock as it will
+               // deadlock against ourselves.
+               vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
+               hfs_global_exclusive_lock_acquire(hfsmp);
+
+               // DO NOT call hfs_journal_flush() because that takes a
+               // shared lock on the global exclusive lock!
+               journal_flush(hfsmp->jnl);
+
+               // don't need to iterate on all vnodes, we just need to
+               // wait for writes to the system files and the device vnode
+               if (HFSTOVCB(hfsmp)->extentsRefNum)
+                   vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
+               if (HFSTOVCB(hfsmp)->catalogRefNum)
+                   vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
+               if (HFSTOVCB(hfsmp)->allocationsRefNum)
+                   vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
+               if (hfsmp->hfs_attribute_vp)
+                   vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
+               vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
+
+               hfsmp->hfs_freezing_proc = current_proc();
+
+               return (0);
+       }
+
+       case F_THAW_FS: {
+               vfsp = vfs_statfs(vnode_mount(vp));
+               if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+                       !kauth_cred_issuser(cred))
+                       return (EACCES);
+
+               // if we're not the one who froze the fs then we
+               // can't thaw it.
+               if (hfsmp->hfs_freezing_proc != current_proc()) {
+                   return EPERM;
+               }
+
+               // NOTE: if you add code here, also go check the
+               //       code that "thaws" the fs in hfs_vnop_close()
+               //
+               hfsmp->hfs_freezing_proc = NULL;
+               hfs_global_exclusive_lock_release(hfsmp);
+               lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
+
+               return (0);
+       }
+
+       case HFS_BULKACCESS_FSCTL: {
+           int size;
+           
+           if (hfsmp->hfs_flags & HFS_STANDARD) {
+               return EINVAL;
+           }
+
+           if (is64bit) {
+               size = sizeof(struct user64_access_t);
+           } else {
+               size = sizeof(struct user32_access_t);
+           }
+           
+           return do_bulk_access_check(hfsmp, vp, ap, size, context);
+       } 
+
+       case HFS_EXT_BULKACCESS_FSCTL: {
+           int size;
+           
+           if (hfsmp->hfs_flags & HFS_STANDARD) {
+               return EINVAL;
+           }
+
+           if (is64bit) {
+               size = sizeof(struct user64_ext_access_t);
+           } else {
+               size = sizeof(struct user32_ext_access_t);
+           }
+           
+           return do_bulk_access_check(hfsmp, vp, ap, size, context);
+       } 
+
+       case HFS_SETACLSTATE: {
+               int state;
+
+               if (ap->a_data == NULL) {
+                       return (EINVAL);
+               }
+
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+               state = *(int *)ap->a_data;
+
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+               // super-user can enable or disable acl's on a volume.
+               // the volume owner can only enable acl's
+               if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
+                       return (EPERM);
+               }
+               if (state == 0 || state == 1)
+                       return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
+               else
+                       return (EINVAL);        
+       }
+
+       case HFS_SET_XATTREXTENTS_STATE: {
+               int state;
+
+               if (ap->a_data == NULL) {
+                       return (EINVAL);
+               }
+
+               state = *(int *)ap->a_data;
+               
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+
+               /* Super-user can enable or disable extent-based extended 
+                * attribute support on a volume 
+                */
+               if (!is_suser()) {
+                       return (EPERM);
+               }
+               if (state == 0 || state == 1)
+                       return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
+               else
+                       return (EINVAL);        
+       }
+
+       case F_FULLFSYNC: {
+               int error;
+               
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+               error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+               if (error == 0) {
+                       error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
+                       hfs_unlock(VTOC(vp));
+               }
+
+               return error;
+       }
+
+       case F_CHKCLEAN: {
+               register struct cnode *cp;
+               int error;
+
+               if (!vnode_isreg(vp))
+                       return EINVAL;
+ 
+               error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+               if (error == 0) {
+                       cp = VTOC(vp);
+                       /*
+                        * used by regression test to determine if 
+                        * all the dirty pages (via write) have been cleaned
+                        * after a call to 'fsysnc'.
+                        */
+                       error = is_file_clean(vp, VTOF(vp)->ff_size);
+                       hfs_unlock(cp);
+               }
+               return (error);
+       }
+
+       case F_RDADVISE: {
+               register struct radvisory *ra;
+               struct filefork *fp;
+               int error;
+
+               if (!vnode_isreg(vp))
+                       return EINVAL;
+ 
+               ra = (struct radvisory *)(ap->a_data);
+               fp = VTOF(vp);
+
+               /* Protect against a size change. */
+               hfs_lock_truncate(VTOC(vp), TRUE);
+
+#if HFS_COMPRESSION
+               if (compressed && (uncompressed_size == -1)) {
+                       /* fetching the uncompressed size failed above, so return the error */
+                       error = decmpfs_error;
+               } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
+                                  (!compressed && (ra->ra_offset >= fp->ff_size))) {
+                       error = EFBIG;
+               }
+#else /* HFS_COMPRESSION */
+               if (ra->ra_offset >= fp->ff_size) {
+                       error = EFBIG;
+               }
+#endif /* HFS_COMPRESSION */
+               else {
+                       error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
+               }
+
+               hfs_unlock_truncate(VTOC(vp), TRUE);
+               return (error);
+       }
+
+       case F_READBOOTSTRAP:
+       case F_WRITEBOOTSTRAP:
+       {
+           struct vnode *devvp = NULL;
+           user_fbootstraptransfer_t *user_bootstrapp;
+           int devBlockSize;
+           int error;
+           uio_t auio;
+           daddr64_t blockNumber;
+           u_int32_t blockOffset;
+           u_int32_t xfersize;
+           struct buf *bp;
+           user_fbootstraptransfer_t user_bootstrap;
+
+               if (!vnode_isvroot(vp))
+                       return (EINVAL);
+               /* LP64 - when caller is a 64 bit process then we are passed a pointer 
+                * to a user_fbootstraptransfer_t else we get a pointer to a 
+                * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
+                */
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+               if (is64bit) {
+                       user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
+               }
+               else {
+               user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
+                       user_bootstrapp = &user_bootstrap;
+                       user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
+                       user_bootstrap.fbt_length = bootstrapp->fbt_length;
+                       user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
+               }
+               if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024) 
+                       return EINVAL;
+           
+           devvp = VTOHFS(vp)->hfs_devvp;
+               auio = uio_create(1, user_bootstrapp->fbt_offset, 
+                                                 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
+                                                 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
+               uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
+
+           devBlockSize = vfs_devblocksize(vnode_mount(vp));
+
+           while (uio_resid(auio) > 0) {
+                       blockNumber = uio_offset(auio) / devBlockSize;
+                       error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
+                       if (error) {
+                               if (bp) buf_brelse(bp);
+                               uio_free(auio);
+                               return error;
+                       };
+
+                       blockOffset = uio_offset(auio) % devBlockSize;
+                       xfersize = devBlockSize - blockOffset;
+                       error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
+                       if (error) {
+                               buf_brelse(bp);
+                               uio_free(auio);
+                               return error;
+                       };
+                       if (uio_rw(auio) == UIO_WRITE) {
+                               error = VNOP_BWRITE(bp);
+                               if (error) {
+                                       uio_free(auio);
+                       return error;
+                               }
+                       } else {
+                               buf_brelse(bp);
+                       };
+               };
+               uio_free(auio);
+       };
+       return 0;
+
+       case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
+       {
+               if (is64bit) {
+                       *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+               }
+               else {
+                       *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+               }
+               return 0;
+       }
+
+       case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
+           *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
+           break;
+
+       case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
+           *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
+           break;
+
+       case HFS_FSCTL_SET_VERY_LOW_DISK:
+           if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
+               return EINVAL;
+           }
+
+           hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
+           break;
+
+       case HFS_FSCTL_SET_LOW_DISK:
+           if (   *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
+               || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
+
+               return EINVAL;
+           }
+
+           hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
+           break;
+
+       case HFS_FSCTL_SET_DESIRED_DISK:
+           if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
+               return EINVAL;
+           }
+
+           hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
+           break;
+
+       case HFS_VOLUME_STATUS:
+           *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
+           break;
+
+       case HFS_SET_BOOT_INFO:
+               if (!vnode_isvroot(vp))
+                       return(EINVAL);
+               if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
+                       return(EACCES); /* must be superuser or owner of filesystem */
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+               HFS_MOUNT_LOCK(hfsmp, TRUE);
+               bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
+               HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+               (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+               break;
+               
+       case HFS_GET_BOOT_INFO:
+               if (!vnode_isvroot(vp))
+                       return(EINVAL);
+               HFS_MOUNT_LOCK(hfsmp, TRUE);
+               bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
+               HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+               break;
+
+       case HFS_MARK_BOOT_CORRUPT:
+               /* Mark the boot volume corrupt by setting 
+                * kHFSVolumeInconsistentBit in the volume header.  This will 
+                * force fsck_hfs on next mount.
+                */
+               if (!is_suser()) {
+                       return EACCES;
+               }
+                       
+               /* Allowed only on the root vnode of the boot volume */
+               if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) || 
+                   !vnode_isvroot(vp)) {
+                       return EINVAL;
+               }
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                       return (EROFS);
+               }
+               printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
+               hfs_mark_volume_inconsistent(hfsmp);
+               break;
+
+       case HFS_FSCTL_GET_JOURNAL_INFO:
+               jip = (struct hfs_journal_info*)ap->a_data;
+
+               if (vp == NULLVP)
+                       return EINVAL;
+
+           if (hfsmp->jnl == NULL) {
+                       jnl_start = 0;
+                       jnl_size  = 0;
+           } else {
+                       jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
+                       jnl_size  = (off_t)hfsmp->jnl_size;
+           }
+
+               jip->jstart = jnl_start;
+               jip->jsize = jnl_size;
+               break;
+
+       case HFS_SET_ALWAYS_ZEROFILL: {
+           struct cnode *cp = VTOC(vp);
+
+           if (*(int *)ap->a_data) {
+               cp->c_flag |= C_ALWAYS_ZEROFILL;
+           } else {
+               cp->c_flag &= ~C_ALWAYS_ZEROFILL;
+           }
+           break;
+       }    
+
+       default:
+               return (ENOTTY);
+       }
+
+       return 0;
+}
+
+/*
+ * select
+ */
+int
+hfs_vnop_select(__unused struct vnop_select_args *ap)
+/*
+       struct vnop_select_args {
+               vnode_t a_vp;
+               int  a_which;
+               int  a_fflags;
+               void *a_wql;
+               vfs_context_t a_context;
+       };
+*/
+{
+       /*
+        * We should really check to see if I/O is possible.
+        */
+       return (1);
+}
+
+/*
+ * Converts a logical block number to a physical block, and optionally returns
+ * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
+ * The physical block number is based on the device block size, currently its 512.
+ * The block run is returned in logical blocks, and is the REMAINING amount of blocks
+ */
+int
+hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
+{
+       struct filefork *fp = VTOF(vp);
+       struct hfsmount *hfsmp = VTOHFS(vp);
+       int  retval = E_NONE;
+       u_int32_t  logBlockSize;
+       size_t  bytesContAvail = 0;
+       off_t  blockposition;
+       int lockExtBtree;
+       int lockflags = 0;
+
+       /*
+        * Check for underlying vnode requests and ensure that logical
+        * to physical mapping is requested.
+        */
+       if (vpp != NULL)
+               *vpp = hfsmp->hfs_devvp;
+       if (bnp == NULL)
+               return (0);
+
+       logBlockSize = GetLogicalBlockSize(vp);
+       blockposition = (off_t)bn * logBlockSize;
+
+       lockExtBtree = overflow_extents(fp);
+
+       if (lockExtBtree)
+               lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+       retval = MacToVFSError(
+                            MapFileBlockC (HFSTOVCB(hfsmp),
+                                            (FCB*)fp,
+                                            MAXPHYSIO,
+                                            blockposition,
+                                            bnp,
+                                            &bytesContAvail));
+
+       if (lockExtBtree)
+               hfs_systemfile_unlock(hfsmp, lockflags);
+
+       if (retval == E_NONE) {
+               /* Figure out how many read ahead blocks there are */
+               if (runp != NULL) {
+                       if (can_cluster(logBlockSize)) {
+                               /* Make sure this result never goes negative: */
+                               *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
+                       } else {
+                               *runp = 0;
+                       }
+               }
+       }
+       return (retval);
+}
+
+/*
+ * Convert logical block number to file offset.
+ */
+int
+hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
+/*
+       struct vnop_blktooff_args {
+               vnode_t a_vp;
+               daddr64_t a_lblkno;  
+               off_t *a_offset;
+       };
+*/
+{      
         if (ap->a_vp == NULL)
                 return (EINVAL);
-       *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
+       *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
  
         return(0);
  }
  
+/*
+ * Convert file offset to logical block number.
+ */
  int
-hfs_offtoblk(ap)
-struct vop_offtoblk_args /* {
-       struct vnode *a_vp;
-       off_t a_offset;    
-       daddr_t *a_lblkno;
-} */ *ap;
+hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
+/*
+       struct vnop_offtoblk_args {
+               vnode_t a_vp;
+               off_t a_offset;    
+               daddr64_t *a_lblkno;
+       };
+*/
  {      
-       long lbsize, boff;
-
         if (ap->a_vp == NULL)
                 return (EINVAL);
-       *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
+       *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
  
         return(0);
  }
  
+/*
+ * Map file offset to physical block number.
+ *
+ * If this function is called for write operation, and if the file
+ * had virtual blocks allocated (delayed allocation), real blocks
+ * are allocated by calling ExtendFileC().
+ * 
+ * If this function is called for read operation, and if the file
+ * had virtual blocks allocated (delayed allocation), no change 
+ * to the size of file is done, and if required, rangelist is 
+ * searched for mapping.
+ *
+ * System file cnodes are expected to be locked (shared or exclusive).
+ */
  int
-hfs_cmap(ap)
-struct vop_cmap_args /* {
-       struct vnode *a_vp;
-       off_t a_foffset;    
-       size_t a_size;
-       daddr_t *a_bpn;
-       size_t *a_run;
-       void *a_poff;
-} */ *ap;
+hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
+/*
+       struct vnop_blockmap_args {
+               vnode_t a_vp;
+               off_t a_foffset;    
+               size_t a_size;
+               daddr64_t *a_bpn;
+               size_t *a_run;
+               void *a_poff;
+               int a_flags;
+               vfs_context_t a_context;
+       };
+*/
  {
-    struct hfsnode     *hp = VTOH(ap->a_vp);
-    struct hfsmount    *hfsmp = VTOHFS(ap->a_vp);
-    FCB                                        *fcb = HTOFCB(hp);
-    size_t                             bytesContAvail = 0;
-    int                        retval = E_NONE;
-    int                                        lockExtBtree;
-    struct proc                *p = NULL;
-    struct rl_entry *invalid_range;
-    enum rl_overlaptype overlaptype;
-    off_t limit;
-
-#define DEBUG_CMAP 0
-#if DEBUG_CMAP
-    DBG_FUNC_NAME("hfs_cmap");
-    DBG_VOP_LOCKS_DECL(2);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);
-
-    DBG_VOP_LOCKS_INIT(0, ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-#endif
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp;
+       struct filefork *fp;
+       struct hfsmount *hfsmp;
+       size_t bytesContAvail = 0;
+       int retval = E_NONE;
+       int syslocks = 0;
+       int lockflags = 0;
+       struct rl_entry *invalid_range;
+       enum rl_overlaptype overlaptype;
+       int started_tr = 0;
+       int tooklock = 0;
+
+#if HFS_COMPRESSION
+       if (VNODE_IS_RSRC(vp)) {
+               /* allow blockmaps to the resource fork */
+       } else {
+               if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+                       int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+                       switch(state) {
+                               case FILE_IS_COMPRESSED:
+                                       return ENOTSUP;
+                               case FILE_IS_CONVERTING:
+                                       /* if FILE_IS_CONVERTING, we allow blockmap */
+                                       break;
+                               default:
+                                       printf("invalid state %d for compressed file\n", state);
+                                       /* fall through */
+                       }
+               }
+       }
+#endif /* HFS_COMPRESSION */
  
-    DBG_IO(("\tMapped offset %qx --> ", ap->a_foffset));
-    /*
-     * Check for underlying vnode requests and ensure that logical
-     * to physical mapping is requested.
-     */
-    if (ap->a_bpn == NULL) {
-        return (0);
-    };
-
-    if (lockExtBtree = hasOverflowExtents(hp))
-    {
-        p = current_proc();
-        if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
-            return (retval);
-        };
-    }
-    retval = MacToVFSError(
-                          MapFileBlockC (HFSTOVCB(hfsmp),
-                                         fcb,
-                                         ap->a_size,
-                                         ap->a_foffset,
-                                         ap->a_bpn,
-                                         &bytesContAvail));
-
-    if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
-
-    if (retval == E_NONE) {
-        /* Adjust the mapping information for invalid file ranges: */
-        overlaptype = rl_scan(&hp->h_invalidranges,
-                            ap->a_foffset,
-                            ap->a_foffset + (off_t)bytesContAvail - 1,
-                            &invalid_range);
-        if (overlaptype != RL_NOOVERLAP) {
-            switch(overlaptype) {
-                case RL_MATCHINGOVERLAP:
-                case RL_OVERLAPCONTAINSRANGE:
-                case RL_OVERLAPSTARTSBEFORE:
-                    /* There's no valid block for this byte offset: */
-                    *ap->a_bpn = (daddr_t)-1;
-                    
-                    /* There's no point limiting the amount to be returned if the
-                       invalid range that was hit extends all the way to the EOF
-                       (i.e. there's no valid bytes between the end of this range
-                       and the file's EOF):
-                     */
-                    if ((fcb->fcbEOF > (invalid_range->rl_end + 1)) &&
-                                       (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
-                       bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
-                    };
-                    break;
-                
-                case RL_OVERLAPISCONTAINED:
-                case RL_OVERLAPENDSAFTER:
-                    /* The range of interest hits an invalid block before the end: */
-                    if (invalid_range->rl_start == ap->a_foffset) {
-                       /* There's actually no valid information to be had starting here: */
-                       *ap->a_bpn = (daddr_t)-1;
-                                               if ((fcb->fcbEOF > (invalid_range->rl_end + 1)) &&
-                                                       (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
-                               bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
-                       };
-                    } else {
-                       bytesContAvail = invalid_range->rl_start - ap->a_foffset;
-                    };
-                    break;
-            };
-            if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
-        };
-        
-        if (ap->a_run) *ap->a_run = bytesContAvail;
-    };
+       /* Do not allow blockmap operation on a directory */
+       if (vnode_isdir(vp)) {
+               return (ENOTSUP);
+       }
  
-    if (ap->a_poff) *(int *)ap->a_poff = 0;
+       /*
+        * Check for underlying vnode requests and ensure that logical
+        * to physical mapping is requested.
+        */
+       if (ap->a_bpn == NULL)
+               return (0);
  
-    DBG_IO(("%d:%d.\n", *ap->a_bpn, bytesContAvail));
+       if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
+               if (VTOC(vp)->c_lockowner != current_thread()) {
+                       hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+                       tooklock = 1;
+               }
+       }
+       hfsmp = VTOHFS(vp);
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+
+retry:
+       /* Check virtual blocks only when performing write operation */
+       if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+               if (hfs_start_transaction(hfsmp) != 0) {
+                       retval = EINVAL;
+                       goto exit;
+               } else {
+                       started_tr = 1;
+               }
+               syslocks = SFL_EXTENTS | SFL_BITMAP;
+               
+       } else if (overflow_extents(fp)) {
+               syslocks = SFL_EXTENTS;
+       }
+       
+       if (syslocks)
+               lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
  
-#if DEBUG_BMAP
+       /*
+        * Check for any delayed allocations.
+        */
+       if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+               int64_t actbytes;
+               u_int32_t loanedBlocks;
+
+               // 
+               // Make sure we have a transaction.  It's possible
+               // that we came in and fp->ff_unallocblocks was zero
+               // but during the time we blocked acquiring the extents
+               // btree, ff_unallocblocks became non-zero and so we
+               // will need to start a transaction.
+               //
+               if (started_tr == 0) {
+                       if (syslocks) {
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                               syslocks = 0;
+                       }
+                       goto retry;
+               }
  
-    DBG_VOP_LOCKS_TEST(retval);
-#endif
+               /*
+                * Note: ExtendFileC will Release any blocks on loan and
+                * aquire real blocks.  So we ask to extend by zero bytes
+                * since ExtendFileC will account for the virtual blocks.
+                */
  
-    return (retval);
+               loanedBlocks = fp->ff_unallocblocks;
+               retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
+                                    kEFAllMask | kEFNoClumpMask, &actbytes);
  
-}
+               if (retval) {
+                       fp->ff_unallocblocks = loanedBlocks;
+                       cp->c_blocks += loanedBlocks;
+                       fp->ff_blocks += loanedBlocks;
  
-/*
- * Calculate the logical to physical mapping if not done already,
- * then call the device strategy routine.
-#
-#vop_strategy {
-#      IN struct buf *bp;
-    */
-int
-hfs_strategy(ap)
-struct vop_strategy_args /* {
-    struct buf *a_bp;
-} */ *ap;
-{
-    register struct buf *bp = ap->a_bp;
-    register struct vnode *vp = bp->b_vp;
-    register struct hfsnode *hp;
-    int retval = 0;
+                       HFS_MOUNT_LOCK(hfsmp, TRUE);
+                       hfsmp->loanedBlocks += loanedBlocks;
+                       HFS_MOUNT_UNLOCK(hfsmp, TRUE);
  
-       DBG_FUNC_NAME("hfs_strategy");
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+                       cp->c_flag |= C_MODIFIED;
+                       if (started_tr) {
+                               (void) hfs_update(vp, TRUE);
+                               (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
  
-//     DBG_VOP_PRINT_FUNCNAME();DBG_VOP_CONT(("\n"));
+                               hfs_end_transaction(hfsmp);
+                               started_tr = 0;
+                       }
+                       goto exit;
+               }
+       }
  
-    hp = VTOH(vp);
+       retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
+                              ap->a_bpn, &bytesContAvail);
+       if (syslocks) {
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               syslocks = 0;
+       }
  
-    if ( !(bp->b_flags & B_VECTORLIST)) {
+       if (started_tr) {
+               (void) hfs_update(vp, TRUE);
+               (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+               hfs_end_transaction(hfsmp);
+               started_tr = 0;
+       }       
+       if (retval) {
+               /* On write, always return error because virtual blocks, if any, 
+                * should have been allocated in ExtendFileC().  We do not 
+                * allocate virtual blocks on read, therefore return error 
+                * only if no virtual blocks are allocated.  Otherwise we search
+                * rangelist for zero-fills
+                */
+               if ((MacToVFSError(retval) != ERANGE) ||
+                   (ap->a_flags & VNODE_WRITE) ||
+                   ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
+                       goto exit;
+               } 
+               
+               /* Validate if the start offset is within logical file size */
+               if (ap->a_foffset > fp->ff_size) {
+                       goto exit;
+               }
  
-        if (vp->v_type == VBLK || vp->v_type == VCHR)
-           panic("hfs_strategy: device vnode passed!");
+               /* Searching file extents has failed for read operation, therefore 
+                * search rangelist for any uncommitted holes in the file. 
+                */
+               overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+                                     ap->a_foffset + (off_t)(ap->a_size - 1),
+                                     &invalid_range);
+               switch(overlaptype) {
+               case RL_OVERLAPISCONTAINED:
+                       /* start_offset <= rl_start, end_offset >= rl_end */
+                       if (ap->a_foffset != invalid_range->rl_start) {
+                               break;
+                       }
+               case RL_MATCHINGOVERLAP:
+                       /* start_offset = rl_start, end_offset = rl_end */
+               case RL_OVERLAPCONTAINSRANGE:
+                       /* start_offset >= rl_start, end_offset <= rl_end */
+               case RL_OVERLAPSTARTSBEFORE:
+                       /* start_offset > rl_start, end_offset >= rl_start */
+                       if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
+                               bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
+                       } else {
+                               bytesContAvail = fp->ff_size - ap->a_foffset;
+                       }
+                       if (bytesContAvail > ap->a_size) {
+                               bytesContAvail = ap->a_size;
+                       }
+                       *ap->a_bpn = (daddr64_t)-1;
+                       retval = 0;
+                       break;
+               case RL_OVERLAPENDSAFTER:
+                       /* start_offset < rl_start, end_offset < rl_end */
+               case RL_NOOVERLAP:
+                       break;
+               }
+               goto exit;
+       }
  
-       if (bp->b_flags & B_PAGELIST) {
-           /*
-            * if we have a page list associated with this bp,
-            * then go through cluster_bp since it knows how to 
-            * deal with a page request that might span non-contiguous
-            * physical blocks on the disk...
-            */
-           retval = cluster_bp(bp);
-           vp = hp->h_meta->h_devvp;
-           bp->b_dev = vp->v_rdev;
+       /* MapFileC() found a valid extent in the filefork.  Search the 
+        * mapping information further for invalid file ranges 
+        */
+       overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+                             ap->a_foffset + (off_t)bytesContAvail - 1,
+                             &invalid_range);
+       if (overlaptype != RL_NOOVERLAP) {
+               switch(overlaptype) {
+               case RL_MATCHINGOVERLAP:
+               case RL_OVERLAPCONTAINSRANGE:
+               case RL_OVERLAPSTARTSBEFORE:
+                       /* There's no valid block for this byte offset */
+                       *ap->a_bpn = (daddr64_t)-1;
+                       /* There's no point limiting the amount to be returned
+                        * if the invalid range that was hit extends all the way 
+                        * to the EOF (i.e. there's no valid bytes between the
+                        * end of this range and the file's EOF):
+                        */
+                       if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+                           ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+                               bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+                       }
+                       break;
+       
+               case RL_OVERLAPISCONTAINED:
+               case RL_OVERLAPENDSAFTER:
+                       /* The range of interest hits an invalid block before the end: */
+                       if (invalid_range->rl_start == ap->a_foffset) {
+                               /* There's actually no valid information to be had starting here: */
+                               *ap->a_bpn = (daddr64_t)-1;
+                               if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+                                   ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+                                       bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+                               }
+                       } else {
+                               bytesContAvail = invalid_range->rl_start - ap->a_foffset;
+                       }
+                       break;
+
+               case RL_NOOVERLAP:
+                       break;
+               } /* end switch */
+               if (bytesContAvail > ap->a_size)
+                       bytesContAvail = ap->a_size;
+       } 
+               
+exit:
+       if (retval == 0) {
+               if (ap->a_run)
+                       *ap->a_run = bytesContAvail;
  
-           return (retval);
+               if (ap->a_poff)
+                       *(int *)ap->a_poff = 0;
         }
-       /*
-        * If we don't already know the filesystem relative block number
-        * then get it using VOP_BMAP().  If VOP_BMAP() returns the block
-        * number as -1 then we've got a hole in the file.  Although HFS
-         * filesystems don't create files with holes, invalidating of
-         * subranges of the file (lazy zero filling) may create such a
-         * situation.
-        */
-       if (bp->b_blkno == bp->b_lblkno) {
-           if ((retval = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL))) {
-               bp->b_error = retval;
-               bp->b_flags |= B_ERROR;
-               biodone(bp);
-               return (retval);
-           }
-           if ((long)bp->b_blkno == -1)
-               clrbuf(bp);
-       }
-       if ((long)bp->b_blkno == -1) {
-           biodone(bp);
-           return (0);
-       }
-       if (bp->b_validend == 0) {
-           /* Record the exact size of the I/O transfer about to be made: */
-           DBG_ASSERT(bp->b_validoff == 0);
-           bp->b_validend = bp->b_bcount;
-           DBG_ASSERT(bp->b_dirtyoff == 0);
-       };
-    }
-    vp = hp->h_meta->h_devvp;
-    bp->b_dev = vp->v_rdev;
-    DBG_IO(("\t\t>>>%s: continuing w/ vp: 0x%x with logBlk Ox%X and phyBlk Ox%X\n", funcname, (u_int)vp, bp->b_lblkno, bp->b_blkno));
  
-    return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+       if (tooklock)
+               hfs_unlock(cp);
+
+       return (MacToVFSError(retval));
  }
  
  
  /*
-#% reallocblks vp      L L L
-#
- vop_reallocblks {
-     IN struct vnode *vp;
-     IN struct cluster_save *buflist;
-
-     */
-
+ * prepare and issue the I/O
+ * buf_strategy knows how to deal
+ * with requests that require 
+ * fragmented I/Os
+ */
  int
-hfs_reallocblks(ap)
-struct vop_reallocblks_args /* {
-    struct vnode *a_vp;
-    struct cluster_save *a_buflist;
-} */ *ap;
+hfs_vnop_strategy(struct vnop_strategy_args *ap)
  {
-    DBG_FUNC_NAME("hfs_reallocblks");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
+       buf_t   bp = ap->a_bp;
+       vnode_t vp = buf_vnode(bp);
  
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
+       return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
+}
  
-    /* Currently no support for clustering */          /* XXX */
-    DBG_VOP_LOCKS_TEST(ENOSPC);
-    return (ENOSPC);
+static int 
+hfs_minorupdate(struct vnode *vp) {
+       struct cnode *cp = VTOC(vp);
+       cp->c_flag &= ~C_MODIFIED;
+       cp->c_touch_acctime = 0;
+       cp->c_touch_chgtime = 0;
+       cp->c_touch_modtime = 0;
+       
+       return 0;
  }
  
+static int
+do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
+{
+       register struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       struct proc *p = vfs_context_proc(context);;
+       kauth_cred_t cred = vfs_context_ucred(context);
+       int retval;
+       off_t bytesToAdd;
+       off_t actualBytesAdded;
+       off_t filebytes;
+       u_int32_t fileblocks;
+       int blksize;
+       struct hfsmount *hfsmp;
+       int lockflags;
+
+       blksize = VTOVCB(vp)->blockSize;
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)blksize;
+
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+                (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+
+       if (length < 0)
+               return (EINVAL);
  
+       /* This should only happen with a corrupt filesystem */
+       if ((off_t)fp->ff_size < 0)
+               return (EINVAL);
  
-/*
-#
-#% truncate    vp      L L L
-#
-vop_truncate {
-    IN struct vnode *vp;
-    IN off_t length;
-    IN int flags;      (IO_SYNC)
-    IN struct ucred *cred;
-    IN struct proc *p;
-};
- * Truncate the hfsnode hp to at most length size, freeing (or adding) the
- * disk blocks.
- */
-int hfs_truncate(ap)
-    struct vop_truncate_args /* {
-        struct vnode *a_vp;
-        off_t a_length;
-        int a_flags;
-        struct ucred *a_cred;
-        struct proc *a_p;
-    } */ *ap;
-{
-    register struct vnode *vp = ap->a_vp;
-    register struct hfsnode *hp = VTOH(vp);
-    off_t length = ap->a_length;
-    long vflags;
-    struct timeval tv;
-    int retval;
-    FCB *fcb;
-    off_t bytesToAdd;
-    off_t actualBytesAdded;
-    DBG_FUNC_NAME("hfs_truncate");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(ap->a_vp);
-#endif
+       if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
+               return (EFBIG);
  
-    fcb = HTOFCB(hp);
+       hfsmp = VTOHFS(vp);
  
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
-                (int)length, fcb->fcbEOF, fcb->fcbPLen, 0, 0);
+       retval = E_NONE;
  
-    if (length < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
+       /* Files that are changing size are not hot file candidates. */
+       if (hfsmp->hfc_stage == HFC_RECORDING) {
+               fp->ff_bytesread = 0;
+       }
  
-    if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE)) {        
-        DBG_VOP_LOCKS_TEST(EFBIG);
-       return (EFBIG);
-    }
+       /* 
+        * We cannot just check if fp->ff_size == length (as an optimization)
+        * since there may be extra physical blocks that also need truncation.
+        */
+#if QUOTA
+       if ((retval = hfs_getinoquota(cp)))
+               return(retval);
+#endif /* QUOTA */
  
-    if (vp->v_type != VREG && vp->v_type != VLNK) {            
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);               /* hfs doesn't support truncating of directories */
-    }
+       /*
+        * Lengthen the size of the file. We must ensure that the
+        * last byte of the file is allocated. Since the smallest
+        * value of ff_size is 0, length will be at least 1.
+        */
+       if (length > (off_t)fp->ff_size) {
+#if QUOTA
+               retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
+                                  cred, 0);
+               if (retval)
+                       goto Err_Exit;
+#endif /* QUOTA */
+               /*
+                * If we don't have enough physical space then
+                * we need to extend the physical size.
+                */
+               if (length > filebytes) {
+                       int eflags;
+                       u_int32_t blockHint = 0;
  
-    tv = time;
-    retval = E_NONE;
-       
-    DBG_RW(("%s: truncate from Ox%lX to Ox%X bytes\n", funcname, fcb->fcbPLen, length));
+                       /* All or nothing and don't round up to clumpsize. */
+                       eflags = kEFAllMask | kEFNoClumpMask;
  
-    /* 
-     * we cannot just check if fcb->fcbEOF == length (as an optimization)
-     * since there may be extra physical blocks that also need truncation
-     */
+                       if (cred && suser(cred, NULL) != 0)
+                               eflags |= kEFReserveMask;  /* keep a reserve */
  
-    /*
-     * Lengthen the size of the file. We must ensure that the
-     * last byte of the file is allocated. Since the smallest
-     * value of fcbEOF is 0, length will be at least 1.
-     */
-    if (length > fcb->fcbEOF) {
-               off_t filePosition;
-               daddr_t logBlockNo;
-               long logBlockSize;
-               long blkOffset;
-               off_t bytestoclear;
-               int blockZeroCount;
-               struct buf *bp=NULL;
+                       /*
+                        * Allocate Journal and Quota files in metadata zone.
+                        */
+                       if (filebytes == 0 &&
+                           hfsmp->hfs_flags & HFS_METADATA_ZONE &&
+                           hfs_virtualmetafile(cp)) {
+                               eflags |= kEFMetadataMask;
+                               blockHint = hfsmp->hfs_metazone_start;
+                       }
+                       if (hfs_start_transaction(hfsmp) != 0) {
+                           retval = EINVAL;
+                           goto Err_Exit;
+                       }
  
-       /*
-        * If we don't have enough physical space then
-        * we need to extend the physical size.
-        */
-       if (length > fcb->fcbPLen) {
-           /* lock extents b-tree (also protects volume bitmap) */
-           retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-
-           while ((length > fcb->fcbPLen) && (retval == E_NONE)) {
-               bytesToAdd = length - fcb->fcbPLen;
-               retval = MacToVFSError(
-                                       ExtendFileC (HTOVCB(hp),
-                                                    fcb,
+                       /* Protect extents b-tree and allocation bitmap */
+                       lockflags = SFL_BITMAP;
+                       if (overflow_extents(fp))
+                               lockflags |= SFL_EXTENTS;
+                       lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+                       while ((length > filebytes) && (retval == E_NONE)) {
+                               bytesToAdd = length - filebytes;
+                               retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
+                                                    (FCB*)fp,
                                                      bytesToAdd,
-                                                    0,
-                                                    kEFAllMask,        /* allocate all requested bytes or none */
+                                                    blockHint,
+                                                    eflags,
                                                      &actualBytesAdded));
  
-               if (actualBytesAdded == 0 && retval == E_NONE) {
-                   if (length > fcb->fcbPLen)
-                       length = fcb->fcbPLen;
-                   break;
-               }
-           } 
-           (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
+                               filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+                               if (actualBytesAdded == 0 && retval == E_NONE) {
+                                       if (length > filebytes)
+                                               length = filebytes;
+                                       break;
+                               }
+                       } /* endwhile */
+
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+
+                       if (hfsmp->jnl) {
+                               if (skipupdate) {
+                                       (void) hfs_minorupdate(vp);
+                               }
+                               else {
+                                       (void) hfs_update(vp, TRUE);
+                                       (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+                               }
+                       }
  
-           DBG_ASSERT(length <= fcb->fcbPLen);
-           KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
-                        (int)length, fcb->fcbEOF, fcb->fcbPLen, 0, 0);
-       }
- 
-       if (! (ap->a_flags & IO_NOZEROFILL)) {
+                       hfs_end_transaction(hfsmp);
  
-           if (UBCISVALID(vp) && retval == E_NONE) {
-                       struct rl_entry *invalid_range;
-               int devBlockSize;
-                       off_t zero_limit;
+                       if (retval)
+                               goto Err_Exit;
+
+                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+                               (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+               }
+ 
+               if (!(flags & IO_NOZEROFILL)) {
+                       if (UBCINFOEXISTS(vp)  && (vnode_issystem(vp) == 0) && retval == E_NONE) {
+                               struct rl_entry *invalid_range;
+                               off_t zero_limit;
                         
-                       zero_limit = (fcb->fcbEOF + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
-                       if (length < zero_limit) zero_limit = length;
-
-                       if (length > fcb->fcbEOF) {
-                               /* Extending the file: time to fill out the current last page w. zeroes? */
-                               if ((fcb->fcbEOF & PAGE_MASK_64) &&
-                                       (rl_scan(&hp->h_invalidranges,
-                                                        fcb->fcbEOF & ~PAGE_MASK_64,
-                                                        fcb->fcbEOF - 1,
-                                                        &invalid_range) == RL_NOOVERLAP)) {
+                               zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+                               if (length < zero_limit) zero_limit = length;
+
+                               if (length > (off_t)fp->ff_size) {
+                                       struct timeval tv;
+
+                                       /* Extending the file: time to fill out the current last page w. zeroes? */
+                                       if ((fp->ff_size & PAGE_MASK_64) &&
+                                           (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
+                                           fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
                                                 
                                                 /* There's some valid data at the start of the (current) last page
                                                    of the file, so zero out the remainder of that page to ensure the
                                                    entire page contains valid data.  Since there is no invalid range
                                                    possible past the (current) eof, there's no need to remove anything
-                                                  from the invalid range list before calling cluster_write():                                           */
-                                               VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-                                               retval = cluster_write(vp, (struct uio *) 0, fcb->fcbEOF, zero_limit,
-                                                                                               fcb->fcbEOF, (off_t)0, devBlockSize, (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL);
+                                                  from the invalid range list before calling cluster_write():  */
+                                               hfs_unlock(cp);
+                                               retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
+                                                               fp->ff_size, (off_t)0,
+                                                               (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
+                                               hfs_lock(cp, HFS_FORCE_LOCK);
                                                 if (retval) goto Err_Exit;
                                                 
                                                 /* Merely invalidate the remaining area, if necessary: */
-                                               if (length > zero_limit) rl_add(zero_limit, length - 1, &hp->h_invalidranges);
-                               } else {
+                                               if (length > zero_limit) {
+                                                       microuptime(&tv);
+                                                       rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
+                                                       cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+                                               }
+                                       } else {
                                         /* The page containing the (current) eof is invalid: just add the
                                            remainder of the page to the invalid list, along with the area
                                            being newly allocated:
                                          */
-                                       rl_add(fcb->fcbEOF, length - 1, &hp->h_invalidranges);
-                               };
-                       }
-           } else {
+                                       microuptime(&tv);
+                                       rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
+                                       cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+                                       };
+                               }
+                       } else {
+                                       panic("hfs_truncate: invoked on non-UBC object?!");
+                       };
+               }
+               cp->c_touch_modtime = TRUE;
+               fp->ff_size = length;
  
-#if 0
-                   /*
-                    * zero out any new logical space...
-                    */
-                   bytestoclear = length - fcb->fcbEOF;
-                   filePosition = fcb->fcbEOF;
-
-                   while (bytestoclear > 0) {
-                       logBlockNo   = (daddr_t)(filePosition / PAGE_SIZE_64);
-                       blkOffset    = (long)(filePosition & PAGE_MASK_64);  
-
-                       if (((off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * (off_t)PAGE_SIZE)) < PAGE_SIZE_64)
-                           logBlockSize = (off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * PAGE_SIZE_64);
-                       else
-                           logBlockSize = PAGE_SIZE;
-                       
-                       if (logBlockSize < blkOffset)
-                           panic("hfs_truncate: bad logBlockSize computed\n");
-                               
-                       blockZeroCount = MIN(bytestoclear, logBlockSize - blkOffset);
+       } else { /* Shorten the size of the file */
  
-                       if (blkOffset == 0 && ((bytestoclear >= logBlockSize) || filePosition >= fcb->fcbEOF)) {
-                           bp = getblk(vp, logBlockNo, logBlockSize, 0, 0, BLK_WRITE);
-                           retval = 0;
+               if ((off_t)fp->ff_size > length) {
+                       /* Any space previously marked as invalid is now irrelevant: */
+                       rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
+               }
  
-                       } else {
-                           retval = bread(vp, logBlockNo, logBlockSize, ap->a_cred, &bp);
-                           if (retval) {
-                               brelse(bp);
-                               goto Err_Exit;
-                           }
-                       }
-                       bzero((char *)bp->b_data + blkOffset, blockZeroCount);
-                                       
-                       bp->b_flags |= B_DIRTY | B_AGE;
+               /* 
+                * Account for any unmapped blocks. Note that the new
+                * file length can still end up with unmapped blocks.
+                */
+               if (fp->ff_unallocblocks > 0) {
+                       u_int32_t finalblks;
+                       u_int32_t loanedBlocks;
  
-                       if (ap->a_flags & IO_SYNC)
-                           VOP_BWRITE(bp);
-                       else if (logBlockNo % 32)
-                           bawrite(bp);
-                       else
-                           VOP_BWRITE(bp);     /* wait after we issue 32 requests */
+                       HFS_MOUNT_LOCK(hfsmp, TRUE);
  
-                       bytestoclear -= blockZeroCount;
-                       filePosition += blockZeroCount;
-                   }
-#else
-                       panic("hfs_truncate: invoked on non-UBC object?!");
-#endif
-           };
-       }
-       fcb->fcbEOF = length;
+                       loanedBlocks = fp->ff_unallocblocks;
+                       cp->c_blocks -= loanedBlocks;
+                       fp->ff_blocks -= loanedBlocks;
+                       fp->ff_unallocblocks = 0;
+
+                       hfsmp->loanedBlocks -= loanedBlocks;
  
-       if (UBCISVALID(vp))
-               ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
+                       finalblks = (length + blksize - 1) / blksize;
+                       if (finalblks > fp->ff_blocks) {
+                               /* calculate required unmapped blocks */
+                               loanedBlocks = finalblks - fp->ff_blocks;
+                               hfsmp->loanedBlocks += loanedBlocks;
  
-    } else { /* Shorten the size of the file */
+                               fp->ff_unallocblocks = loanedBlocks;
+                               cp->c_blocks += loanedBlocks;
+                               fp->ff_blocks += loanedBlocks;
+                       }
+                       HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+               }
  
-        if (fcb->fcbEOF > length) {
-           /*
-            * Any buffers that are past the truncation point need to be
-            * invalidated (to maintain buffer cache consistency).  For
-            * simplicity, we invalidate all the buffers by calling vinvalbuf.
-            */
-           if (UBCISVALID(vp))
-               ubc_setsize(vp, (off_t)length); /* XXX check errors */
+               /*
+                * For a TBE process the deallocation of the file blocks is
+                * delayed until the file is closed.  And hfs_close calls
+                * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
+                * isn't set, we make sure this isn't a TBE process.
+                */
+               if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
+#if QUOTA
+                 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+#endif /* QUOTA */
+                 if (hfs_start_transaction(hfsmp) != 0) {
+                     retval = EINVAL;
+                     goto Err_Exit;
+                 }
+
+                       if (fp->ff_unallocblocks == 0) {
+                               /* Protect extents b-tree and allocation bitmap */
+                               lockflags = SFL_BITMAP;
+                               if (overflow_extents(fp))
+                                       lockflags |= SFL_EXTENTS;
+                               lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+                               retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
+                                               (FCB*)fp, length, false));
+
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                       }
+                       if (hfsmp->jnl) {
+                               if (retval == 0) {
+                                       fp->ff_size = length;
+                               }
+                               if (skipupdate) {
+                                       (void) hfs_minorupdate(vp);
+                               }
+                               else {
+                                       (void) hfs_update(vp, TRUE);
+                                       (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+                               }
+                       }
+                       hfs_end_transaction(hfsmp);
  
-           vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA; 
-           retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
-           
-           /* Any space previously marked as invalid is now irrelevant: */
-           rl_remove(length, fcb->fcbEOF - 1, &hp->h_invalidranges);
+                       filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+                       if (retval)
+                               goto Err_Exit;
+#if QUOTA
+                       /* These are bytesreleased */
+                       (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+#endif /* QUOTA */
+               }
+               /* Only set update flag if the logical length changes */
+               if ((off_t)fp->ff_size != length)
+                       cp->c_touch_modtime = TRUE;
+               fp->ff_size = length;
         }
-
-       /*
-        * For a TBE process the deallocation of the file blocks is
-        * delayed until the file is closed.  And hfs_close calls
-        * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
-        * isn't set, we make sure this isn't a TBE process.
-        */
-       if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
-
-           /* lock extents b-tree (also protects volume bitmap) */
-           retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-           retval = MacToVFSError(
-                               TruncateFileC(  
-                                             HTOVCB(hp),
-                                             fcb,
-                                             length,
-                                             false));
-           (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-       }
-       fcb->fcbEOF = length;
-
-       if (fcb->fcbFlags & fcbModifiedMask)
-           hp->h_nodeflags |= IN_MODIFIED;
-    }
-    hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-    retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
-    if (retval) {
-        DBG_ERR(("Could not update truncate"));
+       if (cp->c_mode & (S_ISUID | S_ISGID)) {
+               if (!vfs_context_issuser(context)) {
+                       cp->c_mode &= ~(S_ISUID | S_ISGID);
+                       skipupdate = 0;
+               }
+       }
+       if (skipupdate) {
+               retval = hfs_minorupdate(vp);
+       }
+       else {
+               cp->c_touch_chgtime = TRUE;     /* status changed */
+               cp->c_touch_modtime = TRUE;     /* file data was modified */
+               retval = hfs_update(vp, MNT_WAIT);
+       }
+       if (retval) {
                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
                      -1, -1, -1, retval, 0);
-    }
-Err_Exit:;
+       }
  
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(ap->a_vp);
-#endif
+Err_Exit:
  
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
-                (int)length, fcb->fcbEOF, fcb->fcbPLen, retval, 0);
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+                (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
  
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
+       return (retval);
  }
  
  
  
  /*
-#
-#% allocate    vp      L L L
-#
-vop_allocate {
-       IN struct vnode *vp;
-       IN off_t length;
-       IN int flags;
-       OUT off_t *bytesallocated;
-       IN off_t offset;
-       IN struct ucred *cred;
-       IN struct proc *p;
-};
- * allocate the hfsnode hp to at most length size
+ * Truncate a cnode to at most length size, freeing (or adding) the
+ * disk blocks.
   */
-int hfs_allocate(ap)
-    struct vop_allocate_args /* {
-        struct vnode *a_vp;
-        off_t a_length;
-        u_int32_t  a_flags;
-        off_t *a_bytesallocated;
-        off_t a_offset;
-        struct ucred *a_cred;
-        struct proc *a_p;
-    } */ *ap;
+__private_extern__
+int
+hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
+             int skipupdate, vfs_context_t context)
  {
-    register struct vnode *vp = ap->a_vp;
-    register struct hfsnode *hp = VTOH(vp);
-    off_t      length = ap->a_length;
-    off_t      startingPEOF;
-    off_t      moreBytesRequested;
-    off_t      actualBytesAdded;
-    long vflags;
-    struct timeval tv;
-    int retval, retval2;
-    FCB *fcb;
-    UInt32 blockHint;
-    UInt32 extendFlags =0;   /* For call to ExtendFileC */
-    DBG_FUNC_NAME("hfs_allocate");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    /* Set the number of bytes allocated to 0 so that the caller will know that we
-       did nothing.  ExtendFileC will fill this in for us if we actually allocate space */
-
-    *(ap->a_bytesallocated) = 0; 
-    fcb = HTOFCB(hp);
-
-    /* Now for some error checking */
-
-    if (length < (off_t)0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
+       struct filefork *fp = VTOF(vp);
+       off_t filebytes;
+       u_int32_t fileblocks;
+       int blksize, error = 0;
+       struct cnode *cp = VTOC(vp);
+
+       /* Cannot truncate an HFS directory! */
+       if (vnode_isdir(vp)) {
+               return (EISDIR);
+       }
+       /* A swap file cannot change size. */
+       if (vnode_isswap(vp) && (length != 0)) {
+               return (EPERM);
+       }
+
+       blksize = VTOVCB(vp)->blockSize;
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)blksize;
+
+       //
+       // Have to do this here so that we don't wind up with
+       // i/o pending for blocks that are about to be released
+       // if we truncate the file.
+       //
+       // If skipsetsize is set, then the caller is responsible
+       // for the ubc_setsize.
+       //
+       // Even if skipsetsize is set, if the length is zero we
+       // want to call ubc_setsize() because as of SnowLeopard
+       // it will no longer cause any page-ins and it will drop
+       // any dirty pages so that we don't do any i/o that we
+       // don't have to.  This also prevents a race where i/o
+       // for truncated blocks may overwrite later data if the
+       // blocks get reallocated to a different file.
+       //
+       if (!skipsetsize || length == 0)
+               ubc_setsize(vp, length);
+
+       // have to loop truncating or growing files that are
+       // really big because otherwise transactions can get
+       // enormous and consume too many kernel resources.
+
+       if (length < filebytes) {
+               while (filebytes > length) {
+                       if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+                               filebytes -= HFS_BIGFILE_SIZE;
+                       } else {
+                               filebytes = length;
+                       }
+                       cp->c_flag |= C_FORCEUPDATE;
+                       error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
+                       if (error)
+                               break;
+               }
+       } else if (length > filebytes) {
+               while (filebytes < length) {
+                       if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+                               filebytes += HFS_BIGFILE_SIZE;
+                       } else {
+                               filebytes = length;
+                       }
+                       cp->c_flag |= C_FORCEUPDATE;
+                       error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
+                       if (error)
+                               break;
+               }
+       } else /* Same logical size */ {
+
+               error = do_hfs_truncate(vp, length, flags, skipupdate, context);
+       }
+       /* Files that are changing size are not hot file candidates. */
+       if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+               fp->ff_bytesread = 0;
+       }
+
+       return (error);
+}
+
+
+
+/*
+ * Preallocate file storage space.
+ */
+int
+hfs_vnop_allocate(struct vnop_allocate_args /* {
+               vnode_t a_vp;
+               off_t a_length;
+               u_int32_t  a_flags;
+               off_t *a_bytesallocated;
+               off_t a_offset;
+               vfs_context_t a_context;
+       } */ *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp;
+       struct filefork *fp;
+       ExtendedVCB *vcb;
+       off_t length = ap->a_length;
+       off_t startingPEOF;
+       off_t moreBytesRequested;
+       off_t actualBytesAdded;
+       off_t filebytes;
+       u_int32_t fileblocks;
+       int retval, retval2;
+       u_int32_t blockHint;
+       u_int32_t extendFlags;   /* For call to ExtendFileC */
+       struct hfsmount *hfsmp;
+       kauth_cred_t cred = vfs_context_ucred(ap->a_context);
+       int lockflags;
+
+       *(ap->a_bytesallocated) = 0;
+
+       if (!vnode_isreg(vp))
+               return (EISDIR);
+       if (length < (off_t)0)
+               return (EINVAL);
+       
+       cp = VTOC(vp);
  
-    if (vp->v_type != VREG && vp->v_type != VLNK) {
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);        /* hfs doesn't support truncating of directories */
-    }
+       hfs_lock_truncate(cp, TRUE);
  
-    if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= fcb->fcbPLen))
-        return (EINVAL);
+       if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+               goto Err_Exit;
+       }
+       
+       fp = VTOF(vp);
+       hfsmp = VTOHFS(vp);
+       vcb = VTOVCB(vp);
  
-    /* Fill in the flags word for the call to Extend the file */
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
  
-       if (ap->a_flags & ALLOCATECONTIG) {
-               extendFlags |= kEFContigMask;
+       if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
+               retval = EINVAL;
+               goto Err_Exit;
         }
  
-    if (ap->a_flags & ALLOCATEALL) {
+       /* Fill in the flags word for the call to Extend the file */
+
+       extendFlags = kEFNoClumpMask;
+       if (ap->a_flags & ALLOCATECONTIG) 
+               extendFlags |= kEFContigMask;
+       if (ap->a_flags & ALLOCATEALL)
                 extendFlags |= kEFAllMask;
-       }
+       if (cred && suser(cred, NULL) != 0)
+               extendFlags |= kEFReserveMask;
+       if (hfs_virtualmetafile(cp))
+               extendFlags |= kEFMetadataMask;
  
-    tv = time;
-    retval = E_NONE;
-    blockHint = 0;
-    startingPEOF = fcb->fcbPLen;
+       retval = E_NONE;
+       blockHint = 0;
+       startingPEOF = filebytes;
  
-    if (ap->a_flags & ALLOCATEFROMPEOF) {
-               length += fcb->fcbPLen;
-       }
+       if (ap->a_flags & ALLOCATEFROMPEOF)
+               length += filebytes;
+       else if (ap->a_flags & ALLOCATEFROMVOL)
+               blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
  
-       if (ap->a_flags & ALLOCATEFROMVOL)
-               blockHint = ap->a_offset / HTOVCB(hp)->blockSize;
+       /* If no changes are necesary, then we're done */
+       if (filebytes == length)
+               goto Std_Exit;
  
-    /* If no changes are necesary, then we're done */
-    if (fcb->fcbPLen == length)
-       goto Std_Exit;
+       /*
+        * Lengthen the size of the file. We must ensure that the
+        * last byte of the file is allocated. Since the smallest
+        * value of filebytes is 0, length will be at least 1.
+        */
+       if (length > filebytes) {
+               off_t total_bytes_added = 0, orig_request_size;
  
-    /*
-    * Lengthen the size of the file. We must ensure that the
-    * last byte of the file is allocated. Since the smallest
-    * value of fcbPLen is 0, length will be at least 1.
-    */
-    if (length > fcb->fcbPLen) {
-               moreBytesRequested = length - fcb->fcbPLen;
+               orig_request_size = moreBytesRequested = length - filebytes;
                 
-               /* lock extents b-tree (also protects volume bitmap) */
-               retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-               if (retval) goto Err_Exit;
+#if QUOTA
+               retval = hfs_chkdq(cp,
+                               (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), 
+                               cred, 0);
+               if (retval)
+                       goto Err_Exit;
+
+#endif /* QUOTA */
+               /*
+                * Metadata zone checks.
+                */
+               if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+                       /*
+                        * Allocate Journal and Quota files in metadata zone.
+                        */
+                       if (hfs_virtualmetafile(cp)) {
+                               blockHint = hfsmp->hfs_metazone_start;
+                       } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
+                                  (blockHint <= hfsmp->hfs_metazone_end)) {
+                               /*
+                                * Move blockHint outside metadata zone.
+                                */
+                               blockHint = hfsmp->hfs_metazone_end + 1;
+                       }
+               }
+
+
+               while ((length > filebytes) && (retval == E_NONE)) {
+                   off_t bytesRequested;
+                   
+                   if (hfs_start_transaction(hfsmp) != 0) {
+                       retval = EINVAL;
+                       goto Err_Exit;
+                   }
+
+                   /* Protect extents b-tree and allocation bitmap */
+                   lockflags = SFL_BITMAP;
+                   if (overflow_extents(fp))
+                       lockflags |= SFL_EXTENTS;
+                   lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
  
-               retval = MacToVFSError(
-                                                               ExtendFileC(HTOVCB(hp),
-                                                                                       fcb,
-                                                                                       moreBytesRequested,
-                                                                                       blockHint,
-                                                                                       extendFlags,
-                                                                                       &actualBytesAdded));
+                   if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
+                       bytesRequested = HFS_BIGFILE_SIZE;
+                   } else {
+                       bytesRequested = moreBytesRequested;
+                   }
+
+                   if (extendFlags & kEFContigMask) {
+                           // if we're on a sparse device, this will force it to do a
+                           // full scan to find the space needed.
+                           hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
+                   }
+
+                   retval = MacToVFSError(ExtendFileC(vcb,
+                                               (FCB*)fp,
+                                               bytesRequested,
+                                               blockHint,
+                                               extendFlags,
+                                               &actualBytesAdded));
+
+                   if (retval == E_NONE) {
+                       *(ap->a_bytesallocated) += actualBytesAdded;
+                       total_bytes_added += actualBytesAdded;
+                       moreBytesRequested -= actualBytesAdded;
+                       if (blockHint != 0) {
+                           blockHint += actualBytesAdded / vcb->blockSize;
+                       }
+                   }
+                   filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+                   
+                   hfs_systemfile_unlock(hfsmp, lockflags);
  
-               *(ap->a_bytesallocated) = actualBytesAdded;
+                   if (hfsmp->jnl) {
+                       (void) hfs_update(vp, TRUE);
+                       (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+                   }
  
-               (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+                   hfs_end_transaction(hfsmp);
+               }
  
-               DBG_ASSERT(length <= fcb->fcbPLen);
  
                 /*
                  * if we get an error and no changes were made then exit
-                * otherwise we must do the VOP_UPDATE to reflect the changes
+                * otherwise we must do the hfs_update to reflect the changes
                  */
-        if (retval && (startingPEOF == fcb->fcbPLen)) goto Err_Exit;
+               if (retval && (startingPEOF == filebytes))
+                       goto Err_Exit;
          
-        /*
-         * Adjust actualBytesAdded to be allocation block aligned, not
-         * clump size aligned.
-         * NOTE: So what we are reporting does not affect reality
-         * until the file is closed, when we truncate the file to allocation
-         * block size.
-         */
-
-               if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
+               /*
+                * Adjust actualBytesAdded to be allocation block aligned, not
+                * clump size aligned.
+                * NOTE: So what we are reporting does not affect reality
+                * until the file is closed, when we truncate the file to allocation
+                * block size.
+                */
+               if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
                         *(ap->a_bytesallocated) =
-                               roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize);
+                               roundup(orig_request_size, (off_t)vcb->blockSize);
  
-    } else { /* Shorten the size of the file */
+       } else { /* Shorten the size of the file */
  
-       if (fcb->fcbEOF > length) {
+               if (fp->ff_size > length) {
                         /*
                          * Any buffers that are past the truncation point need to be
-                        * invalidated (to maintain buffer cache consistency).  For
-                        * simplicity, we invalidate all the buffers by calling vinvalbuf.
+                        * invalidated (to maintain buffer cache consistency).
                          */
-                       vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
-                       (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
                 }
  
-       /* lock extents b-tree (also protects volume bitmap) */
-        retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-        if (retval) goto Err_Exit;
-
-        retval = MacToVFSError(
-                            TruncateFileC(
-                                            HTOVCB(hp),
-                                            fcb,
-                                            length,
-                                            false));
-        (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+               retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
+               filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
  
                 /*
                  * if we get an error and no changes were made then exit
-                * otherwise we must do the VOP_UPDATE to reflect the changes
+                * otherwise we must do the hfs_update to reflect the changes
                  */
-               if (retval && (startingPEOF == fcb->fcbPLen)) goto Err_Exit;
-        if (fcb->fcbFlags & fcbModifiedMask)
-           hp->h_nodeflags |= IN_MODIFIED;
-
-        DBG_ASSERT(length <= fcb->fcbPLen)  // DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG
-
-        if (fcb->fcbEOF > fcb->fcbPLen) {
-                       fcb->fcbEOF = fcb->fcbPLen;
-
-                       if (UBCISVALID(vp))
-                               ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
-        }
-    }
+               if (retval && (startingPEOF == filebytes)) goto Err_Exit;
+#if QUOTA
+               /* These are  bytesreleased */
+               (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
+#endif /* QUOTA */
+
+               if (fp->ff_size > filebytes) {
+                       fp->ff_size = filebytes;
+
+                       hfs_unlock(cp);
+                       ubc_setsize(vp, fp->ff_size);
+                       hfs_lock(cp, HFS_FORCE_LOCK);
+               }
+       }
  
  Std_Exit:
-    hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-       retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
-
-    if (retval == 0) retval = retval2;
+       cp->c_touch_chgtime = TRUE;
+       cp->c_touch_modtime = TRUE;
+       retval2 = hfs_update(vp, MNT_WAIT);
  
+       if (retval == 0)
+               retval = retval2;
  Err_Exit:
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
+       hfs_unlock_truncate(cp, TRUE);
+       hfs_unlock(cp);
+       return (retval);
  }
  
  
-
-
-/* pagein for HFS filesystem, similar to hfs_read(), but without cluster_read() */
+/*
+ * Pagein for HFS filesystem
+ */
  int
-hfs_pagein(ap)
-       struct vop_pagein_args /* {
-               struct vnode *a_vp,
+hfs_vnop_pagein(struct vnop_pagein_args *ap)
+/*
+       struct vnop_pagein_args {
+               vnode_t a_vp,
                 upl_t         a_pl,
                 vm_offset_t   a_pl_offset,
                 off_t         a_f_offset,
                 size_t        a_size,
-               struct ucred *a_cred,
                 int           a_flags
-       } */ *ap;
+               vfs_context_t a_context;
+       };
+*/
  {
-    register struct vnode *vp;
-    struct hfsnode       *hp;
-    FCB                          *fcb;
-    int                                devBlockSize = 0;
-    int                   retval;
-
-    DBG_FUNC_NAME("hfs_pagein");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    vp  = ap->a_vp;
-    hp  = VTOH(vp);
-    fcb = HTOFCB(hp);
-
-    if (vp->v_type != VREG && vp->v_type != VLNK)
-       panic("hfs_pagein: vp not UBC type\n");
-
-    DBG_VOP(("\tfile size Ox%X\n", (u_int)fcb->fcbEOF));
-    DBG_VOP(("\tstarting at offset Ox%X of file, length Ox%X\n", (u_int)ap->a_f_offset, (u_int)ap->a_size));
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
+       vnode_t vp = ap->a_vp;
+       int error;
+
+#if HFS_COMPRESSION
+       if (VNODE_IS_RSRC(vp)) {
+               /* allow pageins of the resource fork */
+       } else {
+               int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+               if (compressed) {
+                       error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+                       if (compressed) {
+                               if (error == 0) {
+                                       /* successful page-in, update the access time */
+                                       VTOC(vp)->c_touch_acctime = TRUE;
+                                       
+                                       /* compressed files are not hot file candidates */
+                                       if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+                                               VTOF(vp)->ff_bytesread = 0;
+                                       }
+                               }
+                               return error;
+                       }
+                       /* otherwise the file was converted back to a regular file while we were reading it */
+               }
+       }
  #endif
  
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+       error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+                              ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
+       /*
+        * Keep track of blocks read.
+        */
+       if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+               struct cnode *cp;
+               struct filefork *fp;
+               int bytesread;
+               int took_cnode_lock = 0;
+               
+               cp = VTOC(vp);
+               fp = VTOF(vp);
  
-    retval = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
-                        ap->a_size, (off_t)fcb->fcbEOF, devBlockSize,
-                        ap->a_flags);
+               if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
+                       bytesread = fp->ff_size;
+               else
+                       bytesread = ap->a_size;
  
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
-    DBG_VOP_LOCKS_TEST(retval);
+               /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+               if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
+                       hfs_lock(cp, HFS_FORCE_LOCK);
+                       took_cnode_lock = 1;
+               }
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+                       struct timeval tv;
  
-    return (retval);
+                       fp->ff_bytesread = bytesread;
+                       microtime(&tv);
+                       cp->c_atime = tv.tv_sec;
+               } else {
+                       fp->ff_bytesread += bytesread;
+               }
+               cp->c_touch_acctime = TRUE;
+               if (took_cnode_lock)
+                       hfs_unlock(cp);
+       }
+       return (error);
  }
  
  /* 
- * pageout for HFS filesystem.
+ * Pageout for HFS filesystem.
   */
  int
-hfs_pageout(ap)
-       struct vop_pageout_args /* {
-          struct vnode *a_vp,
+hfs_vnop_pageout(struct vnop_pageout_args *ap)
+/*
+       struct vnop_pageout_args {
+          vnode_t a_vp,
            upl_t         a_pl,
            vm_offset_t   a_pl_offset,
            off_t         a_f_offset,
            size_t        a_size,
-          struct ucred *a_cred,
            int           a_flags
-       } */ *ap;
+          vfs_context_t a_context;
+       };
+*/
  {
-       struct vnode    *vp = ap->a_vp;
-       struct hfsnode  *hp =  VTOH(vp);
-       FCB             *fcb = HTOFCB(hp);
-       int              retval;
-       int              devBlockSize = 0;
-       off_t            end_of_range;
-
-       DBG_FUNC_NAME("hfs_pageout");
-       DBG_VOP_LOCKS_DECL(1);
-       DBG_VOP_PRINT_FUNCNAME();
-       DBG_VOP_PRINT_VNODE_INFO(vp);DBG_VOP_CONT(("\n"));
-       DBG_VOP(("\thfsnode 0x%x (%s)\n", (u_int)hp, H_NAME(hp)));
-       DBG_VOP(("\tstarting at offset Ox%lX of file, length Ox%lX\n", 
-               (UInt32)ap->a_f_offset, (UInt32)ap->a_size));
-
-       DBG_VOP_LOCKS_INIT(0, vp, VOPDBG_LOCKED, 
-               VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-#if HFS_DIAGNOSTIC
-       debug_check_blocksizes(vp);
-#endif
-
-       if (UBCINVALID(vp))
-               panic("hfs_pageout: Not a  VREG: vp=%x", vp);
+       vnode_t vp = ap->a_vp;
+       struct cnode *cp;
+       struct filefork *fp;
+       int retval = 0;
+       off_t filesize;
+       upl_t           upl;
+       upl_page_info_t* pl;
+       vm_offset_t     a_pl_offset;
+       int             a_flags;
+       int is_pageoutv2 = 0;
+
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+       
+       /*
+        * Figure out where the file ends, for pageout purposes.  If
+        * ff_new_size > ff_size, then we're in the middle of extending the
+        * file via a write, so it is safe (and necessary) that we be able
+        * to pageout up to that point.
+        */
+       filesize = fp->ff_size;
+       if (fp->ff_new_size > filesize)
+               filesize = fp->ff_new_size;
  
-       VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+       a_flags = ap->a_flags;
+       a_pl_offset = ap->a_pl_offset;
  
-       end_of_range = ap->a_f_offset + ap->a_size - 1;
+       /*
+        * we can tell if we're getting the new or old behavior from the UPL
+        */
+       if ((upl = ap->a_pl) == NULL) {
+               int request_flags; 
  
-       if (end_of_range >= (off_t)fcb->fcbEOF)
-               end_of_range = (off_t)(fcb->fcbEOF - 1);
+               is_pageoutv2 = 1;
+               /*
+                * we're in control of any UPL we commit
+                * make sure someone hasn't accidentally passed in UPL_NOCOMMIT 
+                */
+               a_flags &= ~UPL_NOCOMMIT;
+               a_pl_offset = 0;
  
-       if (ap->a_f_offset < (off_t)fcb->fcbEOF)
-               rl_remove(ap->a_f_offset, end_of_range, &hp->h_invalidranges);
+               /*
+                * take truncate lock (shared) to guard against 
+                * zero-fill thru fsync interfering, but only for v2 
+                */
+               hfs_lock_truncate(cp, 0);
  
-       retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
-                                (off_t)fcb->fcbEOF, devBlockSize, ap->a_flags);
+               if (a_flags & UPL_MSYNC) {
+                       request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
+               }
+               else {
+                       request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
+               }
+               ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
  
+               if (upl == (upl_t) NULL) {
+                       retval = EINVAL;
+                       goto pageout_done;
+               }
+       }
         /*
-        * If we successfully wrote any data, and we are not the superuser
-        * we clear the setuid and setgid bits as a precaution against
-        * tampering.
+        * from this point forward upl points at the UPL we're working with
+        * it was either passed in or we succesfully created it
          */
-       if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
-               hp->h_meta->h_mode &= ~(ISUID | ISGID);
  
-#if HFS_DIAGNOSTIC
-       debug_check_blocksizes(vp);
-#endif
+       /* 
+        * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own  
+        * UPL instead of relying on the UPL passed into us.  We go ahead and do that here,
+        * scanning for dirty ranges.  We'll issue our own N cluster_pageout calls, for
+        * N dirty ranges in the UPL.  Note that this is almost a direct copy of the 
+        * logic in vnode_pageout except that we need to do it after grabbing the truncate 
+        * lock in HFS so that we don't lock invert ourselves.  
+        * 
+        * Note that we can still get into this function on behalf of the default pager with
+        * non-V2 behavior (swapfiles).  However in that case, we did not grab locks above 
+        * since fsync and other writing threads will grab the locks, then mark the 
+        * relevant pages as busy.  But the pageout codepath marks the pages as busy, 
+        * and THEN would attempt to grab the truncate lock, which would result in deadlock.  So
+        * we do not try to grab anything for the pre-V2 case, which should only be accessed
+        * by the paging/VM system.
+        */
+
+       if (is_pageoutv2) {
+               off_t f_offset;
+               int offset;
+               int isize; 
+               int pg_index;
+               int error;
+               int error_ret = 0;
+
+               isize = ap->a_size;
+               f_offset = ap->a_f_offset;
+
+               /* 
+                * Scan from the back to find the last page in the UPL, so that we 
+                * aren't looking at a UPL that may have already been freed by the
+                * preceding aborts/completions.
+                */ 
+               for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+                       if (upl_page_present(pl, --pg_index))
+                               break;
+                       if (pg_index == 0) {
+                               ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+                               goto pageout_done;
+                       }
+               }
+
+               /* 
+                * initialize the offset variables before we touch the UPL.
+                * a_f_offset is the position into the file, in bytes
+                * offset is the position into the UPL, in bytes
+                * pg_index is the pg# of the UPL we're operating on.
+                * isize is the offset into the UPL of the last non-clean page. 
+                */
+               isize = ((pg_index + 1) * PAGE_SIZE);   
+
+               offset = 0;
+               pg_index = 0;
+
+               while (isize) {
+                       int  xsize;
+                       int  num_of_pages;
  
-       DBG_VOP_LOCKS_TEST(retval);
+                       if ( !upl_page_present(pl, pg_index)) {
+                               /*
+                                * we asked for RET_ONLY_DIRTY, so it's possible
+                                * to get back empty slots in the UPL.
+                                * just skip over them
+                                */
+                               f_offset += PAGE_SIZE;
+                               offset   += PAGE_SIZE;
+                               isize    -= PAGE_SIZE;
+                               pg_index++;
+
+                               continue;
+                       }
+                       if ( !upl_dirty_page(pl, pg_index)) {
+                               panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
+                       }
+
+                       /* 
+                        * We know that we have at least one dirty page.
+                        * Now checking to see how many in a row we have
+                        */
+                       num_of_pages = 1;
+                       xsize = isize - PAGE_SIZE;
+
+                       while (xsize) {
+                               if ( !upl_dirty_page(pl, pg_index + num_of_pages))
+                                       break;
+                               num_of_pages++;
+                               xsize -= PAGE_SIZE;
+                       }
+                       xsize = num_of_pages * PAGE_SIZE;
+
+                       if (!vnode_isswap(vp)) {
+                               off_t end_of_range;
+                               int tooklock;
+
+                               tooklock = 0;
+
+                               if (cp->c_lockowner != current_thread()) {
+                                       if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+                                               /*
+                                                * we're in the v2 path, so we are the
+                                                * owner of the UPL... we may have already
+                                                * processed some of the UPL, so abort it
+                                                * from the current working offset to the
+                                                * end of the UPL
+                                                */
+                                               ubc_upl_abort_range(upl,
+                                                                   offset,
+                                                                   ap->a_size - offset,
+                                                                   UPL_ABORT_FREE_ON_EMPTY);
+                                               goto pageout_done;
+                                       }
+                                       tooklock = 1;
+                               }
+                               end_of_range = f_offset + xsize - 1;
+       
+                               if (end_of_range >= filesize) {
+                                       end_of_range = (off_t)(filesize - 1);
+                               }
+                               if (f_offset < filesize) {
+                                       rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
+                                       cp->c_flag |= C_MODIFIED;  /* leof is dirty */
+                               }
+                               if (tooklock) {
+                                       hfs_unlock(cp);
+                               }
+                       }
+                       if ((error = cluster_pageout(vp, upl, offset, f_offset,
+                                                       xsize, filesize, a_flags))) {
+                               if (error_ret == 0)
+                                       error_ret = error;
+                       }
+                       f_offset += xsize;
+                       offset   += xsize;
+                       isize    -= xsize;
+                       pg_index += num_of_pages;
+               }
+               /* capture errnos bubbled out of cluster_pageout if they occurred */
+               if (error_ret != 0) {
+                       retval = error_ret;
+               }
+       } /* end block for v2 pageout behavior */
+       else {
+               if (!vnode_isswap(vp)) {
+                       off_t end_of_range;
+                       int tooklock = 0;
+
+                       if (cp->c_lockowner != current_thread()) {
+                               if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+                                       if (!(a_flags & UPL_NOCOMMIT)) {
+                                               ubc_upl_abort_range(upl,
+                                                                   a_pl_offset,
+                                                                   ap->a_size,
+                                                                   UPL_ABORT_FREE_ON_EMPTY);
+                                       }
+                                       goto pageout_done;
+                               }
+                               tooklock = 1;
+                       }
+                       end_of_range = ap->a_f_offset + ap->a_size - 1;
+       
+                       if (end_of_range >= filesize) {
+                               end_of_range = (off_t)(filesize - 1);
+                       }
+                       if (ap->a_f_offset < filesize) {
+                               rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
+                               cp->c_flag |= C_MODIFIED;  /* leof is dirty */
+                       }
+
+                       if (tooklock) {
+                               hfs_unlock(cp);
+                       }
+               }
+               /* 
+                * just call cluster_pageout for old pre-v2 behavior
+                */
+               retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
+                               ap->a_size, filesize, a_flags);         
+       }
+
+       /*
+        * If data was written, update the modification time of the file.
+        * If setuid or setgid bits are set and this process is not the 
+        * superuser then clear the setuid and setgid bits as a precaution 
+        * against tampering.
+        */
+       if (retval == 0) {
+               cp->c_touch_modtime = TRUE;
+               cp->c_touch_chgtime = TRUE;
+               if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
+                   (vfs_context_suser(ap->a_context) != 0)) {
+                       hfs_lock(cp, HFS_FORCE_LOCK);
+                       cp->c_mode &= ~(S_ISUID | S_ISGID);
+                       hfs_unlock(cp);
+               }
+       }
+
+pageout_done:
+       if (is_pageoutv2) {
+               /* release truncate lock (shared) */
+               hfs_unlock_truncate(cp, 0);
+       }
         return (retval);
  }
  
  /*
   * Intercept B-Tree node writes to unswap them if necessary.
-#
-#vop_bwrite {
-#      IN struct buf *bp;
   */
  int
-hfs_bwrite(ap)
-struct vop_bwrite_args /* {
-    struct buf *a_bp;
-} */ *ap;
+hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
  {
-    register struct buf *bp = ap->a_bp;
-    register struct vnode *vp = bp->b_vp;
-    BlockDescriptor block;
-    int retval = 0;
-
-       DBG_FUNC_NAME("hfs_bwrite");
-
-#if BYTE_ORDER == LITTLE_ENDIAN
-    /* Trap B-Tree writes */
-    if ((H_FILEID(VTOH(vp)) == kHFSExtentsFileID) ||
-        (H_FILEID(VTOH(vp)) == kHFSCatalogFileID)) {
-
-        /* Swap if the B-Tree node is in native byte order */
-        if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
-            /* Prepare the block pointer */
-            block.blockHeader = bp;
-            block.buffer = bp->b_data;
-            block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;    /* not found in cache ==> came from disk */
-            block.blockSize = bp->b_bcount;
+       int retval = 0;
+       register struct buf *bp = ap->a_bp;
+       register struct vnode *vp = buf_vnode(bp);
+       BlockDescriptor block;
+
+       /* Trap B-Tree writes */
+       if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
+           (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
+           (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
+           (vp == VTOHFS(vp)->hfc_filevp)) {
+
+               /* 
+                * Swap and validate the node if it is in native byte order.
+                * This is always be true on big endian, so we always validate
+                * before writing here.  On little endian, the node typically has
+                * been swapped and validated when it was written to the journal,
+                * so we won't do anything here.
+                */
+               if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
+                       /* Prepare the block pointer */
+                       block.blockHeader = bp;
+                       block.buffer = (char *)buf_dataptr(bp);
+                       block.blockNum = buf_lblkno(bp);
+                       /* not found in cache ==> came from disk */
+                       block.blockReadFromDisk = (buf_fromcache(bp) == 0);
+                       block.blockSize = buf_count(bp);
      
-            /* Endian un-swap B-Tree node */
-            SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), H_FILEID(VTOH(vp)), 1);
-        }
+                       /* Endian un-swap B-Tree node */
+                       retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
+                       if (retval)
+                               panic("hfs_vnop_bwrite: about to write corrupt node!\n");
+               }
+       }
  
-        /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
-    }
+       /* This buffer shouldn't be locked anymore but if it is clear it */
+       if ((buf_flags(bp) & B_LOCKED)) {
+               // XXXdbg
+               if (VTOHFS(vp)->jnl) {
+                       panic("hfs: CLEARING the lock bit on bp %p\n", bp);
+               }
+               buf_clearflags(bp, B_LOCKED);
+       }
+       retval = vn_bwrite (ap);
+
+       return (retval);
+}
+
+/*
+ * Relocate a file to a new location on disk
+ *  cnode must be locked on entry
+ *
+ * Relocation occurs by cloning the file's data from its
+ * current set of blocks to a new set of blocks. During
+ * the relocation all of the blocks (old and new) are
+ * owned by the file.
+ *
+ * -----------------
+ * |///////////////|
+ * -----------------
+ * 0               N (file offset)
+ *
+ * -----------------     -----------------
+ * |///////////////|     |               |     STEP 1 (acquire new blocks)
+ * -----------------     -----------------
+ * 0               N     N+1             2N
+ *
+ * -----------------     -----------------
+ * |///////////////|     |///////////////|     STEP 2 (clone data)
+ * -----------------     -----------------
+ * 0               N     N+1             2N
+ *
+ *                       -----------------
+ *                       |///////////////|     STEP 3 (head truncate blocks)
+ *                       -----------------
+ *                       0               N
+ *
+ * During steps 2 and 3 page-outs to file offsets less
+ * than or equal to N are suspended.
+ *
+ * During step 3 page-ins to the file get suspended.
+ */
+__private_extern__
+int
+hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
+       struct  proc *p)
+{
+       struct  cnode *cp;
+       struct  filefork *fp;
+       struct  hfsmount *hfsmp;
+       u_int32_t  headblks;
+       u_int32_t  datablks;
+       u_int32_t  blksize;
+       u_int32_t  growsize;
+       u_int32_t  nextallocsave;
+       daddr64_t  sector_a,  sector_b;
+       int eflags;
+       off_t  newbytes;
+       int  retval;
+       int lockflags = 0;
+       int took_trunc_lock = 0;
+       int started_tr = 0;
+       enum vtype vnodetype;
+
+       vnodetype = vnode_vtype(vp);
+       if (vnodetype != VREG && vnodetype != VLNK) {
+               return (EPERM);
+       }
+       
+       hfsmp = VTOHFS(vp);
+       if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
+               return (ENOSPC);
+       }
+
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+       if (fp->ff_unallocblocks)
+               return (EINVAL);
+       blksize = hfsmp->blockSize;
+       if (blockHint == 0)
+               blockHint = hfsmp->nextAllocation;
+
+       if ((fp->ff_size > 0x7fffffff) ||
+           ((fp->ff_size > blksize) && vnodetype == VLNK)) {
+               return (EFBIG);
+       }
+
+       //
+       // We do not believe that this call to hfs_fsync() is
+       // necessary and it causes a journal transaction
+       // deadlock so we are removing it.
+       //
+       //if (vnodetype == VREG && !vnode_issystem(vp)) {
+       //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
+       //      if (retval)
+       //              return (retval);
+       //}
+
+       if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
+               hfs_unlock(cp);
+               hfs_lock_truncate(cp, TRUE);
+               /* Force lock since callers expects lock to be held. */
+               if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
+                       hfs_unlock_truncate(cp, TRUE);
+                       return (retval);
+               }
+               /* No need to continue if file was removed. */
+               if (cp->c_flag & C_NOEXISTS) {
+                       hfs_unlock_truncate(cp, TRUE);
+                       return (ENOENT);
+               }
+               took_trunc_lock = 1;
+       }
+       headblks = fp->ff_blocks;
+       datablks = howmany(fp->ff_size, blksize);
+       growsize = datablks * blksize;
+       eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
+       if (blockHint >= hfsmp->hfs_metazone_start &&
+           blockHint <= hfsmp->hfs_metazone_end)
+               eflags |= kEFMetadataMask;
+
+       if (hfs_start_transaction(hfsmp) != 0) {
+               if (took_trunc_lock)
+                       hfs_unlock_truncate(cp, TRUE);
+           return (EINVAL);
+       }
+       started_tr = 1;
+       /*
+        * Protect the extents b-tree and the allocation bitmap
+        * during MapFileBlockC and ExtendFileC operations.
+        */
+       lockflags = SFL_BITMAP;
+       if (overflow_extents(fp))
+               lockflags |= SFL_EXTENTS;
+       lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+       retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
+       if (retval) {
+               retval = MacToVFSError(retval);
+               goto out;
+       }
+
+       /*
+        * STEP 1 - acquire new allocation blocks.
+        */
+       nextallocsave = hfsmp->nextAllocation;
+       retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
+       if (eflags & kEFMetadataMask) {
+               HFS_MOUNT_LOCK(hfsmp, TRUE);
+               HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
+               MarkVCBDirty(hfsmp);
+               HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+       }
+
+       retval = MacToVFSError(retval);
+       if (retval == 0) {
+               cp->c_flag |= C_MODIFIED;
+               if (newbytes < growsize) {
+                       retval = ENOSPC;
+                       goto restore;
+               } else if (fp->ff_blocks < (headblks + datablks)) {
+                       printf("hfs_relocate: allocation failed");
+                       retval = ENOSPC;
+                       goto restore;
+               }
+
+               retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
+               if (retval) {
+                       retval = MacToVFSError(retval);
+               } else if ((sector_a + 1) == sector_b) {
+                       retval = ENOSPC;
+                       goto restore;
+               } else if ((eflags & kEFMetadataMask) &&
+                          ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
+                             hfsmp->hfs_metazone_end)) {
+#if 0
+                       const char * filestr;
+                       char emptystr = '\0';
+
+                       if (cp->c_desc.cd_nameptr != NULL) {
+                               filestr = (const char *)&cp->c_desc.cd_nameptr[0];
+                       } else if (vnode_name(vp) != NULL) {
+                               filestr = vnode_name(vp);
+                       } else {
+                               filestr = &emptystr;
+                       }
  #endif
+                       retval = ENOSPC;
+                       goto restore;
+               }
+       }
+       /* Done with system locks and journal for now. */
+       hfs_systemfile_unlock(hfsmp, lockflags);
+       lockflags = 0;
+       hfs_end_transaction(hfsmp);
+       started_tr = 0;
+
+       if (retval) {
+               /*
+                * Check to see if failure is due to excessive fragmentation.
+                */
+               if ((retval == ENOSPC) &&
+                   (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
+                       hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
+               }
+               goto out;
+       }
+       /*
+        * STEP 2 - clone file data into the new allocation blocks.
+        */
+
+       if (vnodetype == VLNK)
+               retval = hfs_clonelink(vp, blksize, cred, p);
+       else if (vnode_issystem(vp))
+               retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
+       else
+               retval = hfs_clonefile(vp, headblks, datablks, blksize);
+
+       /* Start transaction for step 3 or for a restore. */
+       if (hfs_start_transaction(hfsmp) != 0) {
+               retval = EINVAL;
+               goto out;
+       }
+       started_tr = 1;
+       if (retval)
+               goto restore;
+
+       /*
+        * STEP 3 - switch to cloned data and remove old blocks.
+        */
+       lockflags = SFL_BITMAP;
+       if (overflow_extents(fp))
+               lockflags |= SFL_EXTENTS;
+       lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+       retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
+
+       hfs_systemfile_unlock(hfsmp, lockflags);
+       lockflags = 0;
+       if (retval)
+               goto restore;
+out:
+       if (took_trunc_lock)
+               hfs_unlock_truncate(cp, TRUE);
+
+       if (lockflags) {
+               hfs_systemfile_unlock(hfsmp, lockflags);
+               lockflags = 0;
+       }
+
+       /* Push cnode's new extent data to disk. */
+       if (retval == 0) {
+               (void) hfs_update(vp, MNT_WAIT);
+       }
+       if (hfsmp->jnl) {
+               if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
+                       (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+               else
+                       (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+       }
+exit:
+       if (started_tr)
+               hfs_end_transaction(hfsmp);
+
+       return (retval);
  
-    retval = vn_bwrite (ap);
+restore:
+       if (fp->ff_blocks == headblks) {
+               if (took_trunc_lock)
+                       hfs_unlock_truncate(cp, TRUE);
+               goto exit;
+       }
+       /*
+        * Give back any newly allocated space.
+        */
+       if (lockflags == 0) {
+               lockflags = SFL_BITMAP;
+               if (overflow_extents(fp))
+                       lockflags |= SFL_EXTENTS;
+               lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+       }
+
+       (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
+
+       hfs_systemfile_unlock(hfsmp, lockflags);
+       lockflags = 0;
+
+       if (took_trunc_lock)
+               hfs_unlock_truncate(cp, TRUE);
+       goto exit;
+}
+
+
+/*
+ * Clone a symlink.
+ *
+ */
+static int
+hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
+{
+       struct buf *head_bp = NULL;
+       struct buf *tail_bp = NULL;
+       int error;
+
+
+       error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
+       if (error)
+               goto out;
+
+       tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
+       if (tail_bp == NULL) {
+               error = EIO;
+               goto out;
+       }
+       bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
+       error = (int)buf_bwrite(tail_bp);
+out:
+       if (head_bp) {
+               buf_markinvalid(head_bp);
+               buf_brelse(head_bp);
+       }       
+       (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
+
+       return (error);
+}
+
+/*
+ * Clone a file's data within the file.
+ *
+ */
+static int
+hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
+{
+       caddr_t  bufp;
+       size_t  bufsize;
+       size_t  copysize;
+        size_t  iosize;
+       size_t  offset;
+       off_t   writebase;
+       uio_t auio;
+       int  error = 0;
+
+       writebase = blkstart * blksize;
+       copysize = blkcnt * blksize;
+       iosize = bufsize = MIN(copysize, 128 * 1024);
+       offset = 0;
+
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+               return (ENOMEM);
+       }       
+       hfs_unlock(VTOC(vp));
+
+       auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+
+       while (offset < copysize) {
+               iosize = MIN(copysize - offset, iosize);
+
+               uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
+               uio_addiov(auio, (uintptr_t)bufp, iosize);
+
+               error = cluster_read(vp, auio, copysize, IO_NOCACHE);
+               if (error) {
+                       printf("hfs_clonefile: cluster_read failed - %d\n", error);
+                       break;
+               }
+               if (uio_resid(auio) != 0) {
+                       printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
+                       error = EIO;            
+                       break;
+               }
+
+               uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
+               uio_addiov(auio, (uintptr_t)bufp, iosize);
+
+               error = cluster_write(vp, auio, writebase + offset,
+                                     writebase + offset + iosize,
+                                     uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
+               if (error) {
+                       printf("hfs_clonefile: cluster_write failed - %d\n", error);
+                       break;
+               }
+               if (uio_resid(auio) != 0) {
+                       printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
+                       error = EIO;            
+                       break;
+               }       
+               offset += iosize;
+       }
+       uio_free(auio);
+
+       if ((blksize & PAGE_MASK)) {
+               /*
+                * since the copy may not have started on a PAGE
+                * boundary (or may not have ended on one), we 
+                * may have pages left in the cache since NOCACHE
+                * will let partially written pages linger...
+                * lets just flush the entire range to make sure
+                * we don't have any pages left that are beyond
+                * (or intersect) the real LEOF of this file
+                */
+               ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
+       } else {
+               /*
+                * No need to call ubc_sync_range or hfs_invalbuf
+                * since the file was copied using IO_NOCACHE and
+                * the copy was done starting and ending on a page
+                * boundary in the file.
+                */
+       }
+       kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
+
+       hfs_lock(VTOC(vp), HFS_FORCE_LOCK);     
+       return (error);
+}
+
+/*
+ * Clone a system (metadata) file.
+ *
+ */
+static int
+hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+                 kauth_cred_t cred, struct proc *p)
+{
+       caddr_t  bufp;
+       char * offset;
+       size_t  bufsize;
+       size_t  iosize;
+       struct buf *bp = NULL;
+       daddr64_t  blkno;
+       daddr64_t  blk;
+       daddr64_t  start_blk;
+       daddr64_t  last_blk;
+       int  breadcnt;
+        int  i;
+       int  error = 0;
+
+
+       iosize = GetLogicalBlockSize(vp);
+       bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
+       breadcnt = bufsize / iosize;
+
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+               return (ENOMEM);
+       }       
+       start_blk = ((daddr64_t)blkstart * blksize) / iosize;
+       last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
+       blkno = 0;
+
+       while (blkno < last_blk) {
+               /*
+                * Read up to a megabyte
+                */
+               offset = bufp;
+               for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
+                       error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
+                       if (error) {
+                               printf("hfs_clonesysfile: meta_bread error %d\n", error);
+                               goto out;
+                       }
+                       if (buf_count(bp) != iosize) {
+                               printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
+                               goto out;
+                       }
+                       bcopy((char *)buf_dataptr(bp), offset, iosize);
+
+                       buf_markinvalid(bp);
+                       buf_brelse(bp);
+                       bp = NULL;
+
+                       offset += iosize;
+               }
+       
+               /*
+                * Write up to a megabyte
+                */
+               offset = bufp;
+               for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
+                       bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
+                       if (bp == NULL) {
+                               printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
+                               error = EIO;
+                               goto out;
+                       }
+                       bcopy(offset, (char *)buf_dataptr(bp), iosize);
+                       error = (int)buf_bwrite(bp);
+                       bp = NULL;
+                       if (error)
+                               goto out;
+                       offset += iosize;
+               }
+       }
+out:
+       if (bp) {
+               buf_brelse(bp);
+       }
+
+       kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
  
-    return (retval);
+       error = hfs_fsync(vp, MNT_WAIT, 0, p);
+
+       return (error);
  }