xnu-517.3.7.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c

index dcb0fe1e59a84be9978b9403e1862813a3a02d30..10b3a271ee3ff7255a6cabd6a7a5d0d7c3141115 100644 (file)
--- a/bsd/hfs/hfs_readwrite.c
+++ b/bsd/hfs/hfs_readwrite.c
@@ -1,56 +1,33 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
   * 
   * @APPLE_LICENSE_HEADER_END@
   */
  /*     @(#)hfs_readwrite.c     1.0
   *
- *     (c) 1990, 1992 NeXT Computer, Inc.  All Rights Reserved
- *     (c) 1998       Apple Computer, Inc.  All Rights Reserved
+ *     (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
   *     
- *
   *     hfs_readwrite.c -- vnode operations to deal with reading and writing files.
   *
- *     MODIFICATION HISTORY:
- *      9-Nov-1999     Scott Roberts   hfs_allocate now returns sizes based on allocation block boundaries (#2398794)
- *      3-Feb-1999     Pat Dirks               Merged in Joe's change to hfs_truncate to skip vinvalbuf if LEOF isn't changing (#2302796)
- *                                                             Removed superfluous (and potentially dangerous) second call to vinvalbuf() in hfs_truncate.
- *      2-Dec-1998     Pat Dirks               Added support for read/write bootstrap ioctls.
- *     10-Nov-1998     Pat Dirks               Changed read/write/truncate logic to optimize block sizes for first extents of a file.
- *                              Changed hfs_strategy to correct I/O sizes from cluser code I/O requests in light of
- *                              different block sizing.  Changed bexpand to handle RELEASE_BUFFER flag.
- *     22-Sep-1998     Don Brady               Changed truncate zero-fill to use bwrite after several bawrites have been queued.
- *     11-Sep-1998     Pat Dirks               Fixed buffering logic to not rely on B_CACHE, which is set for empty buffers that
- *                                                             have been pre-read by cluster_read (use b_validend > 0 instead).
- *  27-Aug-1998        Pat Dirks               Changed hfs_truncate to use cluster_write in place of bawrite where possible.
- *     25-Aug-1998     Pat Dirks               Changed hfs_write to do small device-block aligned writes into buffers without doing
- *                                                             read-ahead of the buffer.  Added bexpand to deal with incomplete [dirty] buffers.
- *                                                             Fixed can_cluster macro to use MAXPHYSIO instead of MAXBSIZE.
- *     19-Aug-1998     Don Brady               Remove optimization in hfs_truncate that prevented extra physical blocks from
- *                                                             being truncated (radar #2265750). Also set fcb->fcbEOF before calling vinvalbuf.
- *      7-Jul-1998     Pat Dirks               Added code to honor IO_NOZEROFILL in hfs_truncate.
- *     16-Jul-1998     Don Brady               In hfs_bmap use MAXPHYSIO instead of MAXBSIZE when calling MapFileBlockC (radar #2263753).
- *     16-Jul-1998     Don Brady               Fix error handling in hfs_allocate (radar #2252265).
- *     04-Jul-1998     chw                             Synchronized options in hfs_allocate with flags in call to ExtendFileC
- *     25-Jun-1998     Don Brady               Add missing blockNo incrementing to zero fill loop in hfs_truncate.
- *     22-Jun-1998     Don Brady               Add bp = NULL assignment after brelse in hfs_read.
- *      4-Jun-1998     Pat Dirks               Split off from hfs_vnodeops.c
   */
  
  #include <sys/param.h>
@@ -58,28 +35,29 @@
  #include <sys/resourcevar.h>
  #include <sys/kernel.h>
  #include <sys/fcntl.h>
+#include <sys/filedesc.h>
  #include <sys/stat.h>
  #include <sys/buf.h>
  #include <sys/proc.h>
-//#include <mach/machine/vm_types.h>
  #include <sys/vnode.h>
  #include <sys/uio.h>
  
  #include <miscfs/specfs/specdev.h>
  
-
  #include <sys/ubc.h>
  #include <vm/vm_pageout.h>
  
-
  #include <sys/kdebug.h>
  
  #include       "hfs.h"
-#include       "hfs_dbg.h"
  #include       "hfs_endian.h"
+#include       "hfs_quota.h"
  #include       "hfscommon/headers/FileMgrInternal.h"
  #include       "hfscommon/headers/BTreesInternal.h"
+#include       "hfs_cnode.h"
+#include       "hfs_dbg.h"
  
+extern int overflow_extents(struct filefork *fp);
  
  #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  
@@ -89,13 +67,10 @@ enum {
  
  extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  
-#if DBG_VOP_TEST_LOCKS
-extern void DbgVopTest(int maxSlots, int retval, VopDbgStoreRec *VopDbgStore, char *funcname);
-#endif
+static int  hfs_clonelink(struct vnode *, int, struct ucred *, struct proc *);
+static int  hfs_clonefile(struct vnode *, int, int, int,  struct ucred *, struct proc *);
+static int  hfs_clonesysfile(struct vnode *, int, int, int, struct ucred *, struct proc *);
  
-#if HFS_DIAGNOSTIC
-void debug_check_blocksizes(struct vnode *vp);
-#endif
  
  /*****************************************************************************
  *
@@ -116,202 +91,72 @@ void debug_check_blocksizes(struct vnode *vp);
  
  int
  hfs_read(ap)
-struct vop_read_args /* {
-    struct vnode *a_vp;
-    struct uio *a_uio;
-    int a_ioflag;
-    struct ucred *a_cred;
-} */ *ap;
+       struct vop_read_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
  {
-    register struct vnode      *vp;
-    struct hfsnode                     *hp;
-    register struct uio        *uio;
-    struct buf                                 *bp;
-    daddr_t                            logBlockNo;
-    u_long                                     fragSize, moveSize, startOffset, ioxfersize;
-    long                                       devBlockSize = 0;
-    off_t                                      bytesRemaining;
-    int                                        retval;
-    u_short                            mode;
-    FCB                                                *fcb;
-
-    DBG_FUNC_NAME("hfs_read");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    vp = ap->a_vp;
-    hp = VTOH(vp);
-    fcb = HTOFCB(hp);
-    mode = hp->h_meta->h_mode;
-    uio = ap->a_uio;
-
-#if HFS_DIAGNOSTIC
-    if (uio->uio_rw != UIO_READ)
-        panic("%s: mode", funcname);
-#endif
-
-    /* Can only read files */
-    if (ap->a_vp->v_type != VREG && ap->a_vp->v_type != VLNK) {
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);
-    }
-    DBG_RW(("\tfile size Ox%X\n", (u_int)fcb->fcbEOF));
-    DBG_RW(("\tstarting at offset Ox%X of file, length Ox%X\n", (u_int)uio->uio_offset, (u_int)uio->uio_resid));
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
-
-    /*
-     * If they didn't ask for any data, then we are done.
-     */
-    if (uio->uio_resid == 0) {
-        DBG_VOP_LOCKS_TEST(E_NONE);
-        return (E_NONE);
-    }
-
-    /* cant read from a negative offset */
-    if (uio->uio_offset < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
-
-    if (uio->uio_offset > fcb->fcbEOF) {
-        if ( (!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
-            retval = EFBIG;
-        else
-            retval = E_NONE;
-
-        DBG_VOP_LOCKS_TEST(retval);
-        return (retval);
-    }
-
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
-                 (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-
-    if (UBCISVALID(vp))
-        retval = cluster_read(vp, uio, (off_t)fcb->fcbEOF, devBlockSize, 0);
-    else {
-
-        for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
-
-            if ((bytesRemaining = (fcb->fcbEOF - uio->uio_offset)) <= 0)
-                break;
-
-            logBlockNo  = (daddr_t)(uio->uio_offset / PAGE_SIZE_64);
-            startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64);
-            fragSize    = PAGE_SIZE;
-
-            if (((logBlockNo * PAGE_SIZE) + fragSize) < fcb->fcbEOF)
-                ioxfersize = fragSize;
-            else {
-                ioxfersize = fcb->fcbEOF - (logBlockNo * PAGE_SIZE);
-                ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1);
-            }
-            DBG_RW(("\tat logBlockNo Ox%X, with Ox%lX left to read\n", logBlockNo, (UInt32)uio->uio_resid));
-            moveSize = ioxfersize;
-            DBG_RW(("\tmoveSize = Ox%lX; ioxfersize = Ox%lX; startOffset = Ox%lX.\n",
-                    moveSize, ioxfersize, startOffset));
-            DBG_ASSERT(moveSize >= startOffset);
-            moveSize -= startOffset;
-
-            if (bytesRemaining < moveSize)
-                moveSize = bytesRemaining;
-
-            if (uio->uio_resid < moveSize) {
-                moveSize = uio->uio_resid;
-                DBG_RW(("\treducing moveSize to Ox%lX (uio->uio_resid).\n", moveSize));
-            };
-            if (moveSize == 0) {
-                break;
-            };
-
-            DBG_RW(("\tat logBlockNo Ox%X, extent of Ox%lX, xfer of Ox%lX; moveSize = Ox%lX\n", logBlockNo, fragSize, ioxfersize, moveSize));
-
-            if (( uio->uio_offset + fragSize) >= fcb->fcbEOF) {
-                retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
-
-            } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) {
-                daddr_t nextLogBlockNo = logBlockNo + 1;
-                int nextsize;
-
-                if (((nextLogBlockNo * PAGE_SIZE) +
-                     (daddr_t)fragSize) < fcb->fcbEOF)
-                    nextsize = fragSize;
-                else {
-                    nextsize = fcb->fcbEOF - (nextLogBlockNo * PAGE_SIZE);
-                    nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1);
-                }
-                retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp);
-            } else {
-                retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
-            };
-
-            if (retval != E_NONE) {
-                if (bp) {
-                    brelse(bp);
-                    bp = NULL;
-                }
-                break;
-            };
-            vp->v_lastr = logBlockNo;
-
-            /*
-             * We should only get non-zero b_resid when an I/O retval
-             * has occurred, which should cause us to break above.
-             * However, if the short read did not cause an retval,
-             * then we want to ensure that we do not uiomove bad
-             * or uninitialized data.
-             */
-            ioxfersize -= bp->b_resid;
-
-            if (ioxfersize < moveSize) {                       /* XXX PPD This should take the offset into account, too! */
-                if (ioxfersize == 0)
-                    break;
-                moveSize = ioxfersize;
-            }
-            if ((startOffset + moveSize) > bp->b_bcount)
-                panic("hfs_read: bad startOffset or moveSize\n");
-
-            DBG_RW(("\tcopying Ox%lX bytes from %lX; resid = Ox%lX...\n", moveSize, (char *)bp->b_data + startOffset, bp->b_resid));
-
-            if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio)))
-                break;
-
-            if (S_ISREG(mode) &&
-                (((startOffset + moveSize) == fragSize) || (uio->uio_offset == fcb->fcbEOF))) {
-                bp->b_flags |= B_AGE;
-            };
-
-            DBG_ASSERT(bp->b_bcount == bp->b_validend);
+       register struct uio *uio = ap->a_uio;
+       register struct vnode *vp = ap->a_vp;
+       struct cnode *cp;
+       struct filefork *fp;
+       int devBlockSize = 0;
+       int retval = 0;
+       off_t filesize;
+       off_t filebytes;
+       off_t start_resid = uio->uio_resid;
+
+
+       /* Preflight checks */
+       if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
+               return (EPERM);         /* can only read regular files */
+       if (uio->uio_resid == 0)
+               return (0);             /* Nothing left to do */
+       if (uio->uio_offset < 0)
+               return (EINVAL);        /* cant read from a negative offset */
+
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+       filesize = fp->ff_size;
+       filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
+       if (uio->uio_offset > filesize) {
+               if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
+                       return (EFBIG);
+               else
+                       return (0);
+       }
  
-            brelse(bp);
-            /* Start of loop resets bp to NULL before reaching outside this block... */
-        }
+       VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
  
-        if (bp != NULL) {
-            DBG_ASSERT(bp->b_bcount == bp->b_validend);
-            brelse(bp);
-        };
-    }
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
+               (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
  
-    if (HTOVCB(hp)->vcbSigWord == kHFSPlusSigWord)
-        hp->h_nodeflags |= IN_ACCESS;
+       retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
  
-    DBG_VOP_LOCKS_TEST(retval);
+       cp->c_flag |= C_ACCESS;
  
-    #if HFS_DIAGNOSTIC
-        debug_check_blocksizes(vp);
-    #endif
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
+               (int)uio->uio_offset, uio->uio_resid, (int)filesize,  (int)filebytes, 0);
  
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
+       /*
+        * Keep track blocks read
+        */
+       if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+                       fp->ff_bytesread = start_resid - uio->uio_resid;
+                       cp->c_atime = time.tv_sec;
+               } else {
+                       fp->ff_bytesread += start_resid - uio->uio_resid;
+               }
+       }
  
-    return (retval);
+       return (retval);
  }
  
  /*
@@ -327,336 +172,342 @@ struct vop_read_args /* {
       */
  int
  hfs_write(ap)
-struct vop_write_args /* {
-    struct vnode *a_vp;
-    struct uio *a_uio;
-    int a_ioflag;
-    struct ucred *a_cred;
-} */ *ap;
+       struct vop_write_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
  {
-    struct hfsnode             *hp = VTOH(ap->a_vp);
-    struct uio                         *uio = ap->a_uio;
-    struct vnode               *vp = ap->a_vp ;
-    struct vnode               *dev;
-    struct buf                         *bp;
-    struct proc                *p, *cp;
-    struct timeval tv;
-    FCB                                        *fcb = HTOFCB(hp);
-    ExtendedVCB                        *vcb = HTOVCB(hp);
-    long                               devBlockSize = 0;
-    daddr_t                    logBlockNo;
-    long                               fragSize;
-    off_t                              origFileSize, currOffset, writelimit, bytesToAdd;
-    off_t                              actualBytesAdded;
-    u_long                             blkoffset, resid, xfersize, clearSize;
-    int                                        flags, ioflag;
-    int                                retval;
-    DBG_FUNC_NAME("hfs_write");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_RW(("\thfsnode 0x%x (%s)\n", (u_int)hp, H_NAME(hp)));
-    DBG_RW(("\tstarting at offset Ox%lX of file, length Ox%lX\n", (UInt32)uio->uio_offset, (UInt32)uio->uio_resid));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    dev = hp->h_meta->h_devvp;
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
-
-    if (uio->uio_offset < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
-
-    if (uio->uio_resid == 0) {
-        DBG_VOP_LOCKS_TEST(E_NONE);
-        return (E_NONE);
-    }
-
-    if (ap->a_vp->v_type != VREG && ap->a_vp->v_type != VLNK) {                /* Can only write files */
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);
-    };
-
-#if HFS_DIAGNOSTIC
-       if (uio->uio_rw != UIO_WRITE)
-               panic("%s: mode", funcname);
-#endif
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       struct cnode *cp;
+       struct filefork *fp;
+       struct proc *p;
+       struct timeval tv;
+       ExtendedVCB *vcb;
+       int devBlockSize = 0;
+       off_t origFileSize, writelimit, bytesToAdd;
+       off_t actualBytesAdded;
+       u_long resid;
+       int eflags, ioflag;
+       int retval;
+       off_t filebytes;
+       struct hfsmount *hfsmp;
+       int started_tr = 0, grabbed_lock = 0;
  
-    ioflag = ap->a_ioflag;
-    uio = ap->a_uio;
-    vp = ap->a_vp;
  
-    if (ioflag & IO_APPEND)
-       uio->uio_offset = fcb->fcbEOF;
-    if ((hp->h_meta->h_pflags & APPEND) && uio->uio_offset != fcb->fcbEOF)
-       return (EPERM);
+       if (uio->uio_offset < 0)
+               return (EINVAL);
+       if (uio->uio_resid == 0)
+               return (E_NONE);
+       if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
+               return (EPERM);         /* Can only write regular files */
+
+       ioflag = ap->a_ioflag;
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+       vcb = VTOVCB(vp);
+       filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+       if (ioflag & IO_APPEND)
+               uio->uio_offset = fp->ff_size;
+       if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
+               return (EPERM);
+
+       // XXXdbg - don't allow modification of the journal or journal_info_block
+       if (VTOHFS(vp)->jnl && cp->c_datafork) {
+               struct HFSPlusExtentDescriptor *extd;
+
+               extd = &cp->c_datafork->ff_extents[0];
+               if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
+                       return EPERM;
+               }
+       }
  
         writelimit = uio->uio_offset + uio->uio_resid;
  
-    /*
-    * Maybe this should be above the vnode op call, but so long as
-    * file servers have no limits, I don't think it matters.
-    */
-    p = uio->uio_procp;
-    if (vp->v_type == VREG && p &&
-        writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
-        psignal(p, SIGXFSZ);
-        return (EFBIG);
-    };
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+       /*
+        * Maybe this should be above the vnode op call, but so long as
+        * file servers have no limits, I don't think it matters.
+        */
+       p = uio->uio_procp;
+       if (vp->v_type == VREG && p &&
+           writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+               psignal(p, SIGXFSZ);
+               return (EFBIG);
+       }
+       p = current_proc();
  
-    resid = uio->uio_resid;
-    origFileSize = fcb->fcbPLen;
-    flags = ioflag & IO_SYNC ? B_SYNC : 0;
+       VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
  
-    DBG_RW(("\tLEOF is 0x%lX, PEOF is 0x%lX.\n", fcb->fcbEOF, fcb->fcbPLen));
+       resid = uio->uio_resid;
+       origFileSize = fp->ff_size;
+       eflags = kEFDeferMask;  /* defer file block allocations */
+       filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
  
-    /*
-    NOTE:      In the following loop there are two positions tracked:
-    currOffset is the current I/O starting offset.  currOffset is never >LEOF; the
-    LEOF is nudged along with currOffset as data is zeroed or written.
-    uio->uio_offset is the start of the current I/O operation.  It may be arbitrarily
-    beyond currOffset.
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
+               (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
+       retval = 0;
  
-    The following is true at all times:
+       /* Now test if we need to extend the file */
+       /* Doing so will adjust the filebytes for us */
  
-    currOffset <= LEOF <= uio->uio_offset <= writelimit
-    */
-    currOffset = MIN(uio->uio_offset, fcb->fcbEOF);
+#if QUOTA
+       if(writelimit > filebytes) {
+               bytesToAdd = writelimit - filebytes;
  
-    DBG_RW(("\tstarting I/O loop at 0x%lX.\n", (u_long)currOffset));
+               retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)), 
+                                  ap->a_cred, 0);
+               if (retval)
+                       return (retval);
+       }
+#endif /* QUOTA */
  
-    cp = current_proc();
+       hfsmp = VTOHFS(vp);
  
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-    retval = 0;
+#ifdef HFS_SPARSE_DEV
+       /* 
+        * When the underlying device is sparse and space
+        * is low (< 8MB), stop doing delayed allocations
+        * and begin doing synchronous I/O.
+        */
+       if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+           (hfs_freeblks(hfsmp, 0) < 2048)) {
+               eflags &= ~kEFDeferMask;
+               ioflag |= IO_SYNC;
+       }
+#endif /* HFS_SPARSE_DEV */
  
-    if (fcb->fcbEOF > fcb->fcbMaxEOF)
-        fcb->fcbMaxEOF = fcb->fcbEOF;
+       if (writelimit > filebytes) {
+               hfs_global_shared_lock_acquire(hfsmp);
+               grabbed_lock = 1;
+       }
+       if (hfsmp->jnl && (writelimit > filebytes)) {
+               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                       hfs_global_shared_lock_release(hfsmp);
+                       return EINVAL;
+               }
+               started_tr = 1;
+       }
  
-    /* Now test if we need to extend the file */
-    /* Doing so will adjust the fcbPLen for us */
+       while (writelimit > filebytes) {
+               bytesToAdd = writelimit - filebytes;
+               if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
+                       eflags |= kEFReserveMask;
  
-    while (writelimit > (off_t)fcb->fcbPLen) {
+               /* lock extents b-tree (also protects volume bitmap) */
+               retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
+               if (retval != E_NONE)
+                       break;
         
-        bytesToAdd = writelimit - fcb->fcbPLen;
-        DBG_RW(("\textending file by 0x%lX bytes; 0x%lX blocks free",
-                (unsigned long)bytesToAdd, (unsigned long)vcb->freeBlocks));
-
-        /* lock extents b-tree (also protects volume bitmap) */
-        retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, cp);
-        if (retval != E_NONE)
-            break;
-
-        retval = MacToVFSError(
-                            ExtendFileC (vcb,
-                                            fcb,
-                                            bytesToAdd,
-                                            kEFContigBit,
-                                            &actualBytesAdded));
-
-        (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, cp);
-        DBG_VOP_CONT(("\tactual bytes added = 0x%lX bytes, retval = %d...\n", actualBytesAdded, retval));
-        if ((actualBytesAdded == 0) && (retval == E_NONE))
-            retval = ENOSPC;
-        if (retval != E_NONE)
-            break;
-
-        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
-                    (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-    };
-
-    if (UBCISVALID(vp) && retval == E_NONE) {
-         off_t filesize;
-         off_t zero_off;
-         int   lflag;
-
-         if (writelimit > fcb->fcbEOF)
-             filesize = writelimit;
-         else
-             filesize = fcb->fcbEOF;
-
-         lflag = (ioflag & IO_SYNC);
-
-         if (uio->uio_offset > fcb->fcbMaxEOF) {
-             zero_off = fcb->fcbMaxEOF;
-             lflag   |= IO_HEADZEROFILL;
-         } else
-             zero_off = 0;
-
-         /*
-          * if the write starts beyond the current EOF then
-          * we we'll zero fill from the current EOF to where the write begins
-          */
-          retval = cluster_write(vp, uio, fcb->fcbEOF, filesize, zero_off,
-                                (off_t)0, devBlockSize, lflag);
-
-         if (uio->uio_offset > fcb->fcbEOF) {
-             fcb->fcbEOF = uio->uio_offset;
-
-             if (fcb->fcbEOF > fcb->fcbMaxEOF)
-                 fcb->fcbMaxEOF = fcb->fcbEOF;
-
-             ubc_setsize(vp, (off_t)fcb->fcbEOF);       /* XXX check errors */
-         }
-         if (resid > uio->uio_resid)
-             hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-
-    } else {
-
-        while (retval == E_NONE && uio->uio_resid > 0) {
-
-            logBlockNo = currOffset / PAGE_SIZE;
-            blkoffset  = currOffset & PAGE_MASK;
-
-            if (((off_t)(fcb->fcbPLen) - currOffset) < PAGE_SIZE_64)
-                fragSize = (off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * PAGE_SIZE_64);
-            else
-                fragSize = PAGE_SIZE;
-            xfersize = fragSize - blkoffset;
+               /* Files that are changing size are not hot file candidates. */
+               if (hfsmp->hfc_stage == HFC_RECORDING) {
+                       fp->ff_bytesread = 0;
+               }
+               retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
+                               0, eflags, &actualBytesAdded));
+
+               (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
+               if ((actualBytesAdded == 0) && (retval == E_NONE))
+                       retval = ENOSPC;
+               if (retval != E_NONE)
+                       break;
+               filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
+                       (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size,  (int)filebytes, 0);
+       }
  
-            DBG_RW(("\tcurrOffset = Ox%lX, logBlockNo = Ox%X, blkoffset = Ox%lX, xfersize = Ox%lX, fragSize = Ox%lX.\n",
-                    (unsigned long)currOffset, logBlockNo, blkoffset, xfersize, fragSize));
+       // XXXdbg
+       if (started_tr) {
+               tv = time;
+               VOP_UPDATE(vp, &tv, &tv, 1);
  
-            /* Make any adjustments for boundary conditions */
-            if (currOffset + (off_t)xfersize > writelimit) {
-                xfersize = writelimit - currOffset;
-                DBG_RW(("\ttrimming xfersize to 0x%lX to match writelimit (uio_resid)...\n", xfersize));
-            };
+               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+               journal_end_transaction(hfsmp->jnl);
+               started_tr = 0;
+       }
+       if (grabbed_lock) {
+               hfs_global_shared_lock_release(hfsmp);
+               grabbed_lock = 0;
+       }
  
-            /*
-            * There is no need to read into bp if:
-            * We start on a block boundary and will overwrite the whole block
-            *
-            *                                          OR
-            */
-            if ((blkoffset == 0) && (xfersize >= fragSize)) {
-                DBG_RW(("\tRequesting %ld-byte block Ox%lX w/o read...\n", fragSize, (long)logBlockNo));
-
-                bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
-                retval = 0;
-
-                if (bp->b_blkno == -1) {
-                    brelse(bp);
-                    retval = EIO;              /* XXX */
-                    break;
-                }
-            } else {
+       if (retval == E_NONE) {
+               off_t filesize;
+               off_t zero_off;
+               off_t tail_off;
+               off_t inval_start;
+               off_t inval_end;
+               off_t io_start, io_end;
+               int lflag;
+               struct rl_entry *invalid_range;
+
+               if (writelimit > fp->ff_size)
+                       filesize = writelimit;
+               else
+                       filesize = fp->ff_size;
  
-                if (currOffset == fcb->fcbEOF && blkoffset == 0) {
-                    bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
-                    retval = 0;
+               lflag = (ioflag & IO_SYNC);
  
-                    if (bp->b_blkno == -1) {
-                        brelse(bp);
-                        retval = EIO;          /* XXX */
-                        break;
-                    }
+               if (uio->uio_offset <= fp->ff_size) {
+                       zero_off = uio->uio_offset & ~PAGE_MASK_64;
+                       
+                       /* Check to see whether the area between the zero_offset and the start
+                          of the transfer to see whether is invalid and should be zero-filled
+                          as part of the transfer:
+                        */
+                       if (uio->uio_offset > zero_off) {
+                               if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
+                                       lflag |= IO_HEADZEROFILL;
+                       }
+               } else {
+                       off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
+                       
+                       /* The bytes between fp->ff_size and uio->uio_offset must never be
+                          read without being zeroed.  The current last block is filled with zeroes
+                          if it holds valid data but in all cases merely do a little bookkeeping
+                          to track the area from the end of the current last page to the start of
+                          the area actually written.  For the same reason only the bytes up to the
+                          start of the page where this write will start is invalidated; any remainder
+                          before uio->uio_offset is explicitly zeroed as part of the cluster_write.
+                          
+                          Note that inval_start, the start of the page after the current EOF,
+                          may be past the start of the write, in which case the zeroing
+                          will be handled by the cluser_write of the actual data.
+                        */
+                       inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+                       inval_end = uio->uio_offset & ~PAGE_MASK_64;
+                       zero_off = fp->ff_size;
+                       
+                       if ((fp->ff_size & PAGE_MASK_64) &&
+                               (rl_scan(&fp->ff_invalidranges,
+                                                       eof_page_base,
+                                                       fp->ff_size - 1,
+                                                       &invalid_range) != RL_NOOVERLAP)) {
+                               /* The page containing the EOF is not valid, so the
+                                  entire page must be made inaccessible now.  If the write
+                                  starts on a page beyond the page containing the eof
+                                  (inval_end > eof_page_base), add the
+                                  whole page to the range to be invalidated.  Otherwise
+                                  (i.e. if the write starts on the same page), zero-fill
+                                  the entire page explicitly now:
+                                */
+                               if (inval_end > eof_page_base) {
+                                       inval_start = eof_page_base;
+                               } else {
+                                       zero_off = eof_page_base;
+                               };
+                       };
+                       
+                       if (inval_start < inval_end) {
+                               /* There's some range of data that's going to be marked invalid */
+                               
+                               if (zero_off < inval_start) {
+                                       /* The pages between inval_start and inval_end are going to be invalidated,
+                                          and the actual write will start on a page past inval_end.  Now's the last
+                                          chance to zero-fill the page containing the EOF:
+                                        */
+                                       retval = cluster_write(vp, (struct uio *) 0,
+                                                       fp->ff_size, inval_start,
+                                                       zero_off, (off_t)0, devBlockSize,
+                                                       lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
+                                       if (retval) goto ioerr_exit;
+                               };
+                               
+                               /* Mark the remaining area of the newly allocated space as invalid: */
+                               rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
+                               cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
+                               zero_off = fp->ff_size = inval_end;
+                       };
+                       
+                       if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
+               };
  
-                } else {
-                    /*
-                    * This I/O transfer is not sufficiently aligned, so read the affected block into a buffer:
-                    */
-                    DBG_VOP(("\tRequesting block Ox%X, size = 0x%08lX...\n", logBlockNo, fragSize));
-                    retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp);
-
-                    if (retval != E_NONE) {
-                        if (bp)
-                            brelse(bp);
-                        break;
-                    }
-                }
-            }
+               /* Check to see whether the area between the end of the write and the end of
+                  the page it falls in is invalid and should be zero-filled as part of the transfer:
+                */
+               tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+               if (tail_off > filesize) tail_off = filesize;
+               if (tail_off > writelimit) {
+                       if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
+                               lflag |= IO_TAILZEROFILL;
+                       };
+               };
+               
+               /*
+                * if the write starts beyond the current EOF (possibly advanced in the
+                * zeroing of the last block, above), then we'll zero fill from the current EOF
+                * to where the write begins:
+                *
+                * NOTE: If (and ONLY if) the portion of the file about to be written is
+                *       before the current EOF it might be marked as invalid now and must be
+                *       made readable (removed from the invalid ranges) before cluster_write
+                *       tries to write it:
+                */
+               io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
+               io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
+               if (io_start < fp->ff_size) {
+                       rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
+               };
+               retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
+                               tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
+                               
+               if (uio->uio_offset > fp->ff_size) {
+                       fp->ff_size = uio->uio_offset;
  
-            /* See if we are starting to write within file boundaries:
-                If not, then we need to present a "hole" for the area between
-                the current EOF and the start of the current I/O operation:
-
-                Note that currOffset is only less than uio_offset if uio_offset > LEOF...
-                */
-            if (uio->uio_offset > currOffset) {
-                clearSize = MIN(uio->uio_offset - currOffset, xfersize);
-                DBG_RW(("\tzeroing Ox%lX bytes Ox%lX bytes into block Ox%X...\n", clearSize, blkoffset, logBlockNo));
-                bzero(bp->b_data + blkoffset, clearSize);
-                currOffset += clearSize;
-                blkoffset += clearSize;
-                xfersize -= clearSize;
-            };
+                       ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
+               }
+               if (resid > uio->uio_resid)
+                       cp->c_flag |= C_CHANGE | C_UPDATE;
+       }
  
-            if (xfersize > 0) {
-                DBG_RW(("\tCopying Ox%lX bytes Ox%lX bytes into block Ox%X... ioflag == 0x%X\n",
-                        xfersize, blkoffset, logBlockNo, ioflag));
-                retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio);
-                currOffset += xfersize;
-            };
-            DBG_ASSERT((bp->b_bcount % devBlockSize) == 0);
-
-            if (ioflag & IO_SYNC) {
-                (void)VOP_BWRITE(bp);
-                //DBG_RW(("\tissuing bwrite\n"));
-            } else if ((xfersize + blkoffset) == fragSize) {
-                //DBG_RW(("\tissuing bawrite\n"));
-                bp->b_flags |= B_AGE;
-                bawrite(bp);
-            } else {
-                //DBG_RW(("\tissuing bdwrite\n"));
-                bdwrite(bp);
-            };
+       HFS_KNOTE(vp, NOTE_WRITE);
  
-            /* Update the EOF if we just extended the file
-                (the PEOF has already been moved out and the block mapping table has been updated): */
-            if (currOffset > fcb->fcbEOF) {
-                DBG_VOP(("\textending EOF to 0x%lX...\n", (UInt32)fcb->fcbEOF));
-                fcb->fcbEOF = currOffset;
+ioerr_exit:
+       /*
+        * If we successfully wrote any data, and we are not the superuser
+        * we clear the setuid and setgid bits as a precaution against
+        * tampering.
+        */
+       if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
+               cp->c_mode &= ~(S_ISUID | S_ISGID);
+
+       if (retval) {
+               if (ioflag & IO_UNIT) {
+                       (void)VOP_TRUNCATE(vp, origFileSize,
+                               ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
+                       uio->uio_offset -= resid - uio->uio_resid;
+                       uio->uio_resid = resid;
+                       filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+               }
+       } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
+               tv = time;
+               retval = VOP_UPDATE(vp, &tv, &tv, 1);
+       }
+       vcb->vcbWrCnt++;
  
-                if (fcb->fcbEOF > fcb->fcbMaxEOF)
-                               fcb->fcbMaxEOF = fcb->fcbEOF;
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
+               (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
  
-                if (UBCISVALID(vp))
-                    ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
-            };
+       return (retval);
+}
  
-            if (retval || (resid == 0))
-                break;
-            hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-        };
-    };
-    /*
-    * If we successfully wrote any data, and we are not the superuser
-    * we clear the setuid and setgid bits as a precaution against
-    * tampering.
-    */
-    if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
-    hp->h_meta->h_mode &= ~(ISUID | ISGID);
-
-    if (retval) {
-        if (ioflag & IO_UNIT) {
-            (void)VOP_TRUNCATE(vp, origFileSize,
-                            ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
-            uio->uio_offset -= resid - uio->uio_resid;
-            uio->uio_resid = resid;
-        }
-    } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
-        tv = time;
-        retval = VOP_UPDATE(vp, &tv, &tv, 1);
-    }
  
-    #if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-    #endif
+#ifdef HFS_SPARSE_DEV
+struct hfs_backingstoreinfo {
+       int  signature;   /* == 3419115 */
+       int  version;     /* version of this struct (1) */
+       int  backingfd;   /* disk image file (on backing fs) */
+       int  bandsize;    /* sparse disk image band size */
+};
  
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF, (int)fcb->fcbPLen, 0);
+#define HFSIOC_SETBACKINGSTOREINFO   _IOW('h', 7, struct hfs_backingstoreinfo)
+#define HFSIOC_CLRBACKINGSTOREINFO   _IO('h', 8)
  
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
-}
+#define HFS_SETBACKINGSTOREINFO  IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO)
+#define HFS_CLRBACKINGSTOREINFO  IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO)
  
+#endif /* HFS_SPARSE_DEV */
  
  /*
  
@@ -676,63 +527,174 @@ struct vop_write_args /* {
  /* ARGSUSED */
  int
  hfs_ioctl(ap)
-struct vop_ioctl_args /* {
-    struct vnode *a_vp;
-    int  a_command;
-    caddr_t  a_data;
-    int  a_fflag;
-    struct ucred *a_cred;
-    struct proc *a_p;
-} */ *ap;
+       struct vop_ioctl_args /* {
+               struct vnode *a_vp;
+               int  a_command;
+               caddr_t  a_data;
+               int  a_fflag;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
  {
-    DBG_FUNC_NAME("hfs_ioctl");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
+       switch (ap->a_command) {
+
+#ifdef HFS_SPARSE_DEV
+       case HFS_SETBACKINGSTOREINFO: {
+               struct hfsmount * hfsmp;
+               struct vnode * bsfs_rootvp;
+               struct vnode * di_vp;
+               struct file * di_fp;
+               struct hfs_backingstoreinfo *bsdata;
+               int error = 0;
+               
+               hfsmp = VTOHFS(ap->a_vp);
+               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+                       return (EALREADY);
+               }
+               if (ap->a_p->p_ucred->cr_uid != 0 &&
+                       ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
+               if (bsdata == NULL) {
+                       return (EINVAL);
+               }
+               if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) {
+                       return (error);
+               }
+               if (fref(di_fp) == -1) {
+                       return (EBADF);
+               }
+               if (di_fp->f_type != DTYPE_VNODE) {
+                       frele(di_fp);
+                       return (EINVAL);
+               }
+               di_vp = (struct vnode *)di_fp->f_data;
+               if (ap->a_vp->v_mount == di_vp->v_mount) {
+                       frele(di_fp);
+                       return (EINVAL);
+               }
  
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_UNLOCKED, VOPDBG_UNLOCKED, VOPDBG_UNLOCKED, VOPDBG_POS);
+               /*
+                * Obtain the backing fs root vnode and keep a reference
+                * on it.  This reference will be dropped in hfs_unmount.
+                */
+               error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp);
+               if (error) {
+                       frele(di_fp);
+                       return (error);
+               }
+               VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p);  /* Hold on to the reference */
  
-    switch (ap->a_command) {
-       
-    case 1:
-    {   register struct hfsnode *hp;
-        register struct vnode *vp;
-       register struct radvisory *ra;
-       FCB *fcb;
-       int devBlockSize = 0;
-       int error;
+               hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
+               hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+               hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
+               hfsmp->hfs_sparsebandblks *= 4;
  
-       vp = ap->a_vp;
+               frele(di_fp);
+               return (0);
+       }
+       case HFS_CLRBACKINGSTOREINFO: {
+               struct hfsmount * hfsmp;
+               struct vnode * tmpvp;
+
+               hfsmp = VTOHFS(ap->a_vp);
+               if (ap->a_p->p_ucred->cr_uid != 0 &&
+                       ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+                   hfsmp->hfs_backingfs_rootvp) {
+
+                       hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
+                       tmpvp = hfsmp->hfs_backingfs_rootvp;
+                       hfsmp->hfs_backingfs_rootvp = NULLVP;
+                       hfsmp->hfs_sparsebandblks = 0;
+                       vrele(tmpvp);
+               }
+               return (0);
+       }
+#endif /* HFS_SPARSE_DEV */
  
-       VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+       case 6: {
+               int error;
  
-       ra = (struct radvisory *)(ap->a_data);
-       hp = VTOH(vp);
+               ap->a_vp->v_flag |= VFULLFSYNC;
+               error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p);
+               ap->a_vp->v_flag &= ~VFULLFSYNC;
  
-       fcb = HTOFCB(hp);
+               return error;
+       }
+       case 5: {
+               register struct vnode *vp;
+               register struct cnode *cp;
+               struct filefork *fp;
+               int error;
+
+               vp = ap->a_vp;
+               cp = VTOC(vp);
+               fp = VTOF(vp);
+
+               if (vp->v_type != VREG)
+                       return EINVAL;
+ 
+               VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
+               error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+               if (error)
+                       return (error);
+
+               /*
+                * used by regression test to determine if 
+                * all the dirty pages (via write) have been cleaned
+                * after a call to 'fsysnc'.
+                */
+               error = is_file_clean(vp, fp->ff_size);
+               VOP_UNLOCK(vp, 0, ap->a_p);
  
-       if (ra->ra_offset >= fcb->fcbEOF) {
-           VOP_UNLOCK(vp, 0, ap->a_p);
-           DBG_VOP_LOCKS_TEST(EFBIG);
-           return (EFBIG);
+               return (error);
         }
-       VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
  
-       error = advisory_read(vp, fcb->fcbEOF, ra->ra_offset, ra->ra_count, devBlockSize);
-       VOP_UNLOCK(vp, 0, ap->a_p);
+       case 1: {
+               register struct vnode *vp;
+               register struct radvisory *ra;
+               register struct cnode *cp;
+               struct filefork *fp;
+               int devBlockSize = 0;
+               int error;
  
-       DBG_VOP_LOCKS_TEST(error);
-       return (error);
-    }
+               vp = ap->a_vp;
+
+               if (vp->v_type != VREG)
+                       return EINVAL;
+ 
+               VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
+               error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+               if (error)
+                       return (error);
+
+               ra = (struct radvisory *)(ap->a_data);
+               cp = VTOC(vp);
+               fp = VTOF(vp);
+
+               if (ra->ra_offset >= fp->ff_size) {
+                       VOP_UNLOCK(vp, 0, ap->a_p);
+                       return (EFBIG);
+               }
+               VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
  
-    case 2: /* F_READBOOTBLOCKS */
-    case 3: /* F_WRITEBOOTBLOCKS */
-      {
+               error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
+               VOP_UNLOCK(vp, 0, ap->a_p);
+
+               return (error);
+       }
+
+        case 2: /* F_READBOOTBLOCKS */
+        case 3: /* F_WRITEBOOTBLOCKS */
+            {
             struct vnode *vp = ap->a_vp;
-           struct hfsnode *hp = VTOH(vp);
+           struct vnode *devvp = NULL;
             struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
-           u_long devBlockSize;
+           int devBlockSize;
             int error;
             struct iovec aiov;
             struct uio auio;
@@ -741,9 +703,10 @@ struct vop_ioctl_args /* {
             u_long xfersize;
             struct buf *bp;
  
-        if ((vp->v_flag & VROOT) == 0) return EINVAL;
-        if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
+            if ((vp->v_flag & VROOT) == 0) return EINVAL;
+            if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
             
+           devvp = VTOHFS(vp)->hfs_devvp;
             aiov.iov_base = btd->fbt_buffer;
             aiov.iov_len = btd->fbt_length;
             
@@ -755,141 +718,65 @@ struct vop_ioctl_args /* {
             auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
             auio.uio_procp = ap->a_p;
  
-           VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+           VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
  
             while (auio.uio_resid > 0) {
               blockNumber = auio.uio_offset / devBlockSize;
-             error = bread(hp->h_meta->h_devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
+             error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
               if (error) {
-              if (bp) brelse(bp);
-              return error;
-          };
+                  if (bp) brelse(bp);
+                  return error;
+                };
  
-          blockOffset = auio.uio_offset % devBlockSize;
+                blockOffset = auio.uio_offset % devBlockSize;
               xfersize = devBlockSize - blockOffset;
               error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
-          if (error) {
-              brelse(bp);
-              return error;
-          };
-          if (auio.uio_rw == UIO_WRITE) {
-              error = VOP_BWRITE(bp);
-              if (error) return error;
-          } else {
-              brelse(bp);
-          };
+                if (error) {
+                  brelse(bp);
+                  return error;
+                };
+                if (auio.uio_rw == UIO_WRITE) {
+                  error = VOP_BWRITE(bp);
+                  if (error) return error;
+                } else {
+                  brelse(bp);
+                };
+            };
          };
-      };
-      return 0;
+        return 0;
+
+        case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
+            {
+            *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
+            return 0;
+            }
  
-    default:
-        DBG_VOP_LOCKS_TEST(ENOTTY);
-        return (ENOTTY);
+        default:
+            return (ENOTTY);
      }
  
-    return 0;
+    /* Should never get here */
+       return 0;
  }
  
  /* ARGSUSED */
  int
  hfs_select(ap)
-struct vop_select_args /* {
-    struct vnode *a_vp;
-    int  a_which;
-    int  a_fflags;
-    struct ucred *a_cred;
-    struct proc *a_p;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_select");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-
-    /*
-     * We should really check to see if I/O is possible.
-     */
-    DBG_VOP_LOCKS_TEST(1);
-    return (1);
-}
-
-
-
-/*
- * Mmap a file
- *
- * NB Currently unsupported.
-# XXX - not used
-#
- vop_mmap {
-     IN struct vnode *vp;
-     IN int fflags;
-     IN struct ucred *cred;
-     IN struct proc *p;
-
-     */
-
-/* ARGSUSED */
-
-int
-hfs_mmap(ap)
-struct vop_mmap_args /* {
-    struct vnode *a_vp;
-    int  a_fflags;
-    struct ucred *a_cred;
-    struct proc *a_p;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_mmap");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-
-    DBG_VOP_LOCKS_TEST(EINVAL);
-    return (EINVAL);
-}
-
-
-
-/*
- * Seek on a file
- *
- * Nothing to do, so just return.
-# XXX - not used
-# Needs work: Is newoff right?  What's it mean?
-#
- vop_seek {
-     IN struct vnode *vp;
-     IN off_t oldoff;
-     IN off_t newoff;
-     IN struct ucred *cred;
-     */
-/* ARGSUSED */
-int
-hfs_seek(ap)
-struct vop_seek_args /* {
-    struct vnode *a_vp;
-    off_t  a_oldoff;
-    off_t  a_newoff;
-    struct ucred *a_cred;
-} */ *ap;
+       struct vop_select_args /* {
+               struct vnode *a_vp;
+               int  a_which;
+               int  a_fflags;
+               struct ucred *a_cred;
+               void *a_wql;
+               struct proc *a_p;
+       } */ *ap;
  {
-    DBG_FUNC_NAME("hfs_seek");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-
-    DBG_VOP_LOCKS_TEST(E_NONE);
-    return (E_NONE);
+       /*
+        * We should really check to see if I/O is possible.
+        */
+       return (1);
  }
  
-
  /*
   * Bmap converts a the logical block number of a file to its physical block
   * number on the disk.
@@ -920,69 +807,95 @@ struct vop_seek_args /* {
  
  int
  hfs_bmap(ap)
-struct vop_bmap_args /* {
-    struct vnode *a_vp;
-    daddr_t a_bn;
-    struct vnode **a_vpp;
-    daddr_t *a_bnp;
-    int *a_runp;
-} */ *ap;
+       struct vop_bmap_args /* {
+               struct vnode *a_vp;
+               daddr_t a_bn;
+               struct vnode **a_vpp;
+               daddr_t *a_bnp;
+               int *a_runp;
+       } */ *ap;
  {
-    struct hfsnode             *hp = VTOH(ap->a_vp);
-    struct hfsmount    *hfsmp = VTOHFS(ap->a_vp);
-    int                                        retval = E_NONE;
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       struct hfsmount *hfsmp = VTOHFS(vp);
+   int                                 retval = E_NONE;
      daddr_t                            logBlockSize;
      size_t                             bytesContAvail = 0;
+    off_t blockposition;
      struct proc                        *p = NULL;
      int                                        lockExtBtree;
+    struct rl_entry *invalid_range;
+    enum rl_overlaptype overlaptype;
  
-#define DEBUG_BMAP 0
-#if DEBUG_BMAP
-    DBG_FUNC_NAME("hfs_bmap");
-    DBG_VOP_LOCKS_DECL(2);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-    if (ap->a_vpp != NULL) {
-        DBG_VOP_LOCKS_INIT(1,*ap->a_vpp, VOPDBG_IGNORE, VOPDBG_UNLOCKED, VOPDBG_IGNORE, VOPDBG_POS);
-    } else {
-        DBG_VOP_LOCKS_INIT(1,NULL, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-       };
-#endif
-
-       DBG_IO(("\tMapped blk %d --> ", ap->a_bn));
-    /*
-     * Check for underlying vnode requests and ensure that logical
-     * to physical mapping is requested.
-     */
-    if (ap->a_vpp != NULL)
-        *ap->a_vpp = VTOH(ap->a_vp)->h_meta->h_devvp;
-    if (ap->a_bnp == NULL)
-        return (0);
-
-    lockExtBtree = hasOverflowExtents(hp);
-    if (lockExtBtree)
-    {
-        p = current_proc();
-        retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p);
-        if (retval)
-            return (retval);
-    }
-
-       logBlockSize = GetLogicalBlockSize(ap->a_vp);
+       /*
+        * Check for underlying vnode requests and ensure that logical
+        * to physical mapping is requested.
+        */
+       if (ap->a_vpp != NULL)
+               *ap->a_vpp = cp->c_devvp;
+       if (ap->a_bnp == NULL)
+               return (0);
+
+       /* Only clustered I/O should have delayed allocations. */
+       DBG_ASSERT(fp->ff_unallocblocks == 0);
+
+       logBlockSize = GetLogicalBlockSize(vp);
+       blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
+
+       lockExtBtree = overflow_extents(fp);
+       if (lockExtBtree) {
+               p = current_proc();
+               retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
+                               LK_EXCLUSIVE | LK_CANRECURSE, p);
+               if (retval)
+                       return (retval);
+       }
  
         retval = MacToVFSError(
-                               MapFileBlockC (HFSTOVCB(hfsmp),
-                                              HTOFCB(hp),
-                                              MAXPHYSIO,
-                                              (off_t)(ap->a_bn * logBlockSize),
-                                              ap->a_bnp,
-                                              &bytesContAvail));
+                            MapFileBlockC (HFSTOVCB(hfsmp),
+                                            (FCB*)fp,
+                                            MAXPHYSIO,
+                                            blockposition,
+                                            ap->a_bnp,
+                                            &bytesContAvail));
  
      if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
  
      if (retval == E_NONE) {
+        /* Adjust the mapping information for invalid file ranges: */
+        overlaptype = rl_scan(&fp->ff_invalidranges,
+                            blockposition,
+                            blockposition + MAXPHYSIO - 1,
+                            &invalid_range);
+        if (overlaptype != RL_NOOVERLAP) {
+            switch(overlaptype) {
+                case RL_MATCHINGOVERLAP:
+                case RL_OVERLAPCONTAINSRANGE:
+                case RL_OVERLAPSTARTSBEFORE:
+                    /* There's no valid block for this byte offset: */
+                    *ap->a_bnp = (daddr_t)-1;
+                    bytesContAvail = invalid_range->rl_end + 1 - blockposition;
+                    break;
+                
+                case RL_OVERLAPISCONTAINED:
+                case RL_OVERLAPENDSAFTER:
+                    /* The range of interest hits an invalid block before the end: */
+                    if (invalid_range->rl_start == blockposition) {
+                       /* There's actually no valid information to be had starting here: */
+                       *ap->a_bnp = (daddr_t)-1;
+                                               if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
+                                                       (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
+                               bytesContAvail = invalid_range->rl_end + 1 - blockposition;
+                       };
+                    } else {
+                       bytesContAvail = invalid_range->rl_start - blockposition;
+                    };
+                    break;
+            };
+                       if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
+        };
+        
          /* Figure out how many read ahead blocks there are */
          if (ap->a_runp != NULL) {
              if (can_cluster(logBlockSize)) {
@@ -994,21 +907,6 @@ struct vop_bmap_args /* {
          };
      };
  
-    DBG_IO(("%d:%d.\n", *ap->a_bnp, (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1));
-
-#if DEBUG_BMAP
-
-    DBG_VOP_LOCKS_TEST(retval);
-#endif
-
-    if (ap->a_runp) {
-        DBG_ASSERT((*ap->a_runp * logBlockSize) < bytesContAvail);                                                     /* At least *ap->a_runp blocks left and ... */
-        if (can_cluster(logBlockSize)) {
-            DBG_ASSERT(bytesContAvail - (*ap->a_runp * logBlockSize) < (2*logBlockSize));      /* ... at most 1 logical block accounted for by current block */
-                                                                                            /* ... plus some sub-logical block sized piece */
-        };
-    };
-
      return (retval);
  }
  
@@ -1016,11 +914,11 @@ struct vop_bmap_args /* {
  
  int
  hfs_blktooff(ap)
-struct vop_blktooff_args /* {
-    struct vnode *a_vp;
-    daddr_t a_lblkno;  
-    off_t *a_offset;
-} */ *ap;
+       struct vop_blktooff_args /* {
+               struct vnode *a_vp;
+               daddr_t a_lblkno;  
+               off_t *a_offset;
+       } */ *ap;
  {      
         if (ap->a_vp == NULL)
                 return (EINVAL);
@@ -1031,14 +929,12 @@ struct vop_blktooff_args /* {
  
  int
  hfs_offtoblk(ap)
-struct vop_offtoblk_args /* {
-       struct vnode *a_vp;
-       off_t a_offset;    
-       daddr_t *a_lblkno;
-} */ *ap;
+       struct vop_offtoblk_args /* {
+               struct vnode *a_vp;
+               off_t a_offset;    
+               daddr_t *a_lblkno;
+       } */ *ap;
  {      
-       long lbsize, boff;
-
         if (ap->a_vp == NULL)
                 return (EINVAL);
         *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
@@ -1048,73 +944,323 @@ struct vop_offtoblk_args /* {
  
  int
  hfs_cmap(ap)
-struct vop_cmap_args /* {
-       struct vnode *a_vp;
-       off_t a_foffset;    
-       size_t a_size;
-       daddr_t *a_bpn;
-       size_t *a_run;
-       void *a_poff;
-} */ *ap;
+       struct vop_cmap_args /* {
+               struct vnode *a_vp;
+               off_t a_foffset;    
+               size_t a_size;
+               daddr_t *a_bpn;
+               size_t *a_run;
+               void *a_poff;
+       } */ *ap;
  {
-    struct hfsnode     *hp = VTOH(ap->a_vp);
-    struct hfsmount    *hfsmp = VTOHFS(ap->a_vp);
+    struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
+    struct filefork *fp = VTOF(ap->a_vp);
      size_t                             bytesContAvail = 0;
      int                        retval = E_NONE;
-    int                                        lockExtBtree;
+    int lockExtBtree = 0;
      struct proc                *p = NULL;
+    struct rl_entry *invalid_range;
+    enum rl_overlaptype overlaptype;
+    int started_tr = 0, grabbed_lock = 0;
+       struct timeval tv;
  
-#define DEBUG_CMAP 0
-#if DEBUG_CMAP
-    DBG_FUNC_NAME("hfs_cmap");
-    DBG_VOP_LOCKS_DECL(2);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);
+       /*
+        * Check for underlying vnode requests and ensure that logical
+        * to physical mapping is requested.
+        */
+       if (ap->a_bpn == NULL)
+               return (0);
  
-    DBG_VOP_LOCKS_INIT(0, ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-#endif
+       p = current_proc();
  
-    DBG_IO(("\tMapped offset %qx --> ", ap->a_foffset));
-    /*
-     * Check for underlying vnode requests and ensure that logical
-     * to physical mapping is requested.
-     */
-    if (ap->a_bpn == NULL)
-        return (0);
-
-    if (lockExtBtree = hasOverflowExtents(hp))
-    {
-        p = current_proc();
-        if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p))
-            return (retval);
-    }
-    retval = MacToVFSError(
-                          MapFileBlockC (HFSTOVCB(hfsmp),
-                                         HTOFCB(hp),
+       if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) {
+               /*
+                * File blocks are getting remapped. Wait until its finished.
+                */
+               SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP);
+               (void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0);
+               if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP))
+                       panic("hfs_cmap: no mappable blocks");
+       }       
+
+  retry:
+       if (fp->ff_unallocblocks) {
+               lockExtBtree = 1;
+
+               // XXXdbg
+               hfs_global_shared_lock_acquire(hfsmp);
+               grabbed_lock = 1;
+
+               if (hfsmp->jnl) {
+                       if (journal_start_transaction(hfsmp->jnl) != 0) {
+                               hfs_global_shared_lock_release(hfsmp);
+                               return EINVAL;
+                       } else {
+                               started_tr = 1;
+                       }
+               } 
+
+               if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
+                       if (started_tr) {
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       if (grabbed_lock) {
+                               hfs_global_shared_lock_release(hfsmp);
+                       }
+                       return (retval);
+               }
+       } else if (overflow_extents(fp)) {
+               lockExtBtree = 1;
+               if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
+                       return retval;
+               }
+       }
+
+       /*
+        * Check for any delayed allocations.
+        */
+       if (fp->ff_unallocblocks) {
+               SInt64 reqbytes, actbytes;
+
+               // 
+               // Make sure we have a transaction.  It's possible
+               // that we came in and fp->ff_unallocblocks was zero
+               // but during the time we blocked acquiring the extents
+               // btree, ff_unallocblocks became non-zero and so we
+               // will need to start a transaction.
+               //
+               if (hfsmp->jnl && started_tr == 0) {
+                   if (lockExtBtree) {
+                       (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
+                       lockExtBtree = 0;
+                   }
+
+                   goto retry;
+               }
+
+               reqbytes = (SInt64)fp->ff_unallocblocks *
+                            (SInt64)HFSTOVCB(hfsmp)->blockSize;
+               /*
+                * Release the blocks on loan and aquire some real ones.
+                * Note that we can race someone else for these blocks
+                * (and lose) so cmap needs to handle a failure here.
+                * Currently this race can't occur because all allocations
+                * are protected by an exclusive lock on the  Extents
+                * Overflow file.
+                */
+               HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
+               FTOC(fp)->c_blocks            -= fp->ff_unallocblocks;
+               fp->ff_blocks                 -= fp->ff_unallocblocks;
+               fp->ff_unallocblocks           = 0;
+
+               /* Files that are changing size are not hot file candidates. */
+               if (hfsmp->hfc_stage == HFC_RECORDING) {
+                       fp->ff_bytesread = 0;
+               }
+               while (retval == 0 && reqbytes > 0) {
+                       retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
+                                       (FCB*)fp, reqbytes, 0,
+                                       kEFAllMask | kEFNoClumpMask, &actbytes));
+                       if (retval == 0 && actbytes == 0)
+                               retval = ENOSPC;
+
+                       if (retval) {
+                               fp->ff_unallocblocks =
+                                       reqbytes / HFSTOVCB(hfsmp)->blockSize;
+                               HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
+                               FTOC(fp)->c_blocks            += fp->ff_unallocblocks;
+                               fp->ff_blocks                 += fp->ff_unallocblocks;
+                       }
+                       reqbytes -= actbytes;
+               }
+
+               if (retval) {
+                       (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
+                       VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
+                       if (started_tr) {
+                               tv = time;
+                               VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
+
+                               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       if (grabbed_lock) {
+                               hfs_global_shared_lock_release(hfsmp);
+                       }
+                       return (retval);
+               }
+       }
+
+       retval = MacToVFSError(
+                          MapFileBlockC (HFSTOVCB(hfsmp),
+                                         (FCB *)fp,
                                           ap->a_size,
                                           ap->a_foffset,
                                           ap->a_bpn,
                                           &bytesContAvail));
  
-    if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
+       if (lockExtBtree)
+               (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
  
-    if ((retval == E_NONE) && (ap->a_run))
-               *ap->a_run = bytesContAvail;
+       // XXXdbg
+       if (started_tr) {
+               tv = time;
+               retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
  
-    if (ap->a_poff)
-               *(int *)ap->a_poff = 0;
+               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+               journal_end_transaction(hfsmp->jnl);
+               started_tr = 0;
+       }
+       if (grabbed_lock) {
+               hfs_global_shared_lock_release(hfsmp);
+               grabbed_lock = 0;
+       }
+                       
+    if (retval == E_NONE) {
+        /* Adjust the mapping information for invalid file ranges: */
+        overlaptype = rl_scan(&fp->ff_invalidranges,
+                            ap->a_foffset,
+                            ap->a_foffset + (off_t)bytesContAvail - 1,
+                            &invalid_range);
+        if (overlaptype != RL_NOOVERLAP) {
+            switch(overlaptype) {
+                case RL_MATCHINGOVERLAP:
+                case RL_OVERLAPCONTAINSRANGE:
+                case RL_OVERLAPSTARTSBEFORE:
+                    /* There's no valid block for this byte offset: */
+                    *ap->a_bpn = (daddr_t)-1;
+                    
+                    /* There's no point limiting the amount to be returned if the
+                       invalid range that was hit extends all the way to the EOF
+                       (i.e. there's no valid bytes between the end of this range
+                       and the file's EOF):
+                     */
+                    if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
+                                       (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
+                       bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+                    };
+                    break;
+                
+                case RL_OVERLAPISCONTAINED:
+                case RL_OVERLAPENDSAFTER:
+                    /* The range of interest hits an invalid block before the end: */
+                    if (invalid_range->rl_start == ap->a_foffset) {
+                       /* There's actually no valid information to be had starting here: */
+                       *ap->a_bpn = (daddr_t)-1;
+                                               if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
+                                                       (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
+                               bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+                       };
+                    } else {
+                       bytesContAvail = invalid_range->rl_start - ap->a_foffset;
+                    };
+                    break;
+            };
+            if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
+        };
+        
+        if (ap->a_run) *ap->a_run = bytesContAvail;
+    };
  
-    DBG_IO(("%d:%d.\n", *ap->a_bpn, bytesContAvail));
+       if (ap->a_poff)
+               *(int *)ap->a_poff = 0;
  
-#if DEBUG_BMAP
+       return (retval);
+}
  
-    DBG_VOP_LOCKS_TEST(retval);
-#endif
  
-    return (retval);
+/*
+ * Read or write a buffer that is not contiguous on disk.  We loop over
+ * each device block, copying to or from caller's buffer.
+ *
+ * We could be a bit more efficient by transferring as much data as is
+ * contiguous.  But since this routine should rarely be called, and that
+ * would be more complicated; best to keep it simple.
+ */
+static int
+hfs_strategy_fragmented(struct buf *bp)
+{
+       register struct vnode *vp = bp->b_vp;
+       register struct cnode *cp = VTOC(vp);
+       register struct vnode *devvp = cp->c_devvp;
+       caddr_t ioaddr;         /* Address of fragment within bp  */
+       struct buf *frag = NULL; /* For reading or writing a single block */
+       int retval = 0;
+       long remaining;         /* Bytes (in bp) left to transfer */
+       off_t offset;           /* Logical offset of current fragment in vp */
+       u_long block_size;      /* Size of one device block (and one I/O) */
+       
+       /* Make sure we redo this mapping for the next I/O */
+       bp->b_blkno = bp->b_lblkno;
+       
+       /* Set up the logical position and number of bytes to read/write */
+       offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
+       block_size = VTOHFS(vp)->hfs_phys_block_size;
+       
+       /* Get an empty buffer to do the deblocking */
+       frag = geteblk(block_size);
+       if (ISSET(bp->b_flags, B_READ))
+               SET(frag->b_flags, B_READ);
+
+       for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
+           ioaddr += block_size, offset += block_size,
+           remaining -= block_size) {
+               frag->b_resid = frag->b_bcount;
+               CLR(frag->b_flags, B_DONE);
+
+               /* Map the current position to a physical block number */
+               retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
+                   NULL, NULL);
+               if (retval != 0)
+                       break;
  
+               /*
+                * Did we try to read a hole?
+                * (Should never happen for metadata!)
+                */
+               if ((long)frag->b_lblkno == -1) {
+                       bzero(ioaddr, block_size);
+                       continue;
+               }
+               
+               /* If writing, copy before I/O */
+               if (!ISSET(bp->b_flags, B_READ))
+                       bcopy(ioaddr, frag->b_data, block_size);
+
+               /* Call the device to do the I/O and wait for it */
+               frag->b_blkno = frag->b_lblkno;
+               frag->b_vp = devvp;  /* Used to dispatch via VOP_STRATEGY */
+               frag->b_dev = devvp->v_rdev;
+               retval = VOP_STRATEGY(frag);
+               frag->b_vp = NULL;
+               if (retval != 0)
+                       break;
+               retval = biowait(frag);
+               if (retval != 0)
+                       break;
+               
+               /* If reading, copy after the I/O */
+               if (ISSET(bp->b_flags, B_READ))
+                       bcopy(frag->b_data, ioaddr, block_size);
+       }
+       
+       frag->b_vp = NULL;
+       //
+       // XXXdbg - in the case that this is a meta-data block, it won't affect
+       //          the journal because this bp is for a physical disk block,
+       //          not a logical block that is part of the catalog or extents
+       //          files.
+       SET(frag->b_flags, B_INVAL);
+       brelse(frag);
+       
+       if ((bp->b_error = retval) != 0)
+               SET(bp->b_flags, B_ERROR);
+       
+       biodone(bp);    /* This I/O is now complete */
+       return retval;
  }
  
+
  /*
   * Calculate the logical to physical mapping if not done already,
   * then call the device strategy routine.
@@ -1124,102 +1270,409 @@ struct vop_cmap_args /* {
      */
  int
  hfs_strategy(ap)
-struct vop_strategy_args /* {
-    struct buf *a_bp;
-} */ *ap;
+       struct vop_strategy_args /* {
+               struct buf *a_bp;
+       } */ *ap;
  {
-    register struct buf *bp = ap->a_bp;
-    register struct vnode *vp = bp->b_vp;
-    register struct hfsnode *hp;
-    int retval = 0;
+       register struct buf *bp = ap->a_bp;
+       register struct vnode *vp = bp->b_vp;
+       register struct cnode *cp = VTOC(vp);
+       int retval = 0;
+       off_t offset;
+       size_t bytes_contig;
+       
+       if ( !(bp->b_flags & B_VECTORLIST)) {
+               if (vp->v_type == VBLK || vp->v_type == VCHR)
+                       panic("hfs_strategy: device vnode passed!");
+
+               if (bp->b_flags & B_PAGELIST) {
+                       /*
+                        * If we have a page list associated with this bp,
+                        * then go through cluster_bp since it knows how to 
+                        * deal with a page request that might span non-
+                        * contiguous physical blocks on the disk...
+                        */
+                       retval = cluster_bp(bp);
+                       vp = cp->c_devvp;
+                       bp->b_dev = vp->v_rdev;
+
+                       return (retval);
+               }
+               
+               /*
+                * If we don't already know the filesystem relative block
+                * number then get it using VOP_BMAP().  If VOP_BMAP()
+                * returns the block number as -1 then we've got a hole in
+                * the file.  Although HFS filesystems don't create files with
+                * holes, invalidating of subranges of the file (lazy zero
+                * filling) may create such a situation.
+                */
+               if (bp->b_blkno == bp->b_lblkno) {
+                       offset = (off_t) bp->b_lblkno *
+                           (off_t) GetLogicalBlockSize(vp);
+
+                       if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
+                           &bp->b_blkno, &bytes_contig, NULL))) {
+                               bp->b_error = retval;
+                               bp->b_flags |= B_ERROR;
+                               biodone(bp);
+                               return (retval);
+                       }
+                       if (bytes_contig < bp->b_bcount)
+                       {
+                               /*
+                                * We were asked to read a block that wasn't
+                                * contiguous, so we have to read each of the
+                                * pieces and copy them into the buffer.
+                                * Since ordinary file I/O goes through
+                                * cluster_io (which won't ask us for
+                                * discontiguous data), this is probably an
+                                * attempt to read or write metadata.
+                                */
+                               return hfs_strategy_fragmented(bp);
+                       }
+                       if ((long)bp->b_blkno == -1)
+                               clrbuf(bp);
+               }
+               if ((long)bp->b_blkno == -1) {
+                       biodone(bp);
+                       return (0);
+               }
+               if (bp->b_validend == 0) {
+                       /*
+                        * Record the exact size of the I/O transfer about to
+                        * be made:
+                        */
+                       bp->b_validend = bp->b_bcount;
+               }
+       }
+       vp = cp->c_devvp;
+       bp->b_dev = vp->v_rdev;
  
-       DBG_FUNC_NAME("hfs_strategy");
+       return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+}
  
-//     DBG_VOP_PRINT_FUNCNAME();DBG_VOP_CONT(("\n"));
  
-    hp = VTOH(vp);
+static int do_hfs_truncate(ap)
+       struct vop_truncate_args /* {
+               struct vnode *a_vp;
+               off_t a_length;
+               int a_flags;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       off_t length;
+       long vflags;
+       struct timeval tv;
+       int retval;
+       off_t bytesToAdd;
+       off_t actualBytesAdded;
+       off_t filebytes;
+       u_long fileblocks;
+       int blksize;
+       struct hfsmount *hfsmp;
+
+       if (vp->v_type != VREG && vp->v_type != VLNK)
+               return (EISDIR);        /* cannot truncate an HFS directory! */
+
+       length = ap->a_length;
+       blksize = VTOVCB(vp)->blockSize;
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)blksize;
+
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+                (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+
+       if (length < 0)
+               return (EINVAL);
  
-    if ( !(bp->b_flags & B_VECTORLIST)) {
+       if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
+               return (EFBIG);
  
-        if (vp->v_type == VBLK || vp->v_type == VCHR)
-           panic("hfs_strategy: device vnode passed!");
+       hfsmp = VTOHFS(vp);
  
-       if (bp->b_flags & B_PAGELIST) {
-           /*
-            * if we have a page list associated with this bp,
-            * then go through cluste_bp since it knows how to 
-            * deal with a page request that might span non-contiguous
-            * physical blocks on the disk...
-            */
-           retval = cluster_bp(bp);
-           vp = hp->h_meta->h_devvp;
-           bp->b_dev = vp->v_rdev;
+       tv = time;
+       retval = E_NONE;
  
-           return (retval);
+       /* Files that are changing size are not hot file candidates. */
+       if (hfsmp->hfc_stage == HFC_RECORDING) {
+               fp->ff_bytesread = 0;
         }
+
+       /* 
+        * We cannot just check if fp->ff_size == length (as an optimization)
+        * since there may be extra physical blocks that also need truncation.
+        */
+#if QUOTA
+       if (retval = hfs_getinoquota(cp))
+               return(retval);
+#endif /* QUOTA */
+
         /*
-        * If we don't already know the filesystem relative block number
-        * then get it using VOP_BMAP().  If VOP_BMAP() returns the block
-        * number as -1 then we've got a hole in the file.  HFS filesystems
-        * don't allow files with holes, so we shouldn't ever see this.
+        * Lengthen the size of the file. We must ensure that the
+        * last byte of the file is allocated. Since the smallest
+        * value of ff_size is 0, length will be at least 1.
          */
-       if (bp->b_blkno == bp->b_lblkno) {
-           if ((retval = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL))) {
-               bp->b_error = retval;
-               bp->b_flags |= B_ERROR;
-               biodone(bp);
-               return (retval);
-           }
-           if ((long)bp->b_blkno == -1)
-               clrbuf(bp);
-       }
-       if ((long)bp->b_blkno == -1) {
-           biodone(bp);
-           return (0);
-       }
-       if (bp->b_validend == 0) {
-           /* Record the exact size of the I/O transfer about to be made: */
-           DBG_ASSERT(bp->b_validoff == 0);
-           bp->b_validend = bp->b_bcount;
-           DBG_ASSERT(bp->b_dirtyoff == 0);
-       };
-    }
-    vp = hp->h_meta->h_devvp;
-    bp->b_dev = vp->v_rdev;
-    DBG_IO(("\t\t>>>%s: continuing w/ vp: 0x%x with logBlk Ox%X and phyBlk Ox%X\n", funcname, (u_int)vp, bp->b_lblkno, bp->b_blkno));
+       if (length > fp->ff_size) {
+#if QUOTA
+               retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
+                               ap->a_cred, 0);
+               if (retval)
+                       goto Err_Exit;
+#endif /* QUOTA */
+               /*
+                * If we don't have enough physical space then
+                * we need to extend the physical size.
+                */
+               if (length > filebytes) {
+                       int eflags;
+                       u_long blockHint = 0;
  
-    return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
-}
+                       /* All or nothing and don't round up to clumpsize. */
+                       eflags = kEFAllMask | kEFNoClumpMask;
  
+                       if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
+                               eflags |= kEFReserveMask;  /* keep a reserve */
  
-/*
-#% reallocblks vp      L L L
-#
- vop_reallocblks {
-     IN struct vnode *vp;
-     IN struct cluster_save *buflist;
+                       /*
+                        * Allocate Journal and Quota files in metadata zone.
+                        */
+                       if (filebytes == 0 &&
+                           hfsmp->hfs_flags & HFS_METADATA_ZONE &&
+                           hfs_virtualmetafile(cp)) {
+                               eflags |= kEFMetadataMask;
+                               blockHint = hfsmp->hfs_metazone_start;
+                       }
+                       // XXXdbg
+                       hfs_global_shared_lock_acquire(hfsmp);
+                       if (hfsmp->jnl) {
+                               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                                       retval = EINVAL;
+                                       goto Err_Exit;
+                               }
+                       }
+
+                       /* lock extents b-tree (also protects volume bitmap) */
+                       retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+                       if (retval) {
+                               if (hfsmp->jnl) {
+                                       journal_end_transaction(hfsmp->jnl);
+                               } 
+                               hfs_global_shared_lock_release(hfsmp);
+
+                               goto Err_Exit;
+                       }
+
+                       while ((length > filebytes) && (retval == E_NONE)) {
+                               bytesToAdd = length - filebytes;
+                               retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
+                                                    (FCB*)fp,
+                                                    bytesToAdd,
+                                                    blockHint,
+                                                    eflags,
+                                                    &actualBytesAdded));
  
-     */
+                               filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+                               if (actualBytesAdded == 0 && retval == E_NONE) {
+                                       if (length > filebytes)
+                                               length = filebytes;
+                                       break;
+                               }
+                       } /* endwhile */
  
-int
-hfs_reallocblks(ap)
-struct vop_reallocblks_args /* {
-    struct vnode *a_vp;
-    struct cluster_save *a_buflist;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_reallocblks");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
+                       (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
  
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
+                       // XXXdbg
+                       if (hfsmp->jnl) {
+                               tv = time;
+                               VOP_UPDATE(vp, &tv, &tv, 1);
  
-    /* Currently no support for clustering */          /* XXX */
-    DBG_VOP_LOCKS_TEST(ENOSPC);
-    return (ENOSPC);
-}
+                               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                               journal_end_transaction(hfsmp->jnl);
+                       } 
+                       hfs_global_shared_lock_release(hfsmp);
+
+                       if (retval)
+                               goto Err_Exit;
+
+                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+                               (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+               }
+ 
+               if (!(ap->a_flags & IO_NOZEROFILL)) {
+                       if (UBCINFOEXISTS(vp) && retval == E_NONE) {
+                               struct rl_entry *invalid_range;
+                               int devBlockSize;
+                               off_t zero_limit;
+                       
+                               zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+                               if (length < zero_limit) zero_limit = length;
+
+                               if (length > fp->ff_size) {
+                                       /* Extending the file: time to fill out the current last page w. zeroes? */
+                                       if ((fp->ff_size & PAGE_MASK_64) &&
+                                           (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
+                                           fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
+                                               
+                                               /* There's some valid data at the start of the (current) last page
+                                                  of the file, so zero out the remainder of that page to ensure the
+                                                  entire page contains valid data.  Since there is no invalid range
+                                                  possible past the (current) eof, there's no need to remove anything
+                                                  from the invalid range list before calling cluster_write():                                           */
+                                               VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
+                                               retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
+                                                               fp->ff_size, (off_t)0, devBlockSize,
+                                                               (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
+                                               if (retval) goto Err_Exit;
+                                               
+                                               /* Merely invalidate the remaining area, if necessary: */
+                                               if (length > zero_limit) {
+                                                       rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
+                                                       cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
+                                               }
+                                       } else {
+                                       /* The page containing the (current) eof is invalid: just add the
+                                          remainder of the page to the invalid list, along with the area
+                                          being newly allocated:
+                                        */
+                                       rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
+                                       cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
+                                       };
+                               }
+                       } else {
+                                       panic("hfs_truncate: invoked on non-UBC object?!");
+                       };
+               }
+               cp->c_flag |= C_UPDATE;
+               fp->ff_size = length;
+
+               if (UBCISVALID(vp))
+                       ubc_setsize(vp, fp->ff_size);   /* XXX check errors */
+
+       } else { /* Shorten the size of the file */
+
+               if (fp->ff_size > length) {
+                       /*
+                        * Any buffers that are past the truncation point need to be
+                        * invalidated (to maintain buffer cache consistency).  For
+                        * simplicity, we invalidate all the buffers by calling vinvalbuf.
+                        */
+                       if (UBCISVALID(vp))
+                               ubc_setsize(vp, length); /* XXX check errors */
+
+                       vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA;     
+                       retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
+           
+                       /* Any space previously marked as invalid is now irrelevant: */
+                       rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
+               }
+
+               /* 
+                * Account for any unmapped blocks. Note that the new
+                * file length can still end up with unmapped blocks.
+                */
+               if (fp->ff_unallocblocks > 0) {
+                       u_int32_t finalblks;
+
+                       /* lock extents b-tree */
+                       retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
+                                       LK_EXCLUSIVE, ap->a_p);
+                       if (retval)
+                               goto Err_Exit;
+
+                       VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
+                       cp->c_blocks             -= fp->ff_unallocblocks;
+                       fp->ff_blocks            -= fp->ff_unallocblocks;
+                       fp->ff_unallocblocks      = 0;
+
+                       finalblks = (length + blksize - 1) / blksize;
+                       if (finalblks > fp->ff_blocks) {
+                               /* calculate required unmapped blocks */
+                               fp->ff_unallocblocks      = finalblks - fp->ff_blocks;
+                               VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
+                               cp->c_blocks             += fp->ff_unallocblocks;
+                               fp->ff_blocks            += fp->ff_unallocblocks;
+                       }
+                       (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
+                                       LK_RELEASE, ap->a_p);
+               }
+
+               /*
+                * For a TBE process the deallocation of the file blocks is
+                * delayed until the file is closed.  And hfs_close calls
+                * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
+                * isn't set, we make sure this isn't a TBE process.
+                */
+               if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
+#if QUOTA
+                 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+#endif /* QUOTA */
+                 // XXXdbg
+                 hfs_global_shared_lock_acquire(hfsmp);
+                       if (hfsmp->jnl) {
+                               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                                       retval = EINVAL;
+                                       goto Err_Exit;
+                               }
+                       }
+
+                       /* lock extents b-tree (also protects volume bitmap) */
+                       retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+                       if (retval) {
+                               if (hfsmp->jnl) {
+                                       journal_end_transaction(hfsmp->jnl);
+                               }
+                               hfs_global_shared_lock_release(hfsmp);
+                               goto Err_Exit;
+                       }
+                       
+                       if (fp->ff_unallocblocks == 0)
+                               retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
+                                               (FCB*)fp, length, false));
+
+                       (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+
+                       // XXXdbg
+                       if (hfsmp->jnl) {
+                               tv = time;
+                               VOP_UPDATE(vp, &tv, &tv, 1);
+
+                               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       hfs_global_shared_lock_release(hfsmp);
+
+                       filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+                       if (retval)
+                               goto Err_Exit;
+#if QUOTA
+                       /* These are bytesreleased */
+                       (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+#endif /* QUOTA */
+               }
+               /* Only set update flag if the logical length changes */
+               if (fp->ff_size != length)
+                       cp->c_flag |= C_UPDATE;
+               fp->ff_size = length;
+       }
+       cp->c_flag |= C_CHANGE;
+       retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
+       if (retval) {
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+                    -1, -1, -1, retval, 0);
+       }
+
+Err_Exit:
  
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+                (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
+
+       return (retval);
+}
  
  
  /*
@@ -1233,253 +1686,69 @@ vop_truncate {
      IN struct ucred *cred;
      IN struct proc *p;
  };
- * Truncate the hfsnode hp to at most length size, freeing (or adding) the
+ * Truncate a cnode to at most length size, freeing (or adding) the
   * disk blocks.
   */
  int hfs_truncate(ap)
-    struct vop_truncate_args /* {
-        struct vnode *a_vp;
-        off_t a_length;
-        int a_flags;
-        struct ucred *a_cred;
-        struct proc *a_p;
-    } */ *ap;
+       struct vop_truncate_args /* {
+               struct vnode *a_vp;
+               off_t a_length;
+               int a_flags;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
  {
-    register struct vnode *vp = ap->a_vp;
-    register struct hfsnode *hp = VTOH(vp);
-    off_t length = ap->a_length;
-    long vflags;
-    struct timeval tv;
-    int retval;
-    FCB *fcb;
-    off_t bytesToAdd;
-    off_t actualBytesAdded;
-    DBG_FUNC_NAME("hfs_truncate");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(ap->a_vp);
-#endif
-
-    fcb = HTOFCB(hp);
-
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
-                (int)length, fcb->fcbEOF, fcb->fcbPLen, 0, 0);
-
-    if (length < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
-
-    if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE)) {        
-        DBG_VOP_LOCKS_TEST(EFBIG);
-       return (EFBIG);
-    }
-
-    if (vp->v_type != VREG && vp->v_type != VLNK) {            
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);               /* hfs doesn't support truncating of directories */
-    }
-
-    tv = time;
-    retval = E_NONE;
-       
-    DBG_RW(("%s: truncate from Ox%lX to Ox%X bytes\n", funcname, fcb->fcbPLen, length));
-
-    /* 
-     * we cannot just check if fcb->fcbEOF == length (as an optimization)
-     * since there may be extra physical blocks that also need truncation
-     */
-
-    if (fcb->fcbEOF > fcb->fcbMaxEOF)
-        fcb->fcbMaxEOF = fcb->fcbEOF;
-
-    /*
-     * Lengthen the size of the file. We must ensure that the
-     * last byte of the file is allocated. Since the smallest
-     * value of fcbEOF is 0, length will be at least 1.
-     */
-    if (length > fcb->fcbEOF) {
-        off_t filePosition;
-       daddr_t logBlockNo;
-       long logBlockSize;
-       long blkOffset;
-       off_t bytestoclear;
-       int blockZeroCount;
-       struct buf *bp=NULL;
-
-       /*
-        * If we don't have enough physical space then
-        * we need to extend the physical size.
-        */
-       if (length > fcb->fcbPLen) {
-           /* lock extents b-tree (also protects volume bitmap) */
-           retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-
-           while ((length > fcb->fcbPLen) && (retval == E_NONE)) {
-               bytesToAdd = length - fcb->fcbPLen;
-               retval = MacToVFSError(
-                                       ExtendFileC (HTOVCB(hp),
-                                                    fcb,
-                                                    bytesToAdd,
-                                                    kEFAllMask,        /* allocate all requested bytes or none */
-                                                    &actualBytesAdded));
-
-               if (actualBytesAdded == 0 && retval == E_NONE) {
-                   if (length > fcb->fcbPLen)
-                       length = fcb->fcbPLen;
-                   break;
-               }
-           } 
-           (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-
-           DBG_ASSERT(length <= fcb->fcbPLen);
-           KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
-                        (int)length, fcb->fcbEOF, fcb->fcbPLen, 0, 0);
-       }
- 
-       if (! (ap->a_flags & IO_NOZEROFILL)) {
-
-           if (UBCISVALID(vp) && retval == E_NONE) {
-               u_long    devBlockSize;
-
-               if (length > fcb->fcbMaxEOF) {
-
-                   VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-               
-                   retval = cluster_write(vp, (struct uio *) 0, fcb->fcbEOF, length, fcb->fcbMaxEOF,
-                                          (off_t)0, devBlockSize, ((ap->a_flags & IO_SYNC) | IO_HEADZEROFILL));
-
-                   if (retval)
-                       goto Err_Exit;
+       register struct vnode *vp = ap->a_vp;
+       register struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       off_t length;
+       off_t filebytes;
+       u_long fileblocks;
+       int blksize, error;
+       u_int64_t nsize;
+
+       if (vp->v_type != VREG && vp->v_type != VLNK)
+               return (EISDIR);        /* cannot truncate an HFS directory! */
+
+       length = ap->a_length;
+       blksize = VTOVCB(vp)->blockSize;
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)blksize;
+
+       // have to loop truncating or growing files that are
+       // really big because otherwise transactions can get
+       // enormous and consume too many kernel resources.
+       if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) {
+           while (filebytes > length) {
+               if ((filebytes - length) > HFS_BIGFILE_SIZE) {
+                   filebytes -= HFS_BIGFILE_SIZE;
+               } else {
+                   filebytes = length;
                 }
-           } else {
-
-           /*
-            * zero out any new logical space...
-            */
-           bytestoclear = length - fcb->fcbEOF;
-           filePosition = fcb->fcbEOF;
-
-           while (bytestoclear > 0) {
-               logBlockNo   = (daddr_t)(filePosition / PAGE_SIZE_64);
-               blkOffset    = (long)(filePosition & PAGE_MASK_64);  
-
-               if (((off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * (off_t)PAGE_SIZE)) < PAGE_SIZE_64)
-                   logBlockSize = (off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * PAGE_SIZE_64);
-               else
-                   logBlockSize = PAGE_SIZE;
-               
-               if (logBlockSize < blkOffset)
-                   panic("hfs_truncate: bad logBlockSize computed\n");
-                       
-               blockZeroCount = MIN(bytestoclear, logBlockSize - blkOffset);
-
-               if (blkOffset == 0 && ((bytestoclear >= logBlockSize) || filePosition >= fcb->fcbEOF)) {
-                   bp = getblk(vp, logBlockNo, logBlockSize, 0, 0, BLK_WRITE);
-                   retval = 0;
  
+               ap->a_length = filebytes;
+               error = do_hfs_truncate(ap);
+               if (error)
+                   break;
+           }
+       } else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) {
+           while (filebytes < length) {
+               if ((length - filebytes) > HFS_BIGFILE_SIZE) {
+                   filebytes += HFS_BIGFILE_SIZE;
                 } else {
-                   retval = bread(vp, logBlockNo, logBlockSize, ap->a_cred, &bp);
-                   if (retval) {
-                       brelse(bp);
-                       goto Err_Exit;
-                   }
+                   filebytes = (length - filebytes);
                 }
-               bzero((char *)bp->b_data + blkOffset, blockZeroCount);
-                               
-               bp->b_flags |= B_DIRTY | B_AGE;
-
-               if (ap->a_flags & IO_SYNC)
-                   VOP_BWRITE(bp);
-               else if (logBlockNo % 32)
-                   bawrite(bp);
-               else
-                   VOP_BWRITE(bp);     /* wait after we issue 32 requests */
  
-               bytestoclear -= blockZeroCount;
-               filePosition += blockZeroCount;
+               ap->a_length = filebytes;
+               error = do_hfs_truncate(ap);
+               if (error)
+                   break;
             }
-           };
+       } else {
+           error = do_hfs_truncate(ap);
         }
-       fcb->fcbEOF = length;
-
-       if (fcb->fcbEOF > fcb->fcbMaxEOF)
-               fcb->fcbMaxEOF = fcb->fcbEOF;
  
-       if (UBCISVALID(vp))
-               ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
-
-    } else { /* Shorten the size of the file */
-
-        if (fcb->fcbEOF > length) {
-           /*
-            * Any buffers that are past the truncation point need to be
-            * invalidated (to maintain buffer cache consistency).  For
-            * simplicity, we invalidate all the buffers by calling vinvalbuf.
-            */
-           if (UBCISVALID(vp))
-               ubc_setsize(vp, (off_t)length); /* XXX check errors */
-
-           vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA; 
-           retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
-       }
-
-       /*
-        * For a TBE process the deallocation of the file blocks is
-        * delayed until the file is closed.  And hfs_close calls
-        * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
-        * isn't set, we make sure this isn't a TBE process.
-        */
-       if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
-
-           /* lock extents b-tree (also protects volume bitmap) */
-           retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-           retval = MacToVFSError(
-                               TruncateFileC(  
-                                             HTOVCB(hp),
-                                             fcb,
-                                             length,
-                                             false));
-           (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-
-           fcb->fcbMaxEOF = length;
-       }
-       fcb->fcbEOF = length;
-
-       if (fcb->fcbFlags & fcbModifiedMask)
-           hp->h_nodeflags |= IN_MODIFIED;
-    }
-    hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-    retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
-    if (retval) {
-        DBG_ERR(("Could not update truncate"));
-
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
-                    -1, -1, -1, retval, 0);
-    }
-Err_Exit:;
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(ap->a_vp);
-#endif
-
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
-                (int)length, fcb->fcbEOF, fcb->fcbPLen, retval, 0);
-
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
+       return error;
  }
  
  
@@ -1489,136 +1758,178 @@ Err_Exit:;
  #% allocate    vp      L L L
  #
  vop_allocate {
-    IN struct vnode *vp;
-    IN off_t length;
-    IN int flags;
-    IN struct ucred *cred;
-    IN struct proc *p;
+       IN struct vnode *vp;
+       IN off_t length;
+       IN int flags;
+       OUT off_t *bytesallocated;
+       IN off_t offset;
+       IN struct ucred *cred;
+       IN struct proc *p;
  };
- * allocate the hfsnode hp to at most length size
+ * allocate a cnode to at most length size
   */
  int hfs_allocate(ap)
-    struct vop_allocate_args /* {
-        struct vnode *a_vp;
-        off_t a_length;
-        u_int32_t  a_flags;
-       off_t *a_bytesallocated;
-        struct ucred *a_cred;
-        struct proc *a_p;
-    } */ *ap;
+       struct vop_allocate_args /* {
+               struct vnode *a_vp;
+               off_t a_length;
+               u_int32_t  a_flags;
+               off_t *a_bytesallocated;
+               off_t a_offset;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
  {
-    register struct vnode *vp = ap->a_vp;
-    register struct hfsnode *hp = VTOH(vp);
-    off_t      length = ap->a_length;
-    off_t      startingPEOF;
-    off_t      moreBytesRequested;
-    off_t      actualBytesAdded;
-    long vflags;
-    struct timeval tv;
-    int retval, retval2;
-    FCB *fcb;
-    UInt32 extendFlags =0;   /* For call to ExtendFileC */
-    DBG_FUNC_NAME("hfs_allocate");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    /* Set the number of bytes allocated to 0 so that the caller will know that we
-       did nothing.  ExtendFileC will fill this in for us if we actually allocate space */
-
-    *(ap->a_bytesallocated) = 0; 
-
-    /* Now for some error checking */
-
-    if (length < (off_t)0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
-
-    if (vp->v_type != VREG && vp->v_type != VLNK) {
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);        /* hfs doesn't support truncating of directories */
-    }
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       ExtendedVCB *vcb = VTOVCB(vp);
+       off_t length = ap->a_length;
+       off_t startingPEOF;
+       off_t moreBytesRequested;
+       off_t actualBytesAdded;
+       off_t filebytes;
+       u_long fileblocks;
+       long vflags;
+       struct timeval tv;
+       int retval, retval2;
+       UInt32 blockHint;
+       UInt32 extendFlags;   /* For call to ExtendFileC */
+       struct hfsmount *hfsmp;
+
+       hfsmp = VTOHFS(vp);
+
+       *(ap->a_bytesallocated) = 0;
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
+
+       if (length < (off_t)0)
+               return (EINVAL);
+       if (vp->v_type != VREG)
+               return (EISDIR);
+       if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes))
+               return (EINVAL);
  
-    /* Fill in the flags word for the call to Extend the file */
+       /* Fill in the flags word for the call to Extend the file */
  
-       if (ap->a_flags & ALLOCATECONTIG) {
+       extendFlags = kEFNoClumpMask;
+       if (ap->a_flags & ALLOCATECONTIG) 
                 extendFlags |= kEFContigMask;
-       }
-
-    if (ap->a_flags & ALLOCATEALL) {
+       if (ap->a_flags & ALLOCATEALL)
                 extendFlags |= kEFAllMask;
-       }
+       if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
+               extendFlags |= kEFReserveMask;
  
-    fcb = HTOFCB(hp);
-    tv = time;
-    retval = E_NONE;
-    startingPEOF = fcb->fcbPLen;
+       tv = time;
+       retval = E_NONE;
+       blockHint = 0;
+       startingPEOF = filebytes;
  
-    if (ap->a_flags & ALLOCATEFROMPEOF) {
-               length += fcb->fcbPLen;
-       }
-
-    DBG_RW(("%s: allocate from Ox%lX to Ox%X bytes\n", funcname, fcb->fcbPLen, (u_int)length));
+       if (ap->a_flags & ALLOCATEFROMPEOF)
+               length += filebytes;
+       else if (ap->a_flags & ALLOCATEFROMVOL)
+               blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
  
-    /* If no changes are necesary, then we're done */
-    if (fcb->fcbPLen == length)
-       goto Std_Exit;
+       /* If no changes are necesary, then we're done */
+       if (filebytes == length)
+               goto Std_Exit;
  
-    /*
-    * Lengthen the size of the file. We must ensure that the
-    * last byte of the file is allocated. Since the smallest
-    * value of fcbPLen is 0, length will be at least 1.
-    */
-    if (length > fcb->fcbPLen) {
-               moreBytesRequested = length - fcb->fcbPLen;
+       /*
+        * Lengthen the size of the file. We must ensure that the
+        * last byte of the file is allocated. Since the smallest
+        * value of filebytes is 0, length will be at least 1.
+        */
+       if (length > filebytes) {
+               moreBytesRequested = length - filebytes;
                 
+#if QUOTA
+               retval = hfs_chkdq(cp,
+                               (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), 
+                               ap->a_cred, 0);
+               if (retval)
+                       return (retval);
+
+#endif /* QUOTA */
+               /*
+                * Metadata zone checks.
+                */
+               if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+                       /*
+                        * Allocate Journal and Quota files in metadata zone.
+                        */
+                       if (hfs_virtualmetafile(cp)) {
+                               extendFlags |= kEFMetadataMask;
+                               blockHint = hfsmp->hfs_metazone_start;
+                       } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
+                                  (blockHint <= hfsmp->hfs_metazone_end)) {
+                               /*
+                                * Move blockHint outside metadata zone.
+                                */
+                               blockHint = hfsmp->hfs_metazone_end + 1;
+                       }
+               }
+
+               // XXXdbg
+               hfs_global_shared_lock_acquire(hfsmp);
+               if (hfsmp->jnl) {
+                       if (journal_start_transaction(hfsmp->jnl) != 0) {
+                               retval = EINVAL;
+                               goto Err_Exit;
+                       }
+               }
+
                 /* lock extents b-tree (also protects volume bitmap) */
-               retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-               if (retval) goto Err_Exit;
+               retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+               if (retval) {
+                       if (hfsmp->jnl) {
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       hfs_global_shared_lock_release(hfsmp);
+                       goto Err_Exit;
+               }
  
-               retval = MacToVFSError(
-                                                               ExtendFileC(HTOVCB(hp),
-                                                                                       fcb,
-                                                                                       moreBytesRequested,
-                                                                                       extendFlags,
-                                                                                       &actualBytesAdded));
+               retval = MacToVFSError(ExtendFileC(vcb,
+                                               (FCB*)fp,
+                                               moreBytesRequested,
+                                               blockHint,
+                                               extendFlags,
+                                               &actualBytesAdded));
  
                 *(ap->a_bytesallocated) = actualBytesAdded;
+               filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
  
-               (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+               (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
  
-               DBG_ASSERT(length <= fcb->fcbPLen);
+               // XXXdbg
+               if (hfsmp->jnl) {
+                       tv = time;
+                       VOP_UPDATE(vp, &tv, &tv, 1);
+
+                       hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                       journal_end_transaction(hfsmp->jnl);
+               }
+               hfs_global_shared_lock_release(hfsmp);
  
                 /*
                  * if we get an error and no changes were made then exit
                  * otherwise we must do the VOP_UPDATE to reflect the changes
                  */
-        if (retval && (startingPEOF == fcb->fcbPLen)) goto Err_Exit;
+               if (retval && (startingPEOF == filebytes))
+                       goto Err_Exit;
          
-        /*
-         * Adjust actualBytesAdded to be allocation block aligned, not
-         * clump size aligned.
-         * NOTE: So what we are reporting does not affect reality
-         * until the file is closed, when we truncate the file to allocation
-         * block size.
-         */
-
-               if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded)) {
-                       u_long                                  blks, blocksize;
-                       
-                       blocksize = VTOVCB(vp)->blockSize;
-                       blks = moreBytesRequested / blocksize;
-                       if ((blks * blocksize) != moreBytesRequested)
-                               blks++;
-                       
-                       *(ap->a_bytesallocated) = blks * blocksize;
-               }
+               /*
+                * Adjust actualBytesAdded to be allocation block aligned, not
+                * clump size aligned.
+                * NOTE: So what we are reporting does not affect reality
+                * until the file is closed, when we truncate the file to allocation
+                * block size.
+                */
+               if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
+                       *(ap->a_bytesallocated) =
+                               roundup(moreBytesRequested, (off_t)vcb->blockSize);
  
-    } else { /* Shorten the size of the file */
+       } else { /* Shorten the size of the file */
  
-       if (fcb->fcbEOF > length) {
+               if (fp->ff_size > length) {
                         /*
                          * Any buffers that are past the truncation point need to be
                          * invalidated (to maintain buffer cache consistency).  For
@@ -1628,52 +1939,77 @@ int hfs_allocate(ap)
                         (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
                 }
  
-       /* lock extents b-tree (also protects volume bitmap) */
-        retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-        if (retval) goto Err_Exit;
+               // XXXdbg
+               hfs_global_shared_lock_acquire(hfsmp);
+               if (hfsmp->jnl) {
+                       if (journal_start_transaction(hfsmp->jnl) != 0) {
+                               retval = EINVAL;
+                               goto Err_Exit;
+                       }
+               }
  
-        retval = MacToVFSError(
+               /* lock extents b-tree (also protects volume bitmap) */
+               retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+               if (retval) {
+                       if (hfsmp->jnl) {
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       hfs_global_shared_lock_release(hfsmp);
+
+                       goto Err_Exit;
+               }                       
+
+               retval = MacToVFSError(
                              TruncateFileC(
-                                            HTOVCB(hp),
-                                            fcb,
+                                            vcb,
+                                            (FCB*)fp,
                                              length,
                                              false));
-        (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+               (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+               filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+               if (hfsmp->jnl) {
+                       tv = time;
+                       VOP_UPDATE(vp, &tv, &tv, 1);
+
+                       hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                       journal_end_transaction(hfsmp->jnl);
+               }
+               hfs_global_shared_lock_release(hfsmp);
+               
  
                 /*
                  * if we get an error and no changes were made then exit
                  * otherwise we must do the VOP_UPDATE to reflect the changes
                  */
-               if (retval && (startingPEOF == fcb->fcbPLen)) goto Err_Exit;
-        if (fcb->fcbFlags & fcbModifiedMask)
-           hp->h_nodeflags |= IN_MODIFIED;
+               if (retval && (startingPEOF == filebytes)) goto Err_Exit;
+#if QUOTA
+               /* These are  bytesreleased */
+               (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
+#endif /* QUOTA */
  
-        DBG_ASSERT(length <= fcb->fcbPLen)  // DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG
-
-        if (fcb->fcbEOF > fcb->fcbPLen) {
-                       fcb->fcbEOF = fcb->fcbPLen;
-                       fcb->fcbMaxEOF = fcb->fcbPLen;
+               if (fp->ff_size > filebytes) {
+                       fp->ff_size = filebytes;
  
                         if (UBCISVALID(vp))
-                               ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
-        }
-    }
+                               ubc_setsize(vp, fp->ff_size); /* XXX check errors */
+               }
+       }
  
  Std_Exit:
-    hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
+       cp->c_flag |= C_CHANGE | C_UPDATE;
         retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
  
-    if (retval == 0) retval = retval2;
-
+       if (retval == 0)
+               retval = retval2;
  Err_Exit:
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
+       return (retval);
  }
  
  
-
-
-/* pagein for HFS filesystem, similar to hfs_read(), but without cluster_read() */
+/*
+ * pagein for HFS filesystem
+ */
  int
  hfs_pagein(ap)
         struct vop_pagein_args /* {
@@ -1686,44 +2022,38 @@ hfs_pagein(ap)
                 int           a_flags
         } */ *ap;
  {
-    register struct vnode *vp;
-    struct hfsnode       *hp;
-    FCB                          *fcb;
-    long                  devBlockSize = 0;
-    int                   retval;
-
-    DBG_FUNC_NAME("hfs_pagein");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    vp  = ap->a_vp;
-    hp  = VTOH(vp);
-    fcb = HTOFCB(hp);
-
-    if (vp->v_type != VREG && vp->v_type != VLNK)
-       panic("hfs_pagein: vp not UBC type\n");
-
-    DBG_VOP(("\tfile size Ox%X\n", (u_int)fcb->fcbEOF));
-    DBG_VOP(("\tstarting at offset Ox%X of file, length Ox%X\n", (u_int)ap->a_f_offset, (u_int)ap->a_size));
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
+       register struct vnode *vp = ap->a_vp;
+       int devBlockSize = 0;
+       int error;
  
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+       if (vp->v_type != VREG)
+               panic("hfs_pagein: vp not UBC type\n");
  
-    retval = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
-                        ap->a_size, (off_t)fcb->fcbEOF, devBlockSize,
-                        ap->a_flags);
+       VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
  
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
-    DBG_VOP_LOCKS_TEST(retval);
+       error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+                               ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
+                               ap->a_flags);
+       /*
+        * Keep track blocks read
+        */
+       if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+               struct cnode *cp;
+               
+               cp = VTOC(vp);          
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
+                       VTOF(vp)->ff_bytesread = ap->a_size;
+               else
+                       VTOF(vp)->ff_bytesread += ap->a_size;
  
-    return (retval);
+               cp->c_flag |= C_ACCESS;
+       }
+
+       return (error);
  }
  
  /* 
@@ -1741,47 +2071,45 @@ hfs_pageout(ap)
            int           a_flags
         } */ *ap;
  {
-       struct vnode    *vp = ap->a_vp;
-       struct hfsnode  *hp =  VTOH(vp);
-       FCB             *fcb = HTOFCB(hp);
-       int              retval;
-       long             devBlockSize = 0;
-
-       DBG_FUNC_NAME("hfs_pageout");
-       DBG_VOP_LOCKS_DECL(1);
-       DBG_VOP_PRINT_FUNCNAME();
-       DBG_VOP_PRINT_VNODE_INFO(vp);DBG_VOP_CONT(("\n"));
-       DBG_VOP(("\thfsnode 0x%x (%s)\n", (u_int)hp, H_NAME(hp)));
-       DBG_VOP(("\tstarting at offset Ox%lX of file, length Ox%lX\n", 
-               (UInt32)ap->a_f_offset, (UInt32)ap->a_size));
-
-       DBG_VOP_LOCKS_INIT(0, vp, VOPDBG_LOCKED, 
-               VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-#if HFS_DIAGNOSTIC
-       debug_check_blocksizes(vp);
-#endif
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       int retval;
+       int devBlockSize = 0;
+       off_t end_of_range;
+       off_t filesize;
  
         if (UBCINVALID(vp))
                 panic("hfs_pageout: Not a  VREG: vp=%x", vp);
  
-       VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+       VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
+       filesize = fp->ff_size;
+       end_of_range = ap->a_f_offset + ap->a_size - 1;
+
+       if (cp->c_flag & C_RELOCATING) {
+               if (end_of_range < (filesize / 2)) {
+                       return (EBUSY);
+               }
+       }
+
+       if (end_of_range >= filesize)
+               end_of_range = (off_t)(filesize - 1);
+       if (ap->a_f_offset < filesize) {
+               rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
+               cp->c_flag |= C_MODIFIED;  /* leof is dirty */
+       }
  
         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
-                                (off_t)fcb->fcbEOF, devBlockSize, ap->a_flags);
+                                filesize, devBlockSize, ap->a_flags);
+
         /*
          * If we successfully wrote any data, and we are not the superuser
          * we clear the setuid and setgid bits as a precaution against
          * tampering.
          */
         if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
-               hp->h_meta->h_mode &= ~(ISUID | ISGID);
-
-#if HFS_DIAGNOSTIC
-       debug_check_blocksizes(vp);
-#endif
+               cp->c_mode &= ~(S_ISUID | S_ISGID);
  
-       DBG_VOP_LOCKS_TEST(retval);
         return (retval);
  }
  
@@ -1793,39 +2121,502 @@ hfs_pageout(ap)
   */
  int
  hfs_bwrite(ap)
-struct vop_bwrite_args /* {
-    struct buf *a_bp;
-} */ *ap;
+       struct vop_bwrite_args /* {
+               struct buf *a_bp;
+       } */ *ap;
  {
-    register struct buf *bp = ap->a_bp;
-    register struct vnode *vp = bp->b_vp;
-    BlockDescriptor block;
-    int retval = 0;
-
-       DBG_FUNC_NAME("hfs_bwrite");
-
+       int retval = 0;
+       register struct buf *bp = ap->a_bp;
+       register struct vnode *vp = bp->b_vp;
  #if BYTE_ORDER == LITTLE_ENDIAN
-    /* Trap B-Tree writes */
-    if ((H_FILEID(VTOH(vp)) == kHFSExtentsFileID) ||
-        (H_FILEID(VTOH(vp)) == kHFSCatalogFileID)) {
-
-        /* Swap if the B-Tree node is in native byte order */
-        if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
-            /* Prepare the block pointer */
-            block.blockHeader = bp;
-            block.buffer = bp->b_data + IOBYTEOFFSETFORBLK(bp->b_blkno, VTOHFS(vp)->hfs_phys_block_size);
-            block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;    /* not found in cache ==> came from disk */
-            block.blockSize = bp->b_bcount;
+       BlockDescriptor block;
+
+       /* Trap B-Tree writes */
+       if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
+           (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
+
+               /* Swap if the B-Tree node is in native byte order */
+               if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
+                       /* Prepare the block pointer */
+                       block.blockHeader = bp;
+                       block.buffer = bp->b_data;
+                       /* not found in cache ==> came from disk */
+                       block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
+                       block.blockSize = bp->b_bcount;
      
-            /* Endian un-swap B-Tree node */
-            SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), H_FILEID(VTOH(vp)), 1);
-        }
+                       /* Endian un-swap B-Tree node */
+                       SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
+               }
  
-        /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
-    }
+               /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
+       }
  #endif
+       /* This buffer shouldn't be locked anymore but if it is clear it */
+       if (ISSET(bp->b_flags, B_LOCKED)) {
+           // XXXdbg
+           if (VTOHFS(vp)->jnl) {
+                       panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
+           }
+               CLR(bp->b_flags, B_LOCKED);
+               printf("hfs_bwrite: called with lock bit set\n");
+       }
+       retval = vn_bwrite (ap);
  
-    retval = vn_bwrite (ap);
+       return (retval);
+}
  
-    return (retval);
+/*
+ * Relocate a file to a new location on disk
+ *  cnode must be locked on entry
+ *
+ * Relocation occurs by cloning the file's data from its
+ * current set of blocks to a new set of blocks. During
+ * the relocation all of the blocks (old and new) are
+ * owned by the file.
+ *
+ * -----------------
+ * |///////////////|
+ * -----------------
+ * 0               N (file offset)
+ *
+ * -----------------     -----------------
+ * |///////////////|     |               |     STEP 1 (aquire new blocks)
+ * -----------------     -----------------
+ * 0               N     N+1             2N
+ *
+ * -----------------     -----------------
+ * |///////////////|     |///////////////|     STEP 2 (clone data)
+ * -----------------     -----------------
+ * 0               N     N+1             2N
+ *
+ *                       -----------------
+ *                       |///////////////|     STEP 3 (head truncate blocks)
+ *                       -----------------
+ *                       0               N
+ *
+ * During steps 2 and 3 page-outs to file offsets less
+ * than or equal to N are suspended.
+ *
+ * During step 3 page-ins to the file get supended.
+ */
+__private_extern__
+int
+hfs_relocate(vp, blockHint, cred, p)
+       struct  vnode *vp;
+       u_int32_t  blockHint;
+       struct  ucred *cred;
+       struct  proc *p;
+{
+       struct  filefork *fp;
+       struct  hfsmount *hfsmp;
+       ExtendedVCB *vcb;
+
+       u_int32_t  headblks;
+       u_int32_t  datablks;
+       u_int32_t  blksize;
+       u_int32_t  realsize;
+       u_int32_t  growsize;
+       u_int32_t  nextallocsave;
+       u_int32_t  sector_a;
+       u_int32_t  sector_b;
+       int eflags;
+       u_int32_t  oldstart;  /* debug only */
+       off_t  newbytes;
+       int  retval;
+
+       if (vp->v_type != VREG && vp->v_type != VLNK) {
+               return (EPERM);
+       }
+       
+       hfsmp = VTOHFS(vp);
+       if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
+               return (ENOSPC);
+       }
+
+       fp = VTOF(vp);
+       if (fp->ff_unallocblocks)
+               return (EINVAL);
+       vcb = VTOVCB(vp);
+       blksize = vcb->blockSize;
+       if (blockHint == 0)
+               blockHint = vcb->nextAllocation;
+
+       if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
+           (vp->v_type == VLNK && fp->ff_size > blksize)) {
+               return (EFBIG);
+       }
+
+       headblks = fp->ff_blocks;
+       datablks = howmany(fp->ff_size, blksize);
+       growsize = datablks * blksize;
+       realsize = fp->ff_size;
+       eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
+       if (blockHint >= hfsmp->hfs_metazone_start &&
+           blockHint <= hfsmp->hfs_metazone_end)
+               eflags |= kEFMetadataMask;
+
+       hfs_global_shared_lock_acquire(hfsmp);
+       if (hfsmp->jnl) {
+               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                       return (EINVAL);
+               }
+       }
+
+       /* Lock extents b-tree (also protects volume bitmap) */
+       retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
+       if (retval)
+               goto out2;
+
+       retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
+       if (retval) {
+               retval = MacToVFSError(retval);
+               goto out;
+       }
+
+       /*
+        * STEP 1 - aquire new allocation blocks.
+        */
+       nextallocsave = vcb->nextAllocation;
+       retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
+       if (eflags & kEFMetadataMask)                   
+               vcb->nextAllocation = nextallocsave;
+
+       retval = MacToVFSError(retval);
+       if (retval == 0) {
+               VTOC(vp)->c_flag |= C_MODIFIED;
+               if (newbytes < growsize) {
+                       retval = ENOSPC;
+                       goto restore;
+               } else if (fp->ff_blocks < (headblks + datablks)) {
+                       printf("hfs_relocate: allocation failed");
+                       retval = ENOSPC;
+                       goto restore;
+               }
+
+               retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, &sector_b, NULL);
+               if (retval) {
+                       retval = MacToVFSError(retval);
+               } else if ((sector_a + 1) == sector_b) {
+                       retval = ENOSPC;
+                       goto restore;
+               } else if ((eflags & kEFMetadataMask) &&
+                          ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
+                             hfsmp->hfs_metazone_end)) {
+                       printf("hfs_relocate: didn't move into metadata zone\n");
+                       retval = ENOSPC;
+                       goto restore;
+               }
+       }
+       if (retval) {
+               /*
+                * Check to see if failure is due to excessive fragmentation.
+                */
+               if (retval == ENOSPC &&
+                   hfs_freeblks(hfsmp, 0) > (datablks * 2)) {
+                       hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
+               }
+               goto out;
+       }
+
+       fp->ff_size = fp->ff_blocks * blksize;
+       if (UBCISVALID(vp))
+               (void) ubc_setsize(vp, fp->ff_size);
+
+       /*
+        * STEP 2 - clone data into the new allocation blocks.
+        */
+
+       if (vp->v_type == VLNK)
+               retval = hfs_clonelink(vp, blksize, cred, p);
+       else if (vp->v_flag & VSYSTEM)
+               retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
+       else
+               retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p);
+
+       if (retval)
+               goto restore;
+       
+       oldstart = fp->ff_extents[0].startBlock;
+
+       /*
+        * STEP 3 - switch to clone and remove old blocks.
+        */
+       SET(VTOC(vp)->c_flag, C_NOBLKMAP);   /* suspend page-ins */
+
+       retval = HeadTruncateFile(vcb, (FCB*)fp, headblks);
+
+       CLR(VTOC(vp)->c_flag, C_NOBLKMAP);   /* resume page-ins */
+       if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP))
+               wakeup(VTOC(vp));
+       if (retval)
+               goto restore;
+
+       fp->ff_size = realsize;
+       if (UBCISVALID(vp)) {
+               (void) ubc_setsize(vp, realsize);
+               (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+       }
+
+       CLR(VTOC(vp)->c_flag, C_RELOCATING);  /* Resume page-outs for this file. */
+out:
+       (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
+
+       retval = VOP_FSYNC(vp, cred, MNT_WAIT, p);
+out2:
+       if (hfsmp->jnl) {
+               if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID)
+                       (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+               else
+                       (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+               journal_end_transaction(hfsmp->jnl);
+       }
+       hfs_global_shared_lock_release(hfsmp);
+
+       return (retval);
+
+restore:
+       /*
+        * Give back any newly allocated space.
+        */
+       if (fp->ff_size != realsize)
+               fp->ff_size = realsize;
+       (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false);
+       if (UBCISVALID(vp))
+               (void) ubc_setsize(vp, fp->ff_size);
+       CLR(VTOC(vp)->c_flag, C_RELOCATING);
+       goto out;
+}
+
+
+/*
+ * Clone a symlink.
+ *
+ */
+static int
+hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p)
+{
+       struct buf *head_bp = NULL;
+       struct buf *tail_bp = NULL;
+       int error;
+
+
+       error = meta_bread(vp, 0, blksize, cred, &head_bp);
+       if (error)
+               goto out;
+
+       tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META);
+       if (tail_bp == NULL) {
+               error = EIO;
+               goto out;
+       }
+       bcopy(head_bp->b_data, tail_bp->b_data, blksize);
+       error = bwrite(tail_bp);
+out:
+       if (head_bp) {
+               head_bp->b_flags |= B_INVAL;
+               brelse(head_bp);
+       }       
+       (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+
+       return (error);
  }
+
+/*
+ * Clone a file's data within the file.
+ *
+ */
+static int
+hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+              struct ucred *cred, struct proc *p)
+{
+       caddr_t  bufp;
+       size_t  writebase;
+       size_t  bufsize;
+       size_t  copysize;
+        size_t  iosize;
+       size_t  filesize;
+       size_t  offset;
+       struct uio auio;
+       struct iovec aiov;
+       int  devblocksize;
+       int  didhold;
+       int  error;
+
+
+       if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) {
+               printf("hfs_clonefile: vinvalbuf failed - %d\n", error);
+               return (error);
+       }
+
+       if (!ubc_clean(vp, 1)) {
+               printf("hfs_clonefile: not ubc_clean\n");
+               return (EIO);  /* XXX error code */
+       }
+
+       /*
+        * Suspend page-outs for this file.
+        */
+       SET(VTOC(vp)->c_flag, C_RELOCATING);
+
+       filesize = VTOF(vp)->ff_size;
+       writebase = blkstart * blksize;
+       copysize = blkcnt * blksize;
+       iosize = bufsize = MIN(copysize, 4096 * 16);
+       offset = 0;
+
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+               return (ENOMEM);
+       }       
+
+       VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize);
+
+       auio.uio_iov = &aiov;
+       auio.uio_iovcnt = 1;
+       auio.uio_segflg = UIO_SYSSPACE;
+       auio.uio_procp = p;
+
+       while (offset < copysize) {
+               iosize = MIN(copysize - offset, iosize);
+
+               aiov.iov_base = bufp;
+               aiov.iov_len = iosize;
+               auio.uio_resid = iosize;
+               auio.uio_offset = offset;
+               auio.uio_rw = UIO_READ;
+
+               error = cluster_read(vp, &auio, copysize, devblocksize, 0);
+               if (error) {
+                       printf("hfs_clonefile: cluster_read failed - %d\n", error);
+                       break;
+               }
+               if (auio.uio_resid != 0) {
+                       printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid);
+                       error = EIO;            
+                       break;
+               }
+
+
+               aiov.iov_base = bufp;
+               aiov.iov_len = iosize;
+               auio.uio_resid = iosize;
+               auio.uio_offset = writebase + offset;
+               auio.uio_rw = UIO_WRITE;
+
+               error = cluster_write(vp, &auio, filesize + offset,
+                                     filesize + offset + iosize,
+                                     auio.uio_offset, 0, devblocksize, 0);
+               if (error) {
+                       printf("hfs_clonefile: cluster_write failed - %d\n", error);
+                       break;
+               }
+               if (auio.uio_resid != 0) {
+                       printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
+                       error = EIO;            
+                       break;
+               }       
+               offset += iosize;
+       }
+       if (error == 0) {
+               /* Clean the pages in VM. */
+               didhold = ubc_hold(vp);
+               if (didhold)
+                       (void) ubc_clean(vp, 1);
+       
+               /*
+                * Clean out all associated buffers.
+                */
+               (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+       
+               if (didhold)
+                       ubc_rele(vp);
+       }
+       kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
+       
+       return (error);
+}
+
+/*
+ * Clone a system (metadata) file.
+ *
+ */
+static int
+hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+                 struct ucred *cred, struct proc *p)
+{
+       caddr_t  bufp;
+       char * offset;
+       size_t  bufsize;
+       size_t  iosize;
+       struct buf *bp = NULL;
+       daddr_t  blkno;
+       daddr_t  blk;
+       int  breadcnt;
+        int  i;
+       int  error = 0;
+
+
+       iosize = GetLogicalBlockSize(vp);
+       bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
+       breadcnt = bufsize / iosize;
+
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+               return (ENOMEM);
+       }       
+       blkstart = (blkstart * blksize) / iosize;
+       blkcnt = (blkcnt * blksize) / iosize;
+       blkno = 0;
+
+       while (blkno < blkcnt) {
+               /*
+                * Read up to a megabyte
+                */
+               offset = bufp;
+               for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) {
+                       error = meta_bread(vp, blk, iosize, cred, &bp);
+                       if (error) {
+                               printf("hfs_clonesysfile: meta_bread error %d\n", error);
+                               goto out;
+                       }
+                       if (bp->b_bcount != iosize) {
+                               printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount);
+                               goto out;
+                       }
+       
+                       bcopy(bp->b_data, offset, iosize);
+                       bp->b_flags |= B_INVAL;
+                       brelse(bp);
+                       bp = NULL;
+                       offset += iosize;
+               }
+       
+               /*
+                * Write up to a megabyte
+                */
+               offset = bufp;
+               for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) {
+                       bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META);
+                       if (bp == NULL) {
+                               printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno);
+                               error = EIO;
+                               goto out;
+                       }
+                       bcopy(offset, bp->b_data, iosize);
+                       error = bwrite(bp);
+                       bp = NULL;
+                       if (error)
+                               goto out;
+                       offset += iosize;
+               }
+       }
+out:
+       if (bp) {
+               brelse(bp);
+       }
+
+       kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
+
+       error = VOP_FSYNC(vp, cred, MNT_WAIT, p);
+
+       return (error);
+}
+