]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/hfs/hfs_readwrite.c
xnu-517.3.7.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
index 825ec1ca55976a6144e1589fb3feea05ea17adc8..10b3a271ee3ff7255a6cabd6a7a5d0d7c3141115 100644 (file)
@@ -1,56 +1,33 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
 /*     @(#)hfs_readwrite.c     1.0
  *
- *     (c) 1990, 1992 NeXT Computer, Inc.  All Rights Reserved
- *     (c) 1998       Apple Computer, Inc.  All Rights Reserved
+ *     (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  *     
- *
  *     hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  *
- *     MODIFICATION HISTORY:
- *      9-Nov-1999     Scott Roberts   hfs_allocate now returns sizes based on allocation block boundaries (#2398794)
- *      3-Feb-1999     Pat Dirks               Merged in Joe's change to hfs_truncate to skip vinvalbuf if LEOF isn't changing (#2302796)
- *                                                             Removed superfluous (and potentially dangerous) second call to vinvalbuf() in hfs_truncate.
- *      2-Dec-1998     Pat Dirks               Added support for read/write bootstrap ioctls.
- *     10-Nov-1998     Pat Dirks               Changed read/write/truncate logic to optimize block sizes for first extents of a file.
- *                              Changed hfs_strategy to correct I/O sizes from cluser code I/O requests in light of
- *                              different block sizing.  Changed bexpand to handle RELEASE_BUFFER flag.
- *     22-Sep-1998     Don Brady               Changed truncate zero-fill to use bwrite after several bawrites have been queued.
- *     11-Sep-1998     Pat Dirks               Fixed buffering logic to not rely on B_CACHE, which is set for empty buffers that
- *                                                             have been pre-read by cluster_read (use b_validend > 0 instead).
- *  27-Aug-1998        Pat Dirks               Changed hfs_truncate to use cluster_write in place of bawrite where possible.
- *     25-Aug-1998     Pat Dirks               Changed hfs_write to do small device-block aligned writes into buffers without doing
- *                                                             read-ahead of the buffer.  Added bexpand to deal with incomplete [dirty] buffers.
- *                                                             Fixed can_cluster macro to use MAXPHYSIO instead of MAXBSIZE.
- *     19-Aug-1998     Don Brady               Remove optimization in hfs_truncate that prevented extra physical blocks from
- *                                                             being truncated (radar #2265750). Also set fcb->fcbEOF before calling vinvalbuf.
- *      7-Jul-1998     Pat Dirks               Added code to honor IO_NOZEROFILL in hfs_truncate.
- *     16-Jul-1998     Don Brady               In hfs_bmap use MAXPHYSIO instead of MAXBSIZE when calling MapFileBlockC (radar #2263753).
- *     16-Jul-1998     Don Brady               Fix error handling in hfs_allocate (radar #2252265).
- *     04-Jul-1998     chw                             Synchronized options in hfs_allocate with flags in call to ExtendFileC
- *     25-Jun-1998     Don Brady               Add missing blockNo incrementing to zero fill loop in hfs_truncate.
- *     22-Jun-1998     Don Brady               Add bp = NULL assignment after brelse in hfs_read.
- *      4-Jun-1998     Pat Dirks               Split off from hfs_vnodeops.c
  */
 
 #include <sys/param.h>
 #include <sys/resourcevar.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
+#include <sys/filedesc.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
-//#include <mach/machine/vm_types.h>
 #include <sys/vnode.h>
 #include <sys/uio.h>
 
 #include <sys/kdebug.h>
 
 #include       "hfs.h"
-#include       "hfs_dbg.h"
 #include       "hfs_endian.h"
+#include       "hfs_quota.h"
 #include       "hfscommon/headers/FileMgrInternal.h"
 #include       "hfscommon/headers/BTreesInternal.h"
+#include       "hfs_cnode.h"
+#include       "hfs_dbg.h"
 
+extern int overflow_extents(struct filefork *fp);
 
 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
 
@@ -87,13 +67,10 @@ enum {
 
 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
 
-#if DBG_VOP_TEST_LOCKS
-extern void DbgVopTest(int maxSlots, int retval, VopDbgStoreRec *VopDbgStore, char *funcname);
-#endif
+static int  hfs_clonelink(struct vnode *, int, struct ucred *, struct proc *);
+static int  hfs_clonefile(struct vnode *, int, int, int,  struct ucred *, struct proc *);
+static int  hfs_clonesysfile(struct vnode *, int, int, int, struct ucred *, struct proc *);
 
-#if HFS_DIAGNOSTIC
-void debug_check_blocksizes(struct vnode *vp);
-#endif
 
 /*****************************************************************************
 *
@@ -114,202 +91,72 @@ void debug_check_blocksizes(struct vnode *vp);
 
 int
 hfs_read(ap)
-struct vop_read_args /* {
-    struct vnode *a_vp;
-    struct uio *a_uio;
-    int a_ioflag;
-    struct ucred *a_cred;
-} */ *ap;
+       struct vop_read_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
 {
-    register struct vnode      *vp;
-    struct hfsnode                     *hp;
-    register struct uio        *uio;
-    struct buf                                 *bp;
-    daddr_t                            logBlockNo;
-    u_long                                     fragSize, moveSize, startOffset, ioxfersize;
-    int                                                devBlockSize = 0;
-    off_t                                      bytesRemaining;
-    int                                        retval;
-    u_short                            mode;
-    FCB                                                *fcb;
-
-    DBG_FUNC_NAME("hfs_read");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    vp = ap->a_vp;
-    hp = VTOH(vp);
-    fcb = HTOFCB(hp);
-    mode = hp->h_meta->h_mode;
-    uio = ap->a_uio;
-
-#if HFS_DIAGNOSTIC
-    if (uio->uio_rw != UIO_READ)
-        panic("%s: mode", funcname);
-#endif
-
-    /* Can only read files */
-    if (ap->a_vp->v_type != VREG && ap->a_vp->v_type != VLNK) {
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);
-    }
-    DBG_RW(("\tfile size Ox%X\n", (u_int)fcb->fcbEOF));
-    DBG_RW(("\tstarting at offset Ox%X of file, length Ox%X\n", (u_int)uio->uio_offset, (u_int)uio->uio_resid));
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
-
-    /*
-     * If they didn't ask for any data, then we are done.
-     */
-    if (uio->uio_resid == 0) {
-        DBG_VOP_LOCKS_TEST(E_NONE);
-        return (E_NONE);
-    }
-
-    /* cant read from a negative offset */
-    if (uio->uio_offset < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
-
-    if (uio->uio_offset > fcb->fcbEOF) {
-        if ( (!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
-            retval = EFBIG;
-        else
-            retval = E_NONE;
-
-        DBG_VOP_LOCKS_TEST(retval);
-        return (retval);
-    }
-
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
-                 (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-
-    if (UBCISVALID(vp))
-        retval = cluster_read(vp, uio, (off_t)fcb->fcbEOF, devBlockSize, 0);
-    else {
-
-        for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
-
-            if ((bytesRemaining = (fcb->fcbEOF - uio->uio_offset)) <= 0)
-                break;
-
-            logBlockNo  = (daddr_t)(uio->uio_offset / PAGE_SIZE_64);
-            startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64);
-            fragSize    = PAGE_SIZE;
-
-            if (((logBlockNo * PAGE_SIZE) + fragSize) < fcb->fcbEOF)
-                ioxfersize = fragSize;
-            else {
-                ioxfersize = fcb->fcbEOF - (logBlockNo * PAGE_SIZE);
-                ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1);
-            }
-            DBG_RW(("\tat logBlockNo Ox%X, with Ox%lX left to read\n", logBlockNo, (UInt32)uio->uio_resid));
-            moveSize = ioxfersize;
-            DBG_RW(("\tmoveSize = Ox%lX; ioxfersize = Ox%lX; startOffset = Ox%lX.\n",
-                    moveSize, ioxfersize, startOffset));
-            DBG_ASSERT(moveSize >= startOffset);
-            moveSize -= startOffset;
-
-            if (bytesRemaining < moveSize)
-                moveSize = bytesRemaining;
-
-            if (uio->uio_resid < moveSize) {
-                moveSize = uio->uio_resid;
-                DBG_RW(("\treducing moveSize to Ox%lX (uio->uio_resid).\n", moveSize));
-            };
-            if (moveSize == 0) {
-                break;
-            };
-
-            DBG_RW(("\tat logBlockNo Ox%X, extent of Ox%lX, xfer of Ox%lX; moveSize = Ox%lX\n", logBlockNo, fragSize, ioxfersize, moveSize));
-
-            if (( uio->uio_offset + fragSize) >= fcb->fcbEOF) {
-                retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
-
-            } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) {
-                daddr_t nextLogBlockNo = logBlockNo + 1;
-                int nextsize;
-
-                if (((nextLogBlockNo * PAGE_SIZE) +
-                     (daddr_t)fragSize) < fcb->fcbEOF)
-                    nextsize = fragSize;
-                else {
-                    nextsize = fcb->fcbEOF - (nextLogBlockNo * PAGE_SIZE);
-                    nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1);
-                }
-                retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp);
-            } else {
-                retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
-            };
-
-            if (retval != E_NONE) {
-                if (bp) {
-                    brelse(bp);
-                    bp = NULL;
-                }
-                break;
-            };
-            vp->v_lastr = logBlockNo;
-
-            /*
-             * We should only get non-zero b_resid when an I/O retval
-             * has occurred, which should cause us to break above.
-             * However, if the short read did not cause an retval,
-             * then we want to ensure that we do not uiomove bad
-             * or uninitialized data.
-             */
-            ioxfersize -= bp->b_resid;
-
-            if (ioxfersize < moveSize) {                       /* XXX PPD This should take the offset into account, too! */
-                if (ioxfersize == 0)
-                    break;
-                moveSize = ioxfersize;
-            }
-            if ((startOffset + moveSize) > bp->b_bcount)
-                panic("hfs_read: bad startOffset or moveSize\n");
-
-            DBG_RW(("\tcopying Ox%lX bytes from %lX; resid = Ox%lX...\n", moveSize, (char *)bp->b_data + startOffset, bp->b_resid));
-
-            if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio)))
-                break;
-
-            if (S_ISREG(mode) &&
-                (((startOffset + moveSize) == fragSize) || (uio->uio_offset == fcb->fcbEOF))) {
-                bp->b_flags |= B_AGE;
-            };
-
-            DBG_ASSERT(bp->b_bcount == bp->b_validend);
+       register struct uio *uio = ap->a_uio;
+       register struct vnode *vp = ap->a_vp;
+       struct cnode *cp;
+       struct filefork *fp;
+       int devBlockSize = 0;
+       int retval = 0;
+       off_t filesize;
+       off_t filebytes;
+       off_t start_resid = uio->uio_resid;
+
+
+       /* Preflight checks */
+       if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
+               return (EPERM);         /* can only read regular files */
+       if (uio->uio_resid == 0)
+               return (0);             /* Nothing left to do */
+       if (uio->uio_offset < 0)
+               return (EINVAL);        /* cant read from a negative offset */
+
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+       filesize = fp->ff_size;
+       filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
+       if (uio->uio_offset > filesize) {
+               if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
+                       return (EFBIG);
+               else
+                       return (0);
+       }
 
-            brelse(bp);
-            /* Start of loop resets bp to NULL before reaching outside this block... */
-        }
+       VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 
-        if (bp != NULL) {
-            DBG_ASSERT(bp->b_bcount == bp->b_validend);
-            brelse(bp);
-        };
-    }
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
+               (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
 
-    if (HTOVCB(hp)->vcbSigWord == kHFSPlusSigWord)
-        hp->h_nodeflags |= IN_ACCESS;
+       retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
 
-    DBG_VOP_LOCKS_TEST(retval);
+       cp->c_flag |= C_ACCESS;
 
-    #if HFS_DIAGNOSTIC
-        debug_check_blocksizes(vp);
-    #endif
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
+               (int)uio->uio_offset, uio->uio_resid, (int)filesize,  (int)filebytes, 0);
 
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
+       /*
+        * Keep track blocks read
+        */
+       if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+                       fp->ff_bytesread = start_resid - uio->uio_resid;
+                       cp->c_atime = time.tv_sec;
+               } else {
+                       fp->ff_bytesread += start_resid - uio->uio_resid;
+               }
+       }
 
-    return (retval);
+       return (retval);
 }
 
 /*
@@ -325,146 +172,167 @@ struct vop_read_args /* {
      */
 int
 hfs_write(ap)
-struct vop_write_args /* {
-    struct vnode *a_vp;
-    struct uio *a_uio;
-    int a_ioflag;
-    struct ucred *a_cred;
-} */ *ap;
+       struct vop_write_args /* {
+               struct vnode *a_vp;
+               struct uio *a_uio;
+               int a_ioflag;
+               struct ucred *a_cred;
+       } */ *ap;
 {
-    struct hfsnode             *hp = VTOH(ap->a_vp);
-    struct uio                         *uio = ap->a_uio;
-    struct vnode               *vp = ap->a_vp ;
-    struct vnode               *dev;
-    struct buf                         *bp;
-    struct proc                *p, *cp;
-    struct timeval tv;
-    FCB                                        *fcb = HTOFCB(hp);
-    ExtendedVCB                        *vcb = HTOVCB(hp);
-    int                                        devBlockSize = 0;
-    daddr_t                    logBlockNo;
-    long                               fragSize;
-    off_t                              origFileSize, currOffset, writelimit, bytesToAdd;
-    off_t                              actualBytesAdded;
-    u_long                             blkoffset, resid, xfersize, clearSize;
-    int                                        flags, ioflag;
-    int                                retval;
-    DBG_FUNC_NAME("hfs_write");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_RW(("\thfsnode 0x%x (%s)\n", (u_int)hp, H_NAME(hp)));
-    DBG_RW(("\tstarting at offset Ox%lX of file, length Ox%lX\n", (UInt32)uio->uio_offset, (UInt32)uio->uio_resid));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    dev = hp->h_meta->h_devvp;
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
-
-    if (uio->uio_offset < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
+       struct vnode *vp = ap->a_vp;
+       struct uio *uio = ap->a_uio;
+       struct cnode *cp;
+       struct filefork *fp;
+       struct proc *p;
+       struct timeval tv;
+       ExtendedVCB *vcb;
+       int devBlockSize = 0;
+       off_t origFileSize, writelimit, bytesToAdd;
+       off_t actualBytesAdded;
+       u_long resid;
+       int eflags, ioflag;
+       int retval;
+       off_t filebytes;
+       struct hfsmount *hfsmp;
+       int started_tr = 0, grabbed_lock = 0;
 
-    if (uio->uio_resid == 0) {
-        DBG_VOP_LOCKS_TEST(E_NONE);
-        return (E_NONE);
-    }
 
-    if (ap->a_vp->v_type != VREG && ap->a_vp->v_type != VLNK) {                /* Can only write files */
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);
-    };
+       if (uio->uio_offset < 0)
+               return (EINVAL);
+       if (uio->uio_resid == 0)
+               return (E_NONE);
+       if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
+               return (EPERM);         /* Can only write regular files */
+
+       ioflag = ap->a_ioflag;
+       cp = VTOC(vp);
+       fp = VTOF(vp);
+       vcb = VTOVCB(vp);
+       filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+       if (ioflag & IO_APPEND)
+               uio->uio_offset = fp->ff_size;
+       if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
+               return (EPERM);
+
+       // XXXdbg - don't allow modification of the journal or journal_info_block
+       if (VTOHFS(vp)->jnl && cp->c_datafork) {
+               struct HFSPlusExtentDescriptor *extd;
+
+               extd = &cp->c_datafork->ff_extents[0];
+               if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
+                       return EPERM;
+               }
+       }
 
-#if HFS_DIAGNOSTIC
-       if (uio->uio_rw != UIO_WRITE)
-               panic("%s: mode", funcname);
-#endif
+       writelimit = uio->uio_offset + uio->uio_resid;
 
-    ioflag = ap->a_ioflag;
-    uio = ap->a_uio;
-    vp = ap->a_vp;
+       /*
+        * Maybe this should be above the vnode op call, but so long as
+        * file servers have no limits, I don't think it matters.
+        */
+       p = uio->uio_procp;
+       if (vp->v_type == VREG && p &&
+           writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+               psignal(p, SIGXFSZ);
+               return (EFBIG);
+       }
+       p = current_proc();
 
-    if (ioflag & IO_APPEND) uio->uio_offset = fcb->fcbEOF;
-    if ((hp->h_meta->h_pflags & APPEND) && uio->uio_offset != fcb->fcbEOF)
-       return (EPERM);
+       VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 
-       writelimit = uio->uio_offset + uio->uio_resid;
+       resid = uio->uio_resid;
+       origFileSize = fp->ff_size;
+       eflags = kEFDeferMask;  /* defer file block allocations */
+       filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 
-    /*
-    * Maybe this should be above the vnode op call, but so long as
-    * file servers have no limits, I don't think it matters.
-    */
-    p = uio->uio_procp;
-    if (vp->v_type == VREG && p &&
-        writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
-        psignal(p, SIGXFSZ);
-        return (EFBIG);
-    };
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
+               (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
+       retval = 0;
 
-    resid = uio->uio_resid;
-    origFileSize = fcb->fcbEOF;
-    flags = ioflag & IO_SYNC ? B_SYNC : 0;
+       /* Now test if we need to extend the file */
+       /* Doing so will adjust the filebytes for us */
 
-    DBG_RW(("\tLEOF is 0x%lX, PEOF is 0x%lX.\n", fcb->fcbEOF, fcb->fcbPLen));
+#if QUOTA
+       if(writelimit > filebytes) {
+               bytesToAdd = writelimit - filebytes;
 
-    /*
-    NOTE:      In the following loop there are two positions tracked:
-    currOffset is the current I/O starting offset.  currOffset is never >LEOF; the
-    LEOF is nudged along with currOffset as data is zeroed or written.
-    uio->uio_offset is the start of the current I/O operation.  It may be arbitrarily
-    beyond currOffset.
+               retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)), 
+                                  ap->a_cred, 0);
+               if (retval)
+                       return (retval);
+       }
+#endif /* QUOTA */
 
-    The following is true at all times:
+       hfsmp = VTOHFS(vp);
 
-    currOffset <= LEOF <= uio->uio_offset <= writelimit
-    */
-    currOffset = MIN(uio->uio_offset, fcb->fcbEOF);
+#ifdef HFS_SPARSE_DEV
+       /* 
+        * When the underlying device is sparse and space
+        * is low (< 8MB), stop doing delayed allocations
+        * and begin doing synchronous I/O.
+        */
+       if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+           (hfs_freeblks(hfsmp, 0) < 2048)) {
+               eflags &= ~kEFDeferMask;
+               ioflag |= IO_SYNC;
+       }
+#endif /* HFS_SPARSE_DEV */
 
-    DBG_RW(("\tstarting I/O loop at 0x%lX.\n", (u_long)currOffset));
+       if (writelimit > filebytes) {
+               hfs_global_shared_lock_acquire(hfsmp);
+               grabbed_lock = 1;
+       }
+       if (hfsmp->jnl && (writelimit > filebytes)) {
+               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                       hfs_global_shared_lock_release(hfsmp);
+                       return EINVAL;
+               }
+               started_tr = 1;
+       }
 
-    cp = current_proc();
+       while (writelimit > filebytes) {
+               bytesToAdd = writelimit - filebytes;
+               if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
+                       eflags |= kEFReserveMask;
 
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-    retval = 0;
+               /* lock extents b-tree (also protects volume bitmap) */
+               retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
+               if (retval != E_NONE)
+                       break;
+       
+               /* Files that are changing size are not hot file candidates. */
+               if (hfsmp->hfc_stage == HFC_RECORDING) {
+                       fp->ff_bytesread = 0;
+               }
+               retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
+                               0, eflags, &actualBytesAdded));
+
+               (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
+               if ((actualBytesAdded == 0) && (retval == E_NONE))
+                       retval = ENOSPC;
+               if (retval != E_NONE)
+                       break;
+               filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
+                       (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size,  (int)filebytes, 0);
+       }
 
-    /* Now test if we need to extend the file */
-    /* Doing so will adjust the fcbPLen for us */
+       // XXXdbg
+       if (started_tr) {
+               tv = time;
+               VOP_UPDATE(vp, &tv, &tv, 1);
 
-    while (writelimit > (off_t)fcb->fcbPLen) {
-       
-        bytesToAdd = writelimit - fcb->fcbPLen;
-        DBG_RW(("\textending file by 0x%lX bytes; 0x%lX blocks free",
-                (unsigned long)bytesToAdd, (unsigned long)vcb->freeBlocks));
-
-        /* lock extents b-tree (also protects volume bitmap) */
-        retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, cp);
-        if (retval != E_NONE)
-            break;
-
-        retval = MacToVFSError(
-                            ExtendFileC (vcb,
-                                            fcb,
-                                            bytesToAdd,
-                                            0,
-                                            kEFContigBit,
-                                            &actualBytesAdded));
-
-        (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, cp);
-        DBG_VOP_CONT(("\tactual bytes added = 0x%lX bytes, retval = %d...\n", actualBytesAdded, retval));
-        if ((actualBytesAdded == 0) && (retval == E_NONE)) retval = ENOSPC;
-        if (retval != E_NONE) break;
-
-        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
-                    (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
-    };
+               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+               journal_end_transaction(hfsmp->jnl);
+               started_tr = 0;
+       }
+       if (grabbed_lock) {
+               hfs_global_shared_lock_release(hfsmp);
+               grabbed_lock = 0;
+       }
 
-       if (UBCISVALID(vp) && retval == E_NONE) {
+       if (retval == E_NONE) {
                off_t filesize;
                off_t zero_off;
                off_t tail_off;
@@ -474,27 +342,28 @@ struct vop_write_args /* {
                int lflag;
                struct rl_entry *invalid_range;
 
-               if (writelimit > fcb->fcbEOF)
+               if (writelimit > fp->ff_size)
                        filesize = writelimit;
                else
-                       filesize = fcb->fcbEOF;
+                       filesize = fp->ff_size;
 
                lflag = (ioflag & IO_SYNC);
 
-               if (uio->uio_offset <= fcb->fcbEOF) {
+               if (uio->uio_offset <= fp->ff_size) {
                        zero_off = uio->uio_offset & ~PAGE_MASK_64;
                        
                        /* Check to see whether the area between the zero_offset and the start
                           of the transfer to see whether is invalid and should be zero-filled
                           as part of the transfer:
                         */
-                       if (rl_scan(&hp->h_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP) {
-                               lflag |= IO_HEADZEROFILL;
-                       };
+                       if (uio->uio_offset > zero_off) {
+                               if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
+                                       lflag |= IO_HEADZEROFILL;
+                       }
                } else {
-                       off_t eof_page_base = fcb->fcbEOF & ~PAGE_MASK_64;
+                       off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
                        
-                       /* The bytes between fcb->fcbEOF and uio->uio_offset must never be
+                       /* The bytes between fp->ff_size and uio->uio_offset must never be
                           read without being zeroed.  The current last block is filled with zeroes
                           if it holds valid data but in all cases merely do a little bookkeeping
                           to track the area from the end of the current last page to the start of
@@ -506,14 +375,14 @@ struct vop_write_args /* {
                           may be past the start of the write, in which case the zeroing
                           will be handled by the cluser_write of the actual data.
                         */
-                       inval_start = (fcb->fcbEOF + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+                       inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
                        inval_end = uio->uio_offset & ~PAGE_MASK_64;
-                       zero_off = fcb->fcbEOF;
+                       zero_off = fp->ff_size;
                        
-                       if ((fcb->fcbEOF & PAGE_MASK_64) &&
-                               (rl_scan(&hp->h_invalidranges,
+                       if ((fp->ff_size & PAGE_MASK_64) &&
+                               (rl_scan(&fp->ff_invalidranges,
                                                        eof_page_base,
-                                                       fcb->fcbEOF - 1,
+                                                       fp->ff_size - 1,
                                                        &invalid_range) != RL_NOOVERLAP)) {
                                /* The page containing the EOF is not valid, so the
                                   entire page must be made inaccessible now.  If the write
@@ -538,14 +407,17 @@ struct vop_write_args /* {
                                           and the actual write will start on a page past inval_end.  Now's the last
                                           chance to zero-fill the page containing the EOF:
                                         */
-                                       retval = cluster_write(vp, (struct uio *) 0, fcb->fcbEOF, inval_start,
-                                                                                       zero_off, (off_t)0, devBlockSize, lflag | IO_HEADZEROFILL);
+                                       retval = cluster_write(vp, (struct uio *) 0,
+                                                       fp->ff_size, inval_start,
+                                                       zero_off, (off_t)0, devBlockSize,
+                                                       lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
                                        if (retval) goto ioerr_exit;
                                };
                                
                                /* Mark the remaining area of the newly allocated space as invalid: */
-                               rl_add(inval_start, inval_end - 1 , &hp->h_invalidranges);
-                               zero_off = fcb->fcbEOF = inval_end;
+                               rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
+                               cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
+                               zero_off = fp->ff_size = inval_end;
                        };
                        
                        if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
@@ -557,7 +429,7 @@ struct vop_write_args /* {
                tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
                if (tail_off > filesize) tail_off = filesize;
                if (tail_off > writelimit) {
-                       if (rl_scan(&hp->h_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
+                       if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
                                lflag |= IO_TAILZEROFILL;
                        };
                };
@@ -574,166 +446,68 @@ struct vop_write_args /* {
                 */
                io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
                io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
-               if (io_start < fcb->fcbEOF) {
-                       rl_remove(io_start, io_end - 1, &hp->h_invalidranges);
+               if (io_start < fp->ff_size) {
+                       rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
                };
-               retval = cluster_write(vp, uio, fcb->fcbEOF, filesize, zero_off, tail_off, devBlockSize, lflag);
+               retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
+                               tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
                                
-               if (uio->uio_offset > fcb->fcbEOF) {
-                       fcb->fcbEOF = uio->uio_offset;
+               if (uio->uio_offset > fp->ff_size) {
+                       fp->ff_size = uio->uio_offset;
 
-                       ubc_setsize(vp, (off_t)fcb->fcbEOF);       /* XXX check errors */
+                       ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
                }
-               if (resid > uio->uio_resid) hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-
-    } else {
-
-        while (retval == E_NONE && uio->uio_resid > 0) {
-            logBlockNo = currOffset / PAGE_SIZE;
-            blkoffset  = currOffset & PAGE_MASK;
-
-            if (((off_t)(fcb->fcbPLen) - currOffset) < PAGE_SIZE_64)
-                fragSize = (off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * PAGE_SIZE_64);
-            else
-                fragSize = PAGE_SIZE;
-            xfersize = fragSize - blkoffset;
-
-            DBG_RW(("\tcurrOffset = Ox%lX, logBlockNo = Ox%X, blkoffset = Ox%lX, xfersize = Ox%lX, fragSize = Ox%lX.\n",
-                    (unsigned long)currOffset, logBlockNo, blkoffset, xfersize, fragSize));
-
-            /* Make any adjustments for boundary conditions */
-            if (currOffset + (off_t)xfersize > writelimit) {
-                xfersize = writelimit - currOffset;
-                DBG_RW(("\ttrimming xfersize to 0x%lX to match writelimit (uio_resid)...\n", xfersize));
-            };
-
-            /*
-            * There is no need to read into bp if:
-            * We start on a block boundary and will overwrite the whole block
-            *
-            *                                          OR
-            */
-            if ((blkoffset == 0) && (xfersize >= fragSize)) {
-                DBG_RW(("\tRequesting %ld-byte block Ox%lX w/o read...\n", fragSize, (long)logBlockNo));
-
-                bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
-                retval = 0;
-
-                if (bp->b_blkno == -1) {
-                    brelse(bp);
-                    retval = EIO;              /* XXX */
-                    break;
-                }
-            } else {
-
-                if (currOffset == fcb->fcbEOF && blkoffset == 0) {
-                    bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
-                    retval = 0;
-
-                    if (bp->b_blkno == -1) {
-                        brelse(bp);
-                        retval = EIO;          /* XXX */
-                        break;
-                    }
-
-                } else {
-                    /*
-                    * This I/O transfer is not sufficiently aligned, so read the affected block into a buffer:
-                    */
-                    DBG_VOP(("\tRequesting block Ox%X, size = 0x%08lX...\n", logBlockNo, fragSize));
-                    retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp);
-
-                    if (retval != E_NONE) {
-                        if (bp)
-                            brelse(bp);
-                        break;
-                    }
-                }
-            }
-
-            /* See if we are starting to write within file boundaries:
-                If not, then we need to present a "hole" for the area between
-                the current EOF and the start of the current I/O operation:
-
-                Note that currOffset is only less than uio_offset if uio_offset > LEOF...
-                */
-            if (uio->uio_offset > currOffset) {
-                clearSize = MIN(uio->uio_offset - currOffset, xfersize);
-                DBG_RW(("\tzeroing Ox%lX bytes Ox%lX bytes into block Ox%X...\n", clearSize, blkoffset, logBlockNo));
-                bzero(bp->b_data + blkoffset, clearSize);
-                currOffset += clearSize;
-                blkoffset += clearSize;
-                xfersize -= clearSize;
-            };
+               if (resid > uio->uio_resid)
+                       cp->c_flag |= C_CHANGE | C_UPDATE;
+       }
 
-            if (xfersize > 0) {
-                DBG_RW(("\tCopying Ox%lX bytes Ox%lX bytes into block Ox%X... ioflag == 0x%X\n",
-                        xfersize, blkoffset, logBlockNo, ioflag));
-                retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio);
-                currOffset += xfersize;
-            };
-            DBG_ASSERT((bp->b_bcount % devBlockSize) == 0);
-
-            if (ioflag & IO_SYNC) {
-                (void)VOP_BWRITE(bp);
-                //DBG_RW(("\tissuing bwrite\n"));
-            } else if ((xfersize + blkoffset) == fragSize) {
-                //DBG_RW(("\tissuing bawrite\n"));
-                bp->b_flags |= B_AGE;
-                bawrite(bp);
-            } else {
-                //DBG_RW(("\tissuing bdwrite\n"));
-                bdwrite(bp);
-            };
+       HFS_KNOTE(vp, NOTE_WRITE);
 
-            /* Update the EOF if we just extended the file
-                (the PEOF has already been moved out and the block mapping table has been updated): */
-            if (currOffset > fcb->fcbEOF) {
-                DBG_VOP(("\textending EOF to 0x%lX...\n", (UInt32)fcb->fcbEOF));
-                fcb->fcbEOF = currOffset;
+ioerr_exit:
+       /*
+        * If we successfully wrote any data, and we are not the superuser
+        * we clear the setuid and setgid bits as a precaution against
+        * tampering.
+        */
+       if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
+               cp->c_mode &= ~(S_ISUID | S_ISGID);
+
+       if (retval) {
+               if (ioflag & IO_UNIT) {
+                       (void)VOP_TRUNCATE(vp, origFileSize,
+                               ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
+                       uio->uio_offset -= resid - uio->uio_resid;
+                       uio->uio_resid = resid;
+                       filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+               }
+       } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
+               tv = time;
+               retval = VOP_UPDATE(vp, &tv, &tv, 1);
+       }
+       vcb->vcbWrCnt++;
 
-                if (UBCISVALID(vp))
-                    ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
-            };
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
+               (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 
-            if (retval || (resid == 0))
-                break;
-            hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-        };
-    };
+       return (retval);
+}
 
-ioerr_exit:
-    /*
-        * If we successfully wrote any data, and we are not the superuser
-     * we clear the setuid and setgid bits as a precaution against
-     * tampering.
-     */
-    if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
-    hp->h_meta->h_mode &= ~(ISUID | ISGID);
-
-    if (retval) {
-        if (ioflag & IO_UNIT) {
-            (void)VOP_TRUNCATE(vp, origFileSize,
-                            ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
-            uio->uio_offset -= resid - uio->uio_resid;
-            uio->uio_resid = resid;
-        }
-    } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
-        tv = time;
-        retval = VOP_UPDATE(vp, &tv, &tv, 1);
-    }
 
-    #if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-    #endif
+#ifdef HFS_SPARSE_DEV
+struct hfs_backingstoreinfo {
+       int  signature;   /* == 3419115 */
+       int  version;     /* version of this struct (1) */
+       int  backingfd;   /* disk image file (on backing fs) */
+       int  bandsize;    /* sparse disk image band size */
+};
 
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
-                (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF, (int)fcb->fcbPLen, 0);
+#define HFSIOC_SETBACKINGSTOREINFO   _IOW('h', 7, struct hfs_backingstoreinfo)
+#define HFSIOC_CLRBACKINGSTOREINFO   _IO('h', 8)
 
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
-}
+#define HFS_SETBACKINGSTOREINFO  IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO)
+#define HFS_CLRBACKINGSTOREINFO  IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO)
 
+#endif /* HFS_SPARSE_DEV */
 
 /*
 
@@ -753,61 +527,172 @@ ioerr_exit:
 /* ARGSUSED */
 int
 hfs_ioctl(ap)
-struct vop_ioctl_args /* {
-    struct vnode *a_vp;
-    int  a_command;
-    caddr_t  a_data;
-    int  a_fflag;
-    struct ucred *a_cred;
-    struct proc *a_p;
-} */ *ap;
+       struct vop_ioctl_args /* {
+               struct vnode *a_vp;
+               int  a_command;
+               caddr_t  a_data;
+               int  a_fflag;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
 {
-    DBG_FUNC_NAME("hfs_ioctl");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
+       switch (ap->a_command) {
+
+#ifdef HFS_SPARSE_DEV
+       case HFS_SETBACKINGSTOREINFO: {
+               struct hfsmount * hfsmp;
+               struct vnode * bsfs_rootvp;
+               struct vnode * di_vp;
+               struct file * di_fp;
+               struct hfs_backingstoreinfo *bsdata;
+               int error = 0;
+               
+               hfsmp = VTOHFS(ap->a_vp);
+               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+                       return (EALREADY);
+               }
+               if (ap->a_p->p_ucred->cr_uid != 0 &&
+                       ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
+               if (bsdata == NULL) {
+                       return (EINVAL);
+               }
+               if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) {
+                       return (error);
+               }
+               if (fref(di_fp) == -1) {
+                       return (EBADF);
+               }
+               if (di_fp->f_type != DTYPE_VNODE) {
+                       frele(di_fp);
+                       return (EINVAL);
+               }
+               di_vp = (struct vnode *)di_fp->f_data;
+               if (ap->a_vp->v_mount == di_vp->v_mount) {
+                       frele(di_fp);
+                       return (EINVAL);
+               }
 
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_UNLOCKED, VOPDBG_UNLOCKED, VOPDBG_UNLOCKED, VOPDBG_POS);
+               /*
+                * Obtain the backing fs root vnode and keep a reference
+                * on it.  This reference will be dropped in hfs_unmount.
+                */
+               error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp);
+               if (error) {
+                       frele(di_fp);
+                       return (error);
+               }
+               VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p);  /* Hold on to the reference */
 
-    switch (ap->a_command) {
-       
-        case 1:
-    {   register struct hfsnode *hp;
-            register struct vnode *vp;
-       register struct radvisory *ra;
-       FCB *fcb;
-       int devBlockSize = 0;
-       int error;
+               hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
+               hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+               hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
+               hfsmp->hfs_sparsebandblks *= 4;
 
-       vp = ap->a_vp;
+               frele(di_fp);
+               return (0);
+       }
+       case HFS_CLRBACKINGSTOREINFO: {
+               struct hfsmount * hfsmp;
+               struct vnode * tmpvp;
+
+               hfsmp = VTOHFS(ap->a_vp);
+               if (ap->a_p->p_ucred->cr_uid != 0 &&
+                       ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
+                       return (EACCES); /* must be owner of file system */
+               }
+               if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+                   hfsmp->hfs_backingfs_rootvp) {
+
+                       hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
+                       tmpvp = hfsmp->hfs_backingfs_rootvp;
+                       hfsmp->hfs_backingfs_rootvp = NULLVP;
+                       hfsmp->hfs_sparsebandblks = 0;
+                       vrele(tmpvp);
+               }
+               return (0);
+       }
+#endif /* HFS_SPARSE_DEV */
 
-       VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+       case 6: {
+               int error;
 
-       ra = (struct radvisory *)(ap->a_data);
-       hp = VTOH(vp);
+               ap->a_vp->v_flag |= VFULLFSYNC;
+               error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p);
+               ap->a_vp->v_flag &= ~VFULLFSYNC;
 
-       fcb = HTOFCB(hp);
+               return error;
+       }
+       case 5: {
+               register struct vnode *vp;
+               register struct cnode *cp;
+               struct filefork *fp;
+               int error;
+
+               vp = ap->a_vp;
+               cp = VTOC(vp);
+               fp = VTOF(vp);
+
+               if (vp->v_type != VREG)
+                       return EINVAL;
+               VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
+               error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+               if (error)
+                       return (error);
+
+               /*
+                * used by regression test to determine if 
+                * all the dirty pages (via write) have been cleaned
+                * after a call to 'fsysnc'.
+                */
+               error = is_file_clean(vp, fp->ff_size);
+               VOP_UNLOCK(vp, 0, ap->a_p);
 
-       if (ra->ra_offset >= fcb->fcbEOF) {
-           VOP_UNLOCK(vp, 0, ap->a_p);
-           DBG_VOP_LOCKS_TEST(EFBIG);
-           return (EFBIG);
+               return (error);
        }
-       VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
 
-       error = advisory_read(vp, fcb->fcbEOF, ra->ra_offset, ra->ra_count, devBlockSize);
-       VOP_UNLOCK(vp, 0, ap->a_p);
+       case 1: {
+               register struct vnode *vp;
+               register struct radvisory *ra;
+               register struct cnode *cp;
+               struct filefork *fp;
+               int devBlockSize = 0;
+               int error;
 
-       DBG_VOP_LOCKS_TEST(error);
-       return (error);
-            }
+               vp = ap->a_vp;
+
+               if (vp->v_type != VREG)
+                       return EINVAL;
+               VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
+               error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+               if (error)
+                       return (error);
+
+               ra = (struct radvisory *)(ap->a_data);
+               cp = VTOC(vp);
+               fp = VTOF(vp);
+
+               if (ra->ra_offset >= fp->ff_size) {
+                       VOP_UNLOCK(vp, 0, ap->a_p);
+                       return (EFBIG);
+               }
+               VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
+
+               error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
+               VOP_UNLOCK(vp, 0, ap->a_p);
+
+               return (error);
+       }
 
         case 2: /* F_READBOOTBLOCKS */
         case 3: /* F_WRITEBOOTBLOCKS */
             {
            struct vnode *vp = ap->a_vp;
-           struct hfsnode *hp = VTOH(vp);
+           struct vnode *devvp = NULL;
            struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
            int devBlockSize;
            int error;
@@ -821,6 +706,7 @@ struct vop_ioctl_args /* {
             if ((vp->v_flag & VROOT) == 0) return EINVAL;
             if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
            
+           devvp = VTOHFS(vp)->hfs_devvp;
            aiov.iov_base = btd->fbt_buffer;
            aiov.iov_len = btd->fbt_length;
            
@@ -832,11 +718,11 @@ struct vop_ioctl_args /* {
            auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
            auio.uio_procp = ap->a_p;
 
-           VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+           VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
 
            while (auio.uio_resid > 0) {
              blockNumber = auio.uio_offset / devBlockSize;
-             error = bread(hp->h_meta->h_devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
+             error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
              if (error) {
                   if (bp) brelse(bp);
                   return error;
@@ -866,7 +752,6 @@ struct vop_ioctl_args /* {
             }
 
         default:
-            DBG_VOP_LOCKS_TEST(ENOTTY);
             return (ENOTTY);
     }
 
@@ -877,104 +762,21 @@ struct vop_ioctl_args /* {
 /* ARGSUSED */
 int
 hfs_select(ap)
-struct vop_select_args /* {
-    struct vnode *a_vp;
-    int  a_which;
-    int  a_fflags;
-    struct ucred *a_cred;
-       void *a_wql;
-    struct proc *a_p;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_select");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-
-    /*
-     * We should really check to see if I/O is possible.
-     */
-    DBG_VOP_LOCKS_TEST(1);
-    return (1);
-}
-
-
-
-/*
- * Mmap a file
- *
- * NB Currently unsupported.
-# XXX - not used
-#
- vop_mmap {
-     IN struct vnode *vp;
-     IN int fflags;
-     IN struct ucred *cred;
-     IN struct proc *p;
-
-     */
-
-/* ARGSUSED */
-
-int
-hfs_mmap(ap)
-struct vop_mmap_args /* {
-    struct vnode *a_vp;
-    int  a_fflags;
-    struct ucred *a_cred;
-    struct proc *a_p;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_mmap");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-
-    DBG_VOP_LOCKS_TEST(EINVAL);
-    return (EINVAL);
-}
-
-
-
-/*
- * Seek on a file
- *
- * Nothing to do, so just return.
-# XXX - not used
-# Needs work: Is newoff right?  What's it mean?
-#
- vop_seek {
-     IN struct vnode *vp;
-     IN off_t oldoff;
-     IN off_t newoff;
-     IN struct ucred *cred;
-     */
-/* ARGSUSED */
-int
-hfs_seek(ap)
-struct vop_seek_args /* {
-    struct vnode *a_vp;
-    off_t  a_oldoff;
-    off_t  a_newoff;
-    struct ucred *a_cred;
-} */ *ap;
+       struct vop_select_args /* {
+               struct vnode *a_vp;
+               int  a_which;
+               int  a_fflags;
+               struct ucred *a_cred;
+               void *a_wql;
+               struct proc *a_p;
+       } */ *ap;
 {
-    DBG_FUNC_NAME("hfs_seek");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-
-    DBG_VOP_LOCKS_TEST(E_NONE);
-    return (E_NONE);
+       /*
+        * We should really check to see if I/O is possible.
+        */
+       return (1);
 }
 
-
 /*
  * Bmap converts a the logical block number of a file to its physical block
  * number on the disk.
@@ -1005,17 +807,19 @@ struct vop_seek_args /* {
 
 int
 hfs_bmap(ap)
-struct vop_bmap_args /* {
-    struct vnode *a_vp;
-    daddr_t a_bn;
-    struct vnode **a_vpp;
-    daddr_t *a_bnp;
-    int *a_runp;
-} */ *ap;
+       struct vop_bmap_args /* {
+               struct vnode *a_vp;
+               daddr_t a_bn;
+               struct vnode **a_vpp;
+               daddr_t *a_bnp;
+               int *a_runp;
+       } */ *ap;
 {
-    struct hfsnode             *hp = VTOH(ap->a_vp);
-    struct hfsmount    *hfsmp = VTOHFS(ap->a_vp);
-    int                                        retval = E_NONE;
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       struct hfsmount *hfsmp = VTOHFS(vp);
+   int                                 retval = E_NONE;
     daddr_t                            logBlockSize;
     size_t                             bytesContAvail = 0;
     off_t blockposition;
@@ -1024,46 +828,33 @@ struct vop_bmap_args /* {
     struct rl_entry *invalid_range;
     enum rl_overlaptype overlaptype;
 
-#define DEBUG_BMAP 0
-#if DEBUG_BMAP
-    DBG_FUNC_NAME("hfs_bmap");
-    DBG_VOP_LOCKS_DECL(2);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);
-
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-    if (ap->a_vpp != NULL) {
-        DBG_VOP_LOCKS_INIT(1,*ap->a_vpp, VOPDBG_IGNORE, VOPDBG_UNLOCKED, VOPDBG_IGNORE, VOPDBG_POS);
-    } else {
-        DBG_VOP_LOCKS_INIT(1,NULL, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
-       };
-#endif
-
-       DBG_IO(("\tMapped blk %d --> ", ap->a_bn));
-    /*
-     * Check for underlying vnode requests and ensure that logical
-     * to physical mapping is requested.
-     */
-    if (ap->a_vpp != NULL)
-        *ap->a_vpp = VTOH(ap->a_vp)->h_meta->h_devvp;
-    if (ap->a_bnp == NULL)
-        return (0);
-
-    logBlockSize = GetLogicalBlockSize(ap->a_vp);
-    blockposition = (off_t)(ap->a_bn * logBlockSize);
-        
-    lockExtBtree = hasOverflowExtents(hp);
-    if (lockExtBtree)
-    {
-        p = current_proc();
-        retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p);
-        if (retval)
-            return (retval);
-    }
+       /*
+        * Check for underlying vnode requests and ensure that logical
+        * to physical mapping is requested.
+        */
+       if (ap->a_vpp != NULL)
+               *ap->a_vpp = cp->c_devvp;
+       if (ap->a_bnp == NULL)
+               return (0);
+
+       /* Only clustered I/O should have delayed allocations. */
+       DBG_ASSERT(fp->ff_unallocblocks == 0);
+
+       logBlockSize = GetLogicalBlockSize(vp);
+       blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
+
+       lockExtBtree = overflow_extents(fp);
+       if (lockExtBtree) {
+               p = current_proc();
+               retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
+                               LK_EXCLUSIVE | LK_CANRECURSE, p);
+               if (retval)
+                       return (retval);
+       }
 
-    retval = MacToVFSError(
+       retval = MacToVFSError(
                             MapFileBlockC (HFSTOVCB(hfsmp),
-                                            HTOFCB(hp),
+                                            (FCB*)fp,
                                             MAXPHYSIO,
                                             blockposition,
                                             ap->a_bnp,
@@ -1073,7 +864,7 @@ struct vop_bmap_args /* {
 
     if (retval == E_NONE) {
         /* Adjust the mapping information for invalid file ranges: */
-        overlaptype = rl_scan(&hp->h_invalidranges,
+        overlaptype = rl_scan(&fp->ff_invalidranges,
                             blockposition,
                             blockposition + MAXPHYSIO - 1,
                             &invalid_range);
@@ -1093,7 +884,7 @@ struct vop_bmap_args /* {
                     if (invalid_range->rl_start == blockposition) {
                        /* There's actually no valid information to be had starting here: */
                        *ap->a_bnp = (daddr_t)-1;
-                                               if ((HTOFCB(hp)->fcbEOF > (invalid_range->rl_end + 1)) &&
+                                               if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
                                                        (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
                                bytesContAvail = invalid_range->rl_end + 1 - blockposition;
                        };
@@ -1116,21 +907,6 @@ struct vop_bmap_args /* {
         };
     };
 
-    DBG_IO(("%d:%d.\n", *ap->a_bnp, (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1));
-
-#if DEBUG_BMAP
-
-    DBG_VOP_LOCKS_TEST(retval);
-#endif
-
-    if (ap->a_runp) {
-        DBG_ASSERT((*ap->a_runp * logBlockSize) < bytesContAvail);                                                     /* At least *ap->a_runp blocks left and ... */
-        if (can_cluster(logBlockSize)) {
-            DBG_ASSERT(bytesContAvail - (*ap->a_runp * logBlockSize) < (2*logBlockSize));      /* ... at most 1 logical block accounted for by current block */
-                                                                                            /* ... plus some sub-logical block sized piece */
-        };
-    };
-
     return (retval);
 }
 
@@ -1138,11 +914,11 @@ struct vop_bmap_args /* {
 
 int
 hfs_blktooff(ap)
-struct vop_blktooff_args /* {
-    struct vnode *a_vp;
-    daddr_t a_lblkno;  
-    off_t *a_offset;
-} */ *ap;
+       struct vop_blktooff_args /* {
+               struct vnode *a_vp;
+               daddr_t a_lblkno;  
+               off_t *a_offset;
+       } */ *ap;
 {      
        if (ap->a_vp == NULL)
                return (EINVAL);
@@ -1153,14 +929,12 @@ struct vop_blktooff_args /* {
 
 int
 hfs_offtoblk(ap)
-struct vop_offtoblk_args /* {
-       struct vnode *a_vp;
-       off_t a_offset;    
-       daddr_t *a_lblkno;
-} */ *ap;
+       struct vop_offtoblk_args /* {
+               struct vnode *a_vp;
+               off_t a_offset;    
+               daddr_t *a_lblkno;
+       } */ *ap;
 {      
-       long lbsize, boff;
-
        if (ap->a_vp == NULL)
                return (EINVAL);
        *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
@@ -1170,67 +944,183 @@ struct vop_offtoblk_args /* {
 
 int
 hfs_cmap(ap)
-struct vop_cmap_args /* {
-       struct vnode *a_vp;
-       off_t a_foffset;    
-       size_t a_size;
-       daddr_t *a_bpn;
-       size_t *a_run;
-       void *a_poff;
-} */ *ap;
+       struct vop_cmap_args /* {
+               struct vnode *a_vp;
+               off_t a_foffset;    
+               size_t a_size;
+               daddr_t *a_bpn;
+               size_t *a_run;
+               void *a_poff;
+       } */ *ap;
 {
-    struct hfsnode     *hp = VTOH(ap->a_vp);
-    struct hfsmount    *hfsmp = VTOHFS(ap->a_vp);
-    FCB                                        *fcb = HTOFCB(hp);
+    struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
+    struct filefork *fp = VTOF(ap->a_vp);
     size_t                             bytesContAvail = 0;
     int                        retval = E_NONE;
-    int                                        lockExtBtree;
+    int lockExtBtree = 0;
     struct proc                *p = NULL;
     struct rl_entry *invalid_range;
     enum rl_overlaptype overlaptype;
-    off_t limit;
+    int started_tr = 0, grabbed_lock = 0;
+       struct timeval tv;
 
-#define DEBUG_CMAP 0
-#if DEBUG_CMAP
-    DBG_FUNC_NAME("hfs_cmap");
-    DBG_VOP_LOCKS_DECL(2);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);
+       /*
+        * Check for underlying vnode requests and ensure that logical
+        * to physical mapping is requested.
+        */
+       if (ap->a_bpn == NULL)
+               return (0);
 
-    DBG_VOP_LOCKS_INIT(0, ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-#endif
+       p = current_proc();
 
-    DBG_IO(("\tMapped offset %qx --> ", ap->a_foffset));
-    /*
-     * Check for underlying vnode requests and ensure that logical
-     * to physical mapping is requested.
-     */
-    if (ap->a_bpn == NULL) {
-        return (0);
-    };
+       if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) {
+               /*
+                * File blocks are getting remapped. Wait until its finished.
+                */
+               SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP);
+               (void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0);
+               if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP))
+                       panic("hfs_cmap: no mappable blocks");
+       }       
+
+  retry:
+       if (fp->ff_unallocblocks) {
+               lockExtBtree = 1;
+
+               // XXXdbg
+               hfs_global_shared_lock_acquire(hfsmp);
+               grabbed_lock = 1;
+
+               if (hfsmp->jnl) {
+                       if (journal_start_transaction(hfsmp->jnl) != 0) {
+                               hfs_global_shared_lock_release(hfsmp);
+                               return EINVAL;
+                       } else {
+                               started_tr = 1;
+                       }
+               } 
 
-    if (lockExtBtree = hasOverflowExtents(hp))
-    {
-        p = current_proc();
-        if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
-            return (retval);
-        };
-    }
-    retval = MacToVFSError(
-                          MapFileBlockC (HFSTOVCB(hfsmp),
-                                         fcb,
-                                         ap->a_size,
-                                         ap->a_foffset,
-                                         ap->a_bpn,
-                                         &bytesContAvail));
+               if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
+                       if (started_tr) {
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       if (grabbed_lock) {
+                               hfs_global_shared_lock_release(hfsmp);
+                       }
+                       return (retval);
+               }
+       } else if (overflow_extents(fp)) {
+               lockExtBtree = 1;
+               if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
+                       return retval;
+               }
+       }
 
-    if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
+       /*
+        * Check for any delayed allocations.
+        */
+       if (fp->ff_unallocblocks) {
+               SInt64 reqbytes, actbytes;
+
+               // 
+               // Make sure we have a transaction.  It's possible
+               // that we came in and fp->ff_unallocblocks was zero
+               // but during the time we blocked acquiring the extents
+               // btree, ff_unallocblocks became non-zero and so we
+               // will need to start a transaction.
+               //
+               if (hfsmp->jnl && started_tr == 0) {
+                   if (lockExtBtree) {
+                       (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
+                       lockExtBtree = 0;
+                   }
 
-    if (retval == E_NONE) {
-        /* Adjust the mapping information for invalid file ranges: */
-        overlaptype = rl_scan(&hp->h_invalidranges,
-                            ap->a_foffset,
-                            ap->a_foffset + (off_t)bytesContAvail - 1,
+                   goto retry;
+               }
+
+               reqbytes = (SInt64)fp->ff_unallocblocks *
+                            (SInt64)HFSTOVCB(hfsmp)->blockSize;
+               /*
+                * Release the blocks on loan and aquire some real ones.
+                * Note that we can race someone else for these blocks
+                * (and lose) so cmap needs to handle a failure here.
+                * Currently this race can't occur because all allocations
+                * are protected by an exclusive lock on the  Extents
+                * Overflow file.
+                */
+               HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
+               FTOC(fp)->c_blocks            -= fp->ff_unallocblocks;
+               fp->ff_blocks                 -= fp->ff_unallocblocks;
+               fp->ff_unallocblocks           = 0;
+
+               /* Files that are changing size are not hot file candidates. */
+               if (hfsmp->hfc_stage == HFC_RECORDING) {
+                       fp->ff_bytesread = 0;
+               }
+               while (retval == 0 && reqbytes > 0) {
+                       retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
+                                       (FCB*)fp, reqbytes, 0,
+                                       kEFAllMask | kEFNoClumpMask, &actbytes));
+                       if (retval == 0 && actbytes == 0)
+                               retval = ENOSPC;
+
+                       if (retval) {
+                               fp->ff_unallocblocks =
+                                       reqbytes / HFSTOVCB(hfsmp)->blockSize;
+                               HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
+                               FTOC(fp)->c_blocks            += fp->ff_unallocblocks;
+                               fp->ff_blocks                 += fp->ff_unallocblocks;
+                       }
+                       reqbytes -= actbytes;
+               }
+
+               if (retval) {
+                       (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
+                       VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
+                       if (started_tr) {
+                               tv = time;
+                               VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
+
+                               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       if (grabbed_lock) {
+                               hfs_global_shared_lock_release(hfsmp);
+                       }
+                       return (retval);
+               }
+       }
+
+       retval = MacToVFSError(
+                          MapFileBlockC (HFSTOVCB(hfsmp),
+                                         (FCB *)fp,
+                                         ap->a_size,
+                                         ap->a_foffset,
+                                         ap->a_bpn,
+                                         &bytesContAvail));
+
+       if (lockExtBtree)
+               (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
+
+       // XXXdbg
+       if (started_tr) {
+               tv = time;
+               retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
+
+               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+               journal_end_transaction(hfsmp->jnl);
+               started_tr = 0;
+       }
+       if (grabbed_lock) {
+               hfs_global_shared_lock_release(hfsmp);
+               grabbed_lock = 0;
+       }
+                       
+    if (retval == E_NONE) {
+        /* Adjust the mapping information for invalid file ranges: */
+        overlaptype = rl_scan(&fp->ff_invalidranges,
+                            ap->a_foffset,
+                            ap->a_foffset + (off_t)bytesContAvail - 1,
                             &invalid_range);
         if (overlaptype != RL_NOOVERLAP) {
             switch(overlaptype) {
@@ -1245,7 +1135,7 @@ struct vop_cmap_args /* {
                        (i.e. there's no valid bytes between the end of this range
                        and the file's EOF):
                      */
-                    if ((fcb->fcbEOF > (invalid_range->rl_end + 1)) &&
+                    if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
                                        (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
                        bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
                     };
@@ -1257,7 +1147,7 @@ struct vop_cmap_args /* {
                     if (invalid_range->rl_start == ap->a_foffset) {
                        /* There's actually no valid information to be had starting here: */
                        *ap->a_bpn = (daddr_t)-1;
-                                               if ((fcb->fcbEOF > (invalid_range->rl_end + 1)) &&
+                                               if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
                                                        (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
                                bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
                        };
@@ -1272,19 +1162,105 @@ struct vop_cmap_args /* {
         if (ap->a_run) *ap->a_run = bytesContAvail;
     };
 
-    if (ap->a_poff) *(int *)ap->a_poff = 0;
+       if (ap->a_poff)
+               *(int *)ap->a_poff = 0;
 
-    DBG_IO(("%d:%d.\n", *ap->a_bpn, bytesContAvail));
-
-#if DEBUG_BMAP
+       return (retval);
+}
 
-    DBG_VOP_LOCKS_TEST(retval);
-#endif
 
-    return (retval);
+/*
+ * Read or write a buffer that is not contiguous on disk.  We loop over
+ * each device block, copying to or from caller's buffer.
+ *
+ * We could be a bit more efficient by transferring as much data as is
+ * contiguous.  But since this routine should rarely be called, and that
+ * would be more complicated; best to keep it simple.
+ */
+static int
+hfs_strategy_fragmented(struct buf *bp)
+{
+       register struct vnode *vp = bp->b_vp;
+       register struct cnode *cp = VTOC(vp);
+       register struct vnode *devvp = cp->c_devvp;
+       caddr_t ioaddr;         /* Address of fragment within bp  */
+       struct buf *frag = NULL; /* For reading or writing a single block */
+       int retval = 0;
+       long remaining;         /* Bytes (in bp) left to transfer */
+       off_t offset;           /* Logical offset of current fragment in vp */
+       u_long block_size;      /* Size of one device block (and one I/O) */
+       
+       /* Make sure we redo this mapping for the next I/O */
+       bp->b_blkno = bp->b_lblkno;
+       
+       /* Set up the logical position and number of bytes to read/write */
+       offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
+       block_size = VTOHFS(vp)->hfs_phys_block_size;
+       
+       /* Get an empty buffer to do the deblocking */
+       frag = geteblk(block_size);
+       if (ISSET(bp->b_flags, B_READ))
+               SET(frag->b_flags, B_READ);
+
+       for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
+           ioaddr += block_size, offset += block_size,
+           remaining -= block_size) {
+               frag->b_resid = frag->b_bcount;
+               CLR(frag->b_flags, B_DONE);
+
+               /* Map the current position to a physical block number */
+               retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
+                   NULL, NULL);
+               if (retval != 0)
+                       break;
 
+               /*
+                * Did we try to read a hole?
+                * (Should never happen for metadata!)
+                */
+               if ((long)frag->b_lblkno == -1) {
+                       bzero(ioaddr, block_size);
+                       continue;
+               }
+               
+               /* If writing, copy before I/O */
+               if (!ISSET(bp->b_flags, B_READ))
+                       bcopy(ioaddr, frag->b_data, block_size);
+
+               /* Call the device to do the I/O and wait for it */
+               frag->b_blkno = frag->b_lblkno;
+               frag->b_vp = devvp;  /* Used to dispatch via VOP_STRATEGY */
+               frag->b_dev = devvp->v_rdev;
+               retval = VOP_STRATEGY(frag);
+               frag->b_vp = NULL;
+               if (retval != 0)
+                       break;
+               retval = biowait(frag);
+               if (retval != 0)
+                       break;
+               
+               /* If reading, copy after the I/O */
+               if (ISSET(bp->b_flags, B_READ))
+                       bcopy(frag->b_data, ioaddr, block_size);
+       }
+       
+       frag->b_vp = NULL;
+       //
+       // XXXdbg - in the case that this is a meta-data block, it won't affect
+       //          the journal because this bp is for a physical disk block,
+       //          not a logical block that is part of the catalog or extents
+       //          files.
+       SET(frag->b_flags, B_INVAL);
+       brelse(frag);
+       
+       if ((bp->b_error = retval) != 0)
+               SET(bp->b_flags, B_ERROR);
+       
+       biodone(bp);    /* This I/O is now complete */
+       return retval;
 }
 
+
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
@@ -1294,386 +1270,485 @@ struct vop_cmap_args /* {
     */
 int
 hfs_strategy(ap)
-struct vop_strategy_args /* {
-    struct buf *a_bp;
-} */ *ap;
+       struct vop_strategy_args /* {
+               struct buf *a_bp;
+       } */ *ap;
 {
-    register struct buf *bp = ap->a_bp;
-    register struct vnode *vp = bp->b_vp;
-    register struct hfsnode *hp;
-    int retval = 0;
-
-       DBG_FUNC_NAME("hfs_strategy");
-
-//     DBG_VOP_PRINT_FUNCNAME();DBG_VOP_CONT(("\n"));
-
-    hp = VTOH(vp);
-
-    if ( !(bp->b_flags & B_VECTORLIST)) {
-
-        if (vp->v_type == VBLK || vp->v_type == VCHR)
-           panic("hfs_strategy: device vnode passed!");
+       register struct buf *bp = ap->a_bp;
+       register struct vnode *vp = bp->b_vp;
+       register struct cnode *cp = VTOC(vp);
+       int retval = 0;
+       off_t offset;
+       size_t bytes_contig;
+       
+       if ( !(bp->b_flags & B_VECTORLIST)) {
+               if (vp->v_type == VBLK || vp->v_type == VCHR)
+                       panic("hfs_strategy: device vnode passed!");
 
-       if (bp->b_flags & B_PAGELIST) {
-           /*
-            * if we have a page list associated with this bp,
-            * then go through cluster_bp since it knows how to 
-            * deal with a page request that might span non-contiguous
-            * physical blocks on the disk...
-            */
-           retval = cluster_bp(bp);
-           vp = hp->h_meta->h_devvp;
-           bp->b_dev = vp->v_rdev;
+               if (bp->b_flags & B_PAGELIST) {
+                       /*
+                        * If we have a page list associated with this bp,
+                        * then go through cluster_bp since it knows how to 
+                        * deal with a page request that might span non-
+                        * contiguous physical blocks on the disk...
+                        */
+                       retval = cluster_bp(bp);
+                       vp = cp->c_devvp;
+                       bp->b_dev = vp->v_rdev;
 
-           return (retval);
-       }
-       /*
-        * If we don't already know the filesystem relative block number
-        * then get it using VOP_BMAP().  If VOP_BMAP() returns the block
-        * number as -1 then we've got a hole in the file.  Although HFS
-         * filesystems don't create files with holes, invalidating of
-         * subranges of the file (lazy zero filling) may create such a
-         * situation.
-        */
-       if (bp->b_blkno == bp->b_lblkno) {
-           if ((retval = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL))) {
-               bp->b_error = retval;
-               bp->b_flags |= B_ERROR;
-               biodone(bp);
-               return (retval);
-           }
-           if ((long)bp->b_blkno == -1)
-               clrbuf(bp);
-       }
-       if ((long)bp->b_blkno == -1) {
-           biodone(bp);
-           return (0);
+                       return (retval);
+               }
+               
+               /*
+                * If we don't already know the filesystem relative block
+                * number then get it using VOP_BMAP().  If VOP_BMAP()
+                * returns the block number as -1 then we've got a hole in
+                * the file.  Although HFS filesystems don't create files with
+                * holes, invalidating of subranges of the file (lazy zero
+                * filling) may create such a situation.
+                */
+               if (bp->b_blkno == bp->b_lblkno) {
+                       offset = (off_t) bp->b_lblkno *
+                           (off_t) GetLogicalBlockSize(vp);
+
+                       if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
+                           &bp->b_blkno, &bytes_contig, NULL))) {
+                               bp->b_error = retval;
+                               bp->b_flags |= B_ERROR;
+                               biodone(bp);
+                               return (retval);
+                       }
+                       if (bytes_contig < bp->b_bcount)
+                       {
+                               /*
+                                * We were asked to read a block that wasn't
+                                * contiguous, so we have to read each of the
+                                * pieces and copy them into the buffer.
+                                * Since ordinary file I/O goes through
+                                * cluster_io (which won't ask us for
+                                * discontiguous data), this is probably an
+                                * attempt to read or write metadata.
+                                */
+                               return hfs_strategy_fragmented(bp);
+                       }
+                       if ((long)bp->b_blkno == -1)
+                               clrbuf(bp);
+               }
+               if ((long)bp->b_blkno == -1) {
+                       biodone(bp);
+                       return (0);
+               }
+               if (bp->b_validend == 0) {
+                       /*
+                        * Record the exact size of the I/O transfer about to
+                        * be made:
+                        */
+                       bp->b_validend = bp->b_bcount;
+               }
        }
-       if (bp->b_validend == 0) {
-           /* Record the exact size of the I/O transfer about to be made: */
-           DBG_ASSERT(bp->b_validoff == 0);
-           bp->b_validend = bp->b_bcount;
-           DBG_ASSERT(bp->b_dirtyoff == 0);
-       };
-    }
-    vp = hp->h_meta->h_devvp;
-    bp->b_dev = vp->v_rdev;
-    DBG_IO(("\t\t>>>%s: continuing w/ vp: 0x%x with logBlk Ox%X and phyBlk Ox%X\n", funcname, (u_int)vp, bp->b_lblkno, bp->b_blkno));
+       vp = cp->c_devvp;
+       bp->b_dev = vp->v_rdev;
 
-    return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+       return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
 }
 
 
-/*
-#% reallocblks vp      L L L
-#
- vop_reallocblks {
-     IN struct vnode *vp;
-     IN struct cluster_save *buflist;
+static int do_hfs_truncate(ap)
+       struct vop_truncate_args /* {
+               struct vnode *a_vp;
+               off_t a_length;
+               int a_flags;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       off_t length;
+       long vflags;
+       struct timeval tv;
+       int retval;
+       off_t bytesToAdd;
+       off_t actualBytesAdded;
+       off_t filebytes;
+       u_long fileblocks;
+       int blksize;
+       struct hfsmount *hfsmp;
+
+       if (vp->v_type != VREG && vp->v_type != VLNK)
+               return (EISDIR);        /* cannot truncate an HFS directory! */
+
+       length = ap->a_length;
+       blksize = VTOVCB(vp)->blockSize;
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)blksize;
+
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+                (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+
+       if (length < 0)
+               return (EINVAL);
 
-     */
+       if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
+               return (EFBIG);
 
-int
-hfs_reallocblks(ap)
-struct vop_reallocblks_args /* {
-    struct vnode *a_vp;
-    struct cluster_save *a_buflist;
-} */ *ap;
-{
-    DBG_FUNC_NAME("hfs_reallocblks");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
+       hfsmp = VTOHFS(vp);
 
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
+       tv = time;
+       retval = E_NONE;
 
-    /* Currently no support for clustering */          /* XXX */
-    DBG_VOP_LOCKS_TEST(ENOSPC);
-    return (ENOSPC);
-}
+       /* Files that are changing size are not hot file candidates. */
+       if (hfsmp->hfc_stage == HFC_RECORDING) {
+               fp->ff_bytesread = 0;
+       }
 
+       /* 
+        * We cannot just check if fp->ff_size == length (as an optimization)
+        * since there may be extra physical blocks that also need truncation.
+        */
+#if QUOTA
+       if (retval = hfs_getinoquota(cp))
+               return(retval);
+#endif /* QUOTA */
 
+       /*
+        * Lengthen the size of the file. We must ensure that the
+        * last byte of the file is allocated. Since the smallest
+        * value of ff_size is 0, length will be at least 1.
+        */
+       if (length > fp->ff_size) {
+#if QUOTA
+               retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
+                               ap->a_cred, 0);
+               if (retval)
+                       goto Err_Exit;
+#endif /* QUOTA */
+               /*
+                * If we don't have enough physical space then
+                * we need to extend the physical size.
+                */
+               if (length > filebytes) {
+                       int eflags;
+                       u_long blockHint = 0;
 
-/*
-#
-#% truncate    vp      L L L
-#
-vop_truncate {
-    IN struct vnode *vp;
-    IN off_t length;
-    IN int flags;      (IO_SYNC)
-    IN struct ucred *cred;
-    IN struct proc *p;
-};
- * Truncate the hfsnode hp to at most length size, freeing (or adding) the
- * disk blocks.
- */
-int hfs_truncate(ap)
-    struct vop_truncate_args /* {
-        struct vnode *a_vp;
-        off_t a_length;
-        int a_flags;
-        struct ucred *a_cred;
-        struct proc *a_p;
-    } */ *ap;
-{
-    register struct vnode *vp = ap->a_vp;
-    register struct hfsnode *hp = VTOH(vp);
-    off_t length = ap->a_length;
-    long vflags;
-    struct timeval tv;
-    int retval;
-    FCB *fcb;
-    off_t bytesToAdd;
-    off_t actualBytesAdded;
-    DBG_FUNC_NAME("hfs_truncate");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(ap->a_vp);
-#endif
+                       /* All or nothing and don't round up to clumpsize. */
+                       eflags = kEFAllMask | kEFNoClumpMask;
 
-    fcb = HTOFCB(hp);
+                       if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
+                               eflags |= kEFReserveMask;  /* keep a reserve */
 
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
-                (int)length, fcb->fcbEOF, fcb->fcbPLen, 0, 0);
+                       /*
+                        * Allocate Journal and Quota files in metadata zone.
+                        */
+                       if (filebytes == 0 &&
+                           hfsmp->hfs_flags & HFS_METADATA_ZONE &&
+                           hfs_virtualmetafile(cp)) {
+                               eflags |= kEFMetadataMask;
+                               blockHint = hfsmp->hfs_metazone_start;
+                       }
+                       // XXXdbg
+                       hfs_global_shared_lock_acquire(hfsmp);
+                       if (hfsmp->jnl) {
+                               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                                       retval = EINVAL;
+                                       goto Err_Exit;
+                               }
+                       }
 
-    if (length < 0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
+                       /* lock extents b-tree (also protects volume bitmap) */
+                       retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+                       if (retval) {
+                               if (hfsmp->jnl) {
+                                       journal_end_transaction(hfsmp->jnl);
+                               } 
+                               hfs_global_shared_lock_release(hfsmp);
 
-    if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE)) {        
-        DBG_VOP_LOCKS_TEST(EFBIG);
-       return (EFBIG);
-    }
+                               goto Err_Exit;
+                       }
 
-    if (vp->v_type != VREG && vp->v_type != VLNK) {            
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);               /* hfs doesn't support truncating of directories */
-    }
+                       while ((length > filebytes) && (retval == E_NONE)) {
+                               bytesToAdd = length - filebytes;
+                               retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
+                                                    (FCB*)fp,
+                                                    bytesToAdd,
+                                                    blockHint,
+                                                    eflags,
+                                                    &actualBytesAdded));
 
-    tv = time;
-    retval = E_NONE;
-       
-    DBG_RW(("%s: truncate from Ox%lX to Ox%X bytes\n", funcname, fcb->fcbPLen, length));
+                               filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+                               if (actualBytesAdded == 0 && retval == E_NONE) {
+                                       if (length > filebytes)
+                                               length = filebytes;
+                                       break;
+                               }
+                       } /* endwhile */
 
-    /* 
-     * we cannot just check if fcb->fcbEOF == length (as an optimization)
-     * since there may be extra physical blocks that also need truncation
-     */
+                       (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
 
-    /*
-     * Lengthen the size of the file. We must ensure that the
-     * last byte of the file is allocated. Since the smallest
-     * value of fcbEOF is 0, length will be at least 1.
-     */
-    if (length > fcb->fcbEOF) {
-               off_t filePosition;
-               daddr_t logBlockNo;
-               long logBlockSize;
-               long blkOffset;
-               off_t bytestoclear;
-               int blockZeroCount;
-               struct buf *bp=NULL;
+                       // XXXdbg
+                       if (hfsmp->jnl) {
+                               tv = time;
+                               VOP_UPDATE(vp, &tv, &tv, 1);
 
-       /*
-        * If we don't have enough physical space then
-        * we need to extend the physical size.
-        */
-       if (length > fcb->fcbPLen) {
-           /* lock extents b-tree (also protects volume bitmap) */
-           retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-
-           while ((length > fcb->fcbPLen) && (retval == E_NONE)) {
-               bytesToAdd = length - fcb->fcbPLen;
-               retval = MacToVFSError(
-                                       ExtendFileC (HTOVCB(hp),
-                                                    fcb,
-                                                    bytesToAdd,
-                                                    0,
-                                                    kEFAllMask,        /* allocate all requested bytes or none */
-                                                    &actualBytesAdded));
+                               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                               journal_end_transaction(hfsmp->jnl);
+                       } 
+                       hfs_global_shared_lock_release(hfsmp);
 
-               if (actualBytesAdded == 0 && retval == E_NONE) {
-                   if (length > fcb->fcbPLen)
-                       length = fcb->fcbPLen;
-                   break;
+                       if (retval)
+                               goto Err_Exit;
+
+                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+                               (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
                }
-           } 
-           (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-
-           DBG_ASSERT(length <= fcb->fcbPLen);
-           KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
-                        (int)length, fcb->fcbEOF, fcb->fcbPLen, 0, 0);
-       }
  
-       if (! (ap->a_flags & IO_NOZEROFILL)) {
-
-           if (UBCISVALID(vp) && retval == E_NONE) {
-                       struct rl_entry *invalid_range;
-               int devBlockSize;
-                       off_t zero_limit;
+               if (!(ap->a_flags & IO_NOZEROFILL)) {
+                       if (UBCINFOEXISTS(vp) && retval == E_NONE) {
+                               struct rl_entry *invalid_range;
+                               int devBlockSize;
+                               off_t zero_limit;
                        
-                       zero_limit = (fcb->fcbEOF + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
-                       if (length < zero_limit) zero_limit = length;
-
-                       if (length > fcb->fcbEOF) {
-                               /* Extending the file: time to fill out the current last page w. zeroes? */
-                               if ((fcb->fcbEOF & PAGE_MASK_64) &&
-                                       (rl_scan(&hp->h_invalidranges,
-                                                        fcb->fcbEOF & ~PAGE_MASK_64,
-                                                        fcb->fcbEOF - 1,
-                                                        &invalid_range) == RL_NOOVERLAP)) {
+                               zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+                               if (length < zero_limit) zero_limit = length;
+
+                               if (length > fp->ff_size) {
+                                       /* Extending the file: time to fill out the current last page w. zeroes? */
+                                       if ((fp->ff_size & PAGE_MASK_64) &&
+                                           (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
+                                           fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
                                                
                                                /* There's some valid data at the start of the (current) last page
                                                   of the file, so zero out the remainder of that page to ensure the
                                                   entire page contains valid data.  Since there is no invalid range
                                                   possible past the (current) eof, there's no need to remove anything
                                                   from the invalid range list before calling cluster_write():                                           */
-                                               VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-                                               retval = cluster_write(vp, (struct uio *) 0, fcb->fcbEOF, zero_limit,
-                                                                                               fcb->fcbEOF, (off_t)0, devBlockSize, (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL);
+                                               VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
+                                               retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
+                                                               fp->ff_size, (off_t)0, devBlockSize,
+                                                               (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
                                                if (retval) goto Err_Exit;
                                                
                                                /* Merely invalidate the remaining area, if necessary: */
-                                               if (length > zero_limit) rl_add(zero_limit, length - 1, &hp->h_invalidranges);
-                               } else {
+                                               if (length > zero_limit) {
+                                                       rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
+                                                       cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
+                                               }
+                                       } else {
                                        /* The page containing the (current) eof is invalid: just add the
                                           remainder of the page to the invalid list, along with the area
                                           being newly allocated:
                                         */
-                                       rl_add(fcb->fcbEOF, length - 1, &hp->h_invalidranges);
-                               };
-                       }
-           } else {
-
-#if 0
-                   /*
-                    * zero out any new logical space...
-                    */
-                   bytestoclear = length - fcb->fcbEOF;
-                   filePosition = fcb->fcbEOF;
-
-                   while (bytestoclear > 0) {
-                       logBlockNo   = (daddr_t)(filePosition / PAGE_SIZE_64);
-                       blkOffset    = (long)(filePosition & PAGE_MASK_64);  
-
-                       if (((off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * (off_t)PAGE_SIZE)) < PAGE_SIZE_64)
-                           logBlockSize = (off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * PAGE_SIZE_64);
-                       else
-                           logBlockSize = PAGE_SIZE;
-                       
-                       if (logBlockSize < blkOffset)
-                           panic("hfs_truncate: bad logBlockSize computed\n");
-                               
-                       blockZeroCount = MIN(bytestoclear, logBlockSize - blkOffset);
+                                       rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
+                                       cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
+                                       };
+                               }
+                       } else {
+                                       panic("hfs_truncate: invoked on non-UBC object?!");
+                       };
+               }
+               cp->c_flag |= C_UPDATE;
+               fp->ff_size = length;
 
-                       if (blkOffset == 0 && ((bytestoclear >= logBlockSize) || filePosition >= fcb->fcbEOF)) {
-                           bp = getblk(vp, logBlockNo, logBlockSize, 0, 0, BLK_WRITE);
-                           retval = 0;
+               if (UBCISVALID(vp))
+                       ubc_setsize(vp, fp->ff_size);   /* XXX check errors */
 
-                       } else {
-                           retval = bread(vp, logBlockNo, logBlockSize, ap->a_cred, &bp);
-                           if (retval) {
-                               brelse(bp);
+       } else { /* Shorten the size of the file */
+
+               if (fp->ff_size > length) {
+                       /*
+                        * Any buffers that are past the truncation point need to be
+                        * invalidated (to maintain buffer cache consistency).  For
+                        * simplicity, we invalidate all the buffers by calling vinvalbuf.
+                        */
+                       if (UBCISVALID(vp))
+                               ubc_setsize(vp, length); /* XXX check errors */
+
+                       vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA;     
+                       retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
+           
+                       /* Any space previously marked as invalid is now irrelevant: */
+                       rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
+               }
+
+               /* 
+                * Account for any unmapped blocks. Note that the new
+                * file length can still end up with unmapped blocks.
+                */
+               if (fp->ff_unallocblocks > 0) {
+                       u_int32_t finalblks;
+
+                       /* lock extents b-tree */
+                       retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
+                                       LK_EXCLUSIVE, ap->a_p);
+                       if (retval)
                                goto Err_Exit;
-                           }
+
+                       VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
+                       cp->c_blocks             -= fp->ff_unallocblocks;
+                       fp->ff_blocks            -= fp->ff_unallocblocks;
+                       fp->ff_unallocblocks      = 0;
+
+                       finalblks = (length + blksize - 1) / blksize;
+                       if (finalblks > fp->ff_blocks) {
+                               /* calculate required unmapped blocks */
+                               fp->ff_unallocblocks      = finalblks - fp->ff_blocks;
+                               VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
+                               cp->c_blocks             += fp->ff_unallocblocks;
+                               fp->ff_blocks            += fp->ff_unallocblocks;
                        }
-                       bzero((char *)bp->b_data + blkOffset, blockZeroCount);
-                                       
-                       bp->b_flags |= B_DIRTY | B_AGE;
-
-                       if (ap->a_flags & IO_SYNC)
-                           VOP_BWRITE(bp);
-                       else if (logBlockNo % 32)
-                           bawrite(bp);
-                       else
-                           VOP_BWRITE(bp);     /* wait after we issue 32 requests */
-
-                       bytestoclear -= blockZeroCount;
-                       filePosition += blockZeroCount;
-                   }
-#else
-                       panic("hfs_truncate: invoked on non-UBC object?!");
-#endif
-           };
-       }
-       fcb->fcbEOF = length;
+                       (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
+                                       LK_RELEASE, ap->a_p);
+               }
 
-       if (UBCISVALID(vp))
-               ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
+               /*
+                * For a TBE process the deallocation of the file blocks is
+                * delayed until the file is closed.  And hfs_close calls
+                * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
+                * isn't set, we make sure this isn't a TBE process.
+                */
+               if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
+#if QUOTA
+                 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+#endif /* QUOTA */
+                 // XXXdbg
+                 hfs_global_shared_lock_acquire(hfsmp);
+                       if (hfsmp->jnl) {
+                               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                                       retval = EINVAL;
+                                       goto Err_Exit;
+                               }
+                       }
 
-    } else { /* Shorten the size of the file */
+                       /* lock extents b-tree (also protects volume bitmap) */
+                       retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+                       if (retval) {
+                               if (hfsmp->jnl) {
+                                       journal_end_transaction(hfsmp->jnl);
+                               }
+                               hfs_global_shared_lock_release(hfsmp);
+                               goto Err_Exit;
+                       }
+                       
+                       if (fp->ff_unallocblocks == 0)
+                               retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
+                                               (FCB*)fp, length, false));
 
-        if (fcb->fcbEOF > length) {
-           /*
-            * Any buffers that are past the truncation point need to be
-            * invalidated (to maintain buffer cache consistency).  For
-            * simplicity, we invalidate all the buffers by calling vinvalbuf.
-            */
-           if (UBCISVALID(vp))
-               ubc_setsize(vp, (off_t)length); /* XXX check errors */
+                       (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
 
-           vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA; 
-           retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
-           
-           /* Any space previously marked as invalid is now irrelevant: */
-           rl_remove(length, fcb->fcbEOF - 1, &hp->h_invalidranges);
-       }
+                       // XXXdbg
+                       if (hfsmp->jnl) {
+                               tv = time;
+                               VOP_UPDATE(vp, &tv, &tv, 1);
 
-       /*
-        * For a TBE process the deallocation of the file blocks is
-        * delayed until the file is closed.  And hfs_close calls
-        * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
-        * isn't set, we make sure this isn't a TBE process.
-        */
-       if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
-
-           /* lock extents b-tree (also protects volume bitmap) */
-           retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-           retval = MacToVFSError(
-                               TruncateFileC(  
-                                             HTOVCB(hp),
-                                             fcb,
-                                             length,
-                                             false));
-           (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
-           if (retval)
-               goto Err_Exit;
-       }
-       fcb->fcbEOF = length;
+                               hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       hfs_global_shared_lock_release(hfsmp);
 
-       if (fcb->fcbFlags & fcbModifiedMask)
-           hp->h_nodeflags |= IN_MODIFIED;
-    }
-    hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
-    retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
-    if (retval) {
-        DBG_ERR(("Could not update truncate"));
+                       filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+                       if (retval)
+                               goto Err_Exit;
+#if QUOTA
+                       /* These are bytesreleased */
+                       (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+#endif /* QUOTA */
+               }
+               /* Only set update flag if the logical length changes */
+               if (fp->ff_size != length)
+                       cp->c_flag |= C_UPDATE;
+               fp->ff_size = length;
+       }
+       cp->c_flag |= C_CHANGE;
+       retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
+       if (retval) {
                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
                     -1, -1, -1, retval, 0);
-    }
-Err_Exit:;
+       }
 
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(ap->a_vp);
-#endif
+Err_Exit:
 
-    KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
-                (int)length, fcb->fcbEOF, fcb->fcbPLen, retval, 0);
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+                (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
 
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
+       return (retval);
+}
+
+
+/*
+#
+#% truncate    vp      L L L
+#
+vop_truncate {
+    IN struct vnode *vp;
+    IN off_t length;
+    IN int flags;      (IO_SYNC)
+    IN struct ucred *cred;
+    IN struct proc *p;
+};
+ * Truncate a cnode to at most length size, freeing (or adding) the
+ * disk blocks.
+ */
+int hfs_truncate(ap)
+       struct vop_truncate_args /* {
+               struct vnode *a_vp;
+               off_t a_length;
+               int a_flags;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
+{
+       register struct vnode *vp = ap->a_vp;
+       register struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       off_t length;
+       off_t filebytes;
+       u_long fileblocks;
+       int blksize, error;
+       u_int64_t nsize;
+
+       if (vp->v_type != VREG && vp->v_type != VLNK)
+               return (EISDIR);        /* cannot truncate an HFS directory! */
+
+       length = ap->a_length;
+       blksize = VTOVCB(vp)->blockSize;
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)blksize;
+
+       // have to loop truncating or growing files that are
+       // really big because otherwise transactions can get
+       // enormous and consume too many kernel resources.
+       if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) {
+           while (filebytes > length) {
+               if ((filebytes - length) > HFS_BIGFILE_SIZE) {
+                   filebytes -= HFS_BIGFILE_SIZE;
+               } else {
+                   filebytes = length;
+               }
+
+               ap->a_length = filebytes;
+               error = do_hfs_truncate(ap);
+               if (error)
+                   break;
+           }
+       } else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) {
+           while (filebytes < length) {
+               if ((length - filebytes) > HFS_BIGFILE_SIZE) {
+                   filebytes += HFS_BIGFILE_SIZE;
+               } else {
+                   filebytes = (length - filebytes);
+               }
+
+               ap->a_length = filebytes;
+               error = do_hfs_truncate(ap);
+               if (error)
+                   break;
+           }
+       } else {
+           error = do_hfs_truncate(ap);
+       }
+
+       return error;
 }
 
 
@@ -1691,131 +1766,170 @@ vop_allocate {
        IN struct ucred *cred;
        IN struct proc *p;
 };
- * allocate the hfsnode hp to at most length size
+ * allocate a cnode to at most length size
  */
 int hfs_allocate(ap)
-    struct vop_allocate_args /* {
-        struct vnode *a_vp;
-        off_t a_length;
-        u_int32_t  a_flags;
-        off_t *a_bytesallocated;
-        off_t a_offset;
-        struct ucred *a_cred;
-        struct proc *a_p;
-    } */ *ap;
+       struct vop_allocate_args /* {
+               struct vnode *a_vp;
+               off_t a_length;
+               u_int32_t  a_flags;
+               off_t *a_bytesallocated;
+               off_t a_offset;
+               struct ucred *a_cred;
+               struct proc *a_p;
+       } */ *ap;
 {
-    register struct vnode *vp = ap->a_vp;
-    register struct hfsnode *hp = VTOH(vp);
-    off_t      length = ap->a_length;
-    off_t      startingPEOF;
-    off_t      moreBytesRequested;
-    off_t      actualBytesAdded;
-    long vflags;
-    struct timeval tv;
-    int retval, retval2;
-    FCB *fcb;
-    UInt32 blockHint;
-    UInt32 extendFlags =0;   /* For call to ExtendFileC */
-    DBG_FUNC_NAME("hfs_allocate");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    /* Set the number of bytes allocated to 0 so that the caller will know that we
-       did nothing.  ExtendFileC will fill this in for us if we actually allocate space */
-
-    *(ap->a_bytesallocated) = 0; 
-    fcb = HTOFCB(hp);
-
-    /* Now for some error checking */
-
-    if (length < (off_t)0) {
-        DBG_VOP_LOCKS_TEST(EINVAL);
-        return (EINVAL);
-    }
-
-    if (vp->v_type != VREG && vp->v_type != VLNK) {
-        DBG_VOP_LOCKS_TEST(EISDIR);
-        return (EISDIR);        /* hfs doesn't support truncating of directories */
-    }
-
-    if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= fcb->fcbPLen))
-        return (EINVAL);
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       ExtendedVCB *vcb = VTOVCB(vp);
+       off_t length = ap->a_length;
+       off_t startingPEOF;
+       off_t moreBytesRequested;
+       off_t actualBytesAdded;
+       off_t filebytes;
+       u_long fileblocks;
+       long vflags;
+       struct timeval tv;
+       int retval, retval2;
+       UInt32 blockHint;
+       UInt32 extendFlags;   /* For call to ExtendFileC */
+       struct hfsmount *hfsmp;
+
+       hfsmp = VTOHFS(vp);
+
+       *(ap->a_bytesallocated) = 0;
+       fileblocks = fp->ff_blocks;
+       filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
+
+       if (length < (off_t)0)
+               return (EINVAL);
+       if (vp->v_type != VREG)
+               return (EISDIR);
+       if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes))
+               return (EINVAL);
 
-    /* Fill in the flags word for the call to Extend the file */
+       /* Fill in the flags word for the call to Extend the file */
 
-       if (ap->a_flags & ALLOCATECONTIG) {
+       extendFlags = kEFNoClumpMask;
+       if (ap->a_flags & ALLOCATECONTIG) 
                extendFlags |= kEFContigMask;
-       }
-
-    if (ap->a_flags & ALLOCATEALL) {
+       if (ap->a_flags & ALLOCATEALL)
                extendFlags |= kEFAllMask;
-       }
+       if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
+               extendFlags |= kEFReserveMask;
 
-    tv = time;
-    retval = E_NONE;
-    blockHint = 0;
-    startingPEOF = fcb->fcbPLen;
+       tv = time;
+       retval = E_NONE;
+       blockHint = 0;
+       startingPEOF = filebytes;
 
-    if (ap->a_flags & ALLOCATEFROMPEOF) {
-               length += fcb->fcbPLen;
-       }
+       if (ap->a_flags & ALLOCATEFROMPEOF)
+               length += filebytes;
+       else if (ap->a_flags & ALLOCATEFROMVOL)
+               blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
 
-       if (ap->a_flags & ALLOCATEFROMVOL)
-               blockHint = ap->a_offset / HTOVCB(hp)->blockSize;
+       /* If no changes are necesary, then we're done */
+       if (filebytes == length)
+               goto Std_Exit;
 
-    /* If no changes are necesary, then we're done */
-    if (fcb->fcbPLen == length)
-       goto Std_Exit;
-
-    /*
-    * Lengthen the size of the file. We must ensure that the
-    * last byte of the file is allocated. Since the smallest
-    * value of fcbPLen is 0, length will be at least 1.
-    */
-    if (length > fcb->fcbPLen) {
-               moreBytesRequested = length - fcb->fcbPLen;
+       /*
+        * Lengthen the size of the file. We must ensure that the
+        * last byte of the file is allocated. Since the smallest
+        * value of filebytes is 0, length will be at least 1.
+        */
+       if (length > filebytes) {
+               moreBytesRequested = length - filebytes;
                
+#if QUOTA
+               retval = hfs_chkdq(cp,
+                               (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), 
+                               ap->a_cred, 0);
+               if (retval)
+                       return (retval);
+
+#endif /* QUOTA */
+               /*
+                * Metadata zone checks.
+                */
+               if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+                       /*
+                        * Allocate Journal and Quota files in metadata zone.
+                        */
+                       if (hfs_virtualmetafile(cp)) {
+                               extendFlags |= kEFMetadataMask;
+                               blockHint = hfsmp->hfs_metazone_start;
+                       } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
+                                  (blockHint <= hfsmp->hfs_metazone_end)) {
+                               /*
+                                * Move blockHint outside metadata zone.
+                                */
+                               blockHint = hfsmp->hfs_metazone_end + 1;
+                       }
+               }
+
+               // XXXdbg
+               hfs_global_shared_lock_acquire(hfsmp);
+               if (hfsmp->jnl) {
+                       if (journal_start_transaction(hfsmp->jnl) != 0) {
+                               retval = EINVAL;
+                               goto Err_Exit;
+                       }
+               }
+
                /* lock extents b-tree (also protects volume bitmap) */
-               retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-               if (retval) goto Err_Exit;
+               retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+               if (retval) {
+                       if (hfsmp->jnl) {
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       hfs_global_shared_lock_release(hfsmp);
+                       goto Err_Exit;
+               }
 
-               retval = MacToVFSError(
-                                                               ExtendFileC(HTOVCB(hp),
-                                                                                       fcb,
-                                                                                       moreBytesRequested,
-                                                                                       blockHint,
-                                                                                       extendFlags,
-                                                                                       &actualBytesAdded));
+               retval = MacToVFSError(ExtendFileC(vcb,
+                                               (FCB*)fp,
+                                               moreBytesRequested,
+                                               blockHint,
+                                               extendFlags,
+                                               &actualBytesAdded));
 
                *(ap->a_bytesallocated) = actualBytesAdded;
+               filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 
-               (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+               (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
 
-               DBG_ASSERT(length <= fcb->fcbPLen);
+               // XXXdbg
+               if (hfsmp->jnl) {
+                       tv = time;
+                       VOP_UPDATE(vp, &tv, &tv, 1);
+
+                       hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                       journal_end_transaction(hfsmp->jnl);
+               }
+               hfs_global_shared_lock_release(hfsmp);
 
                /*
                 * if we get an error and no changes were made then exit
                 * otherwise we must do the VOP_UPDATE to reflect the changes
                 */
-        if (retval && (startingPEOF == fcb->fcbPLen)) goto Err_Exit;
+               if (retval && (startingPEOF == filebytes))
+                       goto Err_Exit;
         
-        /*
-         * Adjust actualBytesAdded to be allocation block aligned, not
-         * clump size aligned.
-         * NOTE: So what we are reporting does not affect reality
-         * until the file is closed, when we truncate the file to allocation
-         * block size.
-         */
-
+               /*
+                * Adjust actualBytesAdded to be allocation block aligned, not
+                * clump size aligned.
+                * NOTE: So what we are reporting does not affect reality
+                * until the file is closed, when we truncate the file to allocation
+                * block size.
+                */
                if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
                        *(ap->a_bytesallocated) =
-                               roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize);
+                               roundup(moreBytesRequested, (off_t)vcb->blockSize);
 
-    } else { /* Shorten the size of the file */
+       } else { /* Shorten the size of the file */
 
-       if (fcb->fcbEOF > length) {
+               if (fp->ff_size > length) {
                        /*
                         * Any buffers that are past the truncation point need to be
                         * invalidated (to maintain buffer cache consistency).  For
@@ -1825,51 +1939,77 @@ int hfs_allocate(ap)
                        (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
                }
 
-       /* lock extents b-tree (also protects volume bitmap) */
-        retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
-        if (retval) goto Err_Exit;
+               // XXXdbg
+               hfs_global_shared_lock_acquire(hfsmp);
+               if (hfsmp->jnl) {
+                       if (journal_start_transaction(hfsmp->jnl) != 0) {
+                               retval = EINVAL;
+                               goto Err_Exit;
+                       }
+               }
+
+               /* lock extents b-tree (also protects volume bitmap) */
+               retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
+               if (retval) {
+                       if (hfsmp->jnl) {
+                               journal_end_transaction(hfsmp->jnl);
+                       }
+                       hfs_global_shared_lock_release(hfsmp);
+
+                       goto Err_Exit;
+               }                       
 
-        retval = MacToVFSError(
+               retval = MacToVFSError(
                             TruncateFileC(
-                                            HTOVCB(hp),
-                                            fcb,
+                                            vcb,
+                                            (FCB*)fp,
                                             length,
                                             false));
-        (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+               (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
+               filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+               if (hfsmp->jnl) {
+                       tv = time;
+                       VOP_UPDATE(vp, &tv, &tv, 1);
+
+                       hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+                       journal_end_transaction(hfsmp->jnl);
+               }
+               hfs_global_shared_lock_release(hfsmp);
+               
 
                /*
                 * if we get an error and no changes were made then exit
                 * otherwise we must do the VOP_UPDATE to reflect the changes
                 */
-               if (retval && (startingPEOF == fcb->fcbPLen)) goto Err_Exit;
-        if (fcb->fcbFlags & fcbModifiedMask)
-           hp->h_nodeflags |= IN_MODIFIED;
+               if (retval && (startingPEOF == filebytes)) goto Err_Exit;
+#if QUOTA
+               /* These are  bytesreleased */
+               (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
+#endif /* QUOTA */
 
-        DBG_ASSERT(length <= fcb->fcbPLen)  // DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG
-
-        if (fcb->fcbEOF > fcb->fcbPLen) {
-                       fcb->fcbEOF = fcb->fcbPLen;
+               if (fp->ff_size > filebytes) {
+                       fp->ff_size = filebytes;
 
                        if (UBCISVALID(vp))
-                               ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
-        }
-    }
+                               ubc_setsize(vp, fp->ff_size); /* XXX check errors */
+               }
+       }
 
 Std_Exit:
-    hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
+       cp->c_flag |= C_CHANGE | C_UPDATE;
        retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
 
-    if (retval == 0) retval = retval2;
-
+       if (retval == 0)
+               retval = retval2;
 Err_Exit:
-    DBG_VOP_LOCKS_TEST(retval);
-    return (retval);
+       return (retval);
 }
 
 
-
-
-/* pagein for HFS filesystem, similar to hfs_read(), but without cluster_read() */
+/*
+ * pagein for HFS filesystem
+ */
 int
 hfs_pagein(ap)
        struct vop_pagein_args /* {
@@ -1882,44 +2022,38 @@ hfs_pagein(ap)
                int           a_flags
        } */ *ap;
 {
-    register struct vnode *vp;
-    struct hfsnode       *hp;
-    FCB                          *fcb;
-    int                                devBlockSize = 0;
-    int                   retval;
-
-    DBG_FUNC_NAME("hfs_pagein");
-    DBG_VOP_LOCKS_DECL(1);
-    DBG_VOP_PRINT_FUNCNAME();
-    DBG_VOP_PRINT_VNODE_INFO(vp);DBG_VOP_CONT(("\n"));
-    DBG_VOP_LOCKS_INIT(0,vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-    vp  = ap->a_vp;
-    hp  = VTOH(vp);
-    fcb = HTOFCB(hp);
-
-    if (vp->v_type != VREG && vp->v_type != VLNK)
-       panic("hfs_pagein: vp not UBC type\n");
-
-    DBG_VOP(("\tfile size Ox%X\n", (u_int)fcb->fcbEOF));
-    DBG_VOP(("\tstarting at offset Ox%X of file, length Ox%X\n", (u_int)ap->a_f_offset, (u_int)ap->a_size));
-
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
+       register struct vnode *vp = ap->a_vp;
+       int devBlockSize = 0;
+       int error;
 
-    VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
+       if (vp->v_type != VREG)
+               panic("hfs_pagein: vp not UBC type\n");
 
-    retval = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
-                        ap->a_size, (off_t)fcb->fcbEOF, devBlockSize,
-                        ap->a_flags);
+       VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
 
-#if HFS_DIAGNOSTIC
-    debug_check_blocksizes(vp);
-#endif
-    DBG_VOP_LOCKS_TEST(retval);
+       error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+                               ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
+                               ap->a_flags);
+       /*
+        * Keep track blocks read
+        */
+       if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+               struct cnode *cp;
+               
+               cp = VTOC(vp);          
+               /*
+                * If this file hasn't been seen since the start of
+                * the current sampling period then start over.
+                */
+               if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
+                       VTOF(vp)->ff_bytesread = ap->a_size;
+               else
+                       VTOF(vp)->ff_bytesread += ap->a_size;
 
-    return (retval);
+               cp->c_flag |= C_ACCESS;
+       }
+
+       return (error);
 }
 
 /* 
@@ -1937,43 +2071,36 @@ hfs_pageout(ap)
           int           a_flags
        } */ *ap;
 {
-       struct vnode    *vp = ap->a_vp;
-       struct hfsnode  *hp =  VTOH(vp);
-       FCB             *fcb = HTOFCB(hp);
-       int              retval;
-       int              devBlockSize = 0;
-       off_t            end_of_range;
-
-       DBG_FUNC_NAME("hfs_pageout");
-       DBG_VOP_LOCKS_DECL(1);
-       DBG_VOP_PRINT_FUNCNAME();
-       DBG_VOP_PRINT_VNODE_INFO(vp);DBG_VOP_CONT(("\n"));
-       DBG_VOP(("\thfsnode 0x%x (%s)\n", (u_int)hp, H_NAME(hp)));
-       DBG_VOP(("\tstarting at offset Ox%lX of file, length Ox%lX\n", 
-               (UInt32)ap->a_f_offset, (UInt32)ap->a_size));
-
-       DBG_VOP_LOCKS_INIT(0, vp, VOPDBG_LOCKED, 
-               VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
-
-#if HFS_DIAGNOSTIC
-       debug_check_blocksizes(vp);
-#endif
+       struct vnode *vp = ap->a_vp;
+       struct cnode *cp = VTOC(vp);
+       struct filefork *fp = VTOF(vp);
+       int retval;
+       int devBlockSize = 0;
+       off_t end_of_range;
+       off_t filesize;
 
        if (UBCINVALID(vp))
                panic("hfs_pageout: Not a  VREG: vp=%x", vp);
 
-       VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
-
+       VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
+       filesize = fp->ff_size;
        end_of_range = ap->a_f_offset + ap->a_size - 1;
 
-       if (end_of_range >= (off_t)fcb->fcbEOF)
-               end_of_range = (off_t)(fcb->fcbEOF - 1);
+       if (cp->c_flag & C_RELOCATING) {
+               if (end_of_range < (filesize / 2)) {
+                       return (EBUSY);
+               }
+       }
 
-       if (ap->a_f_offset < (off_t)fcb->fcbEOF)
-               rl_remove(ap->a_f_offset, end_of_range, &hp->h_invalidranges);
+       if (end_of_range >= filesize)
+               end_of_range = (off_t)(filesize - 1);
+       if (ap->a_f_offset < filesize) {
+               rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
+               cp->c_flag |= C_MODIFIED;  /* leof is dirty */
+       }
 
        retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
-                                (off_t)fcb->fcbEOF, devBlockSize, ap->a_flags);
+                                filesize, devBlockSize, ap->a_flags);
 
        /*
         * If we successfully wrote any data, and we are not the superuser
@@ -1981,13 +2108,8 @@ hfs_pageout(ap)
         * tampering.
         */
        if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
-               hp->h_meta->h_mode &= ~(ISUID | ISGID);
-
-#if HFS_DIAGNOSTIC
-       debug_check_blocksizes(vp);
-#endif
+               cp->c_mode &= ~(S_ISUID | S_ISGID);
 
-       DBG_VOP_LOCKS_TEST(retval);
        return (retval);
 }
 
@@ -1999,39 +2121,502 @@ hfs_pageout(ap)
  */
 int
 hfs_bwrite(ap)
-struct vop_bwrite_args /* {
-    struct buf *a_bp;
-} */ *ap;
+       struct vop_bwrite_args /* {
+               struct buf *a_bp;
+       } */ *ap;
 {
-    register struct buf *bp = ap->a_bp;
-    register struct vnode *vp = bp->b_vp;
-    BlockDescriptor block;
-    int retval = 0;
-
-       DBG_FUNC_NAME("hfs_bwrite");
-
+       int retval = 0;
+       register struct buf *bp = ap->a_bp;
+       register struct vnode *vp = bp->b_vp;
 #if BYTE_ORDER == LITTLE_ENDIAN
-    /* Trap B-Tree writes */
-    if ((H_FILEID(VTOH(vp)) == kHFSExtentsFileID) ||
-        (H_FILEID(VTOH(vp)) == kHFSCatalogFileID)) {
-
-        /* Swap if the B-Tree node is in native byte order */
-        if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
-            /* Prepare the block pointer */
-            block.blockHeader = bp;
-            block.buffer = bp->b_data;
-            block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;    /* not found in cache ==> came from disk */
-            block.blockSize = bp->b_bcount;
+       BlockDescriptor block;
+
+       /* Trap B-Tree writes */
+       if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
+           (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
+
+               /* Swap if the B-Tree node is in native byte order */
+               if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
+                       /* Prepare the block pointer */
+                       block.blockHeader = bp;
+                       block.buffer = bp->b_data;
+                       /* not found in cache ==> came from disk */
+                       block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
+                       block.blockSize = bp->b_bcount;
     
-            /* Endian un-swap B-Tree node */
-            SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), H_FILEID(VTOH(vp)), 1);
-        }
+                       /* Endian un-swap B-Tree node */
+                       SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
+               }
 
-        /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
-    }
+               /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
+       }
 #endif
+       /* This buffer shouldn't be locked anymore but if it is clear it */
+       if (ISSET(bp->b_flags, B_LOCKED)) {
+           // XXXdbg
+           if (VTOHFS(vp)->jnl) {
+                       panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
+           }
+               CLR(bp->b_flags, B_LOCKED);
+               printf("hfs_bwrite: called with lock bit set\n");
+       }
+       retval = vn_bwrite (ap);
 
-    retval = vn_bwrite (ap);
+       return (retval);
+}
 
-    return (retval);
+/*
+ * Relocate a file to a new location on disk
+ *  cnode must be locked on entry
+ *
+ * Relocation occurs by cloning the file's data from its
+ * current set of blocks to a new set of blocks. During
+ * the relocation all of the blocks (old and new) are
+ * owned by the file.
+ *
+ * -----------------
+ * |///////////////|
+ * -----------------
+ * 0               N (file offset)
+ *
+ * -----------------     -----------------
+ * |///////////////|     |               |     STEP 1 (aquire new blocks)
+ * -----------------     -----------------
+ * 0               N     N+1             2N
+ *
+ * -----------------     -----------------
+ * |///////////////|     |///////////////|     STEP 2 (clone data)
+ * -----------------     -----------------
+ * 0               N     N+1             2N
+ *
+ *                       -----------------
+ *                       |///////////////|     STEP 3 (head truncate blocks)
+ *                       -----------------
+ *                       0               N
+ *
+ * During steps 2 and 3 page-outs to file offsets less
+ * than or equal to N are suspended.
+ *
+ * During step 3 page-ins to the file get supended.
+ */
+__private_extern__
+int
+hfs_relocate(vp, blockHint, cred, p)
+       struct  vnode *vp;
+       u_int32_t  blockHint;
+       struct  ucred *cred;
+       struct  proc *p;
+{
+       struct  filefork *fp;
+       struct  hfsmount *hfsmp;
+       ExtendedVCB *vcb;
+
+       u_int32_t  headblks;
+       u_int32_t  datablks;
+       u_int32_t  blksize;
+       u_int32_t  realsize;
+       u_int32_t  growsize;
+       u_int32_t  nextallocsave;
+       u_int32_t  sector_a;
+       u_int32_t  sector_b;
+       int eflags;
+       u_int32_t  oldstart;  /* debug only */
+       off_t  newbytes;
+       int  retval;
+
+       if (vp->v_type != VREG && vp->v_type != VLNK) {
+               return (EPERM);
+       }
+       
+       hfsmp = VTOHFS(vp);
+       if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
+               return (ENOSPC);
+       }
+
+       fp = VTOF(vp);
+       if (fp->ff_unallocblocks)
+               return (EINVAL);
+       vcb = VTOVCB(vp);
+       blksize = vcb->blockSize;
+       if (blockHint == 0)
+               blockHint = vcb->nextAllocation;
+
+       if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
+           (vp->v_type == VLNK && fp->ff_size > blksize)) {
+               return (EFBIG);
+       }
+
+       headblks = fp->ff_blocks;
+       datablks = howmany(fp->ff_size, blksize);
+       growsize = datablks * blksize;
+       realsize = fp->ff_size;
+       eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
+       if (blockHint >= hfsmp->hfs_metazone_start &&
+           blockHint <= hfsmp->hfs_metazone_end)
+               eflags |= kEFMetadataMask;
+
+       hfs_global_shared_lock_acquire(hfsmp);
+       if (hfsmp->jnl) {
+               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                       return (EINVAL);
+               }
+       }
+
+       /* Lock extents b-tree (also protects volume bitmap) */
+       retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
+       if (retval)
+               goto out2;
+
+       retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
+       if (retval) {
+               retval = MacToVFSError(retval);
+               goto out;
+       }
+
+       /*
+        * STEP 1 - aquire new allocation blocks.
+        */
+       nextallocsave = vcb->nextAllocation;
+       retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
+       if (eflags & kEFMetadataMask)                   
+               vcb->nextAllocation = nextallocsave;
+
+       retval = MacToVFSError(retval);
+       if (retval == 0) {
+               VTOC(vp)->c_flag |= C_MODIFIED;
+               if (newbytes < growsize) {
+                       retval = ENOSPC;
+                       goto restore;
+               } else if (fp->ff_blocks < (headblks + datablks)) {
+                       printf("hfs_relocate: allocation failed");
+                       retval = ENOSPC;
+                       goto restore;
+               }
+
+               retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, &sector_b, NULL);
+               if (retval) {
+                       retval = MacToVFSError(retval);
+               } else if ((sector_a + 1) == sector_b) {
+                       retval = ENOSPC;
+                       goto restore;
+               } else if ((eflags & kEFMetadataMask) &&
+                          ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
+                             hfsmp->hfs_metazone_end)) {
+                       printf("hfs_relocate: didn't move into metadata zone\n");
+                       retval = ENOSPC;
+                       goto restore;
+               }
+       }
+       if (retval) {
+               /*
+                * Check to see if failure is due to excessive fragmentation.
+                */
+               if (retval == ENOSPC &&
+                   hfs_freeblks(hfsmp, 0) > (datablks * 2)) {
+                       hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
+               }
+               goto out;
+       }
+
+       fp->ff_size = fp->ff_blocks * blksize;
+       if (UBCISVALID(vp))
+               (void) ubc_setsize(vp, fp->ff_size);
+
+       /*
+        * STEP 2 - clone data into the new allocation blocks.
+        */
+
+       if (vp->v_type == VLNK)
+               retval = hfs_clonelink(vp, blksize, cred, p);
+       else if (vp->v_flag & VSYSTEM)
+               retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
+       else
+               retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p);
+
+       if (retval)
+               goto restore;
+       
+       oldstart = fp->ff_extents[0].startBlock;
+
+       /*
+        * STEP 3 - switch to clone and remove old blocks.
+        */
+       SET(VTOC(vp)->c_flag, C_NOBLKMAP);   /* suspend page-ins */
+
+       retval = HeadTruncateFile(vcb, (FCB*)fp, headblks);
+
+       CLR(VTOC(vp)->c_flag, C_NOBLKMAP);   /* resume page-ins */
+       if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP))
+               wakeup(VTOC(vp));
+       if (retval)
+               goto restore;
+
+       fp->ff_size = realsize;
+       if (UBCISVALID(vp)) {
+               (void) ubc_setsize(vp, realsize);
+               (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+       }
+
+       CLR(VTOC(vp)->c_flag, C_RELOCATING);  /* Resume page-outs for this file. */
+out:
+       (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
+
+       retval = VOP_FSYNC(vp, cred, MNT_WAIT, p);
+out2:
+       if (hfsmp->jnl) {
+               if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID)
+                       (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+               else
+                       (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+               journal_end_transaction(hfsmp->jnl);
+       }
+       hfs_global_shared_lock_release(hfsmp);
+
+       return (retval);
+
+restore:
+       /*
+        * Give back any newly allocated space.
+        */
+       if (fp->ff_size != realsize)
+               fp->ff_size = realsize;
+       (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false);
+       if (UBCISVALID(vp))
+               (void) ubc_setsize(vp, fp->ff_size);
+       CLR(VTOC(vp)->c_flag, C_RELOCATING);
+       goto out;
+}
+
+
+/*
+ * Clone a symlink.
+ *
+ */
+static int
+hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p)
+{
+       struct buf *head_bp = NULL;
+       struct buf *tail_bp = NULL;
+       int error;
+
+
+       error = meta_bread(vp, 0, blksize, cred, &head_bp);
+       if (error)
+               goto out;
+
+       tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META);
+       if (tail_bp == NULL) {
+               error = EIO;
+               goto out;
+       }
+       bcopy(head_bp->b_data, tail_bp->b_data, blksize);
+       error = bwrite(tail_bp);
+out:
+       if (head_bp) {
+               head_bp->b_flags |= B_INVAL;
+               brelse(head_bp);
+       }       
+       (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+
+       return (error);
 }
+
+/*
+ * Clone a file's data within the file.
+ *
+ */
+static int
+hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+              struct ucred *cred, struct proc *p)
+{
+       caddr_t  bufp;
+       size_t  writebase;
+       size_t  bufsize;
+       size_t  copysize;
+        size_t  iosize;
+       size_t  filesize;
+       size_t  offset;
+       struct uio auio;
+       struct iovec aiov;
+       int  devblocksize;
+       int  didhold;
+       int  error;
+
+
+       if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) {
+               printf("hfs_clonefile: vinvalbuf failed - %d\n", error);
+               return (error);
+       }
+
+       if (!ubc_clean(vp, 1)) {
+               printf("hfs_clonefile: not ubc_clean\n");
+               return (EIO);  /* XXX error code */
+       }
+
+       /*
+        * Suspend page-outs for this file.
+        */
+       SET(VTOC(vp)->c_flag, C_RELOCATING);
+
+       filesize = VTOF(vp)->ff_size;
+       writebase = blkstart * blksize;
+       copysize = blkcnt * blksize;
+       iosize = bufsize = MIN(copysize, 4096 * 16);
+       offset = 0;
+
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+               return (ENOMEM);
+       }       
+
+       VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize);
+
+       auio.uio_iov = &aiov;
+       auio.uio_iovcnt = 1;
+       auio.uio_segflg = UIO_SYSSPACE;
+       auio.uio_procp = p;
+
+       while (offset < copysize) {
+               iosize = MIN(copysize - offset, iosize);
+
+               aiov.iov_base = bufp;
+               aiov.iov_len = iosize;
+               auio.uio_resid = iosize;
+               auio.uio_offset = offset;
+               auio.uio_rw = UIO_READ;
+
+               error = cluster_read(vp, &auio, copysize, devblocksize, 0);
+               if (error) {
+                       printf("hfs_clonefile: cluster_read failed - %d\n", error);
+                       break;
+               }
+               if (auio.uio_resid != 0) {
+                       printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid);
+                       error = EIO;            
+                       break;
+               }
+
+
+               aiov.iov_base = bufp;
+               aiov.iov_len = iosize;
+               auio.uio_resid = iosize;
+               auio.uio_offset = writebase + offset;
+               auio.uio_rw = UIO_WRITE;
+
+               error = cluster_write(vp, &auio, filesize + offset,
+                                     filesize + offset + iosize,
+                                     auio.uio_offset, 0, devblocksize, 0);
+               if (error) {
+                       printf("hfs_clonefile: cluster_write failed - %d\n", error);
+                       break;
+               }
+               if (auio.uio_resid != 0) {
+                       printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
+                       error = EIO;            
+                       break;
+               }       
+               offset += iosize;
+       }
+       if (error == 0) {
+               /* Clean the pages in VM. */
+               didhold = ubc_hold(vp);
+               if (didhold)
+                       (void) ubc_clean(vp, 1);
+       
+               /*
+                * Clean out all associated buffers.
+                */
+               (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+       
+               if (didhold)
+                       ubc_rele(vp);
+       }
+       kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
+       
+       return (error);
+}
+
+/*
+ * Clone a system (metadata) file.
+ *
+ */
+static int
+hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+                 struct ucred *cred, struct proc *p)
+{
+       caddr_t  bufp;
+       char * offset;
+       size_t  bufsize;
+       size_t  iosize;
+       struct buf *bp = NULL;
+       daddr_t  blkno;
+       daddr_t  blk;
+       int  breadcnt;
+        int  i;
+       int  error = 0;
+
+
+       iosize = GetLogicalBlockSize(vp);
+       bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
+       breadcnt = bufsize / iosize;
+
+       if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+               return (ENOMEM);
+       }       
+       blkstart = (blkstart * blksize) / iosize;
+       blkcnt = (blkcnt * blksize) / iosize;
+       blkno = 0;
+
+       while (blkno < blkcnt) {
+               /*
+                * Read up to a megabyte
+                */
+               offset = bufp;
+               for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) {
+                       error = meta_bread(vp, blk, iosize, cred, &bp);
+                       if (error) {
+                               printf("hfs_clonesysfile: meta_bread error %d\n", error);
+                               goto out;
+                       }
+                       if (bp->b_bcount != iosize) {
+                               printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount);
+                               goto out;
+                       }
+       
+                       bcopy(bp->b_data, offset, iosize);
+                       bp->b_flags |= B_INVAL;
+                       brelse(bp);
+                       bp = NULL;
+                       offset += iosize;
+               }
+       
+               /*
+                * Write up to a megabyte
+                */
+               offset = bufp;
+               for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) {
+                       bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META);
+                       if (bp == NULL) {
+                               printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno);
+                               error = EIO;
+                               goto out;
+                       }
+                       bcopy(offset, bp->b_data, iosize);
+                       error = bwrite(bp);
+                       bp = NULL;
+                       if (error)
+                               goto out;
+                       offset += iosize;
+               }
+       }
+out:
+       if (bp) {
+               brelse(bp);
+       }
+
+       kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
+
+       error = VOP_FSYNC(vp, cred, MNT_WAIT, p);
+
+       return (error);
+}
+