xnu-344.21.73.tar.gz

[apple/xnu.git] / bsd / vfs / vfs_cluster.c
diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c

index 49b0938bbd113488bb7f48b6e0e791375ca6f344..ec2eaf7f499bbbb8bfa39d58c86f861c4ef77711 100644 (file)
--- a/bsd/vfs/vfs_cluster.c
+++ b/bsd/vfs/vfs_cluster.c
@@ -3,19 +3,22 @@
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
   * 
   * @APPLE_LICENSE_HEADER_END@
   */
@@ -81,12 +84,12 @@
  #define CL_DEV_MEMORY 0x200
  #define CL_PRESERVE   0x400
  
+
  struct clios {
-        u_int  io_completed;
-        u_int  io_issued;
-        off_t  io_offset;
-        int    io_error;
-        int    io_wanted;
+        u_int  io_completed;       /* amount of io that has currently completed */
+        u_int  io_issued;          /* amount of io that was successfully issued */
+        int    io_error;           /* error code of first error encountered */
+        int    io_wanted;          /* someone is sleeping waiting for a change in state */
  };
  
  
@@ -106,7 +109,7 @@ static int cluster_phys_read(struct vnode *vp, struct uio *uio,
  static int cluster_phys_write(struct vnode *vp, struct uio *uio,
                 off_t newEOF, int devblocksize, int flags);
  static int cluster_align_phys_io(struct vnode *vp, struct uio *uio,
-                vm_offset_t usr_paddr, int xsize, int devblocksize, int flags);
+                addr64_t usr_paddr, int xsize, int devblocksize, int flags);
  static int cluster_push_x(struct vnode *vp, off_t EOF, daddr_t first, daddr_t last, int can_delay);
  static int cluster_try_push(struct vnode *vp, off_t newEOF, int can_delay, int push_all);
  
@@ -128,7 +131,6 @@ cluster_iodone(bp)
         int         total_resid;
         int         upl_offset;
         int         zero_offset;
-       int         l_blkno;
         upl_t       upl;
         struct buf *cbp;
         struct buf *cbp_head;
@@ -169,13 +171,9 @@ cluster_iodone(bp)
         real_bp    = cbp->b_real_bp;
         vp         = cbp->b_vp;
         zero_offset= cbp->b_validend;
-       l_blkno    = cbp->b_lblkno;
         iostate    = (struct clios *)cbp->b_iostate;
  
         while (cbp) {
-               if (cbp->b_vectorcount > 1)
-                       _FREE(cbp->b_vectorlist, M_SEGMENT);
-
                 if ((cbp->b_flags & B_ERROR) && error == 0)
                         error = cbp->b_error;
  
@@ -196,22 +194,20 @@ cluster_iodone(bp)
                 wakeup((caddr_t)&vp->v_numoutput);
         }
         if (iostate) {
-               if (error) {
-                       off_t   error_offset;
-
-                       error_offset = (off_t)l_blkno * PAGE_SIZE_64;
+               /*
+                * someone has issued multiple I/Os asynchrounsly
+                * and is waiting for them to complete (streaming)
+                */
+               if (error && iostate->io_error == 0)
+                       iostate->io_error = error;
  
-                       if (iostate->io_error == 0) {
-                               iostate->io_error = error;
-                               iostate->io_offset = error_offset;
-                       } else {
-                               if (error_offset < iostate->io_offset)
-                                       iostate->io_offset = error_offset;
-                       }
-               }
                 iostate->io_completed += total_size;
  
                 if (iostate->io_wanted) {
+                       /*
+                        * someone is waiting for the state of
+                        * this io stream to change
+                        */
                         iostate->io_wanted = 0;
                         wakeup((caddr_t)&iostate->io_wanted);
                 }
@@ -324,7 +320,6 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags,
         struct clios *iostate;
  {
         struct buf   *cbp;
-       struct iovec *iovp;
         u_int         size;
         u_int         io_size;
         int           io_flags;
@@ -386,8 +381,7 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags,
                 zero_offset = upl_offset + non_rounded_size;
         }
         while (size) {
-               int vsize;
-               int i;
+               int i; 
                 int pl_index;
                 int pg_resid;
                 int num_contig;
@@ -531,31 +525,14 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags,
                         real_bp->b_blkno = blkno;
                 }
  
-               if (pg_count > 1) {
-                       if (pg_count > max_vectors) {
-                               io_size -= (pg_count - max_vectors) * PAGE_SIZE;
+               if (pg_count > max_vectors) {
+                       io_size -= (pg_count - max_vectors) * PAGE_SIZE;
  
-                               if (io_size < 0) {
-                                       io_size = PAGE_SIZE - pg_offset;
-                                       pg_count = 1;
-                               } else
-                                       pg_count = max_vectors;
-                       }
-                       /* 
-                        * we need to allocate space for the vector list
-                        */
-                       if (pg_count > 1) {
-                               iovp = (struct iovec *)_MALLOC(sizeof(struct iovec) * pg_count,
-                                                              M_SEGMENT, M_NOWAIT);
-                       
-                               if (iovp == (struct iovec *) 0) {
-                                       /*
-                                        * if the allocation fails, then throttle down to a single page
-                                        */
-                                       io_size = PAGE_SIZE - pg_offset;
-                                       pg_count = 1;
-                               }
-                       }
+                       if (io_size < 0) {
+                               io_size = PAGE_SIZE - pg_offset;
+                               pg_count = 1;
+                       } else
+                               pg_count = max_vectors;
                 }
  
                 /* Throttle the speculative IO */
@@ -566,53 +543,9 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags,
  
                 cbp = alloc_io_buf(vp, priv);
  
-               if (pg_count == 1)
-                       /*
-                        * we use the io vector that's reserved in the buffer header
-                        * this insures we can always issue an I/O even in a low memory
-                        * condition that prevents the _MALLOC from succeeding... this
-                        * is necessary to prevent deadlocks with the pager
-                        */
-                       iovp = (struct iovec *)(&cbp->b_vects[0]);
-
-               cbp->b_vectorlist  = (void *)iovp;
-               cbp->b_vectorcount = pg_count;
-
-               if (flags & CL_DEV_MEMORY) {
-
-                       iovp->iov_len  = io_size;
-                       iovp->iov_base = (caddr_t)upl_phys_page(pl, 0);
-
-                       if (iovp->iov_base == (caddr_t) 0) {
-                               free_io_buf(cbp);
-                               error = EINVAL;
-                       } else
-                               iovp->iov_base += upl_offset;
-               } else {
-
-                 for (i = 0, vsize = io_size; i < pg_count; i++, iovp++) {
-                       int     psize;
-
-                       psize = PAGE_SIZE - pg_offset;
-
-                       if (psize > vsize)
-                               psize = vsize;
  
-                       iovp->iov_len  = psize;
-                       iovp->iov_base = (caddr_t)upl_phys_page(pl, pl_index + i);
-
-                       if (iovp->iov_base == (caddr_t) 0) {
-                               if (pg_count > 1)
-                                       _FREE(cbp->b_vectorlist, M_SEGMENT);
-                               free_io_buf(cbp);
-
-                               error = EINVAL;
-                               break;
-                       }
-                       iovp->iov_base += pg_offset;
-                       pg_offset = 0;
-
-                       if (flags & CL_PAGEOUT) {
+               if (flags & CL_PAGEOUT) {
+                       for (i = 0; i < pg_count; i++) {
                                 int         s;
                                 struct buf *bp;
  
@@ -628,12 +561,7 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags,
                                 }
                                 splx(s);
                         }
-                       vsize -= psize;
-                   }
                 }
-               if (error)
-                       break;
-
                 if (flags & CL_ASYNC) {
                         cbp->b_flags |= (B_CALL | B_ASYNC);
                         cbp->b_iodone = (void *)cluster_iodone;
@@ -648,6 +576,10 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags,
                 cbp->b_trans_next = (struct buf *)0;
  
                 if (cbp->b_iostate = (void *)iostate)
+                       /*
+                        * caller wants to track the state of this
+                        * io... bump the amount issued against this stream
+                        */
                         iostate->io_issued += io_size;
  
                 if (flags & CL_READ)
@@ -737,8 +669,6 @@ start_io:
                 for (cbp = cbp_head; cbp;) {
                         struct buf * cbp_next;
   
-                       if (cbp->b_vectorcount > 1)
-                               _FREE(cbp->b_vectorlist, M_SEGMENT);
                         upl_offset -= cbp->b_bcount;
                         size       += cbp->b_bcount;
                         io_size    += cbp->b_bcount;
@@ -748,13 +678,20 @@ start_io:
                         cbp = cbp_next;
                 }
                 if (iostate) {
-                       if (iostate->io_error == 0) {
+                       /*
+                        * update the error condition for this stream
+                        * since we never really issued the io
+                        * just go ahead and adjust it back
+                        */
+                       if (iostate->io_error == 0)
                                 iostate->io_error = error;
-                               iostate->io_offset = f_offset - (off_t)io_size;
-                       }
                         iostate->io_issued -= io_size;
  
                         if (iostate->io_wanted) {
+                               /*
+                                * someone is waiting for the state of
+                                * this io stream to change
+                                */
                                 iostate->io_wanted = 0;
                                 wakeup((caddr_t)&iostate->io_wanted);
                         }
@@ -1241,6 +1178,7 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags)
         vm_offset_t      upl_offset;
         off_t            max_io_size;
         int              io_size;
+       int              io_flag;
         int              upl_size;
         int              upl_needed_size;
         int              pages_in_pl;
@@ -1248,8 +1186,10 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags)
         kern_return_t    kret;
         struct iovec     *iov;
         int              i;
+       int              first = 1;
         int              force_data_sync;
         int              error  = 0;
+       struct clios     iostate;
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START,
                      (int)uio->uio_offset, (int)uio->uio_resid, 
@@ -1263,152 +1203,182 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags)
          */
         cluster_try_push(vp, newEOF, 0, 1);
  
+       iostate.io_completed = 0;
+       iostate.io_issued = 0;
+       iostate.io_error = 0;
+       iostate.io_wanted = 0;
+
         iov = uio->uio_iov;
  
         while (uio->uio_resid && uio->uio_offset < newEOF && error == 0) {
-         io_size = uio->uio_resid;
-
-          if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
-            io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
+               io_size = uio->uio_resid;
  
-         upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
-         upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
-
-         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START,
-                      (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0);
-
-         for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
-           {
-             pages_in_pl = 0;
-             upl_size = upl_needed_size;
-             upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
-                         UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
+               if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
+                       io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
  
-             kret = vm_map_get_upl(current_map(),
-                                   (vm_offset_t)iov->iov_base & ~PAGE_MASK,
-                                   &upl_size,
-                                       &upl, 
-                                       NULL, 
-                                       &pages_in_pl,
-                                       &upl_flags,
-                                       force_data_sync);
+               if (first) {
+                       if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE) / 4)
+                               io_size = (MAX_UPL_TRANSFER * PAGE_SIZE) / 8;
+                       first = 0;
+               }
+               upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
+               upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
+
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START,
+                            (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0);
+
+               for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) {
+                       pages_in_pl = 0;
+                       upl_size = upl_needed_size;
+                       upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
+                                   UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
+
+                       kret = vm_map_get_upl(current_map(),
+                                             (vm_offset_t)iov->iov_base & ~PAGE_MASK,
+                                             &upl_size,
+                                             &upl, 
+                                             NULL, 
+                                             &pages_in_pl,
+                                             &upl_flags,
+                                             force_data_sync);
+
+                       if (kret != KERN_SUCCESS) {
+                               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
+                                            0, 0, 0, kret, 0);
  
-             if (kret != KERN_SUCCESS)
-               {
-                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
-                              0, 0, 0, kret, 0);
+                               /*
+                                * cluster_nocopy_write: failed to get pagelist
+                                *
+                                * we may have already spun some portion of this request
+                                * off as async requests... we need to wait for the I/O
+                                * to complete before returning
+                                */
+                               goto wait_for_writes;
+                       }
+                       pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+                       pages_in_pl = upl_size / PAGE_SIZE;
  
-                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
-                              (int)uio->uio_offset, (int)uio->uio_resid, kret, 1, 0);
+                       for (i = 0; i < pages_in_pl; i++) {
+                               if (!upl_valid_page(pl, i))
+                                       break;            
+                       }
+                       if (i == pages_in_pl)
+                               break;
  
-                 /* cluster_nocopy_write: failed to get pagelist */
-                 /* do not return kret here */
-                 return(0);
+                       /*
+                        * didn't get all the pages back that we
+                        * needed... release this upl and try again
+                        */
+                       ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
+                                           UPL_ABORT_FREE_ON_EMPTY);
                 }
+               if (force_data_sync >= 3) {
+                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
+                                    i, pages_in_pl, upl_size, kret, 0);
  
-             pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
-             pages_in_pl = upl_size / PAGE_SIZE;
-
-             for(i=0; i < pages_in_pl; i++)
-               {
-                 if (!upl_valid_page(pl, i))
-                   break;                
+                       /*
+                        * for some reason, we couldn't acquire a hold on all
+                        * the pages needed in the user's address space
+                        *
+                        * we may have already spun some portion of this request
+                        * off as async requests... we need to wait for the I/O
+                        * to complete before returning
+                        */
+                       goto wait_for_writes;
                 }
  
-             if (i == pages_in_pl)
-               break;
-
-               ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
-                               UPL_ABORT_FREE_ON_EMPTY);
-           }
-
-         if (force_data_sync >= 3)
-           {
-             KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
-                          i, pages_in_pl, upl_size, kret, 0);
-
-             KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
-                          (int)uio->uio_offset, (int)uio->uio_resid, kret, 2, 0);
-             return(0);
-           }
-
-         /*
-          * Consider the possibility that upl_size wasn't satisfied.
-          */
-         if (upl_size != upl_needed_size)
-           io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
-
-         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
-                      (int)upl_offset, upl_size, (int)iov->iov_base, io_size, 0);                     
-
-         if (io_size == 0)
-           {
-             ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
-                                  UPL_ABORT_FREE_ON_EMPTY);
-             KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
-                    (int)uio->uio_offset, uio->uio_resid, 0, 3, 0);
+               /*
+                * Consider the possibility that upl_size wasn't satisfied.
+                */
+               if (upl_size != upl_needed_size)
+                       io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
  
-             return(0);
-           }
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
+                            (int)upl_offset, upl_size, (int)iov->iov_base, io_size, 0);                       
  
-         /*
-          * Now look for pages already in the cache
-          * and throw them away.
-          */
+               if (io_size == 0) {
+                       ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
+                                           UPL_ABORT_FREE_ON_EMPTY);
  
-         upl_f_offset = uio->uio_offset;   /* this is page aligned in the file */
-         max_io_size = io_size;
+                       /*
+                        * we may have already spun some portion of this request
+                        * off as async requests... we need to wait for the I/O
+                        * to complete before returning
+                        */
+                       goto wait_for_writes;
+               }
+               /*
+                * Now look for pages already in the cache
+                * and throw them away.
+                */
  
-         while (max_io_size) {
+               upl_f_offset = uio->uio_offset;   /* this is page aligned in the file */
+               max_io_size = io_size;
  
-           /*
-            * Flag UPL_POP_DUMP says if the page is found
-            * in the page cache it must be thrown away.
-            */
-           ubc_page_op(vp, 
-                       upl_f_offset,
-                       UPL_POP_SET | UPL_POP_BUSY | UPL_POP_DUMP,
-                       0, 0);
-           max_io_size  -= PAGE_SIZE;
-           upl_f_offset += PAGE_SIZE;
-         }
-
-         /*
-          * issue a synchronous write to cluster_io
-          */
+               while (max_io_size) {
+                       /*
+                        * Flag UPL_POP_DUMP says if the page is found
+                        * in the page cache it must be thrown away.
+                        */
+                       ubc_page_op(vp, 
+                                   upl_f_offset,
+                                   UPL_POP_SET | UPL_POP_BUSY | UPL_POP_DUMP,
+                                   0, 0);
+                       max_io_size  -= PAGE_SIZE_64;
+                       upl_f_offset += PAGE_SIZE_64;
+               }
+               /*
+                * we want push out these writes asynchronously so that we can overlap
+                * the preparation of the next I/O
+                * if there are already too many outstanding writes
+                * wait until some complete before issuing the next
+                */
+               while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_UPL_TRANSFER * PAGE_SIZE)) {
+                       iostate.io_wanted = 1;
+                       tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_nocopy_write", 0);
+               }       
+               if (iostate.io_error) {
+                       /*
+                        * one of the earlier writes we issued ran into a hard error
+                        * don't issue any more writes, cleanup the UPL
+                        * that was just created but not used, then
+                        * go wait for all writes that are part of this stream
+                        * to complete before returning the error to the caller
+                        */
+                       ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
+                                           UPL_ABORT_FREE_ON_EMPTY);
  
-         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_START,
-                      (int)upl_offset, (int)uio->uio_offset, io_size, 0, 0);
+                       goto wait_for_writes;
+               }
+               io_flag = CL_ASYNC | CL_PRESERVE | CL_COMMIT;
  
-         error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
-                            io_size, devblocksize, 0, (struct buf *)0, (struct clios *)0);
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_START,
+                            (int)upl_offset, (int)uio->uio_offset, io_size, io_flag, 0);
  
-         if (error == 0) {
-           /*
-            * The cluster_io write completed successfully,
-            * update the uio structure.
-            */
-           iov->iov_base += io_size;
-           iov->iov_len -= io_size;
-           uio->uio_resid -= io_size;
-           uio->uio_offset += io_size;
-         }
-         /*
-          * always 'commit' the I/O via the abort primitive whether the I/O
-          * succeeded cleanly or not... this is necessary to insure that 
-          * we preserve the state of the DIRTY flag on the pages used to
-          * provide the data for the I/O... the state of this flag SHOULD
-          * NOT be changed by a write
-          */
-         ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
-                             UPL_ABORT_FREE_ON_EMPTY);
+               error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
+                                  io_size, devblocksize, io_flag, (struct buf *)0, &iostate);
  
+               iov->iov_len    -= io_size;
+               iov->iov_base   += io_size;
+               uio->uio_resid  -= io_size;
+               uio->uio_offset += io_size;
  
-         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END,
-                      (int)upl_offset, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END,
+                            (int)upl_offset, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
  
         } /* end while */
  
+wait_for_writes:
+       /*
+        * make sure all async writes issued as part of this stream
+        * have completed before we return
+        */
+       while (iostate.io_issued != iostate.io_completed) {
+               iostate.io_wanted = 1;
+               tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_nocopy_write", 0);
+       }       
+       if (iostate.io_error)
+               error = iostate.io_error;
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
                      (int)uio->uio_offset, (int)uio->uio_resid, error, 4, 0);
@@ -1426,7 +1396,7 @@ cluster_phys_write(vp, uio, newEOF, devblocksize, flags)
         int          flags;
  {
         upl_page_info_t *pl;
-       vm_offset_t      src_paddr;
+       addr64_t             src_paddr;
         upl_t            upl;
         vm_offset_t      upl_offset;
         int              tail_size;
@@ -1477,7 +1447,7 @@ cluster_phys_write(vp, uio, newEOF, devblocksize, flags)
         }
         pl = ubc_upl_pageinfo(upl);
  
-       src_paddr = (vm_offset_t)upl_phys_page(pl, 0) + ((vm_offset_t)iov->iov_base & PAGE_MASK);
+       src_paddr = (((addr64_t)(int)upl_phys_page(pl, 0)) << 12) + ((addr64_t)iov->iov_base & PAGE_MASK);
  
         while (((uio->uio_offset & (devblocksize - 1)) || io_size < devblocksize) && io_size) {
                 int   head_size;
@@ -2267,8 +2237,8 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags)
                                      (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
  
                         while (io_size && retval == 0) {
-                               int         xsize;
-                               vm_offset_t paddr;
+                           int         xsize;
+                               ppnum_t paddr;
  
                                 if (ubc_page_op(vp,
                                                 upl_f_offset,
@@ -2281,7 +2251,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags)
                                 if (xsize > io_size)
                                         xsize = io_size;
  
-                               retval = uiomove((caddr_t)(paddr + start_offset), xsize, uio);
+                               retval = uiomove64((addr64_t)(((addr64_t)paddr << 12) + start_offset), xsize, uio);
  
                                 ubc_page_op(vp, upl_f_offset,
                                             UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
@@ -2442,14 +2412,14 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags)
                                 while (val_size && retval == 0) {
                                         int       csize;
                                         int       i;
-                                       caddr_t   paddr;
+                                       addr64_t        paddr;
  
                                         i = offset / PAGE_SIZE;
                                         csize = min(PAGE_SIZE - start_offset, val_size);
  
-                                       paddr = (caddr_t)upl_phys_page(pl, i) + start_offset;
+                                       paddr = ((addr64_t)upl_phys_page(pl, i) << 12) + start_offset;
  
-                                       retval = uiomove(paddr, csize, uio);
+                                       retval = uiomove64(paddr, csize, uio);
  
                                         val_size    -= csize;
                                         offset      += csize;
@@ -2591,15 +2561,16 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags)
         int              upl_size;
         int              upl_needed_size;
         int              pages_in_pl;
-       vm_offset_t      paddr;
+       ppnum_t              paddr;
         int              upl_flags;
         kern_return_t    kret;
         int              segflg;
         struct iovec     *iov;
         int              i;
         int              force_data_sync;
-       int              error  = 0;
         int              retval = 0;
+       int              first = 1;
+       struct clios     iostate;
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START,
                      (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
@@ -2611,203 +2582,225 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags)
          *  -- the resid will not exceed iov_len
          */
  
+       iostate.io_completed = 0;
+       iostate.io_issued = 0;
+       iostate.io_error = 0;
+       iostate.io_wanted = 0;
+
         iov = uio->uio_iov;
+
         while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
  
-         max_io_size = filesize - uio->uio_offset;
+               max_io_size = filesize - uio->uio_offset;
  
-         if (max_io_size < (off_t)((unsigned int)uio->uio_resid))
-             io_size = max_io_size;
-         else
-             io_size = uio->uio_resid;
+               if (max_io_size < (off_t)((unsigned int)uio->uio_resid))
+                       io_size = max_io_size;
+               else
+                       io_size = uio->uio_resid;
  
-         /*
-          * We don't come into this routine unless
-          * UIO_USERSPACE is set.
-          */
-         segflg = uio->uio_segflg;
+               /*
+                * We don't come into this routine unless
+                * UIO_USERSPACE is set.
+                */
+               segflg = uio->uio_segflg;
  
-         uio->uio_segflg = UIO_PHYS_USERSPACE;
+               uio->uio_segflg = UIO_PHYS_USERSPACE;
  
-         /*
-          * First look for pages already in the cache
-          * and move them to user space.
-          */
-         while (io_size && (retval == 0)) {
-           upl_f_offset = uio->uio_offset;
+               /*
+                * First look for pages already in the cache
+                * and move them to user space.
+                */
+               while (io_size && (retval == 0)) {
+                       upl_f_offset = uio->uio_offset;
  
-           /*
-            * If this call fails, it means the page is not
-            * in the page cache.
-            */
-           if (ubc_page_op(vp, upl_f_offset,
-                           UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) != KERN_SUCCESS)
-             break;
+                       /*
+                        * If this call fails, it means the page is not
+                        * in the page cache.
+                        */
+                       if (ubc_page_op(vp, upl_f_offset,
+                                       UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) != KERN_SUCCESS)
+                               break;
  
-           retval = uiomove((caddr_t)(paddr), PAGE_SIZE, uio);
+                       retval = uiomove64((addr64_t)paddr << 12, PAGE_SIZE, uio);
                                 
-           ubc_page_op(vp, upl_f_offset, 
-                       UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
+                       ubc_page_op(vp, upl_f_offset, 
+                                   UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
                   
-           io_size     -= PAGE_SIZE;
-           KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 71)) | DBG_FUNC_NONE,
-                          (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
-         }
-
-         uio->uio_segflg = segflg;
+                       io_size -= PAGE_SIZE;
+                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 71)) | DBG_FUNC_NONE,
+                                    (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
+               }
+               uio->uio_segflg = segflg;
                         
-         if (retval)
-           {
-             KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
-                          (int)uio->uio_offset, uio->uio_resid, 2, retval, 0);       
-             return(retval);
-           }
-
-         /* If we are already finished with this read, then return */
-         if (io_size == 0)
-           {
-
-             KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
-                          (int)uio->uio_offset, uio->uio_resid, 3, io_size, 0);
-             return(0);
-           }
-
-         max_io_size = io_size;
-         if (max_io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
-           max_io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
-
-         start_upl_f_offset = uio->uio_offset;   /* this is page aligned in the file */
-         upl_f_offset = start_upl_f_offset;
-         io_size = 0;
-
-         while(io_size < max_io_size)
-           {
-
-             if(ubc_page_op(vp, upl_f_offset,
-                               UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) == KERN_SUCCESS)
-             {
-                       ubc_page_op(vp, upl_f_offset,
-                           UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
-                       break;
-             }
-
-                 /*
-                  * Build up the io request parameters.
-                  */
-
-                 io_size += PAGE_SIZE;
-                 upl_f_offset += PAGE_SIZE;
+               if (retval) {
+                       /*
+                        * we may have already spun some portion of this request
+                        * off as async requests... we need to wait for the I/O
+                        * to complete before returning
+                        */
+                       goto wait_for_reads;
                 }
+               /*
+                * If we are already finished with this read, then return
+                */
+               if (io_size == 0) {
+                       /*
+                        * we may have already spun some portion of this request
+                        * off as async requests... we need to wait for the I/O
+                        * to complete before returning
+                        */
+                       goto wait_for_reads;
+               }
+               max_io_size = io_size;
+
+               if (max_io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
+                       max_io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
+               if (first) {
+                       if (max_io_size > (MAX_UPL_TRANSFER * PAGE_SIZE) / 4)
+                               max_io_size = (MAX_UPL_TRANSFER * PAGE_SIZE) / 8;
+                       first = 0;
+               }
+               start_upl_f_offset = uio->uio_offset;   /* this is page aligned in the file */
+               upl_f_offset = start_upl_f_offset;
+               io_size = 0;
  
-             if (io_size == 0)
-               return(retval);
+               while (io_size < max_io_size) {
+                       if (ubc_page_op(vp, upl_f_offset,
+                                       UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) == KERN_SUCCESS) {
+                               ubc_page_op(vp, upl_f_offset,
+                                           UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
+                               break;
+                       }
+                       /*
+                        * Build up the io request parameters.
+                        */
+                       io_size += PAGE_SIZE_64;
+                       upl_f_offset += PAGE_SIZE_64;
+               }
+               if (io_size == 0)
+                       /*
+                        * we may have already spun some portion of this request
+                        * off as async requests... we need to wait for the I/O
+                        * to complete before returning
+                        */
+                       goto wait_for_reads;
  
-         upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
-         upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
+               upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
+               upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
  
-         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START,
-                      (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0);
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START,
+                            (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0);
  
-         for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
-           {
-             pages_in_pl = 0;
-             upl_size = upl_needed_size;
-             upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
+               for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) {
+                       pages_in_pl = 0;
+                       upl_size = upl_needed_size;
+                       upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
  
-             kret = vm_map_get_upl(current_map(),
-                                   (vm_offset_t)iov->iov_base & ~PAGE_MASK,
-                                   &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, force_data_sync);
+                       kret = vm_map_get_upl(current_map(),
+                                             (vm_offset_t)iov->iov_base & ~PAGE_MASK,
+                                             &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, force_data_sync);
  
-             if (kret != KERN_SUCCESS)
-               {
-                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
-                              (int)upl_offset, upl_size, io_size, kret, 0);
+                       if (kret != KERN_SUCCESS) {
+                               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
+                                            (int)upl_offset, upl_size, io_size, kret, 0);
                   
-                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
-                              (int)uio->uio_offset, uio->uio_resid, 4, retval, 0);
-
-                 /* cluster_nocopy_read: failed to get pagelist */
-                 /* do not return kret here */
-                 return(retval);
-               }
+                               /*
+                                * cluster_nocopy_read: failed to get pagelist
+                                *
+                                * we may have already spun some portion of this request
+                                * off as async requests... we need to wait for the I/O
+                                * to complete before returning
+                                */
+                               goto wait_for_reads;
+                       }
+                       pages_in_pl = upl_size / PAGE_SIZE;
+                       pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
  
-             pages_in_pl = upl_size / PAGE_SIZE;
-             pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+                       for (i = 0; i < pages_in_pl; i++) {
+                               if (!upl_valid_page(pl, i))
+                                       break;            
+                       }
+                       if (i == pages_in_pl)
+                               break;
  
-             for(i=0; i < pages_in_pl; i++)
-               {
-                 if (!upl_valid_page(pl, i))
-                   break;                
+                       ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
+                                           UPL_ABORT_FREE_ON_EMPTY);
                 }
-             if (i == pages_in_pl)
-               break;
-
-             ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
-                                 UPL_ABORT_FREE_ON_EMPTY);
-           }
-
-         if (force_data_sync >= 3)
-           {
-                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
-                              (int)upl_offset, upl_size, io_size, kret, 0);
+               if (force_data_sync >= 3) {
+                       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
+                                    (int)upl_offset, upl_size, io_size, kret, 0);
                   
-                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
-                              (int)uio->uio_offset, uio->uio_resid, 5, retval, 0);
-             return(retval);
-           }
-         /*
-          * Consider the possibility that upl_size wasn't satisfied.
-          */
-         if (upl_size != upl_needed_size)
-           io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
-
-         if (io_size == 0)
-           {
-             ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
-                                  UPL_ABORT_FREE_ON_EMPTY);
-             return(retval);
-           }
-
-         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
-                      (int)upl_offset, upl_size, io_size, kret, 0);
-
-         /*
-          * issue a synchronous read to cluster_io
-          */
+                       goto wait_for_reads;
+               }
+               /*
+                * Consider the possibility that upl_size wasn't satisfied.
+                */
+               if (upl_size != upl_needed_size)
+                       io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
  
-         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START,
-                      (int)upl, (int)upl_offset, (int)start_upl_f_offset, io_size, 0);
+               if (io_size == 0) {
+                       ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
+                                           UPL_ABORT_FREE_ON_EMPTY);
+                       goto wait_for_reads;
+               }
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
+                            (int)upl_offset, upl_size, io_size, kret, 0);
  
-         error = cluster_io(vp, upl, upl_offset, start_upl_f_offset,
-                            io_size, devblocksize, CL_READ| CL_NOZERO, (struct buf *)0,  (struct clios *)0);
+               /*
+                * request asynchronously so that we can overlap
+                * the preparation of the next I/O
+                * if there are already too many outstanding reads
+                * wait until some have completed before issuing the next read
+                */
+               while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_UPL_TRANSFER * PAGE_SIZE)) {
+                       iostate.io_wanted = 1;
+                       tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_nocopy_read", 0);
+               }       
+               if (iostate.io_error) {
+                       /*
+                        * one of the earlier reads we issued ran into a hard error
+                        * don't issue any more reads, cleanup the UPL
+                        * that was just created but not used, then
+                        * go wait for any other reads to complete before
+                        * returning the error to the caller
+                        */
+                       ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
+                                           UPL_ABORT_FREE_ON_EMPTY);
  
-         if (error == 0) {
-           /*
-            * The cluster_io read completed successfully,
-            * update the uio structure and commit.
-            */
+                       goto wait_for_reads;
+               }
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START,
+                            (int)upl, (int)upl_offset, (int)start_upl_f_offset, io_size, 0);
  
-           ubc_upl_commit_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
-                                       UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
-           
-           iov->iov_base += io_size;
-           iov->iov_len -= io_size;
-           uio->uio_resid -= io_size;
-           uio->uio_offset += io_size;
-         }
-         else {
-           ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, 
-                                  UPL_ABORT_FREE_ON_EMPTY);
-         }
+               retval = cluster_io(vp, upl, upl_offset, start_upl_f_offset,
+                                  io_size, devblocksize,
+                                  CL_PRESERVE | CL_COMMIT | CL_READ | CL_ASYNC | CL_NOZERO,
+                                  (struct buf *)0, &iostate);
  
-         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END,
-                      (int)upl, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
+               /*
+                * update the uio structure
+                */
+               iov->iov_base   += io_size;
+               iov->iov_len    -= io_size;
+               uio->uio_resid  -= io_size;
+               uio->uio_offset += io_size;
  
-         if (retval == 0)
-           retval = error;
+               KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END,
+                            (int)upl, (int)uio->uio_offset, (int)uio->uio_resid, retval, 0);
  
         } /* end while */
  
+wait_for_reads:
+       /*
+        * make sure all async reads that are part of this stream
+        * have completed before we return
+        */
+       while (iostate.io_issued != iostate.io_completed) {
+               iostate.io_wanted = 1;
+               tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_nocopy_read", 0);
+       }       
+       if (iostate.io_error)
+               retval = iostate.io_error;
  
         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
                      (int)uio->uio_offset, (int)uio->uio_resid, 6, retval, 0);
@@ -2816,7 +2809,6 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags)
  }
  
  
-
  static int
  cluster_phys_read(vp, uio, filesize, devblocksize, flags)
         struct vnode *vp;
@@ -2828,7 +2820,7 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags)
         upl_page_info_t *pl;
         upl_t            upl;
         vm_offset_t      upl_offset;
-       vm_offset_t      dst_paddr;
+       addr64_t             dst_paddr;
         off_t            max_size;
         int              io_size;
         int              tail_size;
@@ -2884,7 +2876,7 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags)
         }
         pl = ubc_upl_pageinfo(upl);
  
-       dst_paddr = (vm_offset_t)upl_phys_page(pl, 0) + ((vm_offset_t)iov->iov_base & PAGE_MASK);
+       dst_paddr = (((addr64_t)(int)upl_phys_page(pl, 0)) << 12) + ((addr64_t)iov->iov_base & PAGE_MASK);
  
         while (((uio->uio_offset & (devblocksize - 1)) || io_size < devblocksize) && io_size) {
                 int   head_size;
@@ -2926,7 +2918,7 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags)
                  * the commit after all the I/O has completed
                  * since its all issued against the same UPL
                  * if there are already too many outstanding reads
-                * throttle back until we reach a more reasonable level
+                * wait until some have completed before issuing the next
                  */
                 while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_UPL_TRANSFER * PAGE_SIZE)) {
                         iostate.io_wanted = 1;
@@ -2951,8 +2943,8 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags)
                 }
         }
         /*
-        * make sure any async reads have completed before
-        * we proceed
+        * make sure all async reads that are part of this stream
+        * have completed before we proceed
          */
         while (iostate.io_issued != iostate.io_completed) {
                 iostate.io_wanted = 1;
@@ -3404,12 +3396,12 @@ cluster_push_x(vp, EOF, first, last, can_delay)
  
  
  static int
-cluster_align_phys_io(struct vnode *vp, struct uio *uio, vm_offset_t usr_paddr, int xsize, int devblocksize, int flags)
+cluster_align_phys_io(struct vnode *vp, struct uio *uio, addr64_t usr_paddr, int xsize, int devblocksize, int flags)
  {
          struct iovec     *iov;
          upl_page_info_t  *pl;
          upl_t            upl;
-        vm_offset_t      ubc_paddr;
+        addr64_t            ubc_paddr;
          kern_return_t    kret;
          int              error = 0;
  
@@ -3437,27 +3429,35 @@ cluster_align_phys_io(struct vnode *vp, struct uio *uio, vm_offset_t usr_paddr,
                            return(error);
                  }
          }
-        ubc_paddr = (vm_offset_t)upl_phys_page(pl, 0) + (int)(uio->uio_offset & PAGE_MASK_64);
-
-       if (flags & CL_READ)
-               copyp2p(ubc_paddr, usr_paddr, xsize, 2);
-       else
-               copyp2p(usr_paddr, ubc_paddr, xsize, 1);
-
-       if ( !(flags & CL_READ) || upl_dirty_page(pl, 0)) {
-                /*
-                 * issue a synchronous write to cluster_io
-                 */
-                error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE, devblocksize,
-                                  0, (struct buf *)0, (struct clios *)0);
-       }
-       if (error == 0) {
-               uio->uio_offset += xsize;
-               iov->iov_base   += xsize;
-               iov->iov_len    -= xsize;
-               uio->uio_resid  -= xsize;
-       }
-       ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
+        ubc_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + (addr64_t)(uio->uio_offset & PAGE_MASK_64);
  
-        return (error);
+/*
+ *             NOTE:  There is no prototype for the following in BSD. It, and the definitions
+ *             of the defines for cppvPsrc, cppvPsnk, cppvFsnk, and cppvFsrc will be found in
+ *             osfmk/ppc/mappings.h.  They are not included here because there appears to be no
+ *             way to do so without exporting them to kexts as well.
+ */
+               if (flags & CL_READ)
+//                     copypv(ubc_paddr, usr_paddr, xsize, cppvPsrc | cppvPsnk | cppvFsnk);    /* Copy physical to physical and flush the destination */
+                       copypv(ubc_paddr, usr_paddr, xsize,        2 |        1 |        4);    /* Copy physical to physical and flush the destination */
+               else
+//                     copypv(ubc_paddr, usr_paddr, xsize, cppvPsrc | cppvPsnk | cppvFsrc);    /* Copy physical to physical and flush the source */
+                       copypv(ubc_paddr, usr_paddr, xsize,        2 |        1 |        8);    /* Copy physical to physical and flush the source */
+       
+               if ( !(flags & CL_READ) || upl_dirty_page(pl, 0)) {
+                       /*
+                       * issue a synchronous write to cluster_io
+                       */
+                       error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE, devblocksize,
+                               0, (struct buf *)0, (struct clios *)0);
+               }
+               if (error == 0) {
+                       uio->uio_offset += xsize;
+                       iov->iov_base   += xsize;
+                       iov->iov_len    -= xsize;
+                       uio->uio_resid  -= xsize;
+               }
+               ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
+       
+               return (error);
  }