]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/vfs/vfs_cluster.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cluster.c
index ee241e7aedd3da292e07e12f3bf26a89cadc9e01..70eecc5ff44759d87818dc61b6ea475697195ea0 100644 (file)
@@ -212,10 +212,10 @@ static void       cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t files
 
 static int     cluster_push_now(vnode_t vp, struct cl_extent *, off_t EOF, int flags, int (*)(buf_t, void *), void *callback_arg);
 
-static int     cluster_try_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_flag, int flags, int (*)(buf_t, void *), void *callback_arg);
+static int     cluster_try_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_flag, int flags, int (*)(buf_t, void *), void *callback_arg, int *err);
 
 static void    sparse_cluster_switch(struct cl_writebehind *, vnode_t vp, off_t EOF, int (*)(buf_t, void *), void *callback_arg);
-static void    sparse_cluster_push(void **cmapp, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*)(buf_t, void *), void *callback_arg);
+static int     sparse_cluster_push(void **cmapp, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*)(buf_t, void *), void *callback_arg);
 static void    sparse_cluster_add(void **cmapp, vnode_t vp, struct cl_extent *, off_t EOF, int (*)(buf_t, void *), void *callback_arg);
 
 static kern_return_t vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, u_int *setcountp);
@@ -479,7 +479,7 @@ cluster_syncup(vnode_t vp, off_t newEOF, int (*callback)(buf_t, void *), void *c
                if (wbp->cl_number) {
                        lck_mtx_lock(&wbp->cl_lockw);
 
-                       cluster_try_push(wbp, vp, newEOF, PUSH_ALL | flags, 0, callback, callback_arg);
+                       cluster_try_push(wbp, vp, newEOF, PUSH_ALL | flags, 0, callback, callback_arg, NULL);
 
                        lck_mtx_unlock(&wbp->cl_lockw);
                }
@@ -1360,47 +1360,69 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
                                pageout_flags |= UPL_NOCOMMIT;
 
                        if (cbp_head) {
-                               buf_t last_cbp;
+                               buf_t prev_cbp;
+                               int   bytes_in_last_page;
 
                                /*
                                 * first we have to wait for the the current outstanding I/Os
                                 * to complete... EOT hasn't been set yet on this transaction
-                                * so the pages won't be released just because all of the current
-                                * I/O linked to this transaction has completed...
+                                * so the pages won't be released
                                 */
                                cluster_wait_IO(cbp_head, (flags & CL_ASYNC));
 
-                               /*
-                                * we've got a transcation that
-                                * includes the page we're about to push out through vnode_pageout...
-                                * find the last bp in the list which will be the one that
-                                * includes the head of this page and round it's iosize down
-                                * to a page boundary...
-                                */
-                                for (last_cbp = cbp = cbp_head; cbp->b_trans_next; cbp = cbp->b_trans_next)
-                                       last_cbp = cbp;
-
-                               cbp->b_bcount &= ~PAGE_MASK;
-
-                               if (cbp->b_bcount == 0) {
-                                       /*
-                                        * this buf no longer has any I/O associated with it
+                               bytes_in_last_page = cbp_head->b_uploffset & PAGE_MASK;
+                               for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next)
+                                       bytes_in_last_page += cbp->b_bcount;
+                               bytes_in_last_page &= PAGE_MASK;
+                               
+                               while (bytes_in_last_page) {
+                                       /*
+                                        * we've got a transcation that
+                                        * includes the page we're about to push out through vnode_pageout...
+                                        * find the bp's in the list which intersect this page and either
+                                        * remove them entirely from the transaction (there could be multiple bp's), or
+                                        * round it's iosize down to the page boundary (there can only be one)...
+                                        *
+                                        * find the last bp in the list and act on it
                                         */
-                                       free_io_buf(cbp);
+                                       for (prev_cbp = cbp = cbp_head; cbp->b_trans_next; cbp = cbp->b_trans_next)
+                                               prev_cbp = cbp;
 
-                                       if (cbp == cbp_head) {
-                                               /*
-                                                * the buf we just freed was the only buf in
-                                                * this transaction... so there's no I/O to do
+                                       if (bytes_in_last_page >= cbp->b_bcount) {
+                                               /*
+                                                * this buf no longer has any I/O associated with it
                                                 */
-                                               cbp_head = NULL;
+                                               bytes_in_last_page -= cbp->b_bcount;
+                                               cbp->b_bcount = 0;
+
+                                               free_io_buf(cbp);
+
+                                               if (cbp == cbp_head) {
+                                                       assert(bytes_in_last_page == 0);
+                                                       /*
+                                                        * the buf we just freed was the only buf in
+                                                        * this transaction... so there's no I/O to do
+                                                        */
+                                                       cbp_head = NULL;
+                                                       cbp_tail = NULL;
+                                               } else {
+                                                       /*
+                                                        * remove the buf we just freed from
+                                                        * the transaction list
+                                                        */
+                                                       prev_cbp->b_trans_next = NULL;
+                                                       cbp_tail = prev_cbp;
+                                               }
                                        } else {
-                                               /*
-                                                * remove the buf we just freed from
-                                                * the transaction list
+                                               /*
+                                                * this is the last bp that has I/O
+                                                * intersecting the page of interest
+                                                * only some of the I/O is in the intersection
+                                                * so clip the size but keep it in the transaction list
                                                 */
-                                               last_cbp->b_trans_next = NULL;
-                                               cbp_tail = last_cbp;
+                                               cbp->b_bcount -= bytes_in_last_page;
+                                               cbp_tail = cbp;
+                                               bytes_in_last_page = 0;
                                        }
                                }
                                if (cbp_head) {
@@ -3519,7 +3541,7 @@ check_cluster:
                                                n = WRITE_BEHIND;
                                }
                                while (n--)
-                                       cluster_try_push(wbp, vp, newEOF, 0, 0, callback, callback_arg);
+                                       cluster_try_push(wbp, vp, newEOF, 0, 0, callback, callback_arg, NULL);
                        }
                        if (wbp->cl_number < MAX_CLUSTERS) {
                                /*
@@ -3546,7 +3568,7 @@ check_cluster:
                         */
                        if (!((unsigned int)vfs_flags(vp->v_mount) & MNT_DEFWRITE)) {
                                
-                               ret_cluster_try_push = cluster_try_push(wbp, vp, newEOF, (flags & IO_NOCACHE) ? 0 : PUSH_DELAY, 0, callback, callback_arg);
+                               ret_cluster_try_push = cluster_try_push(wbp, vp, newEOF, (flags & IO_NOCACHE) ? 0 : PUSH_DELAY, 0, callback, callback_arg, NULL);
                        }
 
                        /*
@@ -5315,11 +5337,21 @@ cluster_push(vnode_t vp, int flags)
 
 int
 cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *callback_arg)
+{
+       return cluster_push_err(vp, flags, callback, callback_arg, NULL);
+}
+
+/* write errors via err, but return the number of clusters written */
+int
+cluster_push_err(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *callback_arg, int *err)
 {
         int    retval;
        int     my_sparse_wait = 0;
        struct  cl_writebehind *wbp;
 
+       if (err)
+               *err = 0;
+
        if ( !UBCINFOEXISTS(vp)) {
                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, kdebug_vnode(vp), flags, 0, -1, 0);
                return (0);
@@ -5385,7 +5417,7 @@ cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *ca
 
                        lck_mtx_unlock(&wbp->cl_lockw);
 
-                       sparse_cluster_push(&scmap, vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg);
+                       retval = sparse_cluster_push(&scmap, vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg);
 
                        lck_mtx_lock(&wbp->cl_lockw);
 
@@ -5394,11 +5426,13 @@ cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *ca
                        if (wbp->cl_sparse_wait && wbp->cl_sparse_pushes == 0)
                                wakeup((caddr_t)&wbp->cl_sparse_pushes);
                } else {
-                       sparse_cluster_push(&(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg);
+                       retval = sparse_cluster_push(&(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg);
                }
+               if (err)
+                       *err = retval;
                retval = 1;
-       } else  {
-               retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg);
+       } else {
+               retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg, err);
        }
        lck_mtx_unlock(&wbp->cl_lockw);
 
@@ -5459,7 +5493,7 @@ cluster_release(struct ubc_info *ubc)
 
 
 static int
-cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*callback)(buf_t, void *), void *callback_arg)
+cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*callback)(buf_t, void *), void *callback_arg, int *err)
 {
         int cl_index;
        int cl_index1;
@@ -5468,7 +5502,7 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla
        int cl_pushed = 0;
        struct cl_wextent l_clusters[MAX_CLUSTERS];
        u_int  max_cluster_pgcount;
-
+       int error = 0;
 
        max_cluster_pgcount = MAX_CLUSTER_SIZE(vp) / PAGE_SIZE;
        /*
@@ -5543,6 +5577,7 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla
        for (cl_index = 0; cl_index < cl_len; cl_index++) {
                int     flags;
                struct  cl_extent cl;
+               int retval;
 
                flags = io_flags & (IO_PASSIVE|IO_CLOSE);
 
@@ -5561,7 +5596,10 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla
                cl.b_addr = l_clusters[cl_index].b_addr;
                cl.e_addr = l_clusters[cl_index].e_addr;
 
-               cluster_push_now(vp, &cl, EOF, flags, callback, callback_arg);
+               retval = cluster_push_now(vp, &cl, EOF, flags, callback, callback_arg);
+
+               if (error == 0 && retval)
+                       error = retval;
 
                l_clusters[cl_index].b_addr = 0;
                l_clusters[cl_index].e_addr = 0;
@@ -5571,6 +5609,9 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla
                if ( !(push_flag & PUSH_ALL) )
                        break;
        }
+       if (err)
+               *err = error;
+
 dont_try:
        if (cl_len > cl_pushed) {
               /*
@@ -5845,12 +5886,13 @@ sparse_cluster_switch(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int (*c
  * still associated with the write-behind context... however, if the scmap has been disassociated
  * from the write-behind context (the cluster_push case), the wb lock is not held
  */
-static void
+static int
 sparse_cluster_push(void **scmap, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*callback)(buf_t, void *), void *callback_arg)
 {
         struct cl_extent cl;
         off_t          offset;
        u_int           length;
+       int error = 0;
 
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_START, kdebug_vnode(vp), (*scmap), 0, push_flag, 0);
 
@@ -5858,18 +5900,23 @@ sparse_cluster_push(void **scmap, vnode_t vp, off_t EOF, int push_flag, int io_f
                vfs_drt_control(scmap, 1);
 
        for (;;) {
+               int retval;
                if (vfs_drt_get_cluster(scmap, &offset, &length) != KERN_SUCCESS)
                        break;
 
                cl.b_addr = (daddr64_t)(offset / PAGE_SIZE_64);
                cl.e_addr = (daddr64_t)((offset + length) / PAGE_SIZE_64);
 
-               cluster_push_now(vp, &cl, EOF, io_flags & (IO_PASSIVE|IO_CLOSE), callback, callback_arg);
+               retval = cluster_push_now(vp, &cl, EOF, io_flags & (IO_PASSIVE|IO_CLOSE), callback, callback_arg);
+               if (error == 0 && retval)
+                       error = retval;
 
                if ( !(push_flag & PUSH_ALL) )
                        break;
        }
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_END, kdebug_vnode(vp), (*scmap), 0, 0, 0);
+
+       return error;
 }