+ }
+
+ return bytes_to_zero;
+}
+
+
+void
+cluster_update_state(vnode_t vp, vm_object_offset_t s_offset, vm_object_offset_t e_offset, boolean_t vm_initiated)
+{
+ struct cl_extent cl;
+ boolean_t first_pass = TRUE;
+
+ assert(s_offset < e_offset);
+ assert((s_offset & PAGE_MASK_64) == 0);
+ assert((e_offset & PAGE_MASK_64) == 0);
+
+ cl.b_addr = (daddr64_t)(s_offset / PAGE_SIZE_64);
+ cl.e_addr = (daddr64_t)(e_offset / PAGE_SIZE_64);
+
+ cluster_update_state_internal(vp, &cl, 0, TRUE, &first_pass, s_offset, (int)(e_offset - s_offset),
+ vp->v_un.vu_ubcinfo->ui_size, NULL, NULL, vm_initiated);
+}
+
+
+static void
+cluster_update_state_internal(vnode_t vp, struct cl_extent *cl, int flags, boolean_t defer_writes,
+ boolean_t *first_pass, off_t write_off, int write_cnt, off_t newEOF,
+ int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated)
+{
+ struct cl_writebehind *wbp;
+ int cl_index;
+ int ret_cluster_try_push;
+ u_int max_cluster_pgcount;
+
+
+ max_cluster_pgcount = MAX_CLUSTER_SIZE(vp) / PAGE_SIZE;
+
+ /*
+ * take the lock to protect our accesses
+ * of the writebehind and sparse cluster state
+ */
+ wbp = cluster_get_wbp(vp, CLW_ALLOCATE | CLW_RETURNLOCKED);
+
+ if (wbp->cl_scmap) {
+ if (!(flags & IO_NOCACHE)) {
+ /*
+ * we've fallen into the sparse
+ * cluster method of delaying dirty pages
+ */
+ sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, cl, newEOF, callback, callback_arg, vm_initiated);
+
+ lck_mtx_unlock(&wbp->cl_lockw);
+ return;
+ }
+ /*
+ * must have done cached writes that fell into
+ * the sparse cluster mechanism... we've switched
+ * to uncached writes on the file, so go ahead
+ * and push whatever's in the sparse map
+ * and switch back to normal clustering
+ */
+ wbp->cl_number = 0;
+
+ sparse_cluster_push(wbp, &(wbp->cl_scmap), vp, newEOF, PUSH_ALL, 0, callback, callback_arg, vm_initiated);
+ /*
+ * no clusters of either type present at this point
+ * so just go directly to start_new_cluster since
+ * we know we need to delay this I/O since we've
+ * already released the pages back into the cache
+ * to avoid the deadlock with sparse_cluster_push
+ */
+ goto start_new_cluster;
+ }
+ if (*first_pass == TRUE) {
+ if (write_off == wbp->cl_last_write) {
+ wbp->cl_seq_written += write_cnt;
+ } else {
+ wbp->cl_seq_written = write_cnt;
+ }
+
+ wbp->cl_last_write = write_off + write_cnt;
+
+ *first_pass = FALSE;
+ }
+ if (wbp->cl_number == 0) {
+ /*
+ * no clusters currently present
+ */
+ goto start_new_cluster;
+ }
+
+ for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) {
+ /*
+ * check each cluster that we currently hold
+ * try to merge some or all of this write into
+ * one or more of the existing clusters... if
+ * any portion of the write remains, start a
+ * new cluster
+ */
+ if (cl->b_addr >= wbp->cl_clusters[cl_index].b_addr) {
+ /*
+ * the current write starts at or after the current cluster
+ */
+ if (cl->e_addr <= (wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount)) {
+ /*
+ * we have a write that fits entirely
+ * within the existing cluster limits
+ */
+ if (cl->e_addr > wbp->cl_clusters[cl_index].e_addr) {
+ /*
+ * update our idea of where the cluster ends
+ */
+ wbp->cl_clusters[cl_index].e_addr = cl->e_addr;
+ }
+ break;
+ }
+ if (cl->b_addr < (wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount)) {
+ /*
+ * we have a write that starts in the middle of the current cluster
+ * but extends beyond the cluster's limit... we know this because
+ * of the previous checks
+ * we'll extend the current cluster to the max
+ * and update the b_addr for the current write to reflect that
+ * the head of it was absorbed into this cluster...
+ * note that we'll always have a leftover tail in this case since
+ * full absorbtion would have occurred in the clause above
+ */
+ wbp->cl_clusters[cl_index].e_addr = wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount;
+
+ cl->b_addr = wbp->cl_clusters[cl_index].e_addr;
+ }
+ /*
+ * we come here for the case where the current write starts
+ * beyond the limit of the existing cluster or we have a leftover
+ * tail after a partial absorbtion
+ *
+ * in either case, we'll check the remaining clusters before
+ * starting a new one
+ */
+ } else {
+ /*
+ * the current write starts in front of the cluster we're currently considering
+ */
+ if ((wbp->cl_clusters[cl_index].e_addr - cl->b_addr) <= max_cluster_pgcount) {
+ /*
+ * we can just merge the new request into
+ * this cluster and leave it in the cache
+ * since the resulting cluster is still
+ * less than the maximum allowable size
+ */
+ wbp->cl_clusters[cl_index].b_addr = cl->b_addr;
+
+ if (cl->e_addr > wbp->cl_clusters[cl_index].e_addr) {
+ /*
+ * the current write completely
+ * envelops the existing cluster and since
+ * each write is limited to at most max_cluster_pgcount pages
+ * we can just use the start and last blocknos of the write
+ * to generate the cluster limits
+ */
+ wbp->cl_clusters[cl_index].e_addr = cl->e_addr;
+ }
+ break;
+ }
+ /*
+ * if we were to combine this write with the current cluster
+ * we would exceed the cluster size limit.... so,
+ * let's see if there's any overlap of the new I/O with
+ * the cluster we're currently considering... in fact, we'll
+ * stretch the cluster out to it's full limit and see if we
+ * get an intersection with the current write
+ *
+ */
+ if (cl->e_addr > wbp->cl_clusters[cl_index].e_addr - max_cluster_pgcount) {
+ /*
+ * the current write extends into the proposed cluster
+ * clip the length of the current write after first combining it's
+ * tail with the newly shaped cluster
+ */
+ wbp->cl_clusters[cl_index].b_addr = wbp->cl_clusters[cl_index].e_addr - max_cluster_pgcount;
+
+ cl->e_addr = wbp->cl_clusters[cl_index].b_addr;
+ }
+ /*
+ * if we get here, there was no way to merge
+ * any portion of this write with this cluster
+ * or we could only merge part of it which
+ * will leave a tail...
+ * we'll check the remaining clusters before starting a new one
+ */
+ }
+ }
+ if (cl_index < wbp->cl_number) {
+ /*
+ * we found an existing cluster(s) that we
+ * could entirely merge this I/O into
+ */
+ goto delay_io;
+ }
+
+ if (defer_writes == FALSE &&
+ wbp->cl_number == MAX_CLUSTERS &&
+ wbp->cl_seq_written >= (MAX_CLUSTERS * (max_cluster_pgcount * PAGE_SIZE))) {
+ uint32_t n;
+
+ if (vp->v_mount->mnt_minsaturationbytecount) {
+ n = vp->v_mount->mnt_minsaturationbytecount / MAX_CLUSTER_SIZE(vp);