+ return bytes_to_zero;
+}
+
+
+void
+cluster_update_state(vnode_t vp, vm_object_offset_t s_offset, vm_object_offset_t e_offset, boolean_t vm_initiated)
+{
+ struct cl_extent cl;
+ boolean_t first_pass = TRUE;
+
+ assert(s_offset < e_offset);
+ assert((s_offset & PAGE_MASK_64) == 0);
+ assert((e_offset & PAGE_MASK_64) == 0);
+
+ cl.b_addr = (daddr64_t)(s_offset / PAGE_SIZE_64);
+ cl.e_addr = (daddr64_t)(e_offset / PAGE_SIZE_64);
+
+ cluster_update_state_internal(vp, &cl, 0, TRUE, &first_pass, s_offset, (int)(e_offset - s_offset),
+ vp->v_un.vu_ubcinfo->ui_size, NULL, NULL, vm_initiated);
+}
+
+
+static void
+cluster_update_state_internal(vnode_t vp, struct cl_extent *cl, int flags, boolean_t defer_writes,
+ boolean_t *first_pass, off_t write_off, int write_cnt, off_t newEOF,
+ int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated)
+{
+ struct cl_writebehind *wbp;
+ int cl_index;
+ int ret_cluster_try_push;
+ u_int max_cluster_pgcount;
+
+
+ max_cluster_pgcount = MAX_CLUSTER_SIZE(vp) / PAGE_SIZE;
+
+ /*
+ * take the lock to protect our accesses
+ * of the writebehind and sparse cluster state
+ */
+ wbp = cluster_get_wbp(vp, CLW_ALLOCATE | CLW_RETURNLOCKED);
+
+ if (wbp->cl_scmap) {
+ if (!(flags & IO_NOCACHE)) {
+ /*
+ * we've fallen into the sparse
+ * cluster method of delaying dirty pages
+ */
+ sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, cl, newEOF, callback, callback_arg, vm_initiated);
+
+ lck_mtx_unlock(&wbp->cl_lockw);
+ return;
+ }
+ /*
+ * must have done cached writes that fell into
+ * the sparse cluster mechanism... we've switched
+ * to uncached writes on the file, so go ahead
+ * and push whatever's in the sparse map
+ * and switch back to normal clustering
+ */
+ wbp->cl_number = 0;
+
+ sparse_cluster_push(wbp, &(wbp->cl_scmap), vp, newEOF, PUSH_ALL, 0, callback, callback_arg, vm_initiated);
+ /*
+ * no clusters of either type present at this point
+ * so just go directly to start_new_cluster since
+ * we know we need to delay this I/O since we've
+ * already released the pages back into the cache
+ * to avoid the deadlock with sparse_cluster_push
+ */
+ goto start_new_cluster;
+ }
+ if (*first_pass == TRUE) {
+ if (write_off == wbp->cl_last_write) {
+ wbp->cl_seq_written += write_cnt;
+ } else {
+ wbp->cl_seq_written = write_cnt;
+ }
+
+ wbp->cl_last_write = write_off + write_cnt;
+
+ *first_pass = FALSE;
+ }
+ if (wbp->cl_number == 0) {
+ /*
+ * no clusters currently present
+ */
+ goto start_new_cluster;
+ }
+
+ for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) {
+ /*
+ * check each cluster that we currently hold
+ * try to merge some or all of this write into
+ * one or more of the existing clusters... if
+ * any portion of the write remains, start a
+ * new cluster
+ */
+ if (cl->b_addr >= wbp->cl_clusters[cl_index].b_addr) {
+ /*
+ * the current write starts at or after the current cluster
+ */
+ if (cl->e_addr <= (wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount)) {
+ /*
+ * we have a write that fits entirely
+ * within the existing cluster limits
+ */
+ if (cl->e_addr > wbp->cl_clusters[cl_index].e_addr) {
+ /*
+ * update our idea of where the cluster ends
+ */
+ wbp->cl_clusters[cl_index].e_addr = cl->e_addr;
+ }
+ break;
+ }
+ if (cl->b_addr < (wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount)) {
+ /*
+ * we have a write that starts in the middle of the current cluster
+ * but extends beyond the cluster's limit... we know this because
+ * of the previous checks
+ * we'll extend the current cluster to the max
+ * and update the b_addr for the current write to reflect that
+ * the head of it was absorbed into this cluster...
+ * note that we'll always have a leftover tail in this case since
+ * full absorbtion would have occurred in the clause above
+ */
+ wbp->cl_clusters[cl_index].e_addr = wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount;
+
+ cl->b_addr = wbp->cl_clusters[cl_index].e_addr;
+ }
+ /*
+ * we come here for the case where the current write starts
+ * beyond the limit of the existing cluster or we have a leftover
+ * tail after a partial absorbtion
+ *
+ * in either case, we'll check the remaining clusters before
+ * starting a new one
+ */
+ } else {
+ /*
+ * the current write starts in front of the cluster we're currently considering
+ */
+ if ((wbp->cl_clusters[cl_index].e_addr - cl->b_addr) <= max_cluster_pgcount) {
+ /*
+ * we can just merge the new request into
+ * this cluster and leave it in the cache
+ * since the resulting cluster is still
+ * less than the maximum allowable size
+ */
+ wbp->cl_clusters[cl_index].b_addr = cl->b_addr;
+
+ if (cl->e_addr > wbp->cl_clusters[cl_index].e_addr) {
+ /*
+ * the current write completely
+ * envelops the existing cluster and since
+ * each write is limited to at most max_cluster_pgcount pages
+ * we can just use the start and last blocknos of the write
+ * to generate the cluster limits
+ */
+ wbp->cl_clusters[cl_index].e_addr = cl->e_addr;
+ }
+ break;
+ }
+ /*
+ * if we were to combine this write with the current cluster
+ * we would exceed the cluster size limit.... so,
+ * let's see if there's any overlap of the new I/O with
+ * the cluster we're currently considering... in fact, we'll
+ * stretch the cluster out to it's full limit and see if we
+ * get an intersection with the current write
+ *
+ */
+ if (cl->e_addr > wbp->cl_clusters[cl_index].e_addr - max_cluster_pgcount) {
+ /*
+ * the current write extends into the proposed cluster
+ * clip the length of the current write after first combining it's
+ * tail with the newly shaped cluster
+ */
+ wbp->cl_clusters[cl_index].b_addr = wbp->cl_clusters[cl_index].e_addr - max_cluster_pgcount;
+
+ cl->e_addr = wbp->cl_clusters[cl_index].b_addr;
+ }
+ /*
+ * if we get here, there was no way to merge
+ * any portion of this write with this cluster
+ * or we could only merge part of it which
+ * will leave a tail...
+ * we'll check the remaining clusters before starting a new one
+ */
+ }
+ }
+ if (cl_index < wbp->cl_number) {
+ /*
+ * we found an existing cluster(s) that we
+ * could entirely merge this I/O into
+ */
+ goto delay_io;
+ }
+
+ if (defer_writes == FALSE &&
+ wbp->cl_number == MAX_CLUSTERS &&
+ wbp->cl_seq_written >= (MAX_CLUSTERS * (max_cluster_pgcount * PAGE_SIZE))) {
+ uint32_t n;
+
+ if (vp->v_mount->mnt_minsaturationbytecount) {
+ n = vp->v_mount->mnt_minsaturationbytecount / MAX_CLUSTER_SIZE(vp);
+
+ if (n > MAX_CLUSTERS) {
+ n = MAX_CLUSTERS;
+ }
+ } else {
+ n = 0;
+ }
+
+ if (n == 0) {
+ if (disk_conditioner_mount_is_ssd(vp->v_mount)) {
+ n = WRITE_BEHIND_SSD;
+ } else {
+ n = WRITE_BEHIND;
+ }
+ }
+ while (n--) {
+ cluster_try_push(wbp, vp, newEOF, 0, 0, callback, callback_arg, NULL, vm_initiated);
+ }
+ }
+ if (wbp->cl_number < MAX_CLUSTERS) {
+ /*
+ * we didn't find an existing cluster to
+ * merge into, but there's room to start
+ * a new one
+ */
+ goto start_new_cluster;
+ }
+ /*
+ * no exisitng cluster to merge with and no
+ * room to start a new one... we'll try
+ * pushing one of the existing ones... if none of
+ * them are able to be pushed, we'll switch
+ * to the sparse cluster mechanism
+ * cluster_try_push updates cl_number to the
+ * number of remaining clusters... and
+ * returns the number of currently unused clusters
+ */
+ ret_cluster_try_push = 0;
+
+ /*
+ * if writes are not deferred, call cluster push immediately
+ */
+ if (defer_writes == FALSE) {
+ ret_cluster_try_push = cluster_try_push(wbp, vp, newEOF, (flags & IO_NOCACHE) ? 0 : PUSH_DELAY, 0, callback, callback_arg, NULL, vm_initiated);
+ }
+ /*
+ * execute following regardless of writes being deferred or not
+ */
+ if (ret_cluster_try_push == 0) {
+ /*
+ * no more room in the normal cluster mechanism
+ * so let's switch to the more expansive but expensive
+ * sparse mechanism....
+ */
+ sparse_cluster_switch(wbp, vp, newEOF, callback, callback_arg, vm_initiated);
+ sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, cl, newEOF, callback, callback_arg, vm_initiated);
+
+ lck_mtx_unlock(&wbp->cl_lockw);
+ return;
+ }
+start_new_cluster:
+ wbp->cl_clusters[wbp->cl_number].b_addr = cl->b_addr;
+ wbp->cl_clusters[wbp->cl_number].e_addr = cl->e_addr;
+
+ wbp->cl_clusters[wbp->cl_number].io_flags = 0;
+
+ if (flags & IO_NOCACHE) {
+ wbp->cl_clusters[wbp->cl_number].io_flags |= CLW_IONOCACHE;
+ }
+
+ if (flags & IO_PASSIVE) {
+ wbp->cl_clusters[wbp->cl_number].io_flags |= CLW_IOPASSIVE;
+ }
+
+ wbp->cl_number++;
+delay_io:
+ lck_mtx_unlock(&wbp->cl_lockw);
+ return;