- }
- xfer_resid = io_size;
- io_offset = start_offset;
-
- while (zero_cnt && xfer_resid) {
-
- if (zero_cnt < (long long)xfer_resid)
- bytes_to_zero = zero_cnt;
- else
- bytes_to_zero = xfer_resid;
-
- if ( !(flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) {
- cluster_zero(upl, io_offset, bytes_to_zero, NULL);
- } else {
- int zero_pg_index;
-
- bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off & PAGE_MASK_64));
- zero_pg_index = (int)((zero_off - upl_f_offset) / PAGE_SIZE_64);
-
- if ( !upl_valid_page(pl, zero_pg_index)) {
- cluster_zero(upl, io_offset, bytes_to_zero, NULL);
-
- } else if ((flags & (IO_NOZERODIRTY | IO_NOZEROVALID)) == IO_NOZERODIRTY &&
- !upl_dirty_page(pl, zero_pg_index)) {
- cluster_zero(upl, io_offset, bytes_to_zero, NULL);
- }
- }
- xfer_resid -= bytes_to_zero;
- zero_cnt -= bytes_to_zero;
- zero_off += bytes_to_zero;
- io_offset += bytes_to_zero;
- }
- if (xfer_resid && io_resid) {
- bytes_to_move = min(io_resid, xfer_resid);
-
- retval = cluster_copy_upl_data(uio, upl, io_offset, bytes_to_move);
-
- if (retval) {
-
- ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
-
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
- (int)upl, 0, 0, retval, 0);
- } else {
- io_resid -= bytes_to_move;
- xfer_resid -= bytes_to_move;
- io_offset += bytes_to_move;
- }
- }
- while (xfer_resid && zero_cnt1 && retval == 0) {
-
- if (zero_cnt1 < (long long)xfer_resid)
- bytes_to_zero = zero_cnt1;
- else
- bytes_to_zero = xfer_resid;
-
- if ( !(flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) {
- cluster_zero(upl, io_offset, bytes_to_zero, NULL);
- } else {
- int zero_pg_index;
-
- bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off1 & PAGE_MASK_64));
- zero_pg_index = (int)((zero_off1 - upl_f_offset) / PAGE_SIZE_64);
-
- if ( !upl_valid_page(pl, zero_pg_index)) {
- cluster_zero(upl, io_offset, bytes_to_zero, NULL);
- } else if ((flags & (IO_NOZERODIRTY | IO_NOZEROVALID)) == IO_NOZERODIRTY &&
- !upl_dirty_page(pl, zero_pg_index)) {
- cluster_zero(upl, io_offset, bytes_to_zero, NULL);
- }
- }
- xfer_resid -= bytes_to_zero;
- zero_cnt1 -= bytes_to_zero;
- zero_off1 += bytes_to_zero;
- io_offset += bytes_to_zero;
- }
-
- if (retval == 0) {
- int cl_index;
- int can_delay;
-
- io_size += start_offset;
-
- if ((upl_f_offset + io_size) >= newEOF && io_size < upl_size) {
- /*
- * if we're extending the file with this write
- * we'll zero fill the rest of the page so that
- * if the file gets extended again in such a way as to leave a
- * hole starting at this EOF, we'll have zero's in the correct spot
- */
- cluster_zero(upl, io_size, upl_size - io_size, NULL);
- }
- if (flags & IO_SYNC)
- /*
- * if the IO_SYNC flag is set than we need to
- * bypass any clusters and immediately issue
- * the I/O
- */
- goto issue_io;
-check_cluster:
- /*
- * take the lock to protect our accesses
- * of the writebehind and sparse cluster state
- */
- wbp = cluster_get_wbp(vp, CLW_ALLOCATE | CLW_RETURNLOCKED);
-
- /*
- * calculate the last logical block number
- * that this delayed I/O encompassed
- */
- cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64);
-
- if (wbp->cl_scmap) {
-
- if ( !(flags & IO_NOCACHE)) {
- /*
- * we've fallen into the sparse
- * cluster method of delaying dirty pages
- * first, we need to release the upl if we hold one
- * since pages in it may be present in the sparse cluster map
- * and may span 2 separate buckets there... if they do and
- * we happen to have to flush a bucket to make room and it intersects
- * this upl, a deadlock may result on page BUSY
- */
- if (upl_size)
- ubc_upl_commit_range(upl, 0, upl_size,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
- sparse_cluster_add(wbp, vp, &cl, newEOF);
-
- lck_mtx_unlock(&wbp->cl_lockw);
-
- continue;
- }
- /*
- * must have done cached writes that fell into
- * the sparse cluster mechanism... we've switched
- * to uncached writes on the file, so go ahead
- * and push whatever's in the sparse map
- * and switch back to normal clustering
- *
- * see the comment above concerning a possible deadlock...
- */
- if (upl_size) {
- ubc_upl_commit_range(upl, 0, upl_size,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
- /*
- * setting upl_size to 0 keeps us from committing a
- * second time in the start_new_cluster path
- */
- upl_size = 0;
- }
- sparse_cluster_push(wbp, vp, newEOF, 1);
-
- wbp->cl_number = 0;
- /*
- * no clusters of either type present at this point
- * so just go directly to start_new_cluster since
- * we know we need to delay this I/O since we've
- * already released the pages back into the cache
- * to avoid the deadlock with sparse_cluster_push
- */
- goto start_new_cluster;
- }
- upl_offset = 0;
-
- if (wbp->cl_number == 0)
- /*
- * no clusters currently present
- */
- goto start_new_cluster;
-
- for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) {
- /*
- * check each cluster that we currently hold
- * try to merge some or all of this write into
- * one or more of the existing clusters... if
- * any portion of the write remains, start a
- * new cluster
- */
- if (cl.b_addr >= wbp->cl_clusters[cl_index].b_addr) {
- /*
- * the current write starts at or after the current cluster
- */
- if (cl.e_addr <= (wbp->cl_clusters[cl_index].b_addr + MAX_UPL_TRANSFER)) {
- /*
- * we have a write that fits entirely
- * within the existing cluster limits
- */
- if (cl.e_addr > wbp->cl_clusters[cl_index].e_addr)
- /*
- * update our idea of where the cluster ends
- */
- wbp->cl_clusters[cl_index].e_addr = cl.e_addr;
- break;
- }
- if (cl.b_addr < (wbp->cl_clusters[cl_index].b_addr + MAX_UPL_TRANSFER)) {
- /*
- * we have a write that starts in the middle of the current cluster
- * but extends beyond the cluster's limit... we know this because
- * of the previous checks
- * we'll extend the current cluster to the max
- * and update the b_addr for the current write to reflect that
- * the head of it was absorbed into this cluster...
- * note that we'll always have a leftover tail in this case since
- * full absorbtion would have occurred in the clause above
- */
- wbp->cl_clusters[cl_index].e_addr = wbp->cl_clusters[cl_index].b_addr + MAX_UPL_TRANSFER;
-
- if (upl_size) {
- daddr64_t start_pg_in_upl;
-
- start_pg_in_upl = (daddr64_t)(upl_f_offset / PAGE_SIZE_64);
-
- if (start_pg_in_upl < wbp->cl_clusters[cl_index].e_addr) {
- intersection = (int)((wbp->cl_clusters[cl_index].e_addr - start_pg_in_upl) * PAGE_SIZE);
-
- ubc_upl_commit_range(upl, upl_offset, intersection,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
- upl_f_offset += intersection;
- upl_offset += intersection;
- upl_size -= intersection;
- }
- }
- cl.b_addr = wbp->cl_clusters[cl_index].e_addr;
- }
- /*
- * we come here for the case where the current write starts
- * beyond the limit of the existing cluster or we have a leftover
- * tail after a partial absorbtion
- *
- * in either case, we'll check the remaining clusters before
- * starting a new one
- */
- } else {
- /*
- * the current write starts in front of the cluster we're currently considering
- */
- if ((wbp->cl_clusters[cl_index].e_addr - cl.b_addr) <= MAX_UPL_TRANSFER) {
- /*
- * we can just merge the new request into
- * this cluster and leave it in the cache
- * since the resulting cluster is still
- * less than the maximum allowable size
- */
- wbp->cl_clusters[cl_index].b_addr = cl.b_addr;
-
- if (cl.e_addr > wbp->cl_clusters[cl_index].e_addr) {
- /*
- * the current write completely
- * envelops the existing cluster and since
- * each write is limited to at most MAX_UPL_TRANSFER bytes
- * we can just use the start and last blocknos of the write
- * to generate the cluster limits
- */
- wbp->cl_clusters[cl_index].e_addr = cl.e_addr;
- }
- break;
- }
-
- /*
- * if we were to combine this write with the current cluster
- * we would exceed the cluster size limit.... so,
- * let's see if there's any overlap of the new I/O with
- * the cluster we're currently considering... in fact, we'll
- * stretch the cluster out to it's full limit and see if we
- * get an intersection with the current write
- *
- */
- if (cl.e_addr > wbp->cl_clusters[cl_index].e_addr - MAX_UPL_TRANSFER) {
- /*
- * the current write extends into the proposed cluster
- * clip the length of the current write after first combining it's
- * tail with the newly shaped cluster
- */
- wbp->cl_clusters[cl_index].b_addr = wbp->cl_clusters[cl_index].e_addr - MAX_UPL_TRANSFER;
-
- if (upl_size) {
- intersection = (int)((cl.e_addr - wbp->cl_clusters[cl_index].b_addr) * PAGE_SIZE);
-
- if (intersection > upl_size)
- /*
- * because the current write may consist of a number of pages found in the cache
- * which are not part of the UPL, we may have an intersection that exceeds
- * the size of the UPL that is also part of this write
- */
- intersection = upl_size;
-
- ubc_upl_commit_range(upl, upl_offset + (upl_size - intersection), intersection,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
- upl_size -= intersection;
- }
- cl.e_addr = wbp->cl_clusters[cl_index].b_addr;
- }
- /*
- * if we get here, there was no way to merge
- * any portion of this write with this cluster
- * or we could only merge part of it which
- * will leave a tail...
- * we'll check the remaining clusters before starting a new one
- */
- }
- }
- if (cl_index < wbp->cl_number)
- /*
- * we found an existing cluster(s) that we
- * could entirely merge this I/O into
- */
- goto delay_io;