#include <sys/kdebug.h>
#define CL_READ 0x01
-#define CL_ASYNC 0x02
-#define CL_COMMIT 0x04
+#define CL_WRITE 0x02
+#define CL_ASYNC 0x04
+#define CL_COMMIT 0x08
#define CL_PAGEOUT 0x10
#define CL_AGE 0x20
#define CL_NOZERO 0x40
* can represent it in a 32 bit int
*/
#define MAX_IO_REQUEST_SIZE (1024 * 1024 * 256)
-#define MAX_IO_CONTIG_SIZE (1024 * 1024 * 8)
-#define MAX_VECTS 16
-
-/*
- * note: MAX_CLUSTER_SIZE CANNOT be larger than MAX_UPL_TRANSFER
- */
-#define MAX_CLUSTER_SIZE (MAX_UPL_TRANSFER)
-#define MAX_PREFETCH (MAX_CLUSTER_SIZE * PAGE_SIZE * 2)
+#define MAX_IO_CONTIG_SIZE (MAX_UPL_SIZE * PAGE_SIZE)
+#define MAX_VECTS 16
#define MIN_DIRECT_WRITE_SIZE (4 * PAGE_SIZE)
+#define MAX_CLUSTER_SIZE(vp) (cluster_max_io_size(vp->v_mount, CL_WRITE))
+#define MAX_PREFETCH(vp) (cluster_max_io_size(vp->v_mount, CL_READ) * 3);
+
+
int speculative_reads_disabled = 0;
/*
}
+uint32_t
+cluster_max_io_size(mount_t mp, int type)
+{
+ uint32_t max_io_size;
+ uint32_t segcnt;
+ uint32_t maxcnt;
+
+ switch(type) {
+
+ case CL_READ:
+ segcnt = mp->mnt_segreadcnt;
+ maxcnt = mp->mnt_maxreadcnt;
+ break;
+ case CL_WRITE:
+ segcnt = mp->mnt_segwritecnt;
+ maxcnt = mp->mnt_maxwritecnt;
+ break;
+ default:
+ segcnt = min(mp->mnt_segreadcnt, mp->mnt_segwritecnt);
+ maxcnt = min(mp->mnt_maxreadcnt, mp->mnt_maxwritecnt);
+ break;
+ }
+ if (segcnt > MAX_UPL_SIZE) {
+ /*
+ * don't allow a size beyond the max UPL size we can create
+ */
+ segcnt = MAX_UPL_SIZE;
+ }
+ max_io_size = min((segcnt * PAGE_SIZE), maxcnt);
+
+ if (max_io_size < (MAX_UPL_TRANSFER * PAGE_SIZE)) {
+ /*
+ * don't allow a size smaller than the old fixed limit
+ */
+ max_io_size = (MAX_UPL_TRANSFER * PAGE_SIZE);
+ } else {
+ /*
+ * make sure the size specified is a multiple of PAGE_SIZE
+ */
+ max_io_size &= ~PAGE_MASK;
+ }
+ return (max_io_size);
+}
+
+
+
#define CLW_ALLOCATE 0x01
#define CLW_RETURNLOCKED 0x02
async_throttle = VNODE_ASYNC_THROTTLE;
else {
u_int max_cluster;
+ u_int max_cluster_size;
+ u_int max_prefetch;
+
+ max_cluster_size = MAX_CLUSTER_SIZE(vp);
+ max_prefetch = MAX_PREFETCH(vp);
- if (max_iosize > (MAX_CLUSTER_SIZE * PAGE_SIZE))
- max_cluster = (MAX_CLUSTER_SIZE * PAGE_SIZE);
+ if (max_iosize > max_cluster_size)
+ max_cluster = max_cluster_size;
else
max_cluster = max_iosize;
if (size < max_cluster)
max_cluster = size;
- async_throttle = min(VNODE_ASYNC_THROTTLE, (MAX_PREFETCH / max_cluster) - 1);
+ async_throttle = min(VNODE_ASYNC_THROTTLE, (max_prefetch / max_cluster) - 1);
}
}
}
daddr64_t r_addr;
off_t f_offset;
int size_of_prefetch;
+ u_int max_prefetch;
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_START,
return;
}
+ max_prefetch = MAX_PREFETCH(vp);
+
if (extent->e_addr < rap->cl_maxra) {
- if ((rap->cl_maxra - extent->e_addr) > ((MAX_PREFETCH / PAGE_SIZE) / 4)) {
+ if ((rap->cl_maxra - extent->e_addr) > ((max_prefetch / PAGE_SIZE) / 4)) {
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 2, 0);
if (f_offset < filesize) {
daddr64_t read_size;
- rap->cl_ralen = rap->cl_ralen ? min(MAX_PREFETCH / PAGE_SIZE, rap->cl_ralen << 1) : 1;
+ rap->cl_ralen = rap->cl_ralen ? min(max_prefetch / PAGE_SIZE, rap->cl_ralen << 1) : 1;
read_size = (extent->e_addr + 1) - extent->b_addr;
if (read_size > rap->cl_ralen) {
- if (read_size > MAX_PREFETCH / PAGE_SIZE)
- rap->cl_ralen = MAX_PREFETCH / PAGE_SIZE;
+ if (read_size > max_prefetch / PAGE_SIZE)
+ rap->cl_ralen = max_prefetch / PAGE_SIZE;
else
rap->cl_ralen = read_size;
}
u_int32_t io_req_size;
u_int32_t offset_in_file;
u_int32_t offset_in_iovbase;
- int io_size;
+ u_int32_t io_size;
int io_flag;
int bflag;
vm_size_t upl_size;
user_addr_t iov_base;
u_int32_t mem_alignment_mask;
u_int32_t devblocksize;
+ u_int32_t max_upl_size;
+
+
+ max_upl_size = cluster_max_io_size(vp->v_mount, CL_WRITE);
if (flags & IO_PASSIVE)
- bflag = CL_PASSIVE;
+ bflag = CL_PASSIVE;
else
- bflag = 0;
+ bflag = 0;
/*
* When we enter this routine, we know
io_size = io_req_size & ~PAGE_MASK;
iov_base = uio_curriovbase(uio);
- if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
- io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
+ if (io_size > max_upl_size)
+ io_size = max_upl_size;
upl_offset = (vm_offset_t)((u_int32_t)iov_base & PAGE_MASK);
upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
*/
lck_mtx_lock(cl_mtxp);
- while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_UPL_TRANSFER * PAGE_SIZE)) {
+ while ((iostate.io_issued - iostate.io_completed) > (2 * max_upl_size)) {
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, 2 * max_upl_size, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_direct", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, 2 * max_upl_size, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
lck_mtx_lock(cl_mtxp);
while (iostate.io_issued != iostate.io_completed) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_direct", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
}
lck_mtx_lock(cl_mtxp);
while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_IO_CONTIG_SIZE)) {
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, 2 * MAX_IO_CONTIG_SIZE, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_contig", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, 2 * MAX_IO_CONTIG_SIZE, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
}
io_size -= xsize;
}
}
- if (error == 0 && iostate.io_error == 0 && tail_size == 0) {
+ if (error == 0 && iostate.io_error == 0 && tail_size == 0 && num_upl < MAX_VECTS) {
error = cluster_io_type(uio, write_type, write_length, 0);
lck_mtx_lock(cl_mtxp);
while (iostate.io_issued != iostate.io_completed) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_contig", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
long long zero_cnt1;
off_t zero_off1;
struct cl_extent cl;
- int intersection;
struct cl_writebehind *wbp;
int bflag;
+ u_int max_cluster_pgcount;
+ u_int max_io_size;
if (flags & IO_PASSIVE)
- bflag = CL_PASSIVE;
+ bflag = CL_PASSIVE;
else
- bflag = 0;
+ bflag = 0;
if (uio) {
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
zero_off = 0;
zero_off1 = 0;
+ max_cluster_pgcount = MAX_CLUSTER_SIZE(vp) / PAGE_SIZE;
+ max_io_size = cluster_max_io_size(vp->v_mount, CL_WRITE);
+
if (flags & IO_HEADZEROFILL) {
/*
* some filesystems (HFS is one) don't support unallocated holes within a file...
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 46)) | DBG_FUNC_NONE,
(int)zero_off, (int)zero_cnt, (int)zero_off1, (int)zero_cnt1, 0);
- if (total_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
- total_size = MAX_UPL_TRANSFER * PAGE_SIZE;
+ if (total_size > max_io_size)
+ total_size = max_io_size;
cl.b_addr = (daddr64_t)(upl_f_offset / PAGE_SIZE_64);
* assumption... total_size <= io_resid
* because IO_HEADZEROFILL and IO_TAILZEROFILL not set
*/
- if ((start_offset + total_size) > (MAX_UPL_TRANSFER * PAGE_SIZE))
+ if ((start_offset + total_size) > max_io_size)
total_size -= start_offset;
xfer_resid = total_size;
*/
upl_size = (start_offset + total_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
- if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
- upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
+ if (upl_size > max_io_size)
+ upl_size = max_io_size;
pages_in_upl = upl_size / PAGE_SIZE;
io_size = upl_size - start_offset;
* to release the rest of the pages in the upl without modifying
* there state and mark the failed page in error
*/
- ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
+ ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
if (upl_size > PAGE_SIZE)
ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
* need to release the rest of the pages in the upl without
* modifying there state and mark the failed page in error
*/
- ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
+ ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
if (upl_size > PAGE_SIZE)
ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
*/
cluster_zero(upl, io_size, upl_size - io_size, NULL);
}
+ /*
+ * release the upl now if we hold one since...
+ * 1) pages in it may be present in the sparse cluster map
+ * and may span 2 separate buckets there... if they do and
+ * we happen to have to flush a bucket to make room and it intersects
+ * this upl, a deadlock may result on page BUSY
+ * 2) we're delaying the I/O... from this point forward we're just updating
+ * the cluster state... no need to hold the pages, so commit them
+ * 3) IO_SYNC is set...
+ * because we had to ask for a UPL that provides currenty non-present pages, the
+ * UPL has been automatically set to clear the dirty flags (both software and hardware)
+ * upon committing it... this is not the behavior we want since it's possible for
+ * pages currently present as part of a mapped file to be dirtied while the I/O is in flight.
+ * we'll pick these pages back up later with the correct behavior specified.
+ * 4) we don't want to hold pages busy in a UPL and then block on the cluster lock... if a flush
+ * of this vnode is in progress, we will deadlock if the pages being flushed intersect the pages
+ * we hold since the flushing context is holding the cluster lock.
+ */
+ ubc_upl_commit_range(upl, 0, upl_size,
+ UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
+check_cluster:
+ /*
+ * calculate the last logical block number
+ * that this delayed I/O encompassed
+ */
+ cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64);
+
if (flags & IO_SYNC)
/*
* if the IO_SYNC flag is set than we need to
* the I/O
*/
goto issue_io;
-check_cluster:
+
/*
* take the lock to protect our accesses
* of the writebehind and sparse cluster state
*/
wbp = cluster_get_wbp(vp, CLW_ALLOCATE | CLW_RETURNLOCKED);
- /*
- * calculate the last logical block number
- * that this delayed I/O encompassed
- */
- cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64);
-
if (wbp->cl_scmap) {
if ( !(flags & IO_NOCACHE)) {
/*
* we've fallen into the sparse
* cluster method of delaying dirty pages
- * first, we need to release the upl if we hold one
- * since pages in it may be present in the sparse cluster map
- * and may span 2 separate buckets there... if they do and
- * we happen to have to flush a bucket to make room and it intersects
- * this upl, a deadlock may result on page BUSY
*/
- if (upl_size)
- ubc_upl_commit_range(upl, 0, upl_size,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
sparse_cluster_add(wbp, vp, &cl, newEOF, callback, callback_arg);
lck_mtx_unlock(&wbp->cl_lockw);
* to uncached writes on the file, so go ahead
* and push whatever's in the sparse map
* and switch back to normal clustering
- *
- * see the comment above concerning a possible deadlock...
*/
- if (upl_size) {
- ubc_upl_commit_range(upl, 0, upl_size,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
- /*
- * setting upl_size to 0 keeps us from committing a
- * second time in the start_new_cluster path
- */
- upl_size = 0;
- }
- sparse_cluster_push(wbp, vp, newEOF, PUSH_ALL, callback, callback_arg);
-
wbp->cl_number = 0;
+
+ sparse_cluster_push(wbp, vp, newEOF, PUSH_ALL, callback, callback_arg);
/*
* no clusters of either type present at this point
* so just go directly to start_new_cluster since
*/
goto start_new_cluster;
}
- upl_offset = 0;
-
if (wbp->cl_number == 0)
/*
* no clusters currently present
/*
* the current write starts at or after the current cluster
*/
- if (cl.e_addr <= (wbp->cl_clusters[cl_index].b_addr + MAX_CLUSTER_SIZE)) {
+ if (cl.e_addr <= (wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount)) {
/*
* we have a write that fits entirely
* within the existing cluster limits
wbp->cl_clusters[cl_index].e_addr = cl.e_addr;
break;
}
- if (cl.b_addr < (wbp->cl_clusters[cl_index].b_addr + MAX_CLUSTER_SIZE)) {
+ if (cl.b_addr < (wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount)) {
/*
* we have a write that starts in the middle of the current cluster
* but extends beyond the cluster's limit... we know this because
* note that we'll always have a leftover tail in this case since
* full absorbtion would have occurred in the clause above
*/
- wbp->cl_clusters[cl_index].e_addr = wbp->cl_clusters[cl_index].b_addr + MAX_CLUSTER_SIZE;
-
- if (upl_size) {
- daddr64_t start_pg_in_upl;
-
- start_pg_in_upl = (daddr64_t)(upl_f_offset / PAGE_SIZE_64);
-
- if (start_pg_in_upl < wbp->cl_clusters[cl_index].e_addr) {
- intersection = (int)((wbp->cl_clusters[cl_index].e_addr - start_pg_in_upl) * PAGE_SIZE);
-
- ubc_upl_commit_range(upl, upl_offset, intersection,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
- upl_f_offset += intersection;
- upl_offset += intersection;
- upl_size -= intersection;
- }
- }
+ wbp->cl_clusters[cl_index].e_addr = wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount;
+
cl.b_addr = wbp->cl_clusters[cl_index].e_addr;
}
/*
/*
* the current write starts in front of the cluster we're currently considering
*/
- if ((wbp->cl_clusters[cl_index].e_addr - cl.b_addr) <= MAX_CLUSTER_SIZE) {
+ if ((wbp->cl_clusters[cl_index].e_addr - cl.b_addr) <= max_cluster_pgcount) {
/*
* we can just merge the new request into
* this cluster and leave it in the cache
/*
* the current write completely
* envelops the existing cluster and since
- * each write is limited to at most MAX_CLUSTER_SIZE pages
+ * each write is limited to at most max_cluster_pgcount pages
* we can just use the start and last blocknos of the write
* to generate the cluster limits
*/
* get an intersection with the current write
*
*/
- if (cl.e_addr > wbp->cl_clusters[cl_index].e_addr - MAX_CLUSTER_SIZE) {
+ if (cl.e_addr > wbp->cl_clusters[cl_index].e_addr - max_cluster_pgcount) {
/*
* the current write extends into the proposed cluster
* clip the length of the current write after first combining it's
* tail with the newly shaped cluster
*/
- wbp->cl_clusters[cl_index].b_addr = wbp->cl_clusters[cl_index].e_addr - MAX_CLUSTER_SIZE;
-
- if (upl_size) {
- intersection = (int)((cl.e_addr - wbp->cl_clusters[cl_index].b_addr) * PAGE_SIZE);
-
- if ((u_int)intersection > upl_size)
- /*
- * because the current write may consist of a number of pages found in the cache
- * which are not part of the UPL, we may have an intersection that exceeds
- * the size of the UPL that is also part of this write
- */
- intersection = upl_size;
-
- ubc_upl_commit_range(upl, upl_offset + (upl_size - intersection), intersection,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
- upl_size -= intersection;
- }
+ wbp->cl_clusters[cl_index].b_addr = wbp->cl_clusters[cl_index].e_addr - max_cluster_pgcount;
+
cl.e_addr = wbp->cl_clusters[cl_index].b_addr;
}
/*
* no more room in the normal cluster mechanism
* so let's switch to the more expansive but expensive
* sparse mechanism....
- * first, we need to release the upl if we hold one
- * since pages in it may be present in the sparse cluster map (after the cluster_switch)
- * and may span 2 separate buckets there... if they do and
- * we happen to have to flush a bucket to make room and it intersects
- * this upl, a deadlock may result on page BUSY
*/
- if (upl_size)
- ubc_upl_commit_range(upl, upl_offset, upl_size,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
sparse_cluster_switch(wbp, vp, newEOF, callback, callback_arg);
sparse_cluster_add(wbp, vp, &cl, newEOF, callback, callback_arg);
wbp->cl_number++;
delay_io:
- if (upl_size)
- ubc_upl_commit_range(upl, upl_offset, upl_size,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
lck_mtx_unlock(&wbp->cl_lockw);
continue;
issue_io:
/*
- * we don't hold the vnode lock at this point
+ * we don't hold the lock at this point
*
- * because we had to ask for a UPL that provides currenty non-present pages, the
- * UPL has been automatically set to clear the dirty flags (both software and hardware)
- * upon committing it... this is not the behavior we want since it's possible for
- * pages currently present as part of a mapped file to be dirtied while the I/O is in flight.
- * in order to maintain some semblance of coherency with mapped writes
- * we need to drop the current upl and pick it back up with COPYOUT_FROM set
+ * we've already dropped the current upl, so pick it back up with COPYOUT_FROM set
* so that we correctly deal with a change in state of the hardware modify bit...
* we do this via cluster_push_now... by passing along the IO_SYNC flag, we force
* cluster_push_now to wait until all the I/Os have completed... cluster_push_now is also
* responsible for generating the correct sized I/O(s)
*/
- ubc_upl_commit_range(upl, 0, upl_size,
- UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
- cl.e_addr = (upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64;
-
retval = cluster_push_now(vp, &cl, newEOF, flags, callback, callback_arg);
}
}
upl_page_info_t *pl;
upl_t upl;
vm_offset_t upl_offset;
- int upl_size;
+ u_int32_t upl_size;
off_t upl_f_offset;
int start_offset;
int start_pg;
u_int32_t size_of_prefetch;
u_int32_t xsize;
u_int32_t io_size;
- u_int32_t max_rd_size = MAX_PREFETCH;
+ u_int32_t max_rd_size;
+ u_int32_t max_io_size;
+ u_int32_t max_prefetch;
u_int rd_ahead_enabled = 1;
u_int prefetch_enabled = 1;
struct cl_readahead * rap;
take_reference = 0;
if (flags & IO_PASSIVE)
- bflag = CL_PASSIVE;
+ bflag = CL_PASSIVE;
else
- bflag = 0;
+ bflag = 0;
+
+ max_prefetch = MAX_PREFETCH(vp);
+ max_rd_size = max_prefetch;
+ max_io_size = cluster_max_io_size(vp->v_mount, CL_READ);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START,
(int)uio->uio_offset, io_req_size, (int)filesize, flags, 0);
* we can notice that our I/O pipe is running dry and
* get the next I/O issued before it does go dry
*/
- if (last_ioread_offset && io_size > ((MAX_UPL_TRANSFER * PAGE_SIZE) / 4))
- io_resid = ((MAX_UPL_TRANSFER * PAGE_SIZE) / 4);
+ if (last_ioread_offset && io_size > (max_io_size / 4))
+ io_resid = (max_io_size / 4);
else
io_resid = io_size;
upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
if (flags & IO_NOCACHE) {
- if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
- upl_size = (MAX_UPL_TRANSFER * PAGE_SIZE);
+ if (upl_size > max_io_size)
+ upl_size = max_io_size;
} else {
- if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE) / 4)
- upl_size = (MAX_UPL_TRANSFER * PAGE_SIZE) / 4;
+ if (upl_size > max_io_size / 4)
+ upl_size = max_io_size / 4;
}
pages_in_upl = upl_size / PAGE_SIZE;
lck_mtx_lock(cl_mtxp);
while (iostate.io_issued != iostate.io_completed) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_copy", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
rd_ahead_enabled = 1;
prefetch_enabled = 1;
- max_rd_size = MAX_PREFETCH;
+ max_rd_size = max_prefetch;
last_ioread_offset = 0;
}
}
u_int32_t xsize;
u_int32_t devblocksize;
u_int32_t mem_alignment_mask;
- u_int32_t max_rd_size = MAX_UPL_TRANSFER * PAGE_SIZE;
- u_int32_t max_rd_ahead = MAX_PREFETCH;
+ u_int32_t max_upl_size;
+ u_int32_t max_rd_size;
+ u_int32_t max_rd_ahead;
+
+
+ max_upl_size = cluster_max_io_size(vp->v_mount, CL_READ);
+
+ max_rd_size = max_upl_size;
+ max_rd_ahead = max_rd_size * 2;
+
if (flags & IO_PASSIVE)
- bflag = CL_PASSIVE;
+ bflag = CL_PASSIVE;
else
- bflag = 0;
+ bflag = 0;
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START,
(int)uio->uio_offset, (int)filesize, *read_type, *read_length, 0);
max_rd_size = HARD_THROTTLE_MAXSIZE;
max_rd_ahead = HARD_THROTTLE_MAXSIZE - 1;
} else {
- max_rd_size = MAX_UPL_TRANSFER * PAGE_SIZE;
- max_rd_ahead = MAX_PREFETCH;
+ max_rd_size = max_upl_size;
+ max_rd_ahead = max_rd_size * 2;
}
io_start = io_size = io_req_size;
lck_mtx_lock(cl_mtxp);
while ((iostate.io_issued - iostate.io_completed) > max_rd_ahead) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, max_rd_ahead, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_direct", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, max_rd_ahead, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
lck_mtx_lock(cl_mtxp);
while (iostate.io_issued != iostate.io_completed) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_direct", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
}
int bflag;
if (flags & IO_PASSIVE)
- bflag = CL_PASSIVE;
+ bflag = CL_PASSIVE;
else
- bflag = 0;
+ bflag = 0;
/*
* When we enter this routine, we know
if (iostate.io_issued) {
lck_mtx_lock(cl_mtxp);
- while ((iostate.io_issued - iostate.io_completed) > (3 * MAX_IO_CONTIG_SIZE)) {
+ while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_IO_CONTIG_SIZE)) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, 2 * MAX_IO_CONTIG_SIZE, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_contig", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, 2 * MAX_IO_CONTIG_SIZE, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
}
lck_mtx_lock(cl_mtxp);
while (iostate.io_issued != iostate.io_completed) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
+
iostate.io_wanted = 1;
msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_contig", NULL);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+ iostate.io_issued, iostate.io_completed, 0, 0, 0);
}
lck_mtx_unlock(cl_mtxp);
int retval = 0;
int issued_io;
int skip_range;
-
+ uint32_t max_io_size;
+
+
if ( !UBCINFOEXISTS(vp))
return(EINVAL);
+ if (resid < 0)
+ return(EINVAL);
+
+ max_io_size = cluster_max_io_size(vp->v_mount, CL_READ);
+
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START,
- (int)f_offset, resid, (int)filesize, 0, 0);
+ (int)f_offset, resid, (int)filesize, 0, 0);
while (resid && f_offset < filesize && retval == 0) {
/*
io_size = max_size;
upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
- if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
- upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
+ if ((uint32_t)upl_size > max_io_size)
+ upl_size = max_io_size;
skip_range = 0;
/*
int cl_len;
int cl_pushed = 0;
struct cl_wextent l_clusters[MAX_CLUSTERS];
-
+ u_int max_cluster_pgcount;
+
+
+ max_cluster_pgcount = MAX_CLUSTER_SIZE(vp) / PAGE_SIZE;
/*
* the write behind context exists and has
* already been locked...
* of order... if this occurs at the tail of the last cluster, we don't want to fall into the sparse cluster world...
*/
for (i = 0; i < MAX_CLUSTERS - 1; i++) {
- if ((l_clusters[i].e_addr - l_clusters[i].b_addr) != MAX_CLUSTER_SIZE)
+ if ((l_clusters[i].e_addr - l_clusters[i].b_addr) != max_cluster_pgcount)
goto dont_try;
if (l_clusters[i].e_addr != l_clusters[i+1].b_addr)
goto dont_try;
}
}
- /*
- * drop the lock while we're firing off the I/Os...
- * this is safe since I'm working off of a private sorted copy
- * of the clusters, and I'm going to re-evaluate the public
- * state after I retake the lock
- *
- * we need to drop it to avoid a lock inversion when trying to
- * grab pages into the UPL... another thread in 'write' may
- * have these pages in its UPL and be blocked trying to
- * gain the write-behind lock for this vnode
- */
- lck_mtx_unlock(&wbp->cl_lockw);
-
for (cl_index = 0; cl_index < cl_len; cl_index++) {
int flags;
struct cl_extent cl;
if ( !(push_flag & PUSH_ALL) )
break;
}
- lck_mtx_lock(&wbp->cl_lockw);
-
dont_try:
if (cl_len > cl_pushed) {
/*
wbp->cl_scdirty -= (int)(cl.e_addr - cl.b_addr);
- /*
- * drop the lock while we're firing off the I/Os...
- * this is safe since I've already updated the state
- * this lock is protecting and I'm going to re-evaluate
- * the public state after I retake the lock
- *
- * we need to drop it to avoid a lock inversion when trying to
- * grab pages into the UPL... another thread in 'write' may
- * have these pages in its UPL and be blocked trying to
- * gain the write-behind lock for this vnode
- */
- lck_mtx_unlock(&wbp->cl_lockw);
-
cluster_push_now(vp, &cl, EOF, push_flag & IO_PASSIVE, callback, callback_arg);
- lck_mtx_lock(&wbp->cl_lockw);
-
if ( !(push_flag & PUSH_ALL) )
break;
}