* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
-#define CL_READ 0x01
-#define CL_WRITE 0x02
-#define CL_ASYNC 0x04
-#define CL_COMMIT 0x08
-#define CL_PAGEOUT 0x10
-#define CL_AGE 0x20
-#define CL_NOZERO 0x40
-#define CL_PAGEIN 0x80
-#define CL_DEV_MEMORY 0x100
-#define CL_PRESERVE 0x200
-#define CL_THROTTLE 0x400
-#define CL_KEEPCACHED 0x800
-#define CL_DIRECT_IO 0x1000
-#define CL_PASSIVE 0x2000
-#define CL_IOSTREAMING 0x4000
-#define CL_CLOSE 0x8000
-#define CL_ENCRYPTED 0x10000
-#define CL_RAW_ENCRYPTED 0x20000
-#define CL_NOCACHE 0x40000
-
-#define MAX_VECTOR_UPL_ELEMENTS 8
-#define MAX_VECTOR_UPL_SIZE (2 * MAX_UPL_SIZE_BYTES)
-
-#define CLUSTER_IO_WAITING ((buf_t)1)
+#define CL_READ 0x01
+#define CL_WRITE 0x02
+#define CL_ASYNC 0x04
+#define CL_COMMIT 0x08
+#define CL_PAGEOUT 0x10
+#define CL_AGE 0x20
+#define CL_NOZERO 0x40
+#define CL_PAGEIN 0x80
+#define CL_DEV_MEMORY 0x100
+#define CL_PRESERVE 0x200
+#define CL_THROTTLE 0x400
+#define CL_KEEPCACHED 0x800
+#define CL_DIRECT_IO 0x1000
+#define CL_PASSIVE 0x2000
+#define CL_IOSTREAMING 0x4000
+#define CL_CLOSE 0x8000
+#define CL_ENCRYPTED 0x10000
+#define CL_RAW_ENCRYPTED 0x20000
+#define CL_NOCACHE 0x40000
+
+#define MAX_VECTOR_UPL_ELEMENTS 8
+#define MAX_VECTOR_UPL_SIZE (2 * MAX_UPL_SIZE_BYTES)
+
+#define CLUSTER_IO_WAITING ((buf_t)1)
- u_int io_completed; /* amount of io that has currently completed */
- u_int io_issued; /* amount of io that was successfully issued */
- int io_error; /* error code of first error encountered */
- int io_wanted; /* someone is sleeping waiting for a change in state */
+ u_int io_completed; /* amount of io that has currently completed */
+ u_int io_issued; /* amount of io that was successfully issued */
+ int io_error; /* error code of first error encountered */
+ int io_wanted; /* someone is sleeping waiting for a change in state */
static int cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t min_length);
static int cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int non_rounded_size,
static int cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t min_length);
static int cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int non_rounded_size,
static int cluster_iodone(buf_t bp, void *callback_arg);
static int cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags, vnode_t vp);
static int cluster_is_throttled(vnode_t vp);
static int cluster_iodone(buf_t bp, void *callback_arg);
static int cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags, vnode_t vp);
static int cluster_is_throttled(vnode_t vp);
static void cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int take_reference);
static int cluster_copy_ubc_data_internal(vnode_t vp, struct uio *uio, int *io_resid, int mark_dirty, int take_reference);
static void cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int take_reference);
static int cluster_copy_ubc_data_internal(vnode_t vp, struct uio *uio, int *io_resid, int mark_dirty, int take_reference);
-static int cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t filesize, int flags,
- int (*)(buf_t, void *), void *callback_arg);
+static int cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t filesize, int flags,
+ int (*)(buf_t, void *), void *callback_arg);
static void cluster_update_state_internal(vnode_t vp, struct cl_extent *cl, int flags, boolean_t defer_writes, boolean_t *first_pass,
static void cluster_update_state_internal(vnode_t vp, struct cl_extent *cl, int flags, boolean_t defer_writes, boolean_t *first_pass,
- off_t write_off, int write_cnt, off_t newEOF, int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
+ off_t write_off, int write_cnt, off_t newEOF, int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
static int cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, u_int32_t xsize, int flags, int (*)(buf_t, void *), void *callback_arg);
static int cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, u_int32_t xsize, int flags, int (*)(buf_t, void *), void *callback_arg);
-static int cluster_read_prefetch(vnode_t vp, off_t f_offset, u_int size, off_t filesize, int (*callback)(buf_t, void *), void *callback_arg, int bflag);
-static void cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct cl_readahead *ra,
- int (*callback)(buf_t, void *), void *callback_arg, int bflag);
+static int cluster_read_prefetch(vnode_t vp, off_t f_offset, u_int size, off_t filesize, int (*callback)(buf_t, void *), void *callback_arg, int bflag);
+static void cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct cl_readahead *ra,
+ int (*callback)(buf_t, void *), void *callback_arg, int bflag);
-static int cluster_push_now(vnode_t vp, struct cl_extent *, off_t EOF, int flags, int (*)(buf_t, void *), void *callback_arg, boolean_t vm_ioitiated);
+static int cluster_push_now(vnode_t vp, struct cl_extent *, off_t EOF, int flags, int (*)(buf_t, void *), void *callback_arg, boolean_t vm_ioitiated);
-static int cluster_try_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_flag, int flags, int (*)(buf_t, void *),
- void *callback_arg, int *err, boolean_t vm_initiated);
+static int cluster_try_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_flag, int flags, int (*)(buf_t, void *),
+ void *callback_arg, int *err, boolean_t vm_initiated);
-static int sparse_cluster_switch(struct cl_writebehind *, vnode_t vp, off_t EOF, int (*)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
-static int sparse_cluster_push(struct cl_writebehind *, void **cmapp, vnode_t vp, off_t EOF, int push_flag,
- int io_flags, int (*)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
-static int sparse_cluster_add(struct cl_writebehind *, void **cmapp, vnode_t vp, struct cl_extent *, off_t EOF,
- int (*)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
+static int sparse_cluster_switch(struct cl_writebehind *, vnode_t vp, off_t EOF, int (*)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
+static int sparse_cluster_push(struct cl_writebehind *, void **cmapp, vnode_t vp, off_t EOF, int push_flag,
+ int io_flags, int (*)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
+static int sparse_cluster_add(struct cl_writebehind *, void **cmapp, vnode_t vp, struct cl_extent *, off_t EOF,
+ int (*)(buf_t, void *), void *callback_arg, boolean_t vm_initiated);
static kern_return_t vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, u_int *setcountp);
static kern_return_t vfs_drt_get_cluster(void **cmapp, off_t *offsetp, u_int *lengthp);
static kern_return_t vfs_drt_control(void **cmapp, int op_type);
static kern_return_t vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, u_int *setcountp);
static kern_return_t vfs_drt_get_cluster(void **cmapp, off_t *offsetp, u_int *lengthp);
static kern_return_t vfs_drt_control(void **cmapp, int op_type);
-#define IO_SCALE(vp, base) (vp->v_mount->mnt_ioscale * (base))
-#define MAX_CLUSTER_SIZE(vp) (cluster_max_io_size(vp->v_mount, CL_WRITE))
-#define MAX_PREFETCH(vp, size, is_ssd) (size * IO_SCALE(vp, ((is_ssd) ? PREFETCH_SSD : PREFETCH)))
+#define IO_SCALE(vp, base) (vp->v_mount->mnt_ioscale * (base))
+#define MAX_CLUSTER_SIZE(vp) (cluster_max_io_size(vp->v_mount, CL_WRITE))
+#define MAX_PREFETCH(vp, size, is_ssd) (size * IO_SCALE(vp, ((is_ssd) ? PREFETCH_SSD : PREFETCH)))
- /*
- * don't allow a size beyond the max UPL size we can create
- */
- segcnt = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
- }
- max_io_size = min((segcnt * PAGE_SIZE), maxcnt);
-
- if (max_io_size < MAX_UPL_TRANSFER_BYTES) {
- /*
- * don't allow a size smaller than the old fixed limit
- */
- max_io_size = MAX_UPL_TRANSFER_BYTES;
- } else {
- /*
- * make sure the size specified is a multiple of PAGE_SIZE
- */
- max_io_size &= ~PAGE_MASK;
- }
- return (max_io_size);
+ /*
+ * don't allow a size beyond the max UPL size we can create
+ */
+ segcnt = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
+ }
+ max_io_size = min((segcnt * PAGE_SIZE), maxcnt);
+
+ if (max_io_size < MAX_UPL_TRANSFER_BYTES) {
+ /*
+ * don't allow a size smaller than the old fixed limit
+ */
+ max_io_size = MAX_UPL_TRANSFER_BYTES;
+ } else {
+ /*
+ * make sure the size specified is a multiple of PAGE_SIZE
+ */
+ max_io_size &= ~PAGE_MASK;
+ }
+ return max_io_size;
-#define CLW_ALLOCATE 0x01
-#define CLW_RETURNLOCKED 0x02
-#define CLW_IONOCACHE 0x04
-#define CLW_IOPASSIVE 0x08
+#define CLW_ALLOCATE 0x01
+#define CLW_RETURNLOCKED 0x02
+#define CLW_IONOCACHE 0x04
+#define CLW_IOPASSIVE 0x08
* once the context is present, try to grab (but don't block on)
* the lock associated with it... if someone
* else currently owns it, than the read
* once the context is present, try to grab (but don't block on)
* the lock associated with it... if someone
* else currently owns it, than the read
- if ((rap = ubc->cl_rahead) == NULL) {
- MALLOC_ZONE(rap, struct cl_readahead *, sizeof *rap, M_CLRDAHEAD, M_WAITOK);
+ if ((rap = ubc->cl_rahead) == NULL) {
+ MALLOC_ZONE(rap, struct cl_readahead *, sizeof *rap, M_CLRDAHEAD, M_WAITOK);
-
- if (ubc->cl_rahead == NULL)
- ubc->cl_rahead = rap;
- else {
- lck_mtx_destroy(&rap->cl_lockr, cl_mtx_grp);
- FREE_ZONE((void *)rap, sizeof *rap, M_CLRDAHEAD);
+
+ if (ubc->cl_rahead == NULL) {
+ ubc->cl_rahead = rap;
+ } else {
+ lck_mtx_destroy(&rap->cl_lockr, cl_mtx_grp);
+ FREE_ZONE(rap, sizeof *rap, M_CLRDAHEAD);
- if ( !(flags & CLW_ALLOCATE))
- return ((struct cl_writebehind *)NULL);
-
- MALLOC_ZONE(wbp, struct cl_writebehind *, sizeof *wbp, M_CLWRBEHIND, M_WAITOK);
+ MALLOC_ZONE(wbp, struct cl_writebehind *, sizeof *wbp, M_CLWRBEHIND, M_WAITOK);
-
- if (ubc->cl_wbehind == NULL)
- ubc->cl_wbehind = wbp;
- else {
- lck_mtx_destroy(&wbp->cl_lockw, cl_mtx_grp);
- FREE_ZONE((void *)wbp, sizeof *wbp, M_CLWRBEHIND);
+
+ if (ubc->cl_wbehind == NULL) {
+ ubc->cl_wbehind = wbp;
+ } else {
+ lck_mtx_destroy(&wbp->cl_lockw, cl_mtx_grp);
+ FREE_ZONE(wbp, sizeof *wbp, M_CLWRBEHIND);
cluster_try_push(wbp, vp, newEOF, PUSH_ALL | flags, 0, callback, callback_arg, NULL, FALSE);
cluster_try_push(wbp, vp, newEOF, PUSH_ALL | flags, 0, callback, callback_arg, NULL, FALSE);
- if (VNOP_BLOCKMAP(vp, f_offset, PAGE_SIZE, &blkno, &io_size, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL))
- return(0);
+ if (VNOP_BLOCKMAP(vp, f_offset, PAGE_SIZE, &blkno, &io_size, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL)) {
+ return 0;
+ }
iostate->io_wanted = 1;
msleep((caddr_t)&iostate->io_wanted, &iostate->io_mtxp, PRIBIO + 1, wait_name, NULL);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
iostate->io_wanted = 1;
msleep((caddr_t)&iostate->io_wanted, &iostate->io_mtxp, PRIBIO + 1, wait_name, NULL);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
* direct/uncached write, we want to dump the pages too.
*/
kern_return_t kr = upl_abort_range(associated_upl, upl_offset, size,
* direct/uncached write, we want to dump the pages too.
*/
kern_return_t kr = upl_abort_range(associated_upl, upl_offset, size,
- ubc_upl_commit_range(upl, upl_offset, abort_size, UPL_COMMIT_FREE_ON_EMPTY);
- else {
- if (io_flags & B_PAGEIO) {
- if (io_flags & B_READ)
- page_in = 1;
- else
- page_out = 1;
- }
- if (io_flags & B_CACHE)
- /*
+ ubc_upl_commit_range(upl, upl_offset, abort_size, UPL_COMMIT_FREE_ON_EMPTY);
+ } else {
+ if (io_flags & B_PAGEIO) {
+ if (io_flags & B_READ) {
+ page_in = 1;
+ } else {
+ page_out = 1;
+ }
+ }
+ if (io_flags & B_CACHE) {
+ /*
}
lck_mtx_unlock(cl_transaction_mtxp);
if (transaction_complete == FALSE) {
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
}
lck_mtx_unlock(cl_transaction_mtxp);
if (transaction_complete == FALSE) {
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
* compute the overall size of the transaction
* in case we created one that has 'holes' in it
* 'total_size' represents the amount of I/O we
* did, not the span of the transaction w/r to the UPL
*/
transaction_size = cbp->b_uploffset + cbp->b_bcount - upl_offset;
* compute the overall size of the transaction
* in case we created one that has 'holes' in it
* 'total_size' represents the amount of I/O we
* did, not the span of the transaction w/r to the UPL
*/
transaction_size = cbp->b_uploffset + cbp->b_bcount - upl_offset;
upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, commit_size, error, b_flags, vp);
} else {
upl_flags = UPL_COMMIT_FREE_ON_EMPTY;
upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, commit_size, error, b_flags, vp);
} else {
upl_flags = UPL_COMMIT_FREE_ON_EMPTY;
}
/*
* we've already waited on all of the I/Os in this transaction,
* so mark all of the buf_t's in this transaction as B_TDONE
* so that cluster_iodone sees the transaction as completed
*/
}
/*
* we've already waited on all of the I/Os in this transaction,
* so mark all of the buf_t's in this transaction as B_TDONE
* so that cluster_iodone sees the transaction as completed
*/
- "is greater than the maximum allowed size of "
- "%d bytes (the system PAGE_SIZE).\n",
- __FUNCTION__, non_rounded_size, PAGE_SIZE);
+ "is greater than the maximum allowed size of "
+ "%d bytes (the system PAGE_SIZE).\n",
+ __FUNCTION__, non_rounded_size, PAGE_SIZE);
+ }
* round the requested size up so that this I/O ends on a
* page boundary in case this is a 'write'... if the filesystem
* has blocks allocated to back the page beyond the EOF, we want to
* round the requested size up so that this I/O ends on a
* page boundary in case this is a 'write'... if the filesystem
* has blocks allocated to back the page beyond the EOF, we want to
* then we are going to end up
* with a page that we can't complete (the file size wasn't a multiple
* of PAGE_SIZE and we're trying to read to the end of the file
* so we'll go ahead and zero out the portion of the page we can't
* read in from the file
*/
* then we are going to end up
* with a page that we can't complete (the file size wasn't a multiple
* of PAGE_SIZE and we're trying to read to the end of the file
* so we'll go ahead and zero out the portion of the page we can't
* read in from the file
*/
-
- if ((error = VNOP_BLOCKMAP(vp, f_offset, io_size, &blkno, &io_size_tmp, NULL, bmap_flags, NULL)))
+
+ if ((error = VNOP_BLOCKMAP(vp, f_offset, io_size, &blkno, &io_size_tmp, NULL, bmap_flags, NULL))) {
* vnop_blockmap didn't return an error... however, it did
* return an extent size of 0 which means we can't
* make forward progress on this I/O... a hole in the
* file would be returned as a blkno of -1 with a non-zero io_size
* a real extent is returned with a blkno != -1 and a non-zero io_size
*/
* vnop_blockmap didn't return an error... however, it did
* return an extent size of 0 which means we can't
* make forward progress on this I/O... a hole in the
* file would be returned as a blkno of -1 with a non-zero io_size
* a real extent is returned with a blkno != -1 and a non-zero io_size
*/
- if ( !(flags & CL_ASYNC))
- pageout_flags |= UPL_IOSYNC;
- if ( !(flags & CL_COMMIT))
- pageout_flags |= UPL_NOCOMMIT;
+ if (!(flags & CL_ASYNC)) {
+ pageout_flags |= UPL_IOSYNC;
+ }
+ if (!(flags & CL_COMMIT)) {
+ pageout_flags |= UPL_NOCOMMIT;
+ }
* there was more to the current transaction
* than just the page we are pushing out via vnode_pageout...
* mark it as finished and complete it... we've already
* waited for the I/Os to complete above in the call to cluster_wait_IO
*/
* there was more to the current transaction
* than just the page we are pushing out via vnode_pageout...
* mark it as finished and complete it... we've already
* waited for the I/Os to complete above in the call to cluster_wait_IO
*/
}
}
if (vnode_pageout(vp, upl, trunc_page(upl_offset), trunc_page_64(f_offset), PAGE_SIZE, pageout_flags, NULL) != PAGER_SUCCESS) {
}
}
if (vnode_pageout(vp, upl, trunc_page(upl_offset), trunc_page_64(f_offset), PAGE_SIZE, pageout_flags, NULL) != PAGER_SUCCESS) {
* we've transferred all of the data in the original
* request, but we were unable to complete the tail
* of the last page because the file didn't have
* an allocation to back that portion... this is ok.
*/
* we've transferred all of the data in the original
* request, but we were unable to complete the tail
* of the last page because the file didn't have
* an allocation to back that portion... this is ok.
*/
* if we're reading and blkno == -1, then we've got a
* 'hole' in the file that we need to deal with by zeroing
* out the affected area in the upl
*/
if (io_size >= (u_int)non_rounded_size) {
* if we're reading and blkno == -1, then we've got a
* 'hole' in the file that we need to deal with by zeroing
* out the affected area in the upl
*/
if (io_size >= (u_int)non_rounded_size) {
* if this upl contains the EOF and it is not a multiple of PAGE_SIZE
* than 'zero_offset' will be non-zero
* if the 'hole' returned by vnop_blockmap extends all the way to the eof
* (indicated by the io_size finishing off the I/O request for this UPL)
* than we're not going to issue an I/O for the
* last page in this upl... we need to zero both the hole and the tail
* if this upl contains the EOF and it is not a multiple of PAGE_SIZE
* than 'zero_offset' will be non-zero
* if the 'hole' returned by vnop_blockmap extends all the way to the eof
* (indicated by the io_size finishing off the I/O request for this UPL)
* than we're not going to issue an I/O for the
* last page in this upl... we need to zero both the hole and the tail
* if there is a current I/O chain pending
* then the first page of the group we just zero'd
* will be handled by the I/O completion if the zero
* fill started in the middle of the page
*/
* if there is a current I/O chain pending
* then the first page of the group we just zero'd
* will be handled by the I/O completion if the zero
* fill started in the middle of the page
*/
* pages that are beyond it
* plus the last page if its partial
* and we have no more I/O to issue...
* otherwise a partial page is left
* to begin the next I/O
*/
* pages that are beyond it
* plus the last page if its partial
* and we have no more I/O to issue...
* otherwise a partial page is left
* to begin the next I/O
*/
- if ( (flags & CL_COMMIT) && pg_count) {
- ubc_upl_commit_range(upl, commit_offset, pg_count * PAGE_SIZE,
- UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
+ if ((flags & CL_COMMIT) && pg_count) {
+ ubc_upl_commit_range(upl, commit_offset, pg_count * PAGE_SIZE,
+ UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
* we've transferred all of the data in the original
* request, but we were unable to complete the tail
* of the last page because the file didn't have
* an allocation to back that portion... this is ok.
*/
* we've transferred all of the data in the original
* request, but we were unable to complete the tail
* of the last page because the file didn't have
* an allocation to back that portion... this is ok.
*/
* if we're not targeting a virtual device i.e. a disk image
* it's safe to dip into the reserve pool since real devices
* can complete this I/O request without requiring additional
* bufs from the alloc_io_buf pool
*/
priv = 1;
* if we're not targeting a virtual device i.e. a disk image
* it's safe to dip into the reserve pool since real devices
* can complete this I/O request without requiring additional
* bufs from the alloc_io_buf pool
*/
priv = 1;
* we've transferred all of the data in the original
* request, but we were unable to complete the tail
* of the last page because the file didn't have
* an allocation to back that portion... this is ok.
*/
* we've transferred all of the data in the original
* request, but we were unable to complete the tail
* of the last page because the file didn't have
* an allocation to back that portion... this is ok.
*/
* we've already accumulated more than 8 I/O's into
* this transaction so mark it as complete so that
* it can finish asynchronously or via the cluster_complete_transaction
* below if the request is synchronous
*/
* we've already accumulated more than 8 I/O's into
* this transaction so mark it as complete so that
* it can finish asynchronously or via the cluster_complete_transaction
* below if the request is synchronous
*/
- if ( !(flags & CL_ASYNC))
- cluster_complete_transaction(&cbp_head, callback_arg, &retval, flags, 1);
+ if (!(flags & CL_ASYNC)) {
+ cluster_complete_transaction(&cbp_head, callback_arg, &retval, flags, 1);
+ }
if (ISSET(flags, CL_COMMIT)) {
cluster_handle_associated_upl(iostate, upl, upl_offset,
if (ISSET(flags, CL_COMMIT)) {
cluster_handle_associated_upl(iostate, upl, upl_offset,
pg_offset = upl_offset & PAGE_MASK;
abort_size = (upl_end_offset - upl_offset + PAGE_MASK) & ~PAGE_MASK;
upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, abort_size, error, io_flags, vp);
pg_offset = upl_offset & PAGE_MASK;
abort_size = (upl_end_offset - upl_offset + PAGE_MASK) & ~PAGE_MASK;
upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, abort_size, error, io_flags, vp);
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_END, (int)f_offset, size, upl_offset, retval, 0);
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_END, (int)f_offset, size, upl_offset, retval, 0);
- if(io_flag & CL_READ) {
- if(vector_upl_offset == 0 && ((vector_upl_iosize & PAGE_MASK)==0))
- io_flag &= ~CL_PRESERVE; /*don't zero fill*/
- else
- io_flag |= CL_PRESERVE; /*zero fill*/
- }
- return (cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, real_bp, iostate, callback, callback_arg));
-
+ if (io_flag & CL_READ) {
+ if (vector_upl_offset == 0 && ((vector_upl_iosize & PAGE_MASK) == 0)) {
+ io_flag &= ~CL_PRESERVE; /*don't zero fill*/
+ } else {
+ io_flag |= CL_PRESERVE; /*zero fill*/
+ }
+ }
+ return cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, real_bp, iostate, callback, callback_arg);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
- (int)f_offset, 0, 0, 0, 0);
- return(0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
+ (int)f_offset, 0, 0, 0, 0);
+ return 0;
+ }
+ if ((off_t)size > (filesize - f_offset)) {
+ size = filesize - f_offset;
pages_in_prefetch = (size + (PAGE_SIZE - 1)) / PAGE_SIZE;
advisory_read_ext(vp, filesize, f_offset, size, callback, callback_arg, bflag);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
pages_in_prefetch = (size + (PAGE_SIZE - 1)) / PAGE_SIZE;
advisory_read_ext(vp, filesize, f_offset, size, callback, callback_arg, bflag);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
}
static void
cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct cl_readahead *rap, int (*callback)(buf_t, void *), void *callback_arg,
}
static void
cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct cl_readahead *rap, int (*callback)(buf_t, void *), void *callback_arg,
- if ((rap->cl_maxra - extent->e_addr) > (rap->cl_ralen / 4)) {
-
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
- rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 2, 0);
+ if ((rap->cl_maxra - extent->e_addr) > (rap->cl_ralen / 4)) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
+ rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 2, 0);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
- rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 3, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
+ rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 3, 0);
}
size_of_prefetch = cluster_read_prefetch(vp, f_offset, rap->cl_ralen * PAGE_SIZE, filesize, callback, callback_arg, bflag);
}
size_of_prefetch = cluster_read_prefetch(vp, f_offset, rap->cl_ralen * PAGE_SIZE, filesize, callback, callback_arg, bflag);
- return cluster_pageout_ext(vp, upl, upl_offset, f_offset, size, filesize, flags, NULL, NULL);
-
+ return cluster_pageout_ext(vp, upl, upl_offset, f_offset, size, filesize, flags, NULL, NULL);
- if (vp->v_mount->mnt_flag & MNT_RDONLY) {
- if (local_flags & CL_COMMIT)
- ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
- return (EROFS);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY) {
+ if (local_flags & CL_COMMIT) {
+ ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
+ }
+ return EROFS;
- return (cluster_io(vp, upl, upl_offset, f_offset, io_size,
- local_flags, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg));
+ return cluster_io(vp, upl, upl_offset, f_offset, io_size,
+ local_flags, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
- return cluster_pagein_ext(vp, upl, upl_offset, f_offset, size, filesize, flags, NULL, NULL);
+ return cluster_pagein_ext(vp, upl, upl_offset, f_offset, size, filesize, flags, NULL, NULL);
- (f_offset & PAGE_MASK_64) || (size & PAGE_MASK) || (upl_offset & PAGE_MASK)) {
- if (local_flags & CL_COMMIT)
- ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
- return (EINVAL);
+ (f_offset & PAGE_MASK_64) || (size & PAGE_MASK) || (upl_offset & PAGE_MASK)) {
+ if (local_flags & CL_COMMIT) {
+ ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
+ }
+ return EINVAL;
- return (cluster_io(bp->b_vp, bp->b_upl, 0, f_offset, bp->b_bcount, flags, bp, (struct clios *)NULL, callback, callback_arg));
+ return cluster_io(bp->b_vp, bp->b_upl, 0, f_offset, bp->b_bcount, flags, bp, (struct clios *)NULL, callback, callback_arg);
int
cluster_write(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t headOff, off_t tailOff, int xflags)
{
int
cluster_write(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t headOff, off_t tailOff, int xflags)
{
- return cluster_write_ext(vp, uio, oldEOF, newEOF, headOff, tailOff, xflags, NULL, NULL);
+ return cluster_write_ext(vp, uio, oldEOF, newEOF, headOff, tailOff, xflags, NULL, NULL);
}
int
cluster_write_ext(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t headOff, off_t tailOff,
}
int
cluster_write_ext(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t headOff, off_t tailOff,
- retval = cluster_write_copy(vp, NULL, (u_int32_t)0, oldEOF, newEOF, headOff, tailOff, flags, callback, callback_arg);
-
- return(retval);
- }
- /*
- * do a write through the cache if one of the following is true....
- * NOCACHE is not true or NODIRECT is true
- * the uio request doesn't target USERSPACE
- * otherwise, find out if we want the direct or contig variant for
- * the first vector in the uio request
- */
- if ( ((flags & (IO_NOCACHE | IO_NODIRECT)) == IO_NOCACHE) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg) )
- retval = cluster_io_type(uio, &write_type, &write_length, MIN_DIRECT_WRITE_SIZE);
-
- if ( (flags & (IO_TAILZEROFILL | IO_HEADZEROFILL)) && write_type == IO_DIRECT)
- /*
+ retval = cluster_write_copy(vp, NULL, (u_int32_t)0, oldEOF, newEOF, headOff, tailOff, flags, callback, callback_arg);
+
+ return retval;
+ }
+ /*
+ * do a write through the cache if one of the following is true....
+ * NOCACHE is not true or NODIRECT is true
+ * the uio request doesn't target USERSPACE
+ * otherwise, find out if we want the direct or contig variant for
+ * the first vector in the uio request
+ */
+ if (((flags & (IO_NOCACHE | IO_NODIRECT)) == IO_NOCACHE) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) {
+ retval = cluster_io_type(uio, &write_type, &write_length, MIN_DIRECT_WRITE_SIZE);
+ }
+
+ if ((flags & (IO_TAILZEROFILL | IO_HEADZEROFILL)) && write_type == IO_DIRECT) {
+ /*
write_length = (u_int32_t)cur_resid;
}
retval = cluster_write_copy(vp, uio, write_length, oldEOF, newEOF, headOff, tailOff, zflags, callback, callback_arg);
break;
case IO_CONTIG:
write_length = (u_int32_t)cur_resid;
}
retval = cluster_write_copy(vp, uio, write_length, oldEOF, newEOF, headOff, tailOff, zflags, callback, callback_arg);
break;
case IO_CONTIG:
}
retval = cluster_write_contig(vp, uio, newEOF, &write_type, &write_length, callback, callback_arg, bflag);
if (retval == 0 && (flags & IO_TAILZEROFILL) && uio_resid(uio) == 0) {
}
retval = cluster_write_contig(vp, uio, newEOF, &write_type, &write_length, callback, callback_arg, bflag);
if (retval == 0 && (flags & IO_TAILZEROFILL) && uio_resid(uio) == 0) {
* we're done with the data from the user specified buffer(s)
* and we've been requested to zero fill at the tail
* treat this as an IO_HEADZEROFILL which doesn't require a uio
* by rearranging the args and passing in IO_HEADZEROFILL
*/
* we're done with the data from the user specified buffer(s)
* and we've been requested to zero fill at the tail
* treat this as an IO_HEADZEROFILL which doesn't require a uio
* by rearranging the args and passing in IO_HEADZEROFILL
*/
- retval = cluster_write_copy(vp, (struct uio *)0, (u_int32_t)0, (off_t)0, tailOff, uio->uio_offset,
- (off_t)0, zflags | IO_HEADZEROFILL | IO_SYNC, callback, callback_arg);
+ retval = cluster_write_copy(vp, (struct uio *)0, (u_int32_t)0, (off_t)0, tailOff, uio->uio_offset,
+ (off_t)0, zflags | IO_HEADZEROFILL | IO_SYNC, callback, callback_arg);
}
static int
cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, int *write_type, u_int32_t *write_length,
}
static int
cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, int *write_type, u_int32_t *write_length,
- u_int32_t vector_upl_iosize = 0;
- int issueVectorUPL = 0,useVectorUPL = (uio->uio_iovcnt > 1);
- off_t v_upl_uio_offset = 0;
- int vector_upl_index=0;
- upl_t vector_upl = NULL;
+ u_int32_t vector_upl_iosize = 0;
+ int issueVectorUPL = 0, useVectorUPL = (uio->uio_iovcnt > 1);
+ off_t v_upl_uio_offset = 0;
+ int vector_upl_index = 0;
+ upl_t vector_upl = NULL;
- /*
- * the AFP client advertises a devblocksize of 1
- * however, its BLOCKMAP routine maps to physical
- * blocks that are PAGE_SIZE in size...
- * therefore we can't ask for I/Os that aren't page aligned
- * or aren't multiples of PAGE_SIZE in size
- * by setting devblocksize to PAGE_SIZE, we re-instate
- * the old behavior we had before the mem_alignment_mask
- * changes went in...
- */
- devblocksize = PAGE_SIZE;
+ /*
+ * the AFP client advertises a devblocksize of 1
+ * however, its BLOCKMAP routine maps to physical
+ * blocks that are PAGE_SIZE in size...
+ * therefore we can't ask for I/Os that aren't page aligned
+ * or aren't multiples of PAGE_SIZE in size
+ * by setting devblocksize to PAGE_SIZE, we re-instate
+ * the old behavior we had before the mem_alignment_mask
+ * changes went in...
+ */
+ devblocksize = PAGE_SIZE;
task_update_logical_writes(current_task(), (io_req_size & ~PAGE_MASK), TASK_WRITE_IMMEDIATE, vp);
while (io_req_size >= PAGE_SIZE && uio->uio_offset < newEOF && retval == 0) {
task_update_logical_writes(current_task(), (io_req_size & ~PAGE_MASK), TASK_WRITE_IMMEDIATE, vp);
while (io_req_size >= PAGE_SIZE && uio->uio_offset < newEOF && retval == 0) {
retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
reset_vector_run_state();
}
retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
reset_vector_run_state();
}
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
- i, pages_in_pl, upl_size, kret, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
+ i, pages_in_pl, upl_size, kret, 0);
* one of the earlier writes we issued ran into a hard error
* don't issue any more writes, cleanup the UPL
* that was just created but not used, then
* go wait for all writes that are part of this stream
* to complete before returning the error to the caller
*/
* one of the earlier writes we issued ran into a hard error
* don't issue any more writes, cleanup the UPL
* that was just created but not used, then
* go wait for all writes that are part of this stream
* to complete before returning the error to the caller
*/
- io_size, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
-
- else {
- if(!vector_upl_index) {
+ io_size, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
+ } else {
+ if (!vector_upl_index) {
vector_upl_set_iostate(vector_upl, upl, vector_upl_size, upl_size);
vector_upl_index++;
vector_upl_iosize += io_size;
vector_upl_size += upl_size;
vector_upl_set_iostate(vector_upl, upl, vector_upl_size, upl_size);
vector_upl_index++;
vector_upl_iosize += io_size;
vector_upl_size += upl_size;
retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
reset_vector_run_state();
}
retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
reset_vector_run_state();
}
- if (retval == 0 && iostate.io_error == 0 && io_req_size == 0) {
-
- retval = cluster_io_type(uio, write_type, write_length, MIN_DIRECT_WRITE_SIZE);
+ if (retval == 0 && iostate.io_error == 0 && io_req_size == 0) {
+ retval = cluster_io_type(uio, write_type, write_length, MIN_DIRECT_WRITE_SIZE);
wait_for_dwrites:
if (retval == 0 && iostate.io_error == 0 && useVectorUPL && vector_upl_index) {
retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
wait_for_dwrites:
if (retval == 0 && iostate.io_error == 0 && useVectorUPL && vector_upl_index) {
retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
* we couldn't handle the tail of this request in DIRECT mode
* so fire it through the copy path
*
* note that flags will never have IO_HEADZEROFILL or IO_TAILZEROFILL set
* so we can just pass 0 in for the headOff and tailOff
*/
* we couldn't handle the tail of this request in DIRECT mode
* so fire it through the copy path
*
* note that flags will never have IO_HEADZEROFILL or IO_TAILZEROFILL set
* so we can just pass 0 in for the headOff and tailOff
*/
- retval = cluster_write_copy(vp, uio, io_req_size, oldEOF, newEOF, (off_t)0, (off_t)0, flags, callback, callback_arg);
+ retval = cluster_write_copy(vp, uio, io_req_size, oldEOF, newEOF, (off_t)0, (off_t)0, flags, callback, callback_arg);
- (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
- &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, VM_KERN_MEMORY_FILE, 0);
+ (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
+ &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, VM_KERN_MEMORY_FILE, 0);
error = cluster_align_phys_io(vp, uio, src_paddr, head_size, 0, callback, callback_arg);
error = cluster_align_phys_io(vp, uio, src_paddr, head_size, 0, callback, callback_arg);
* request doesn't set up on a memory boundary
* the underlying DMA engine can handle...
* return an error instead of going through
* the slow copy path since the intent of this
* path is direct I/O from device memory
*/
* request doesn't set up on a memory boundary
* the underlying DMA engine can handle...
* return an error instead of going through
* the slow copy path since the intent of this
* path is direct I/O from device memory
*/
- if (iostate.io_error) {
- /*
- * one of the earlier writes we issued ran into a hard error
- * don't issue any more writes...
- * go wait for all writes that are part of this stream
- * to complete before returning the error to the caller
- */
- goto wait_for_cwrites;
+ if (iostate.io_error) {
+ /*
+ * one of the earlier writes we issued ran into a hard error
+ * don't issue any more writes...
+ * go wait for all writes that are part of this stream
+ * to complete before returning the error to the caller
+ */
+ goto wait_for_cwrites;
- error = cluster_io(vp, upl[cur_upl], upl_offset, uio->uio_offset,
- xsize, CL_DEV_MEMORY | CL_ASYNC | bflag, (buf_t)NULL, (struct clios *)&iostate, callback, callback_arg);
+ error = cluster_io(vp, upl[cur_upl], upl_offset, uio->uio_offset,
+ xsize, CL_DEV_MEMORY | CL_ASYNC | bflag, (buf_t)NULL, (struct clios *)&iostate, callback, callback_arg);
- if (error == 0 && iostate.io_error == 0 && tail_size == 0 && num_upl < MAX_VECTS) {
-
- error = cluster_io_type(uio, write_type, write_length, 0);
+ if (error == 0 && iostate.io_error == 0 && tail_size == 0 && num_upl < MAX_VECTS) {
+ error = cluster_io_type(uio, write_type, write_length, 0);
- if (error == 0 && tail_size)
- error = cluster_align_phys_io(vp, uio, src_paddr, tail_size, 0, callback, callback_arg);
+ if (error == 0 && tail_size) {
+ error = cluster_align_phys_io(vp, uio, src_paddr, tail_size, 0, callback, callback_arg);
+ }
cl.e_addr = (daddr64_t)(e_offset / PAGE_SIZE_64);
cluster_update_state_internal(vp, &cl, 0, TRUE, &first_pass, s_offset, (int)(e_offset - s_offset),
cl.e_addr = (daddr64_t)(e_offset / PAGE_SIZE_64);
cluster_update_state_internal(vp, &cl, 0, TRUE, &first_pass, s_offset, (int)(e_offset - s_offset),
- boolean_t *first_pass, off_t write_off, int write_cnt, off_t newEOF,
- int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated)
+ boolean_t *first_pass, off_t write_off, int write_cnt, off_t newEOF,
+ int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated)
- sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, cl, newEOF, callback, callback_arg, vm_initiated);
+ sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, cl, newEOF, callback, callback_arg, vm_initiated);
* the cluster we're currently considering... in fact, we'll
* stretch the cluster out to it's full limit and see if we
* get an intersection with the current write
* the cluster we're currently considering... in fact, we'll
* stretch the cluster out to it's full limit and see if we
* get an intersection with the current write
-
- ret_cluster_try_push = cluster_try_push(wbp, vp, newEOF, (flags & IO_NOCACHE) ? 0 : PUSH_DELAY, 0, callback, callback_arg, NULL, vm_initiated);
+ ret_cluster_try_push = cluster_try_push(wbp, vp, newEOF, (flags & IO_NOCACHE) ? 0 : PUSH_DELAY, 0, callback, callback_arg, NULL, vm_initiated);
sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, cl, newEOF, callback, callback_arg, vm_initiated);
sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, cl, newEOF, callback, callback_arg, vm_initiated);
static int
cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t oldEOF, off_t newEOF, off_t headOff,
static int
cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t oldEOF, off_t newEOF, off_t headOff,
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
- (int)uio->uio_offset, io_req_size, (int)oldEOF, (int)newEOF, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
+ (int)uio->uio_offset, io_req_size, (int)oldEOF, (int)newEOF, 0);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
- 0, 0, (int)oldEOF, (int)newEOF, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
+ 0, 0, (int)oldEOF, (int)newEOF, 0);
* some filesystems (HFS is one) don't support unallocated holes within a file...
* so we zero fill the intervening space between the old EOF and the offset
* where the next chunk of real data begins.... ftruncate will also use this
* routine to zero fill to the new EOF when growing a file... in this case, the
* uio structure will not be provided
*/
* some filesystems (HFS is one) don't support unallocated holes within a file...
* so we zero fill the intervening space between the old EOF and the offset
* where the next chunk of real data begins.... ftruncate will also use this
* routine to zero fill to the new EOF when growing a file... in this case, the
* uio structure will not be provided
*/
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 46)) | DBG_FUNC_NONE,
- (int)zero_off, (int)zero_cnt, (int)zero_off1, (int)zero_cnt1, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 46)) | DBG_FUNC_NONE,
+ (int)zero_off, (int)zero_cnt, (int)zero_off1, (int)zero_cnt1, 0);
* the requested write... limit each call to cluster_io
* to the maximum UPL size... cluster_io will clip if
* this exceeds the maximum io_size for the device,
* the requested write... limit each call to cluster_io
* to the maximum UPL size... cluster_io will clip if
* this exceeds the maximum io_size for the device,
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_START, upl_size, io_size, total_size, 0, 0);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_START, upl_size, io_size, total_size, 0, 0);
- retval = cluster_io(vp, upl, 0, upl_f_offset, read_size,
- CL_READ | bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
+ retval = cluster_io(vp, upl, 0, upl_f_offset, read_size,
+ CL_READ | bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
* the last offset we're writing to in this upl does not end on a page
* boundary... if it's not beyond the old EOF, then we'll also need to
* pre-read this page in if it isn't already valid
*/
* the last offset we're writing to in this upl does not end on a page
* boundary... if it's not beyond the old EOF, then we'll also need to
* pre-read this page in if it isn't already valid
*/
- retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size,
- CL_READ | bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
+ retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size,
+ CL_READ | bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
bytes_to_zero = cluster_zero_range(upl, pl, flags, io_offset, zero_off, upl_f_offset, bytes_to_zero);
bytes_to_zero = cluster_zero_range(upl, pl, flags, io_offset, zero_off, upl_f_offset, bytes_to_zero);
ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
bytes_to_zero = cluster_zero_range(upl, pl, flags, io_offset, zero_off1, upl_f_offset, bytes_to_zero);
bytes_to_zero = cluster_zero_range(upl, pl, flags, io_offset, zero_off1, upl_f_offset, bytes_to_zero);
/*
* if we're extending the file with this write
* we'll zero fill the rest of the page so that
* if the file gets extended again in such a way as to leave a
* hole starting at this EOF, we'll have zero's in the correct spot
*/
/*
* if we're extending the file with this write
* we'll zero fill the rest of the page so that
* if the file gets extended again in such a way as to leave a
* hole starting at this EOF, we'll have zero's in the correct spot
*/
* we happen to have to flush a bucket to make room and it intersects
* this upl, a deadlock may result on page BUSY
* 2) we're delaying the I/O... from this point forward we're just updating
* we happen to have to flush a bucket to make room and it intersects
* this upl, a deadlock may result on page BUSY
* 2) we're delaying the I/O... from this point forward we're just updating
- retval = cluster_push_now(vp, &cl, newEOF, flags, callback, callback_arg, FALSE);
+ retval = cluster_push_now(vp, &cl, newEOF, flags, callback, callback_arg, FALSE);
}
}
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END, retval, 0, io_resid, 0, 0);
}
}
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END, retval, 0, io_resid, 0, 0);
}
int
cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (*callback)(buf_t, void *), void *callback_arg)
{
}
int
cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (*callback)(buf_t, void *), void *callback_arg)
{
retval = cluster_io_type(uio, &read_type, &read_length, 0);
}
while ((cur_resid = uio_resid(uio)) && uio->uio_offset < filesize && retval == 0) {
retval = cluster_io_type(uio, &read_type, &read_length, 0);
}
while ((cur_resid = uio_resid(uio)) && uio->uio_offset < filesize && retval == 0) {
retval = cluster_read_copy(vp, uio, io_size, filesize, flags, callback, callback_arg);
break;
case IO_DIRECT:
retval = cluster_read_copy(vp, uio, io_size, filesize, flags, callback, callback_arg);
break;
case IO_DIRECT:
- retval = cluster_read_direct(vp, uio, filesize, &read_type, &read_length, flags, callback, callback_arg);
+ retval = cluster_read_direct(vp, uio, filesize, &read_type, &read_length, flags, callback, callback_arg);
- retval = cluster_read_contig(vp, uio, filesize, &read_type, &read_length, callback, callback_arg, flags);
+ retval = cluster_read_contig(vp, uio, filesize, &read_type, &read_length, callback, callback_arg, flags);
max_io_size = cluster_max_io_size(vp->v_mount, CL_READ);
max_prefetch = MAX_PREFETCH(vp, max_io_size, disk_conditioner_mount_is_ssd(vp->v_mount));
max_io_size = cluster_max_io_size(vp->v_mount, CL_READ);
max_prefetch = MAX_PREFETCH(vp, max_io_size, disk_conditioner_mount_is_ssd(vp->v_mount));
* determine if we already have a read-ahead in the pipe courtesy of the
* last read systemcall that was issued...
* if so, pick up it's extent to determine where we should start
* determine if we already have a read-ahead in the pipe courtesy of the
* last read systemcall that was issued...
* if so, pick up it's extent to determine where we should start
- if (last_ioread_offset < uio->uio_offset)
- last_ioread_offset = (off_t)0;
- else if (last_ioread_offset > last_request_offset)
- last_ioread_offset = last_request_offset;
- } else
- last_ioread_offset = (off_t)0;
+ if (last_ioread_offset < uio->uio_offset) {
+ last_ioread_offset = (off_t)0;
+ } else if (last_ioread_offset > last_request_offset) {
+ last_ioread_offset = last_request_offset;
+ }
+ } else {
+ last_ioread_offset = (off_t)0;
+ }
* we've already issued I/O for this request and
* there's still work to do and
* our prefetch stream is running dry, so issue a
* pre-fetch I/O... the I/O latency will overlap
* with the copying of the data
*/
* we've already issued I/O for this request and
* there's still work to do and
* our prefetch stream is running dry, so issue a
* pre-fetch I/O... the I/O latency will overlap
* with the copying of the data
*/
- size_of_prefetch = cluster_read_prefetch(vp, last_ioread_offset, size_of_prefetch, filesize, callback, callback_arg, bflag);
+ size_of_prefetch = cluster_read_prefetch(vp, last_ioread_offset, size_of_prefetch, filesize, callback, callback_arg, bflag);
- cluster_read_ahead(vp, &extent, filesize, rap, callback, callback_arg, bflag);
+ cluster_read_ahead(vp, &extent, filesize, rap, callback, callback_arg, bflag);
* the requested read... limit each call to cluster_io
* to the maximum UPL size... cluster_io will clip if
* this exceeds the maximum io_size for the device,
* the requested read... limit each call to cluster_io
* to the maximum UPL size... cluster_io will clip if
* this exceeds the maximum io_size for the device,
/*
* issue an asynchronous read to cluster_io
*/
error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset,
/*
* issue an asynchronous read to cluster_io
*/
error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset,
- if (extent.e_addr < rap->cl_maxra) {
- /*
- * we've just issued a read for a block that should have been
- * in the cache courtesy of the read-ahead engine... something
- * has gone wrong with the pipeline, so reset the read-ahead
- * logic which will cause us to restart from scratch
- */
- rap->cl_maxra = 0;
- }
- }
+ if (extent.e_addr < rap->cl_maxra) {
+ /*
+ * we've just issued a read for a block that should have been
+ * in the cache courtesy of the read-ahead engine... something
+ * has gone wrong with the pipeline, so reset the read-ahead
+ * logic which will cause us to restart from scratch
+ */
+ rap->cl_maxra = 0;
+ }
+ }
* if the read completed successfully, or there was no I/O request
* issued, than copy the data into user land via 'cluster_upl_copy_data'
* we'll first add on any 'valid'
* if the read completed successfully, or there was no I/O request
* issued, than copy the data into user land via 'cluster_upl_copy_data'
* we'll first add on any 'valid'
* there were some invalid pages beyond the valid pages
* that we didn't issue an I/O for, just release them
* unchanged now, so that any prefetch/readahed can
* include them
*/
* there were some invalid pages beyond the valid pages
* that we didn't issue an I/O for, just release them
* unchanged now, so that any prefetch/readahed can
* include them
*/
* if there's still I/O left to do for this request, and...
* we're not in hard throttle mode, and...
* we're close to using up the previous prefetch, then issue a
* new pre-fetch I/O... the I/O latency will overlap
* with the copying of the data
*/
* if there's still I/O left to do for this request, and...
* we're not in hard throttle mode, and...
* we're close to using up the previous prefetch, then issue a
* new pre-fetch I/O... the I/O latency will overlap
* with the copying of the data
*/
size_of_prefetch = cluster_read_prefetch(vp, last_ioread_offset, size_of_prefetch, filesize, callback, callback_arg, bflag);
last_ioread_offset += (off_t)(size_of_prefetch * PAGE_SIZE);
size_of_prefetch = cluster_read_prefetch(vp, last_ioread_offset, size_of_prefetch, filesize, callback, callback_arg, bflag);
last_ioread_offset += (off_t)(size_of_prefetch * PAGE_SIZE);
cluster_read_ahead(vp, &extent, filesize, rap, callback, callback_arg, bflag);
cluster_read_ahead(vp, &extent, filesize, rap, callback, callback_arg, bflag);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START, upl, start_pg * PAGE_SIZE, io_size, error, 0);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START, upl, start_pg * PAGE_SIZE, io_size, error, 0);
- if (error || (flags & IO_NOCACHE))
- ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size,
- UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
- else {
- int commit_flags = UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY;
+ if (error || (flags & IO_NOCACHE)) {
+ ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size,
+ UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
+ } else {
+ int commit_flags = UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY;
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END, upl, start_pg * PAGE_SIZE, io_size, error, 0);
}
if ((last_pg - start_pg) < pages_in_upl) {
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END, upl, start_pg * PAGE_SIZE, io_size, error, 0);
}
if ((last_pg - start_pg) < pages_in_upl) {
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END, upl, -1, -1, 0, 0);
}
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END, upl, -1, -1, 0, 0);
}
}
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
- (int)uio->uio_offset, io_req_size, rap->cl_lastr, retval, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
+ (int)uio->uio_offset, io_req_size, rap->cl_lastr, retval, 0);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
- (int)uio->uio_offset, io_req_size, 0, retval, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
+ (int)uio->uio_offset, io_req_size, 0, retval, 0);
- user_addr_t iov_base;
- u_int32_t io_req_size;
- u_int32_t offset_in_file;
- u_int32_t offset_in_iovbase;
- u_int32_t io_size;
- u_int32_t io_min;
- u_int32_t xsize;
- u_int32_t devblocksize;
- u_int32_t mem_alignment_mask;
- u_int32_t max_upl_size;
+ user_addr_t iov_base;
+ u_int32_t io_req_size;
+ u_int32_t offset_in_file;
+ u_int32_t offset_in_iovbase;
+ u_int32_t io_size;
+ u_int32_t io_min;
+ u_int32_t xsize;
+ u_int32_t devblocksize;
+ u_int32_t mem_alignment_mask;
+ u_int32_t max_upl_size;
- u_int32_t vector_upl_iosize = 0;
- int issueVectorUPL = 0,useVectorUPL = (uio->uio_iovcnt > 1);
- off_t v_upl_uio_offset = 0;
- int vector_upl_index=0;
- upl_t vector_upl = NULL;
+ u_int32_t vector_upl_iosize = 0;
+ int issueVectorUPL = 0, useVectorUPL = (uio->uio_iovcnt > 1);
+ off_t v_upl_uio_offset = 0;
+ int vector_upl_index = 0;
+ upl_t vector_upl = NULL;
- /*
- * the AFP client advertises a devblocksize of 1
- * however, its BLOCKMAP routine maps to physical
- * blocks that are PAGE_SIZE in size...
- * therefore we can't ask for I/Os that aren't page aligned
- * or aren't multiples of PAGE_SIZE in size
- * by setting devblocksize to PAGE_SIZE, we re-instate
- * the old behavior we had before the mem_alignment_mask
- * changes went in...
- */
- devblocksize = PAGE_SIZE;
+ /*
+ * the AFP client advertises a devblocksize of 1
+ * however, its BLOCKMAP routine maps to physical
+ * blocks that are PAGE_SIZE in size...
+ * therefore we can't ask for I/Os that aren't page aligned
+ * or aren't multiples of PAGE_SIZE in size
+ * by setting devblocksize to PAGE_SIZE, we re-instate
+ * the old behavior we had before the mem_alignment_mask
+ * changes went in...
+ */
+ devblocksize = PAGE_SIZE;
retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
reset_vector_run_state();
}
retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
reset_vector_run_state();
}
* multiple, we avoid asking the drive for the same physical
* blocks twice.. once for the partial page at the end of the
* request and a 2nd time for the page we read into the cache
* multiple, we avoid asking the drive for the same physical
* blocks twice.. once for the partial page at the end of the
* request and a 2nd time for the page we read into the cache
* either an error or we only have the tail left to
* complete via the copy path...
* we may have already spun some portion of this request
* off as async requests... we need to wait for the I/O
* to complete before returning
*/
* either an error or we only have the tail left to
* complete via the copy path...
* we may have already spun some portion of this request
* off as async requests... we need to wait for the I/O
* to complete before returning
*/
- (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
- &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, VM_KERN_MEMORY_FILE);
+ (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
+ &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, VM_KERN_MEMORY_FILE);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
- (int)upl_offset, upl_size, io_size, kret, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
+ (int)upl_offset, upl_size, io_size, kret, 0);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
- (int)upl_offset, upl_size, io_size, kret, 0);
-
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
+ (int)upl_offset, upl_size, io_size, kret, 0);
+
* one of the earlier reads we issued ran into a hard error
* don't issue any more reads, cleanup the UPL
* that was just created but not used, then
* one of the earlier reads we issued ran into a hard error
* don't issue any more reads, cleanup the UPL
* that was just created but not used, then
- upl, (int)upl_offset, (int)uio->uio_offset, io_size, 0);
-
- if(!useVectorUPL) {
- if (no_zero_fill)
- io_flag &= ~CL_PRESERVE;
- else
- io_flag |= CL_PRESERVE;
-
- retval = cluster_io(vp, upl, upl_offset, uio->uio_offset, io_size, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
+ upl, (int)upl_offset, (int)uio->uio_offset, io_size, 0);
- if(!vector_upl_index) {
+ retval = cluster_io(vp, upl, upl_offset, uio->uio_offset, io_size, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
+ } else {
+ if (!vector_upl_index) {
vector_upl_set_iostate(vector_upl, upl, vector_upl_size, upl_size);
vector_upl_index++;
vector_upl_size += upl_size;
vector_upl_iosize += io_size;
vector_upl_set_iostate(vector_upl, upl, vector_upl_size, upl_size);
vector_upl_index++;
vector_upl_size += upl_size;
vector_upl_iosize += io_size;
-
- if(issueVectorUPL || vector_upl_index == MAX_VECTOR_UPL_ELEMENTS || vector_upl_size >= max_vector_size) {
- retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
- reset_vector_run_state();
+
+ if (issueVectorUPL || vector_upl_index == MAX_VECTOR_UPL_ELEMENTS || vector_upl_size >= max_vector_size) {
+ retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
+ reset_vector_run_state();
-
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_NONE,
- (int)uio->uio_offset, (int)filesize, *read_type, *read_length, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_NONE,
+ (int)uio->uio_offset, (int)filesize, *read_type, *read_length, 0);
- if(retval == 0 && iostate.io_error == 0 && useVectorUPL && vector_upl_index) {
- retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
+ if (retval == 0 && iostate.io_error == 0 && useVectorUPL && vector_upl_index) {
+ retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
- (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
- &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, VM_KERN_MEMORY_FILE, 0);
+ (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
+ &upl_size, &upl[cur_upl], NULL, &pages_in_pl, &upl_flags, VM_KERN_MEMORY_FILE, 0);
error = cluster_align_phys_io(vp, uio, dst_paddr, head_size, CL_READ, callback, callback_arg);
error = cluster_align_phys_io(vp, uio, dst_paddr, head_size, CL_READ, callback, callback_arg);
* request doesn't set up on a memory boundary
* the underlying DMA engine can handle...
* return an error instead of going through
* the slow copy path since the intent of this
* path is direct I/O to device memory
*/
* request doesn't set up on a memory boundary
* the underlying DMA engine can handle...
* return an error instead of going through
* the slow copy path since the intent of this
* path is direct I/O to device memory
*/
* one of the earlier reads we issued ran into a hard error
* don't issue any more reads...
* go wait for any other reads to complete before
* returning the error to the caller
*/
* one of the earlier reads we issued ran into a hard error
* don't issue any more reads...
* go wait for any other reads to complete before
* returning the error to the caller
*/
- error = cluster_io(vp, upl[cur_upl], upl_offset, uio->uio_offset, xsize,
- CL_READ | CL_NOZERO | CL_DEV_MEMORY | CL_ASYNC | bflag,
- (buf_t)NULL, &iostate, callback, callback_arg);
- /*
+ error = cluster_io(vp, upl[cur_upl], upl_offset, uio->uio_offset, xsize,
+ CL_READ | CL_NOZERO | CL_DEV_MEMORY | CL_ASYNC | bflag,
+ (buf_t)NULL, &iostate, callback, callback_arg);
+ /*
- if (error == 0 && tail_size)
- error = cluster_align_phys_io(vp, uio, dst_paddr, tail_size, CL_READ, callback, callback_arg);
+ if (error == 0 && tail_size) {
+ error = cluster_align_phys_io(vp, uio, dst_paddr, tail_size, CL_READ, callback, callback_arg);
+ }
iov_len = uio_curriovlen(uio);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_START, uio, (int)iov_len, 0, 0, 0);
if (iov_len) {
iov_len = uio_curriovlen(uio);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_START, uio, (int)iov_len, 0, 0, 0);
if (iov_len) {
* make sure the size of the vector isn't too big...
* internally, we want to handle all of the I/O in
* chunk sizes that fit in a 32 bit int
*/
* make sure the size of the vector isn't too big...
* internally, we want to handle all of the I/O in
* chunk sizes that fit in a 32 bit int
*/
- (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
- &upl_size, &upl, NULL, NULL, &upl_flags, VM_KERN_MEMORY_FILE, 0)) != KERN_SUCCESS) {
- /*
+ (vm_map_offset_t)(iov_base & ~((user_addr_t)PAGE_MASK)),
+ &upl_size, &upl, NULL, NULL, &upl_flags, VM_KERN_MEMORY_FILE, 0)) != KERN_SUCCESS) {
+ /*
*io_type = IO_UNKNOWN;
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_END, iov_base, *io_type, *io_length, retval, 0);
*io_type = IO_UNKNOWN;
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_END, iov_base, *io_type, *io_length, retval, 0);
- return advisory_read_ext(vp, filesize, f_offset, resid, NULL, NULL, CL_PASSIVE);
+ return advisory_read_ext(vp, filesize, f_offset, resid, NULL, NULL, CL_PASSIVE);
* the requested read... limit each call to cluster_io
* to the maximum UPL size... cluster_io will clip if
* this exceeds the maximum io_size for the device,
* the requested read... limit each call to cluster_io
* to the maximum UPL size... cluster_io will clip if
* this exceeds the maximum io_size for the device,
* a starting offset that's not page aligned
*/
start_offset = (int)(f_offset & PAGE_MASK_64);
upl_f_offset = f_offset - (off_t)start_offset;
max_size = filesize - f_offset;
* a starting offset that's not page aligned
*/
start_offset = (int)(f_offset & PAGE_MASK_64);
upl_f_offset = f_offset - (off_t)start_offset;
max_size = filesize - f_offset;
* scan from the beginning of the upl looking for the first
* page that is present.... this will become the first page in
* the request we're going to make to 'cluster_io'... if all
* of the pages are absent, we won't call through to 'cluster_io'
*/
* scan from the beginning of the upl looking for the first
* page that is present.... this will become the first page in
* the request we're going to make to 'cluster_io'... if all
* of the pages are absent, we won't call through to 'cluster_io'
*/
* find one, then it will terminate the range of pages being
* presented to 'cluster_io'
*/
for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
* find one, then it will terminate the range of pages being
* presented to 'cluster_io'
*/
for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
* we found a range of pages that must be filled
* if the last page in this range is the last page of the file
* we may have to clip the size of it to keep from reading past
* the end of the last physical block associated with the file
*/
* we found a range of pages that must be filled
* if the last page in this range is the last page of the file
* we may have to clip the size of it to keep from reading past
* the end of the last physical block associated with the file
*/
/*
* issue an asynchronous read to cluster_io
*/
retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size,
/*
* issue an asynchronous read to cluster_io
*/
retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size,
- if ( !UBCINFOEXISTS(vp)) {
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, kdebug_vnode(vp), flags, 0, -1, 0);
- return (0);
+ if (!UBCINFOEXISTS(vp)) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, kdebug_vnode(vp), flags, 0, -1, 0);
+ return 0;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, kdebug_vnode(vp), flags, 0, -2, 0);
- return (0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, kdebug_vnode(vp), flags, 0, -2, 0);
+ return 0;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, kdebug_vnode(vp), flags, 0, -3, 0);
- return(0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, kdebug_vnode(vp), flags, 0, -3, 0);
+ return 0;
- retval = sparse_cluster_push(wbp, &(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg, FALSE);
+ retval = sparse_cluster_push(wbp, &(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg, FALSE);
retval = 1;
} else {
retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg, &local_err, FALSE);
retval = 1;
} else {
retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg, &local_err, FALSE);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, ubc, 0, 0, 0, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, ubc, 0, 0, 0, 0);
static int
cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*callback)(buf_t, void *), void *callback_arg, int *err, boolean_t vm_initiated)
{
static int
cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*callback)(buf_t, void *), void *callback_arg, int *err, boolean_t vm_initiated)
{
- for (min_index = -1, cl_index1 = 0; cl_index1 < wbp->cl_number; cl_index1++) {
- if (wbp->cl_clusters[cl_index1].b_addr == wbp->cl_clusters[cl_index1].e_addr)
- continue;
- if (min_index == -1)
- min_index = cl_index1;
- else if (wbp->cl_clusters[cl_index1].b_addr < wbp->cl_clusters[min_index].b_addr)
- min_index = cl_index1;
- }
- if (min_index == -1)
- break;
-
- l_clusters[cl_index].b_addr = wbp->cl_clusters[min_index].b_addr;
+ for (min_index = -1, cl_index1 = 0; cl_index1 < wbp->cl_number; cl_index1++) {
+ if (wbp->cl_clusters[cl_index1].b_addr == wbp->cl_clusters[cl_index1].e_addr) {
+ continue;
+ }
+ if (min_index == -1) {
+ min_index = cl_index1;
+ } else if (wbp->cl_clusters[cl_index1].b_addr < wbp->cl_clusters[min_index].b_addr) {
+ min_index = cl_index1;
+ }
+ }
+ if (min_index == -1) {
+ break;
+ }
+
+ l_clusters[cl_index].b_addr = wbp->cl_clusters[min_index].b_addr;
* so we can just make a simple pass through, up to, but not including the last one...
* note that e_addr is not inclusive, so it will be equal to the b_addr of the next cluster if they
* are sequential
* so we can just make a simple pass through, up to, but not including the last one...
* note that e_addr is not inclusive, so it will be equal to the b_addr of the next cluster if they
* are sequential
* we let the last one be partial as long as it was adjacent to the previous one...
* we need to do this to deal with multi-threaded servers that might write an I/O or 2 out
* of order... if this occurs at the tail of the last cluster, we don't want to fall into the sparse cluster world...
*/
for (i = 0; i < MAX_CLUSTERS - 1; i++) {
* we let the last one be partial as long as it was adjacent to the previous one...
* we need to do this to deal with multi-threaded servers that might write an I/O or 2 out
* of order... if this occurs at the tail of the last cluster, we don't want to fall into the sparse cluster world...
*/
for (i = 0; i < MAX_CLUSTERS - 1; i++) {
- /*
- * we didn't push all of the clusters, so
- * lets try to merge them back in to the vnode
- */
- if ((MAX_CLUSTERS - wbp->cl_number) < (cl_len - cl_pushed)) {
- /*
+ /*
+ * we didn't push all of the clusters, so
+ * lets try to merge them back in to the vnode
+ */
+ if ((MAX_CLUSTERS - wbp->cl_number) < (cl_len - cl_pushed)) {
+ /*
- for (cl_index = 0, cl_index1 = 0; cl_index < cl_len; cl_index++) {
- if (l_clusters[cl_index].b_addr == l_clusters[cl_index].e_addr)
- continue;
- wbp->cl_clusters[cl_index1].b_addr = l_clusters[cl_index].b_addr;
+ for (cl_index = 0, cl_index1 = 0; cl_index < cl_len; cl_index++) {
+ if (l_clusters[cl_index].b_addr == l_clusters[cl_index].e_addr) {
+ continue;
+ }
+ wbp->cl_clusters[cl_index1].b_addr = l_clusters[cl_index].b_addr;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 0, 0, 0, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 0, 0, 0, 0);
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 1, 0, 0, 0);
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 1, 0, 0, 0);
- vnode_pageout(vp, NULL, (upl_offset_t)0, upl_f_offset, (upl_size_t)upl_size,
- UPL_MSYNC | UPL_VNODE_PAGER | UPL_KEEPCACHED, &error);
+ vnode_pageout(vp, NULL, (upl_offset_t)0, upl_f_offset, (upl_size_t)upl_size,
+ UPL_MSYNC | UPL_VNODE_PAGER | UPL_KEEPCACHED, &error);
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_START, upl_size, size, 0, 0, 0);
/*
* by asking for UPL_COPYOUT_FROM and UPL_RET_ONLY_DIRTY, we get the following desirable behavior
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_START, upl_size, size, 0, 0, 0);
/*
* by asking for UPL_COPYOUT_FROM and UPL_RET_ONLY_DIRTY, we get the following desirable behavior
* - only pages that are currently dirty are returned... these are the ones we need to clean
* - the hardware dirty bit is cleared when the page is gathered into the UPL... the software dirty bit is set
* - if we have to abort the I/O for some reason, the software dirty bit is left set since we didn't clean the page
* - only pages that are currently dirty are returned... these are the ones we need to clean
* - the hardware dirty bit is cleared when the page is gathered into the UPL... the software dirty bit is set
* - if we have to abort the I/O for some reason, the software dirty bit is left set since we didn't clean the page
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_END, upl, upl_f_offset, 0, 0, 0);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_END, upl, upl_f_offset, 0, 0, 0);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 2, 0, 0, 0);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 2, 0, 0, 0);
size -= io_size;
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 3, error, 0, 0);
size -= io_size;
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 3, error, 0, 0);
static int
sparse_cluster_switch(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated)
{
static int
sparse_cluster_switch(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int (*callback)(buf_t, void *), void *callback_arg, boolean_t vm_initiated)
{
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_START, kdebug_vnode(vp), wbp->cl_scmap, wbp->cl_number, 0, 0);
for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) {
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_START, kdebug_vnode(vp), wbp->cl_scmap, wbp->cl_number, 0, 0);
for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) {
- for (cl.b_addr = wbp->cl_clusters[cl_index].b_addr; cl.b_addr < wbp->cl_clusters[cl_index].e_addr; cl.b_addr++) {
-
- if (ubc_page_op(vp, (off_t)(cl.b_addr * PAGE_SIZE_64), 0, NULL, &flags) == KERN_SUCCESS) {
- if (flags & UPL_POP_DIRTY) {
- cl.e_addr = cl.b_addr + 1;
+ for (cl.b_addr = wbp->cl_clusters[cl_index].b_addr; cl.b_addr < wbp->cl_clusters[cl_index].e_addr; cl.b_addr++) {
+ if (ubc_page_op(vp, (off_t)(cl.b_addr * PAGE_SIZE_64), 0, NULL, &flags) == KERN_SUCCESS) {
+ if (flags & UPL_POP_DIRTY) {
+ cl.e_addr = cl.b_addr + 1;
- error = sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, &cl, EOF, callback, callback_arg, vm_initiated);
+ error = sparse_cluster_add(wbp, &(wbp->cl_scmap), vp, &cl, EOF, callback, callback_arg, vm_initiated);
void *l_scmap;
int error = 0;
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_START, kdebug_vnode(vp), (*scmap), 0, push_flag, 0);
void *l_scmap;
int error = 0;
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_START, kdebug_vnode(vp), (*scmap), 0, push_flag, 0);
cl.b_addr = (daddr64_t)(offset / PAGE_SIZE_64);
cl.e_addr = (daddr64_t)((offset + length) / PAGE_SIZE_64);
retval = cluster_push_now(vp, &cl, EOF, io_flags, callback, callback_arg, vm_initiated);
cl.b_addr = (daddr64_t)(offset / PAGE_SIZE_64);
cl.e_addr = (daddr64_t)((offset + length) / PAGE_SIZE_64);
retval = cluster_push_now(vp, &cl, EOF, io_flags, callback, callback_arg, vm_initiated);
}
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_END, kdebug_vnode(vp), (*scmap), error, 0, 0);
}
}
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_END, kdebug_vnode(vp), (*scmap), error, 0, 0);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_START, (*scmap), 0, cl->b_addr, (int)cl->e_addr, 0);
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_START, (*scmap), 0, cl->b_addr, (int)cl->e_addr, 0);
length = ((u_int)(cl->e_addr - cl->b_addr)) * PAGE_SIZE;
while (vfs_drt_mark_pages(scmap, offset, length, &new_dirty) != KERN_SUCCESS) {
length = ((u_int)(cl->e_addr - cl->b_addr)) * PAGE_SIZE;
while (vfs_drt_mark_pages(scmap, offset, length, &new_dirty) != KERN_SUCCESS) {
- error = sparse_cluster_push(wbp, scmap, vp, EOF, 0, 0, callback, callback_arg, vm_initiated);
+
+ if (vfs_get_scmap_push_behavior_internal(scmap, &push_flag)) {
+ push_flag = 0;
+ }
+
+ error = sparse_cluster_push(wbp, scmap, vp, EOF, push_flag, 0, callback, callback_arg, vm_initiated);
static int
cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, u_int32_t xsize, int flags, int (*callback)(buf_t, void *), void *callback_arg)
{
static int
cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, u_int32_t xsize, int flags, int (*callback)(buf_t, void *), void *callback_arg)
{
* indicate that there is no need to pull the
* mapping for this page... we're only going
* to read from it, not modify it.
*/
upl_flags |= UPL_FILE_IO;
}
* indicate that there is no need to pull the
* mapping for this page... we're only going
* to read from it, not modify it.
*/
upl_flags |= UPL_FILE_IO;
}
- kret = ubc_create_upl_kernel(vp,
- uio->uio_offset & ~PAGE_MASK_64,
- PAGE_SIZE,
- &upl,
- &pl,
- upl_flags,
- VM_KERN_MEMORY_FILE);
-
- if (kret != KERN_SUCCESS)
- return(EINVAL);
-
- if (!upl_valid_page(pl, 0)) {
- /*
- * issue a synchronous read to cluster_io
- */
- error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE,
- CL_READ | bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
- if (error) {
- ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
-
- return(error);
- }
+ kret = ubc_create_upl_kernel(vp,
+ uio->uio_offset & ~PAGE_MASK_64,
+ PAGE_SIZE,
+ &upl,
+ &pl,
+ upl_flags,
+ VM_KERN_MEMORY_FILE);
+
+ if (kret != KERN_SUCCESS) {
+ return EINVAL;
+ }
+
+ if (!upl_valid_page(pl, 0)) {
+ /*
+ * issue a synchronous read to cluster_io
+ */
+ error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE,
+ CL_READ | bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
+ if (error) {
+ ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
+
+ return error;
+ }
- copypv(usr_paddr, ubc_paddr, xsize, 2 | 1 | 8); /* Copy physical to physical and flush the source */
-
- if ( !(flags & CL_READ) || (upl_valid_page(pl, 0) && upl_dirty_page(pl, 0))) {
- /*
+ copypv(usr_paddr, ubc_paddr, xsize, 2 | 1 | 8); /* Copy physical to physical and flush the source */
+ }
+ if (!(flags & CL_READ) || (upl_valid_page(pl, 0) && upl_dirty_page(pl, 0))) {
+ /*
* issue a synchronous write to cluster_io
*/
error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE,
* issue a synchronous write to cluster_io
*/
error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE,
- bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
+ bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
+ }
+ if (error == 0) {
+ uio_update(uio, (user_size_t)xsize);
task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED, upl_lookup_vnode(upl));
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED, upl_lookup_vnode(upl));
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
control = ubc_getobject(vp, UBC_FLAGS_NONE);
if (control == MEMORY_OBJECT_CONTROL_NULL) {
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
control = ubc_getobject(vp, UBC_FLAGS_NONE);
if (control == MEMORY_OBJECT_CONTROL_NULL) {
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
- if (ubc_page_op(vp, f_offset, 0, NULL, &flags) == KERN_SUCCESS) {
- if (flags & UPL_POP_DIRTY) {
- total_dirty++;
+ if (ubc_page_op(vp, f_offset, 0, NULL, &flags) == KERN_SUCCESS) {
+ if (flags & UPL_POP_DIRTY) {
+ total_dirty++;
-#define DRT_HASH_GET_ADDRESS(scm, i) ((scm)->scm_hashtable[(i)].dhe_control & DRT_ADDRESS_MASK)
-#define DRT_HASH_SET_ADDRESS(scm, i, a) \
- do { \
- (scm)->scm_hashtable[(i)].dhe_control = \
- ((scm)->scm_hashtable[(i)].dhe_control & ~DRT_ADDRESS_MASK) | DRT_ALIGN_ADDRESS(a); \
+#define DRT_HASH_GET_ADDRESS(scm, i) ((scm)->scm_hashtable[(i)].dhe_control & DRT_ADDRESS_MASK)
+#define DRT_HASH_SET_ADDRESS(scm, i, a) \
+ do { \
+ (scm)->scm_hashtable[(i)].dhe_control = \
+ ((scm)->scm_hashtable[(i)].dhe_control & ~DRT_ADDRESS_MASK) | DRT_ALIGN_ADDRESS(a); \
-#define DRT_HASH_COUNT_MASK 0x1ff
-#define DRT_HASH_GET_COUNT(scm, i) ((scm)->scm_hashtable[(i)].dhe_control & DRT_HASH_COUNT_MASK)
-#define DRT_HASH_SET_COUNT(scm, i, c) \
- do { \
- (scm)->scm_hashtable[(i)].dhe_control = \
- ((scm)->scm_hashtable[(i)].dhe_control & ~DRT_HASH_COUNT_MASK) | ((c) & DRT_HASH_COUNT_MASK); \
+#define DRT_HASH_COUNT_MASK 0x1ff
+#define DRT_HASH_GET_COUNT(scm, i) ((scm)->scm_hashtable[(i)].dhe_control & DRT_HASH_COUNT_MASK)
+#define DRT_HASH_SET_COUNT(scm, i, c) \
+ do { \
+ (scm)->scm_hashtable[(i)].dhe_control = \
+ ((scm)->scm_hashtable[(i)].dhe_control & ~DRT_HASH_COUNT_MASK) | ((c) & DRT_HASH_COUNT_MASK); \
-#define DRT_HASH_VACATE(scm, i) DRT_HASH_SET_COUNT((scm), (i), DRT_HASH_COUNT_MASK)
-#define DRT_HASH_VACANT(scm, i) (DRT_HASH_GET_COUNT((scm), (i)) == DRT_HASH_COUNT_MASK)
-#define DRT_HASH_COPY(oscm, oi, scm, i) \
- do { \
- (scm)->scm_hashtable[(i)].dhe_control = (oscm)->scm_hashtable[(oi)].dhe_control; \
- DRT_BITVECTOR_COPY(oscm, oi, scm, i); \
+#define DRT_HASH_VACATE(scm, i) DRT_HASH_SET_COUNT((scm), (i), DRT_HASH_COUNT_MASK)
+#define DRT_HASH_VACANT(scm, i) (DRT_HASH_GET_COUNT((scm), (i)) == DRT_HASH_COUNT_MASK)
+#define DRT_HASH_COPY(oscm, oi, scm, i) \
+ do { \
+ (scm)->scm_hashtable[(i)].dhe_control = (oscm)->scm_hashtable[(oi)].dhe_control; \
+ DRT_BITVECTOR_COPY(oscm, oi, scm, i); \
-* dhe_bitvector was declared as dhe_bitvector[DRT_BITVECTOR_PAGES / 32];
-* DRT_BITVECTOR_PAGES is defined as ((1024 * 256) / PAGE_SIZE)
-* Since PAGE_SIZE is only known at boot time,
-* -define MAX_DRT_BITVECTOR_PAGES for smallest supported page size (4k)
-* -declare dhe_bitvector array for largest possible length
-*/
+ * dhe_bitvector was declared as dhe_bitvector[DRT_BITVECTOR_PAGES / 32];
+ * DRT_BITVECTOR_PAGES is defined as ((1024 * 256) / PAGE_SIZE)
+ * Since PAGE_SIZE is only known at boot time,
+ * -define MAX_DRT_BITVECTOR_PAGES for smallest supported page size (4k)
+ * -declare dhe_bitvector array for largest possible length
+ */
-#define DRT_BITVECTOR_COPY(oscm, oi, scm, i) \
- bcopy(&(oscm)->scm_hashtable[(oi)].dhe_bitvector[0], \
- &(scm)->scm_hashtable[(i)].dhe_bitvector[0], \
+#define DRT_BITVECTOR_COPY(oscm, oi, scm, i) \
+ bcopy(&(oscm)->scm_hashtable[(oi)].dhe_bitvector[0], \
+ &(scm)->scm_hashtable[(i)].dhe_bitvector[0], \
- u_int32_t scm_magic; /* sanity/detection */
-#define DRT_SCM_MAGIC 0x12020003
- u_int32_t scm_modulus; /* current ring size */
- u_int32_t scm_buckets; /* number of occupied buckets */
- u_int32_t scm_lastclean; /* last entry we cleaned */
- u_int32_t scm_iskips; /* number of slot skips */
+ u_int32_t scm_magic; /* sanity/detection */
+#define DRT_SCM_MAGIC 0x12020003
+ u_int32_t scm_modulus; /* current ring size */
+ u_int32_t scm_buckets; /* number of occupied buckets */
+ u_int32_t scm_lastclean; /* last entry we cleaned */
+ u_int32_t scm_iskips; /* number of slot skips */
-#define DRT_DEBUG_EMPTYFREE (FSDBG_CODE(DBG_FSRW, 82)) /* nil */
-#define DRT_DEBUG_RETCLUSTER (FSDBG_CODE(DBG_FSRW, 83)) /* offset, length */
-#define DRT_DEBUG_ALLOC (FSDBG_CODE(DBG_FSRW, 84)) /* copycount */
-#define DRT_DEBUG_INSERT (FSDBG_CODE(DBG_FSRW, 85)) /* offset, iskip */
-#define DRT_DEBUG_MARK (FSDBG_CODE(DBG_FSRW, 86)) /* offset, length,
- * dirty */
- /* 0, setcount */
- /* 1 (clean, no map) */
- /* 2 (map alloc fail) */
- /* 3, resid (partial) */
-#define DRT_DEBUG_6 (FSDBG_CODE(DBG_FSRW, 87))
-#define DRT_DEBUG_SCMDATA (FSDBG_CODE(DBG_FSRW, 88)) /* modulus, buckets,
- * lastclean, iskips */
-
-
-static kern_return_t vfs_drt_alloc_map(struct vfs_drt_clustermap **cmapp);
-static kern_return_t vfs_drt_free_map(struct vfs_drt_clustermap *cmap);
-static kern_return_t vfs_drt_search_index(struct vfs_drt_clustermap *cmap,
- u_int64_t offset, int *indexp);
-static kern_return_t vfs_drt_get_index(struct vfs_drt_clustermap **cmapp,
- u_int64_t offset,
- int *indexp,
- int recursed);
-static kern_return_t vfs_drt_do_mark_pages(
- void **cmapp,
- u_int64_t offset,
- u_int length,
- u_int *setcountp,
- int dirty);
-static void vfs_drt_trace(
+#define DRT_DEBUG_EMPTYFREE (FSDBG_CODE(DBG_FSRW, 82)) /* nil */
+#define DRT_DEBUG_RETCLUSTER (FSDBG_CODE(DBG_FSRW, 83)) /* offset, length */
+#define DRT_DEBUG_ALLOC (FSDBG_CODE(DBG_FSRW, 84)) /* copycount */
+#define DRT_DEBUG_INSERT (FSDBG_CODE(DBG_FSRW, 85)) /* offset, iskip */
+#define DRT_DEBUG_MARK (FSDBG_CODE(DBG_FSRW, 86)) /* offset, length,
+ * dirty */
+ /* 0, setcount */
+ /* 1 (clean, no map) */
+ /* 2 (map alloc fail) */
+ /* 3, resid (partial) */
+#define DRT_DEBUG_6 (FSDBG_CODE(DBG_FSRW, 87))
+#define DRT_DEBUG_SCMDATA (FSDBG_CODE(DBG_FSRW, 88)) /* modulus, buckets,
+ * lastclean, iskips */
+
+
+static kern_return_t vfs_drt_alloc_map(struct vfs_drt_clustermap **cmapp);
+static kern_return_t vfs_drt_free_map(struct vfs_drt_clustermap *cmap);
+static kern_return_t vfs_drt_search_index(struct vfs_drt_clustermap *cmap,
+ u_int64_t offset, int *indexp);
+static kern_return_t vfs_drt_get_index(struct vfs_drt_clustermap **cmapp,
+ u_int64_t offset,
+ int *indexp,
+ int recursed);
+static kern_return_t vfs_drt_do_mark_pages(
+ void **cmapp,
+ u_int64_t offset,
+ u_int length,
+ u_int *setcountp,
+ int dirty);
+static void vfs_drt_trace(
- struct vfs_drt_clustermap *cmap, *ocmap;
- kern_return_t kret;
- u_int64_t offset;
- u_int32_t i;
- int nsize, active_buckets, index, copycount;
+ struct vfs_drt_clustermap *cmap = NULL, *ocmap = NULL;
+ kern_return_t kret = KERN_SUCCESS;
+ u_int64_t offset = 0;
+ u_int32_t i = 0;
+ int modulus_size = 0, map_size = 0, active_buckets = 0, index = 0, copycount = 0;
} else {
/* count the number of active buckets in the old map */
active_buckets = 0;
for (i = 0; i < ocmap->scm_modulus; i++) {
if (!DRT_HASH_VACANT(ocmap, i) &&
} else {
/* count the number of active buckets in the old map */
active_buckets = 0;
for (i = 0; i < ocmap->scm_modulus; i++) {
if (!DRT_HASH_VACANT(ocmap, i) &&
}
/*
* If we're currently using the small allocation, check to
* see whether we should grow to the large one.
*/
if (ocmap->scm_modulus == DRT_HASH_SMALL_MODULUS) {
}
/*
* If we're currently using the small allocation, check to
* see whether we should grow to the large one.
*/
if (ocmap->scm_modulus == DRT_HASH_SMALL_MODULUS) {
* If the ring is nearly full and we are allowed to
* use the large modulus, upgrade.
*/
if ((active_buckets > (DRT_HASH_SMALL_MODULUS - 5)) &&
(max_mem >= DRT_HASH_LARGE_MEMORY_REQUIRED)) {
* If the ring is nearly full and we are allowed to
* use the large modulus, upgrade.
*/
if ((active_buckets > (DRT_HASH_SMALL_MODULUS - 5)) &&
(max_mem >= DRT_HASH_LARGE_MEMORY_REQUIRED)) {
- nsize = DRT_HASH_LARGE_MODULUS;
+ modulus_size = DRT_HASH_LARGE_MODULUS;
+ map_size = DRT_LARGE_ALLOCATION;
+ } else {
+ modulus_size = DRT_HASH_SMALL_MODULUS;
+ map_size = DRT_SMALL_ALLOCATION;
+ }
+ } else if (ocmap->scm_modulus == DRT_HASH_LARGE_MODULUS) {
+ if ((active_buckets > (DRT_HASH_LARGE_MODULUS - 5)) &&
+ (max_mem >= DRT_HASH_XLARGE_MEMORY_REQUIRED)) {
+ modulus_size = DRT_HASH_XLARGE_MODULUS;
+ map_size = DRT_XLARGE_ALLOCATION;
- kret = kmem_alloc(kernel_map, (vm_offset_t *)&cmap,
- (nsize == DRT_HASH_SMALL_MODULUS) ? DRT_SMALL_ALLOCATION : DRT_LARGE_ALLOCATION, VM_KERN_MEMORY_FILE);
- if (kret != KERN_SUCCESS)
- return(kret);
+ kret = kmem_alloc(kernel_map, (vm_offset_t *)&cmap, map_size, VM_KERN_MEMORY_FILE);
+ if (kret != KERN_SUCCESS) {
+ return kret;
+ }
/* get new index */
offset = DRT_HASH_GET_ADDRESS(ocmap, i);
kret = vfs_drt_get_index(&cmap, offset, &index, 1);
/* get new index */
offset = DRT_HASH_GET_ADDRESS(ocmap, i);
kret = vfs_drt_get_index(&cmap, offset, &index, 1);
/* log what we've done */
vfs_drt_trace(cmap, DRT_DEBUG_ALLOC, copycount, 0, 0, 0);
/* log what we've done */
vfs_drt_trace(cmap, DRT_DEBUG_ALLOC, copycount, 0, 0, 0);
- kmem_free(kernel_map, (vm_offset_t)cmap,
- (cmap->scm_modulus == DRT_HASH_SMALL_MODULUS) ? DRT_SMALL_ALLOCATION : DRT_LARGE_ALLOCATION);
- return(KERN_SUCCESS);
+ vm_size_t map_size = 0;
+
+ if (cmap->scm_modulus == DRT_HASH_SMALL_MODULUS) {
+ map_size = DRT_SMALL_ALLOCATION;
+ } else if (cmap->scm_modulus == DRT_HASH_LARGE_MODULUS) {
+ map_size = DRT_LARGE_ALLOCATION;
+ } else if (cmap->scm_modulus == DRT_HASH_XLARGE_MODULUS) {
+ map_size = DRT_XLARGE_ALLOCATION;
+ } else {
+ panic("vfs_drt_free_map: Invalid modulus %d\n", cmap->scm_modulus);
+ }
+
+ kmem_free(kernel_map, (vm_offset_t)cmap, map_size);
+ return KERN_SUCCESS;
offset = DRT_ALIGN_ADDRESS(offset);
index = DRT_HASH(cmap, offset);
/* traverse the hashtable */
for (i = 0; i < cmap->scm_modulus; i++) {
offset = DRT_ALIGN_ADDRESS(offset);
index = DRT_HASH(cmap, offset);
/* traverse the hashtable */
for (i = 0; i < cmap->scm_modulus; i++) {
DRT_HASH_SET_ADDRESS(cmap, index, offset);
DRT_HASH_SET_COUNT(cmap, index, 0);
DRT_BITVECTOR_CLEAR(cmap, index);
*indexp = index;
vfs_drt_trace(cmap, DRT_DEBUG_INSERT, (int)offset, i, 0, 0);
DRT_HASH_SET_ADDRESS(cmap, index, offset);
DRT_HASH_SET_COUNT(cmap, index, 0);
DRT_BITVECTOR_CLEAR(cmap, index);
*indexp = index;
vfs_drt_trace(cmap, DRT_DEBUG_INSERT, (int)offset, i, 0, 0);
kret = vfs_drt_alloc_map(cmapp);
if (kret == KERN_SUCCESS) {
/* now try to insert again */
kret = vfs_drt_get_index(cmapp, offset, indexp, 1);
}
kret = vfs_drt_alloc_map(cmapp);
if (kret == KERN_SUCCESS) {
/* now try to insert again */
kret = vfs_drt_get_index(cmapp, offset, indexp, 1);
}
cmapp = (struct vfs_drt_clustermap **)private;
cmap = *cmapp;
vfs_drt_trace(cmap, DRT_DEBUG_MARK | DBG_FUNC_START, (int)offset, (int)length, dirty, 0);
cmapp = (struct vfs_drt_clustermap **)private;
cmap = *cmapp;
vfs_drt_trace(cmap, DRT_DEBUG_MARK | DBG_FUNC_START, (int)offset, (int)length, dirty, 0);
/* allocate a cluster map if we don't already have one */
if (cmap == NULL) {
/* no cluster map, nothing to clean */
if (!dirty) {
vfs_drt_trace(cmap, DRT_DEBUG_MARK | DBG_FUNC_END, 1, 0, 0, 0);
/* allocate a cluster map if we don't already have one */
if (cmap == NULL) {
/* no cluster map, nothing to clean */
if (!dirty) {
vfs_drt_trace(cmap, DRT_DEBUG_MARK | DBG_FUNC_END, 1, 0, 0, 0);
}
kret = vfs_drt_alloc_map(cmapp);
if (kret != KERN_SUCCESS) {
vfs_drt_trace(cmap, DRT_DEBUG_MARK | DBG_FUNC_END, 2, 0, 0, 0);
}
kret = vfs_drt_alloc_map(cmapp);
if (kret != KERN_SUCCESS) {
vfs_drt_trace(cmap, DRT_DEBUG_MARK | DBG_FUNC_END, 2, 0, 0, 0);
- if (ecount >= DRT_BITVECTOR_PAGES)
- panic("ecount >= DRT_BITVECTOR_PAGES, cmap = %p, index = %d, bit = %d", cmap, index, pgoff+i);
+ if (ecount >= DRT_BITVECTOR_PAGES) {
+ panic("ecount >= DRT_BITVECTOR_PAGES, cmap = %p, index = %d, bit = %d", cmap, index, pgoff + i);
+ }
DRT_HASH_SET_BIT(cmap, index, pgoff + i);
ecount++;
setcount++;
}
} else {
if (DRT_HASH_TEST_BIT(cmap, index, pgoff + i)) {
DRT_HASH_SET_BIT(cmap, index, pgoff + i);
ecount++;
setcount++;
}
} else {
if (DRT_HASH_TEST_BIT(cmap, index, pgoff + i)) {
- if (ecount <= 0)
- panic("ecount <= 0, cmap = %p, index = %d, bit = %d", cmap, index, pgoff+i);
- assert(ecount > 0);
+ if (ecount <= 0) {
+ panic("ecount <= 0, cmap = %p, index = %d, bit = %d", cmap, index, pgoff + i);
+ }
+ assert(ecount > 0);
vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, u_int *setcountp)
{
/* XXX size unused, drop from interface */
vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, u_int *setcountp)
{
/* XXX size unused, drop from interface */
- /* didn't find any bits set */
- panic("vfs_drt: entry summary count > 0 but no bits set in map, cmap = %p, index = %d, count = %lld",
- cmap, index, DRT_HASH_GET_COUNT(cmap, index));
+ /* didn't find any bits set */
+ panic("vfs_drt: entry summary count > 0 but no bits set in map, cmap = %p, index = %d, count = %lld",
+ cmap, index, DRT_HASH_GET_COUNT(cmap, index));
-vfs_drt_trace(__unused struct vfs_drt_clustermap *cmap, __unused int code,
- __unused int arg1, __unused int arg2, __unused int arg3,
- __unused int arg4)
+vfs_drt_trace(__unused struct vfs_drt_clustermap *cmap, __unused int code,
+ __unused int arg1, __unused int arg2, __unused int arg3,
+ __unused int arg4)
- if (bits_on != DRT_HASH_GET_COUNT(cmap, index))
- panic("bits_on = %d, index = %d\n", bits_on, index);
- }
+ if (bits_on != DRT_HASH_GET_COUNT(cmap, index)) {
+ panic("bits_on = %d, index = %d\n", bits_on, index);
+ }
+ }
+
+/*
+ * Internal interface only.
+ */
+static kern_return_t
+vfs_get_scmap_push_behavior_internal(void **cmapp, int *push_flag)
+{
+ struct vfs_drt_clustermap *cmap;
+
+ /* sanity */
+ if ((cmapp == NULL) || (*cmapp == NULL) || (push_flag == NULL)) {
+ return KERN_FAILURE;
+ }
+ cmap = *cmapp;
+
+ if (cmap->scm_modulus == DRT_HASH_XLARGE_MODULUS) {
+ /*
+ * If we have a full xlarge sparse cluster,
+ * we push it out all at once so the cluster
+ * map can be available to absorb more I/Os.
+ * This is done on large memory configs so
+ * the small I/Os don't interfere with the
+ * pro workloads.
+ */
+ *push_flag = PUSH_ALL;
+ }
+ return KERN_SUCCESS;
+}