]>
git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
ec2eaf7f499bbbb8bfa39d58c86f861c4ef77711
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
28 * The Regents of the University of California. All rights reserved.
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
61 #include <sys/param.h>
64 #include <sys/vnode.h>
65 #include <sys/mount.h>
66 #include <sys/trace.h>
67 #include <sys/malloc.h>
68 #include <sys/resourcevar.h>
69 #include <libkern/libkern.h>
72 #include <vm/vm_pageout.h>
74 #include <sys/kdebug.h>
78 #define CL_COMMIT 0x04
79 #define CL_PAGEOUT 0x10
82 #define CL_NOZERO 0x80
83 #define CL_PAGEIN 0x100
84 #define CL_DEV_MEMORY 0x200
85 #define CL_PRESERVE 0x400
89 u_int io_completed
; /* amount of io that has currently completed */
90 u_int io_issued
; /* amount of io that was successfully issued */
91 int io_error
; /* error code of first error encountered */
92 int io_wanted
; /* someone is sleeping waiting for a change in state */
96 static void cluster_zero(upl_t upl
, vm_offset_t upl_offset
,
97 int size
, struct buf
*bp
);
98 static int cluster_read_x(struct vnode
*vp
, struct uio
*uio
,
99 off_t filesize
, int devblocksize
, int flags
);
100 static int cluster_write_x(struct vnode
*vp
, struct uio
*uio
,
101 off_t oldEOF
, off_t newEOF
, off_t headOff
,
102 off_t tailOff
, int devblocksize
, int flags
);
103 static int cluster_nocopy_read(struct vnode
*vp
, struct uio
*uio
,
104 off_t filesize
, int devblocksize
, int flags
);
105 static int cluster_nocopy_write(struct vnode
*vp
, struct uio
*uio
,
106 off_t newEOF
, int devblocksize
, int flags
);
107 static int cluster_phys_read(struct vnode
*vp
, struct uio
*uio
,
108 off_t filesize
, int devblocksize
, int flags
);
109 static int cluster_phys_write(struct vnode
*vp
, struct uio
*uio
,
110 off_t newEOF
, int devblocksize
, int flags
);
111 static int cluster_align_phys_io(struct vnode
*vp
, struct uio
*uio
,
112 addr64_t usr_paddr
, int xsize
, int devblocksize
, int flags
);
113 static int cluster_push_x(struct vnode
*vp
, off_t EOF
, daddr_t first
, daddr_t last
, int can_delay
);
114 static int cluster_try_push(struct vnode
*vp
, off_t newEOF
, int can_delay
, int push_all
);
118 * throttle the number of async writes that
119 * can be outstanding on a single vnode
120 * before we issue a synchronous write
122 #define ASYNC_THROTTLE 9
136 struct buf
*cbp_head
;
137 struct buf
*cbp_next
;
140 struct clios
*iostate
;
145 cbp_head
= (struct buf
*)(bp
->b_trans_head
);
147 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_START
,
148 (int)cbp_head
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
150 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
) {
152 * all I/O requests that are part of this transaction
153 * have to complete before we can process it
155 if ( !(cbp
->b_flags
& B_DONE
)) {
157 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
158 (int)cbp_head
, (int)cbp
, cbp
->b_bcount
, cbp
->b_flags
, 0);
168 upl_offset
= cbp
->b_uploffset
;
169 upl
= cbp
->b_pagelist
;
170 b_flags
= cbp
->b_flags
;
171 real_bp
= cbp
->b_real_bp
;
173 zero_offset
= cbp
->b_validend
;
174 iostate
= (struct clios
*)cbp
->b_iostate
;
177 if ((cbp
->b_flags
& B_ERROR
) && error
== 0)
178 error
= cbp
->b_error
;
180 total_resid
+= cbp
->b_resid
;
181 total_size
+= cbp
->b_bcount
;
183 cbp_next
= cbp
->b_trans_next
;
190 cluster_zero(upl
, zero_offset
, PAGE_SIZE
- (zero_offset
& PAGE_MASK
), real_bp
);
192 if ((vp
->v_flag
& VTHROTTLED
) && (vp
->v_numoutput
<= (ASYNC_THROTTLE
/ 3))) {
193 vp
->v_flag
&= ~VTHROTTLED
;
194 wakeup((caddr_t
)&vp
->v_numoutput
);
198 * someone has issued multiple I/Os asynchrounsly
199 * and is waiting for them to complete (streaming)
201 if (error
&& iostate
->io_error
== 0)
202 iostate
->io_error
= error
;
204 iostate
->io_completed
+= total_size
;
206 if (iostate
->io_wanted
) {
208 * someone is waiting for the state of
209 * this io stream to change
211 iostate
->io_wanted
= 0;
212 wakeup((caddr_t
)&iostate
->io_wanted
);
215 if ((b_flags
& B_NEED_IODONE
) && real_bp
) {
217 real_bp
->b_flags
|= B_ERROR
;
218 real_bp
->b_error
= error
;
220 real_bp
->b_resid
= total_resid
;
224 if (error
== 0 && total_resid
)
227 if (b_flags
& B_COMMIT_UPL
) {
228 pg_offset
= upl_offset
& PAGE_MASK
;
229 commit_size
= (((pg_offset
+ total_size
) + (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
231 if (error
|| (b_flags
& B_NOCACHE
) || ((b_flags
& B_PHYS
) && !(b_flags
& B_READ
))) {
234 if (b_flags
& B_PHYS
)
235 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
236 else if ((b_flags
& B_PAGEOUT
) && (error
!= ENXIO
)) /* transient error */
237 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
238 else if (b_flags
& B_PGIN
)
239 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
241 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
243 ubc_upl_abort_range(upl
, upl_offset
- pg_offset
, commit_size
,
246 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
247 (int)upl
, upl_offset
- pg_offset
, commit_size
,
248 0x80000000|upl_abort_code
, 0);
251 int upl_commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
;
253 if (b_flags
& B_PHYS
)
254 upl_commit_flags
|= UPL_COMMIT_SET_DIRTY
;
255 else if ( !(b_flags
& B_PAGEOUT
))
256 upl_commit_flags
|= UPL_COMMIT_CLEAR_DIRTY
;
258 upl_commit_flags
|= UPL_COMMIT_INACTIVATE
;
260 ubc_upl_commit_range(upl
, upl_offset
- pg_offset
, commit_size
,
263 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
264 (int)upl
, upl_offset
- pg_offset
, commit_size
,
265 upl_commit_flags
, 0);
268 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
269 (int)upl
, upl_offset
, 0, error
, 0);
276 cluster_zero(upl
, upl_offset
, size
, bp
)
278 vm_offset_t upl_offset
;
282 vm_offset_t io_addr
= 0;
286 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 23)) | DBG_FUNC_NONE
,
287 upl_offset
, size
, (int)bp
, 0, 0);
289 if (bp
== NULL
|| bp
->b_data
== NULL
) {
290 kret
= ubc_upl_map(upl
, &io_addr
);
292 if (kret
!= KERN_SUCCESS
)
293 panic("cluster_zero: ubc_upl_map() failed with (%d)", kret
);
295 panic("cluster_zero: ubc_upl_map() mapped 0");
299 io_addr
= (vm_offset_t
)bp
->b_data
;
300 bzero((caddr_t
)(io_addr
+ upl_offset
), size
);
303 kret
= ubc_upl_unmap(upl
);
305 if (kret
!= KERN_SUCCESS
)
306 panic("cluster_zero: kernel_upl_unmap failed");
311 cluster_io(vp
, upl
, upl_offset
, f_offset
, non_rounded_size
, devblocksize
, flags
, real_bp
, iostate
)
314 vm_offset_t upl_offset
;
316 int non_rounded_size
;
320 struct clios
*iostate
;
328 struct buf
*cbp_head
= 0;
329 struct buf
*cbp_tail
= 0;
340 if (flags
& CL_READ
) {
341 io_flags
= (B_VECTORLIST
| B_READ
);
343 vfs_io_attributes(vp
, B_READ
, &max_iosize
, &max_vectors
);
345 io_flags
= (B_VECTORLIST
| B_WRITEINPROG
);
347 vfs_io_attributes(vp
, B_WRITE
, &max_iosize
, &max_vectors
);
349 pl
= ubc_upl_pageinfo(upl
);
354 io_flags
|= B_NOCACHE
;
355 if (flags
& CL_PAGEIN
)
357 if (flags
& CL_PAGEOUT
)
358 io_flags
|= B_PAGEOUT
;
359 if (flags
& CL_COMMIT
)
360 io_flags
|= B_COMMIT_UPL
;
361 if (flags
& CL_PRESERVE
)
365 size
= (non_rounded_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
367 size
= non_rounded_size
;
370 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_START
,
371 (int)f_offset
, size
, upl_offset
, flags
, 0);
373 if ((flags
& CL_READ
) && ((upl_offset
+ non_rounded_size
) & PAGE_MASK
) && (!(flags
& CL_NOZERO
))) {
375 * then we are going to end up
376 * with a page that we can't complete (the file size wasn't a multiple
377 * of PAGE_SIZE and we're trying to read to the end of the file
378 * so we'll go ahead and zero out the portion of the page we can't
379 * read in from the file
381 zero_offset
= upl_offset
+ non_rounded_size
;
391 if (size
> max_iosize
)
392 io_size
= max_iosize
;
396 if (error
= VOP_CMAP(vp
, f_offset
, io_size
, &blkno
, (size_t *)&io_size
, NULL
)) {
397 if (error
== EOPNOTSUPP
)
398 panic("VOP_CMAP Unimplemented");
402 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 24)) | DBG_FUNC_NONE
,
403 (int)f_offset
, (int)blkno
, io_size
, zero_offset
, 0);
405 if ( (!(flags
& CL_READ
) && (long)blkno
== -1) || io_size
== 0) {
406 if (flags
& CL_PAGEOUT
) {
411 /* Try paging out the page individually before
412 giving up entirely and dumping it (it could
413 be mapped in a "hole" and require allocation
416 ubc_upl_abort_range(upl
, upl_offset
, PAGE_SIZE_64
, UPL_ABORT_FREE_ON_EMPTY
);
417 if (ubc_pushdirty_range(vp
, f_offset
, PAGE_SIZE_64
) == 0) {
422 upl_offset
+= PAGE_SIZE_64
;
423 f_offset
+= PAGE_SIZE_64
;
424 size
-= PAGE_SIZE_64
;
427 lblkno
= (daddr_t
)(f_offset
/ PAGE_SIZE_64
);
429 * we have now figured out how much I/O we can do - this is in 'io_size'
430 * pl_index represents the first page in the 'upl' that the I/O will occur for
431 * pg_offset is the starting point in the first page for the I/O
432 * pg_count is the number of full and partial pages that 'io_size' encompasses
434 pl_index
= upl_offset
/ PAGE_SIZE
;
435 pg_offset
= upl_offset
& PAGE_MASK
;
436 pg_count
= (io_size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
438 if (flags
& CL_DEV_MEMORY
) {
440 * currently, can't deal with reading 'holes' in file
442 if ((long)blkno
== -1) {
447 * treat physical requests as one 'giant' page
451 if ((flags
& CL_READ
) && (long)blkno
== -1) {
455 * if we're reading and blkno == -1, then we've got a
456 * 'hole' in the file that we need to deal with by zeroing
457 * out the affected area in the upl
459 if (zero_offset
&& io_size
== size
) {
461 * if this upl contains the EOF and it is not a multiple of PAGE_SIZE
462 * than 'zero_offset' will be non-zero
463 * if the 'hole' returned by VOP_CMAP extends all the way to the eof
464 * (indicated by the io_size finishing off the I/O request for this UPL)
465 * than we're not going to issue an I/O for the
466 * last page in this upl... we need to zero both the hole and the tail
467 * of the page beyond the EOF, since the delayed zero-fill won't kick in
469 bytes_to_zero
= (((upl_offset
+ io_size
) + (PAGE_SIZE
- 1)) & ~PAGE_MASK
) - upl_offset
;
473 bytes_to_zero
= io_size
;
475 cluster_zero(upl
, upl_offset
, bytes_to_zero
, real_bp
);
479 * if there is a current I/O chain pending
480 * then the first page of the group we just zero'd
481 * will be handled by the I/O completion if the zero
482 * fill started in the middle of the page
484 pg_count
= (io_size
- pg_offset
) / PAGE_SIZE
;
487 * no pending I/O to pick up that first page
488 * so, we have to make sure it gets committed
490 * set the pg_offset to 0 so that the upl_commit_range
491 * starts with this page
493 pg_count
= (io_size
+ pg_offset
) / PAGE_SIZE
;
496 if (io_size
== size
&& ((upl_offset
+ io_size
) & PAGE_MASK
))
498 * if we're done with the request for this UPL
499 * then we have to make sure to commit the last page
500 * even if we only partially zero-filled it
506 pg_resid
= PAGE_SIZE
- pg_offset
;
510 if (flags
& CL_COMMIT
)
511 ubc_upl_commit_range(upl
,
512 (upl_offset
+ pg_resid
) & ~PAGE_MASK
,
513 pg_count
* PAGE_SIZE
,
514 UPL_COMMIT_CLEAR_DIRTY
| UPL_COMMIT_FREE_ON_EMPTY
);
516 upl_offset
+= io_size
;
520 if (cbp_head
&& pg_count
)
524 } else if (real_bp
&& (real_bp
->b_blkno
== real_bp
->b_lblkno
)) {
525 real_bp
->b_blkno
= blkno
;
528 if (pg_count
> max_vectors
) {
529 io_size
-= (pg_count
- max_vectors
) * PAGE_SIZE
;
532 io_size
= PAGE_SIZE
- pg_offset
;
535 pg_count
= max_vectors
;
538 /* Throttle the speculative IO */
539 if ((flags
& CL_ASYNC
) && !(flags
& CL_PAGEOUT
))
544 cbp
= alloc_io_buf(vp
, priv
);
547 if (flags
& CL_PAGEOUT
) {
548 for (i
= 0; i
< pg_count
; i
++) {
553 if (bp
= incore(vp
, lblkno
+ i
)) {
554 if (!ISSET(bp
->b_flags
, B_BUSY
)) {
556 SET(bp
->b_flags
, (B_BUSY
| B_INVAL
));
560 panic("BUSY bp found in cluster_io");
565 if (flags
& CL_ASYNC
) {
566 cbp
->b_flags
|= (B_CALL
| B_ASYNC
);
567 cbp
->b_iodone
= (void *)cluster_iodone
;
569 cbp
->b_flags
|= io_flags
;
571 cbp
->b_lblkno
= lblkno
;
572 cbp
->b_blkno
= blkno
;
573 cbp
->b_bcount
= io_size
;
574 cbp
->b_pagelist
= upl
;
575 cbp
->b_uploffset
= upl_offset
;
576 cbp
->b_trans_next
= (struct buf
*)0;
578 if (cbp
->b_iostate
= (void *)iostate
)
580 * caller wants to track the state of this
581 * io... bump the amount issued against this stream
583 iostate
->io_issued
+= io_size
;
586 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 26)) | DBG_FUNC_NONE
,
587 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
589 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 27)) | DBG_FUNC_NONE
,
590 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
593 cbp_tail
->b_trans_next
= cbp
;
599 (struct buf
*)(cbp
->b_trans_head
) = cbp_head
;
602 upl_offset
+= io_size
;
606 if ( (!(upl_offset
& PAGE_MASK
) && !(flags
& CL_DEV_MEMORY
) && ((flags
& CL_ASYNC
) || buf_count
> 8)) || size
== 0) {
608 * if we have no more I/O to issue or
609 * the current I/O we've prepared fully
610 * completes the last page in this request
611 * and it's either an ASYNC request or
612 * we've already accumulated more than 8 I/O's into
613 * this transaction and it's not an I/O directed to
614 * special DEVICE memory
615 * then go ahead and issue the I/O
619 cbp_head
->b_flags
|= B_NEED_IODONE
;
620 cbp_head
->b_real_bp
= real_bp
;
622 cbp_head
->b_real_bp
= (struct buf
*)NULL
;
626 * we're about to issue the last I/O for this upl
627 * if this was a read to the eof and the eof doesn't
628 * finish on a page boundary, than we need to zero-fill
629 * the rest of the page....
631 cbp_head
->b_validend
= zero_offset
;
633 cbp_head
->b_validend
= 0;
635 for (cbp
= cbp_head
; cbp
;) {
636 struct buf
* cbp_next
;
638 if (io_flags
& B_WRITEINPROG
)
639 cbp
->b_vp
->v_numoutput
++;
641 cbp_next
= cbp
->b_trans_next
;
643 (void) VOP_STRATEGY(cbp
);
646 if ( !(flags
& CL_ASYNC
)) {
647 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
)
650 if (error
= cluster_iodone(cbp_head
)) {
651 if ((flags
& CL_PAGEOUT
) && (error
== ENXIO
))
652 retval
= 0; /* drop the error */
658 cbp_head
= (struct buf
*)0;
659 cbp_tail
= (struct buf
*)0;
669 for (cbp
= cbp_head
; cbp
;) {
670 struct buf
* cbp_next
;
672 upl_offset
-= cbp
->b_bcount
;
673 size
+= cbp
->b_bcount
;
674 io_size
+= cbp
->b_bcount
;
676 cbp_next
= cbp
->b_trans_next
;
682 * update the error condition for this stream
683 * since we never really issued the io
684 * just go ahead and adjust it back
686 if (iostate
->io_error
== 0)
687 iostate
->io_error
= error
;
688 iostate
->io_issued
-= io_size
;
690 if (iostate
->io_wanted
) {
692 * someone is waiting for the state of
693 * this io stream to change
695 iostate
->io_wanted
= 0;
696 wakeup((caddr_t
)&iostate
->io_wanted
);
699 pg_offset
= upl_offset
& PAGE_MASK
;
700 abort_size
= ((size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
702 if (flags
& CL_COMMIT
) {
705 if (flags
& CL_PRESERVE
)
706 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
707 else if ((flags
& CL_PAGEOUT
) && (error
!= ENXIO
)) /* transient error */
708 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
709 else if (flags
& CL_PAGEIN
)
710 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
712 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
714 ubc_upl_abort_range(upl
, upl_offset
- pg_offset
, abort_size
,
717 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 28)) | DBG_FUNC_NONE
,
718 (int)upl
, upl_offset
- pg_offset
, abort_size
, error
, 0);
721 real_bp
->b_flags
|= B_ERROR
;
722 real_bp
->b_error
= error
;
729 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_END
,
730 (int)f_offset
, size
, upl_offset
, retval
, 0);
737 cluster_rd_prefetch(vp
, f_offset
, size
, filesize
, devblocksize
)
747 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_START
,
748 (int)f_offset
, size
, (int)filesize
, 0, 0);
750 if (f_offset
>= filesize
) {
751 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
752 (int)f_offset
, 0, 0, 0, 0);
755 if (size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
756 size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
758 size
= (size
+ (PAGE_SIZE
- 1)) & ~(PAGE_SIZE
- 1);
760 if ((off_t
)size
> (filesize
- f_offset
))
761 size
= filesize
- f_offset
;
763 pages_to_fetch
= (size
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
765 for (skipped_pages
= 0; skipped_pages
< pages_to_fetch
; skipped_pages
++) {
766 if (ubc_page_op(vp
, f_offset
, 0, 0, 0) != KERN_SUCCESS
)
768 f_offset
+= PAGE_SIZE
;
771 if (skipped_pages
< pages_to_fetch
)
772 advisory_read(vp
, filesize
, f_offset
, size
, devblocksize
);
774 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
775 (int)f_offset
+ (pages_to_fetch
* PAGE_SIZE
), skipped_pages
, 0, 1, 0);
777 return (pages_to_fetch
);
783 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
)
792 int size_of_prefetch
;
795 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_START
,
796 b_lblkno
, e_lblkno
, vp
->v_lastr
, 0, 0);
798 if (b_lblkno
== vp
->v_lastr
&& b_lblkno
== e_lblkno
) {
799 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
800 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 0, 0);
804 if (vp
->v_lastr
== -1 || (b_lblkno
!= vp
->v_lastr
&& b_lblkno
!= (vp
->v_lastr
+ 1) &&
805 (b_lblkno
!= (vp
->v_maxra
+ 1) || vp
->v_ralen
== 0))) {
809 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
810 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 1, 0);
814 max_pages
= MAX_UPL_TRANSFER
;
816 vp
->v_ralen
= vp
->v_ralen
? min(max_pages
, vp
->v_ralen
<< 1) : 1;
818 if (((e_lblkno
+ 1) - b_lblkno
) > vp
->v_ralen
)
819 vp
->v_ralen
= min(max_pages
, (e_lblkno
+ 1) - b_lblkno
);
821 if (e_lblkno
< vp
->v_maxra
) {
822 if ((vp
->v_maxra
- e_lblkno
) > max(max_pages
/ 16, 4)) {
824 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
825 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 2, 0);
829 r_lblkno
= max(e_lblkno
, vp
->v_maxra
) + 1;
830 f_offset
= (off_t
)r_lblkno
* PAGE_SIZE_64
;
832 if (f_offset
< filesize
) {
833 size_of_prefetch
= cluster_rd_prefetch(vp
, f_offset
, vp
->v_ralen
* PAGE_SIZE
, filesize
, devblocksize
);
835 if (size_of_prefetch
)
836 vp
->v_maxra
= (r_lblkno
+ size_of_prefetch
) - 1;
838 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
839 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 3, 0);
843 cluster_pageout(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
846 vm_offset_t upl_offset
;
856 int local_flags
= CL_PAGEOUT
;
858 if ((flags
& UPL_IOSYNC
) == 0)
859 local_flags
|= CL_ASYNC
;
860 if ((flags
& UPL_NOCOMMIT
) == 0)
861 local_flags
|= CL_COMMIT
;
864 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 52)) | DBG_FUNC_NONE
,
865 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
868 * If they didn't specify any I/O, then we are done...
869 * we can't issue an abort because we don't know how
870 * big the upl really is
875 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
) {
876 if (local_flags
& CL_COMMIT
)
877 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
881 * can't page-in from a negative offset
882 * or if we're starting beyond the EOF
883 * or if the file offset isn't page aligned
884 * or the size requested isn't a multiple of PAGE_SIZE
886 if (f_offset
< 0 || f_offset
>= filesize
||
887 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
)) {
888 if (local_flags
& CL_COMMIT
)
889 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
892 max_size
= filesize
- f_offset
;
899 pg_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
901 if (size
> pg_size
) {
902 if (local_flags
& CL_COMMIT
)
903 ubc_upl_abort_range(upl
, upl_offset
+ pg_size
, size
- pg_size
,
904 UPL_ABORT_FREE_ON_EMPTY
);
906 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
907 vp
->v_flag
|= VTHROTTLED
;
908 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_pageout", 0);
911 return (cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
, devblocksize
,
912 local_flags
, (struct buf
*)0, (struct clios
*)0));
916 cluster_pagein(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
919 vm_offset_t upl_offset
;
932 if (upl
== NULL
|| size
< 0)
933 panic("cluster_pagein: NULL upl passed in");
935 if ((flags
& UPL_IOSYNC
) == 0)
936 local_flags
|= CL_ASYNC
;
937 if ((flags
& UPL_NOCOMMIT
) == 0)
938 local_flags
|= CL_COMMIT
;
941 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 56)) | DBG_FUNC_NONE
,
942 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
945 * can't page-in from a negative offset
946 * or if we're starting beyond the EOF
947 * or if the file offset isn't page aligned
948 * or the size requested isn't a multiple of PAGE_SIZE
950 if (f_offset
< 0 || f_offset
>= filesize
||
951 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
) || (upl_offset
& PAGE_MASK
)) {
952 if (local_flags
& CL_COMMIT
)
953 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
956 max_size
= filesize
- f_offset
;
963 rounded_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
965 if (size
> rounded_size
&& (local_flags
& CL_COMMIT
))
966 ubc_upl_abort_range(upl
, upl_offset
+ rounded_size
,
967 size
- (upl_offset
+ rounded_size
), UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
969 retval
= cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
, devblocksize
,
970 local_flags
| CL_READ
| CL_PAGEIN
, (struct buf
*)0, (struct clios
*)0);
976 b_lblkno
= (int)(f_offset
/ PAGE_SIZE_64
);
978 ((f_offset
+ ((off_t
)io_size
- 1)) / PAGE_SIZE_64
);
980 if (!(flags
& UPL_NORDAHEAD
) && !(vp
->v_flag
& VRAOFF
) && rounded_size
== PAGE_SIZE
) {
982 * we haven't read the last page in of the file yet
983 * so let's try to read ahead if we're in
984 * a sequential access pattern
986 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
988 vp
->v_lastr
= e_lblkno
;
1000 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 19)) | DBG_FUNC_START
,
1001 (int)bp
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
1003 if (bp
->b_pagelist
== (upl_t
) 0)
1004 panic("cluster_bp: can't handle NULL upl yet\n");
1005 if (bp
->b_flags
& B_READ
)
1006 flags
= CL_ASYNC
| CL_READ
;
1010 f_offset
= ubc_blktooff(bp
->b_vp
, bp
->b_lblkno
);
1012 return (cluster_io(bp
->b_vp
, bp
->b_pagelist
, 0, f_offset
, bp
->b_bcount
, 0, flags
, bp
, (struct clios
*)0));
1016 cluster_write(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1030 vm_offset_t upl_offset
;
1033 upl_page_info_t
*pl
;
1039 if ( (!(vp
->v_flag
& VNOCACHE_DATA
)) || (!uio
) || (uio
->uio_segflg
!= UIO_USERSPACE
))
1041 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1045 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& retval
== 0)
1047 /* we know we have a resid, so this is safe */
1049 while (iov
->iov_len
== 0) {
1056 * We check every vector target and if it is physically
1057 * contiguous space, we skip the sanity checks.
1060 upl_offset
= (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
;
1061 upl_size
= (upl_offset
+ PAGE_SIZE
+(PAGE_SIZE
-1)) & ~PAGE_MASK
;
1063 upl_flags
= UPL_QUERY_OBJECT_TYPE
;
1064 if ((vm_map_get_upl(current_map(),
1065 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1066 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0)) != KERN_SUCCESS
)
1069 * the user app must have passed in an invalid address
1074 if (upl_flags
& UPL_PHYS_CONTIG
)
1076 if (flags
& IO_HEADZEROFILL
)
1078 flags
&= ~IO_HEADZEROFILL
;
1080 if (retval
= cluster_write_x(vp
, (struct uio
*)0, 0, uio
->uio_offset
, headOff
, 0, devblocksize
, IO_HEADZEROFILL
))
1084 retval
= cluster_phys_write(vp
, uio
, newEOF
, devblocksize
, flags
);
1086 if (uio
->uio_resid
== 0 && (flags
& IO_TAILZEROFILL
))
1088 retval
= cluster_write_x(vp
, (struct uio
*)0, 0, tailOff
, uio
->uio_offset
, 0, devblocksize
, IO_HEADZEROFILL
);
1092 else if ((uio
->uio_resid
< 4 * PAGE_SIZE
) || (flags
& (IO_TAILZEROFILL
| IO_HEADZEROFILL
)))
1095 * We set a threshhold of 4 pages to decide if the nocopy
1096 * write loop is worth the trouble...
1097 * we also come here if we're trying to zero the head and/or tail
1098 * of a partially written page, and the user source is not a physically contiguous region
1100 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1103 else if (uio
->uio_offset
& PAGE_MASK_64
)
1105 /* Bring the file offset write up to a pagesize boundary */
1106 clip_size
= (PAGE_SIZE
- (uio
->uio_offset
& PAGE_MASK_64
));
1107 if (uio
->uio_resid
< clip_size
)
1108 clip_size
= uio
->uio_resid
;
1110 * Fake the resid going into the cluster_write_x call
1111 * and restore it on the way out.
1113 prev_resid
= uio
->uio_resid
;
1114 uio
->uio_resid
= clip_size
;
1115 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1116 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1118 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
1120 clip_size
= iov
->iov_len
;
1121 prev_resid
= uio
->uio_resid
;
1122 uio
->uio_resid
= clip_size
;
1123 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1124 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1129 * If we come in here, we know the offset into
1130 * the file is on a pagesize boundary
1133 max_io_size
= newEOF
- uio
->uio_offset
;
1134 clip_size
= uio
->uio_resid
;
1135 if (iov
->iov_len
< clip_size
)
1136 clip_size
= iov
->iov_len
;
1137 if (max_io_size
< clip_size
)
1138 clip_size
= max_io_size
;
1140 if (clip_size
< PAGE_SIZE
)
1143 * Take care of tail end of write in this vector
1145 prev_resid
= uio
->uio_resid
;
1146 uio
->uio_resid
= clip_size
;
1147 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1148 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1152 /* round clip_size down to a multiple of pagesize */
1153 clip_size
= clip_size
& ~(PAGE_MASK
);
1154 prev_resid
= uio
->uio_resid
;
1155 uio
->uio_resid
= clip_size
;
1156 retval
= cluster_nocopy_write(vp
, uio
, newEOF
, devblocksize
, flags
);
1157 if ((retval
== 0) && uio
->uio_resid
)
1158 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1159 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1168 cluster_nocopy_write(vp
, uio
, newEOF
, devblocksize
, flags
)
1176 upl_page_info_t
*pl
;
1178 vm_offset_t upl_offset
;
1183 int upl_needed_size
;
1190 int force_data_sync
;
1192 struct clios iostate
;
1194 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_START
,
1195 (int)uio
->uio_offset
, (int)uio
->uio_resid
,
1196 (int)newEOF
, devblocksize
, 0);
1199 * When we enter this routine, we know
1200 * -- the offset into the file is on a pagesize boundary
1201 * -- the resid is a page multiple
1202 * -- the resid will not exceed iov_len
1204 cluster_try_push(vp
, newEOF
, 0, 1);
1206 iostate
.io_completed
= 0;
1207 iostate
.io_issued
= 0;
1208 iostate
.io_error
= 0;
1209 iostate
.io_wanted
= 0;
1213 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& error
== 0) {
1214 io_size
= uio
->uio_resid
;
1216 if (io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1217 io_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1220 if (io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
) / 4)
1221 io_size
= (MAX_UPL_TRANSFER
* PAGE_SIZE
) / 8;
1224 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1225 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
1227 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_START
,
1228 (int)upl_offset
, upl_needed_size
, (int)iov
->iov_base
, io_size
, 0);
1230 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++) {
1232 upl_size
= upl_needed_size
;
1233 upl_flags
= UPL_FILE_IO
| UPL_COPYOUT_FROM
| UPL_NO_SYNC
|
1234 UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1236 kret
= vm_map_get_upl(current_map(),
1237 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1245 if (kret
!= KERN_SUCCESS
) {
1246 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1250 * cluster_nocopy_write: failed to get pagelist
1252 * we may have already spun some portion of this request
1253 * off as async requests... we need to wait for the I/O
1254 * to complete before returning
1256 goto wait_for_writes
;
1258 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
1259 pages_in_pl
= upl_size
/ PAGE_SIZE
;
1261 for (i
= 0; i
< pages_in_pl
; i
++) {
1262 if (!upl_valid_page(pl
, i
))
1265 if (i
== pages_in_pl
)
1269 * didn't get all the pages back that we
1270 * needed... release this upl and try again
1272 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1273 UPL_ABORT_FREE_ON_EMPTY
);
1275 if (force_data_sync
>= 3) {
1276 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1277 i
, pages_in_pl
, upl_size
, kret
, 0);
1280 * for some reason, we couldn't acquire a hold on all
1281 * the pages needed in the user's address space
1283 * we may have already spun some portion of this request
1284 * off as async requests... we need to wait for the I/O
1285 * to complete before returning
1287 goto wait_for_writes
;
1291 * Consider the possibility that upl_size wasn't satisfied.
1293 if (upl_size
!= upl_needed_size
)
1294 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
1296 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1297 (int)upl_offset
, upl_size
, (int)iov
->iov_base
, io_size
, 0);
1300 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1301 UPL_ABORT_FREE_ON_EMPTY
);
1304 * we may have already spun some portion of this request
1305 * off as async requests... we need to wait for the I/O
1306 * to complete before returning
1308 goto wait_for_writes
;
1311 * Now look for pages already in the cache
1312 * and throw them away.
1315 upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
1316 max_io_size
= io_size
;
1318 while (max_io_size
) {
1320 * Flag UPL_POP_DUMP says if the page is found
1321 * in the page cache it must be thrown away.
1325 UPL_POP_SET
| UPL_POP_BUSY
| UPL_POP_DUMP
,
1327 max_io_size
-= PAGE_SIZE_64
;
1328 upl_f_offset
+= PAGE_SIZE_64
;
1331 * we want push out these writes asynchronously so that we can overlap
1332 * the preparation of the next I/O
1333 * if there are already too many outstanding writes
1334 * wait until some complete before issuing the next
1336 while ((iostate
.io_issued
- iostate
.io_completed
) > (2 * MAX_UPL_TRANSFER
* PAGE_SIZE
)) {
1337 iostate
.io_wanted
= 1;
1338 tsleep((caddr_t
)&iostate
.io_wanted
, PRIBIO
+ 1, "cluster_nocopy_write", 0);
1340 if (iostate
.io_error
) {
1342 * one of the earlier writes we issued ran into a hard error
1343 * don't issue any more writes, cleanup the UPL
1344 * that was just created but not used, then
1345 * go wait for all writes that are part of this stream
1346 * to complete before returning the error to the caller
1348 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1349 UPL_ABORT_FREE_ON_EMPTY
);
1351 goto wait_for_writes
;
1353 io_flag
= CL_ASYNC
| CL_PRESERVE
| CL_COMMIT
;
1355 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_START
,
1356 (int)upl_offset
, (int)uio
->uio_offset
, io_size
, io_flag
, 0);
1358 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1359 io_size
, devblocksize
, io_flag
, (struct buf
*)0, &iostate
);
1361 iov
->iov_len
-= io_size
;
1362 iov
->iov_base
+= io_size
;
1363 uio
->uio_resid
-= io_size
;
1364 uio
->uio_offset
+= io_size
;
1366 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_END
,
1367 (int)upl_offset
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
1373 * make sure all async writes issued as part of this stream
1374 * have completed before we return
1376 while (iostate
.io_issued
!= iostate
.io_completed
) {
1377 iostate
.io_wanted
= 1;
1378 tsleep((caddr_t
)&iostate
.io_wanted
, PRIBIO
+ 1, "cluster_nocopy_write", 0);
1380 if (iostate
.io_error
)
1381 error
= iostate
.io_error
;
1383 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1384 (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 4, 0);
1391 cluster_phys_write(vp
, uio
, newEOF
, devblocksize
, flags
)
1398 upl_page_info_t
*pl
;
1401 vm_offset_t upl_offset
;
1405 int upl_needed_size
;
1413 * When we enter this routine, we know
1414 * -- the resid will not exceed iov_len
1415 * -- the vector target address is physcially contiguous
1417 cluster_try_push(vp
, newEOF
, 0, 1);
1420 io_size
= iov
->iov_len
;
1421 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1422 upl_needed_size
= upl_offset
+ io_size
;
1425 upl_size
= upl_needed_size
;
1426 upl_flags
= UPL_FILE_IO
| UPL_COPYOUT_FROM
| UPL_NO_SYNC
|
1427 UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1429 kret
= vm_map_get_upl(current_map(),
1430 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1431 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0);
1433 if (kret
!= KERN_SUCCESS
) {
1435 * cluster_phys_write: failed to get pagelist
1436 * note: return kret here
1441 * Consider the possibility that upl_size wasn't satisfied.
1442 * This is a failure in the physical memory case.
1444 if (upl_size
< upl_needed_size
) {
1445 kernel_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1448 pl
= ubc_upl_pageinfo(upl
);
1450 src_paddr
= (((addr64_t
)(int)upl_phys_page(pl
, 0)) << 12) + ((addr64_t
)iov
->iov_base
& PAGE_MASK
);
1452 while (((uio
->uio_offset
& (devblocksize
- 1)) || io_size
< devblocksize
) && io_size
) {
1455 head_size
= devblocksize
- (int)(uio
->uio_offset
& (devblocksize
- 1));
1457 if (head_size
> io_size
)
1458 head_size
= io_size
;
1460 error
= cluster_align_phys_io(vp
, uio
, src_paddr
, head_size
, devblocksize
, 0);
1463 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1467 upl_offset
+= head_size
;
1468 src_paddr
+= head_size
;
1469 io_size
-= head_size
;
1471 tail_size
= io_size
& (devblocksize
- 1);
1472 io_size
-= tail_size
;
1476 * issue a synchronous write to cluster_io
1478 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1479 io_size
, 0, CL_DEV_MEMORY
, (struct buf
*)0, (struct clios
*)0);
1483 * The cluster_io write completed successfully,
1484 * update the uio structure
1486 uio
->uio_resid
-= io_size
;
1487 iov
->iov_len
-= io_size
;
1488 iov
->iov_base
+= io_size
;
1489 uio
->uio_offset
+= io_size
;
1490 src_paddr
+= io_size
;
1493 error
= cluster_align_phys_io(vp
, uio
, src_paddr
, tail_size
, devblocksize
, 0);
1496 * just release our hold on the physically contiguous
1497 * region without changing any state
1499 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1506 cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1516 upl_page_info_t
*pl
;
1518 vm_offset_t upl_offset
;
1526 vm_offset_t io_address
;
1533 long long total_size
;
1536 long long zero_cnt1
;
1538 daddr_t start_blkno
;
1542 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1543 (int)uio
->uio_offset
, uio
->uio_resid
, (int)oldEOF
, (int)newEOF
, 0);
1545 uio_resid
= uio
->uio_resid
;
1547 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1548 0, 0, (int)oldEOF
, (int)newEOF
, 0);
1555 if (flags
& IO_HEADZEROFILL
) {
1557 * some filesystems (HFS is one) don't support unallocated holes within a file...
1558 * so we zero fill the intervening space between the old EOF and the offset
1559 * where the next chunk of real data begins.... ftruncate will also use this
1560 * routine to zero fill to the new EOF when growing a file... in this case, the
1561 * uio structure will not be provided
1564 if (headOff
< uio
->uio_offset
) {
1565 zero_cnt
= uio
->uio_offset
- headOff
;
1568 } else if (headOff
< newEOF
) {
1569 zero_cnt
= newEOF
- headOff
;
1573 if (flags
& IO_TAILZEROFILL
) {
1575 zero_off1
= uio
->uio_offset
+ uio
->uio_resid
;
1577 if (zero_off1
< tailOff
)
1578 zero_cnt1
= tailOff
- zero_off1
;
1581 if (zero_cnt
== 0 && uio
== (struct uio
*) 0)
1583 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
1584 retval
, 0, 0, 0, 0);
1588 while ((total_size
= (uio_resid
+ zero_cnt
+ zero_cnt1
)) && retval
== 0) {
1590 * for this iteration of the loop, figure out where our starting point is
1593 start_offset
= (int)(zero_off
& PAGE_MASK_64
);
1594 upl_f_offset
= zero_off
- start_offset
;
1595 } else if (uio_resid
) {
1596 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
1597 upl_f_offset
= uio
->uio_offset
- start_offset
;
1599 start_offset
= (int)(zero_off1
& PAGE_MASK_64
);
1600 upl_f_offset
= zero_off1
- start_offset
;
1602 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 46)) | DBG_FUNC_NONE
,
1603 (int)zero_off
, (int)zero_cnt
, (int)zero_off1
, (int)zero_cnt1
, 0);
1605 if (total_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1606 total_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1609 * compute the size of the upl needed to encompass
1610 * the requested write... limit each call to cluster_io
1611 * to the maximum UPL size... cluster_io will clip if
1612 * this exceeds the maximum io_size for the device,
1613 * make sure to account for
1614 * a starting offset that's not page aligned
1616 upl_size
= (start_offset
+ total_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
1618 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1619 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1621 pages_in_upl
= upl_size
/ PAGE_SIZE
;
1622 io_size
= upl_size
- start_offset
;
1624 if ((long long)io_size
> total_size
)
1625 io_size
= total_size
;
1627 start_blkno
= (daddr_t
)(upl_f_offset
/ PAGE_SIZE_64
);
1628 last_blkno
= start_blkno
+ pages_in_upl
;
1630 kret
= ubc_create_upl(vp
,
1636 if (kret
!= KERN_SUCCESS
)
1637 panic("cluster_write: failed to get pagelist");
1639 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 41)) | DBG_FUNC_NONE
,
1640 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
1642 if (start_offset
&& !upl_valid_page(pl
, 0)) {
1646 * we're starting in the middle of the first page of the upl
1647 * and the page isn't currently valid, so we're going to have
1648 * to read it in first... this is a synchronous operation
1650 read_size
= PAGE_SIZE
;
1652 if ((upl_f_offset
+ read_size
) > newEOF
)
1653 read_size
= newEOF
- upl_f_offset
;
1655 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, read_size
, devblocksize
,
1656 CL_READ
, (struct buf
*)0, (struct clios
*)0);
1659 * we had an error during the read which causes us to abort
1660 * the current cluster_write request... before we do, we need
1661 * to release the rest of the pages in the upl without modifying
1662 * there state and mark the failed page in error
1664 ubc_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1665 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1667 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1668 (int)upl
, 0, 0, retval
, 0);
1672 if ((start_offset
== 0 || upl_size
> PAGE_SIZE
) && ((start_offset
+ io_size
) & PAGE_MASK
)) {
1674 * the last offset we're writing to in this upl does not end on a page
1675 * boundary... if it's not beyond the old EOF, then we'll also need to
1676 * pre-read this page in if it isn't already valid
1678 upl_offset
= upl_size
- PAGE_SIZE
;
1680 if ((upl_f_offset
+ start_offset
+ io_size
) < oldEOF
&&
1681 !upl_valid_page(pl
, upl_offset
/ PAGE_SIZE
)) {
1684 read_size
= PAGE_SIZE
;
1686 if ((upl_f_offset
+ upl_offset
+ read_size
) > newEOF
)
1687 read_size
= newEOF
- (upl_f_offset
+ upl_offset
);
1689 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, read_size
, devblocksize
,
1690 CL_READ
, (struct buf
*)0, (struct clios
*)0);
1693 * we had an error during the read which causes us to abort
1694 * the current cluster_write request... before we do, we
1695 * need to release the rest of the pages in the upl without
1696 * modifying there state and mark the failed page in error
1698 ubc_upl_abort_range(upl
, upl_offset
, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1699 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1701 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1702 (int)upl
, 0, 0, retval
, 0);
1707 if ((kret
= ubc_upl_map(upl
, &io_address
)) != KERN_SUCCESS
)
1708 panic("cluster_write: ubc_upl_map failed\n");
1709 xfer_resid
= io_size
;
1710 io_offset
= start_offset
;
1712 while (zero_cnt
&& xfer_resid
) {
1714 if (zero_cnt
< (long long)xfer_resid
)
1715 bytes_to_zero
= zero_cnt
;
1717 bytes_to_zero
= xfer_resid
;
1719 if ( !(flags
& (IO_NOZEROVALID
| IO_NOZERODIRTY
))) {
1720 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1722 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1723 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1724 (int)io_offset
, xfer_resid
, 0);
1728 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off
& PAGE_MASK_64
));
1729 zero_pg_index
= (int)((zero_off
- upl_f_offset
) / PAGE_SIZE_64
);
1731 if ( !upl_valid_page(pl
, zero_pg_index
)) {
1732 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1734 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1735 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1736 (int)io_offset
, xfer_resid
, 0);
1738 } else if ((flags
& (IO_NOZERODIRTY
| IO_NOZEROVALID
)) == IO_NOZERODIRTY
&&
1739 !upl_dirty_page(pl
, zero_pg_index
)) {
1740 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1742 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1743 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1744 (int)io_offset
, xfer_resid
, 0);
1747 xfer_resid
-= bytes_to_zero
;
1748 zero_cnt
-= bytes_to_zero
;
1749 zero_off
+= bytes_to_zero
;
1750 io_offset
+= bytes_to_zero
;
1752 if (xfer_resid
&& uio_resid
) {
1753 bytes_to_move
= min(uio_resid
, xfer_resid
);
1755 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 42)) | DBG_FUNC_NONE
,
1756 (int)uio
->uio_offset
, bytes_to_move
, uio_resid
, xfer_resid
, 0);
1758 retval
= uiomove((caddr_t
)(io_address
+ io_offset
), bytes_to_move
, uio
);
1762 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
1763 panic("cluster_write: kernel_upl_unmap failed\n");
1765 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
1767 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1768 (int)upl
, 0, 0, retval
, 0);
1770 uio_resid
-= bytes_to_move
;
1771 xfer_resid
-= bytes_to_move
;
1772 io_offset
+= bytes_to_move
;
1775 while (xfer_resid
&& zero_cnt1
&& retval
== 0) {
1777 if (zero_cnt1
< (long long)xfer_resid
)
1778 bytes_to_zero
= zero_cnt1
;
1780 bytes_to_zero
= xfer_resid
;
1782 if ( !(flags
& (IO_NOZEROVALID
| IO_NOZERODIRTY
))) {
1783 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1785 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1786 (int)upl_f_offset
+ io_offset
,
1787 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1791 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off1
& PAGE_MASK_64
));
1792 zero_pg_index
= (int)((zero_off1
- upl_f_offset
) / PAGE_SIZE_64
);
1794 if ( !upl_valid_page(pl
, zero_pg_index
)) {
1795 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1797 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1798 (int)upl_f_offset
+ io_offset
,
1799 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1801 } else if ((flags
& (IO_NOZERODIRTY
| IO_NOZEROVALID
)) == IO_NOZERODIRTY
&&
1802 !upl_dirty_page(pl
, zero_pg_index
)) {
1803 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1805 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1806 (int)upl_f_offset
+ io_offset
,
1807 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1810 xfer_resid
-= bytes_to_zero
;
1811 zero_cnt1
-= bytes_to_zero
;
1812 zero_off1
+= bytes_to_zero
;
1813 io_offset
+= bytes_to_zero
;
1820 io_size
+= start_offset
;
1822 if ((upl_f_offset
+ io_size
) >= newEOF
&& io_size
< upl_size
) {
1824 * if we're extending the file with this write
1825 * we'll zero fill the rest of the page so that
1826 * if the file gets extended again in such a way as to leave a
1827 * hole starting at this EOF, we'll have zero's in the correct spot
1829 bzero((caddr_t
)(io_address
+ io_size
), upl_size
- io_size
);
1831 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1832 (int)upl_f_offset
+ io_size
,
1833 upl_size
- io_size
, 0, 0, 0);
1835 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
1836 panic("cluster_write: kernel_upl_unmap failed\n");
1838 if (flags
& IO_SYNC
)
1840 * if the IO_SYNC flag is set than we need to
1841 * bypass any clusters and immediately issue
1846 if (vp
->v_clen
== 0)
1848 * no clusters currently present
1850 goto start_new_cluster
;
1853 * keep track of the overall dirty page
1854 * range we've developed
1855 * in case we have to fall back to the
1856 * VHASDIRTY method of flushing
1858 if (vp
->v_flag
& VHASDIRTY
)
1861 for (cl_index
= 0; cl_index
< vp
->v_clen
; cl_index
++) {
1863 * we have an existing cluster... see if this write will extend it nicely
1865 if (start_blkno
>= vp
->v_clusters
[cl_index
].start_pg
) {
1867 * the current write starts at or after the current cluster
1869 if (last_blkno
<= (vp
->v_clusters
[cl_index
].start_pg
+ MAX_UPL_TRANSFER
)) {
1871 * we have a write that fits entirely
1872 * within the existing cluster limits
1874 if (last_blkno
> vp
->v_clusters
[cl_index
].last_pg
)
1876 * update our idea of where the cluster ends
1878 vp
->v_clusters
[cl_index
].last_pg
= last_blkno
;
1881 if (start_blkno
< (vp
->v_clusters
[cl_index
].start_pg
+ MAX_UPL_TRANSFER
)) {
1883 * we have a write that starts in the middle of the current cluster
1884 * but extends beyond the cluster's limit
1885 * we'll clip the current cluster if we actually
1886 * overlap with the new write
1887 * and start a new cluster with the current write
1889 if (vp
->v_clusters
[cl_index
].last_pg
> start_blkno
)
1890 vp
->v_clusters
[cl_index
].last_pg
= start_blkno
;
1893 * we also get here for the case where the current write starts
1894 * beyond the limit of the existing cluster
1896 * in either case, we'll check the remaining clusters before
1897 * starting a new one
1901 * the current write starts in front of the current cluster
1903 if ((vp
->v_clusters
[cl_index
].last_pg
- start_blkno
) <= MAX_UPL_TRANSFER
) {
1905 * we can just merge the old cluster
1906 * with the new request and leave it
1909 vp
->v_clusters
[cl_index
].start_pg
= start_blkno
;
1911 if (last_blkno
> vp
->v_clusters
[cl_index
].last_pg
) {
1913 * the current write completely
1914 * envelops the existing cluster
1916 vp
->v_clusters
[cl_index
].last_pg
= last_blkno
;
1922 * if we were to combine this write with the current cluster
1923 * we would exceed the cluster size limit.... so,
1924 * let's see if there's any overlap of the new I/O with
1925 * the existing cluster...
1928 if (last_blkno
> vp
->v_clusters
[cl_index
].start_pg
)
1930 * the current write extends into the existing cluster
1931 * clip the current cluster by moving the start position
1932 * to where the current write ends
1934 vp
->v_clusters
[cl_index
].start_pg
= last_blkno
;
1936 * if we get here, there was no way to merge
1937 * the new I/O with this cluster and
1938 * keep it under our maximum cluster length
1939 * we'll check the remaining clusters before starting a new one
1943 if (cl_index
< vp
->v_clen
)
1945 * we found an existing cluster that we
1946 * could merger this I/O into
1950 if (vp
->v_clen
< MAX_CLUSTERS
&& !(vp
->v_flag
& VNOCACHE_DATA
))
1952 * we didn't find an existing cluster to
1953 * merge into, but there's room to start
1956 goto start_new_cluster
;
1959 * no exisitng cluster to merge with and no
1960 * room to start a new one... we'll try
1961 * pushing the existing ones... if none of
1962 * them are able to be pushed, we'll have
1963 * to fall back on the VHASDIRTY mechanism
1964 * cluster_try_push will set v_clen to the
1965 * number of remaining clusters if it is
1966 * unable to push all of them
1968 if (vp
->v_flag
& VNOCACHE_DATA
)
1973 if (cluster_try_push(vp
, newEOF
, 0, 0) == 0) {
1974 vp
->v_flag
|= VHASDIRTY
;
1978 if (vp
->v_clen
== 0) {
1979 vp
->v_ciosiz
= devblocksize
;
1980 vp
->v_cstart
= start_blkno
;
1981 vp
->v_lastw
= last_blkno
;
1983 vp
->v_clusters
[vp
->v_clen
].start_pg
= start_blkno
;
1984 vp
->v_clusters
[vp
->v_clen
].last_pg
= last_blkno
;
1988 * make sure we keep v_cstart and v_lastw up to
1989 * date in case we have to fall back on the
1990 * V_HASDIRTY mechanism (or we've already entered it)
1992 if (start_blkno
< vp
->v_cstart
)
1993 vp
->v_cstart
= start_blkno
;
1994 if (last_blkno
> vp
->v_lastw
)
1995 vp
->v_lastw
= last_blkno
;
1997 ubc_upl_commit_range(upl
, 0, upl_size
, UPL_COMMIT_SET_DIRTY
| UPL_COMMIT_INACTIVATE
| UPL_COMMIT_FREE_ON_EMPTY
);
2001 * in order to maintain some semblance of coherency with mapped writes
2002 * we need to write the cluster back out as a multiple of the PAGESIZE
2003 * unless the cluster encompasses the last page of the file... in this
2004 * case we'll round out to the nearest device block boundary
2008 if ((upl_f_offset
+ io_size
) > newEOF
) {
2009 io_size
= newEOF
- upl_f_offset
;
2010 io_size
= (io_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
2013 if (flags
& IO_SYNC
)
2014 io_flags
= CL_COMMIT
| CL_AGE
;
2016 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
2018 if (vp
->v_flag
& VNOCACHE_DATA
)
2019 io_flags
|= CL_DUMP
;
2021 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
2022 vp
->v_flag
|= VTHROTTLED
;
2023 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_write", 0);
2025 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, io_size
, devblocksize
,
2026 io_flags
, (struct buf
*)0, (struct clios
*)0);
2029 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
2030 retval
, 0, 0, 0, 0);
2036 cluster_read(vp
, uio
, filesize
, devblocksize
, flags
)
2047 vm_offset_t upl_offset
;
2050 upl_page_info_t
*pl
;
2055 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_START
,
2056 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
2059 * We set a threshhold of 4 pages to decide if the nocopy
2060 * read loop is worth the trouble...
2063 if (!((vp
->v_flag
& VNOCACHE_DATA
) && (uio
->uio_segflg
== UIO_USERSPACE
)))
2065 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2066 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2067 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2071 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0)
2073 /* we know we have a resid, so this is safe */
2075 while (iov
->iov_len
== 0) {
2082 * We check every vector target and if it is physically
2083 * contiguous space, we skip the sanity checks.
2086 upl_offset
= (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
;
2087 upl_size
= (upl_offset
+ PAGE_SIZE
+(PAGE_SIZE
-1)) & ~PAGE_MASK
;
2089 upl_flags
= UPL_QUERY_OBJECT_TYPE
;
2090 if((vm_map_get_upl(current_map(),
2091 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2092 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0)) != KERN_SUCCESS
)
2095 * the user app must have passed in an invalid address
2100 if (upl_flags
& UPL_PHYS_CONTIG
)
2102 retval
= cluster_phys_read(vp
, uio
, filesize
, devblocksize
, flags
);
2104 else if (uio
->uio_resid
< 4 * PAGE_SIZE
)
2107 * We set a threshhold of 4 pages to decide if the nocopy
2108 * read loop is worth the trouble...
2110 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2111 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2112 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2115 else if (uio
->uio_offset
& PAGE_MASK_64
)
2117 /* Bring the file offset read up to a pagesize boundary */
2118 clip_size
= (PAGE_SIZE
- (int)(uio
->uio_offset
& PAGE_MASK_64
));
2119 if (uio
->uio_resid
< clip_size
)
2120 clip_size
= uio
->uio_resid
;
2122 * Fake the resid going into the cluster_read_x call
2123 * and restore it on the way out.
2125 prev_resid
= uio
->uio_resid
;
2126 uio
->uio_resid
= clip_size
;
2127 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2128 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2130 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
2132 clip_size
= iov
->iov_len
;
2133 prev_resid
= uio
->uio_resid
;
2134 uio
->uio_resid
= clip_size
;
2135 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2136 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2141 * If we come in here, we know the offset into
2142 * the file is on a pagesize boundary
2145 max_io_size
= filesize
- uio
->uio_offset
;
2146 clip_size
= uio
->uio_resid
;
2147 if (iov
->iov_len
< clip_size
)
2148 clip_size
= iov
->iov_len
;
2149 if (max_io_size
< clip_size
)
2150 clip_size
= (int)max_io_size
;
2152 if (clip_size
< PAGE_SIZE
)
2155 * Take care of the tail end of the read in this vector.
2157 prev_resid
= uio
->uio_resid
;
2158 uio
->uio_resid
= clip_size
;
2159 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2160 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2164 /* round clip_size down to a multiple of pagesize */
2165 clip_size
= clip_size
& ~(PAGE_MASK
);
2166 prev_resid
= uio
->uio_resid
;
2167 uio
->uio_resid
= clip_size
;
2168 retval
= cluster_nocopy_read(vp
, uio
, filesize
, devblocksize
, flags
);
2169 if ((retval
==0) && uio
->uio_resid
)
2170 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2171 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2176 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2177 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2184 cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
)
2191 upl_page_info_t
*pl
;
2193 vm_offset_t upl_offset
;
2203 vm_offset_t io_address
;
2211 b_lblkno
= (int)(uio
->uio_offset
/ PAGE_SIZE_64
);
2213 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2215 * compute the size of the upl needed to encompass
2216 * the requested read... limit each call to cluster_io
2217 * to the maximum UPL size... cluster_io will clip if
2218 * this exceeds the maximum io_size for the device,
2219 * make sure to account for
2220 * a starting offset that's not page aligned
2222 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
2223 upl_f_offset
= uio
->uio_offset
- (off_t
)start_offset
;
2224 max_size
= filesize
- uio
->uio_offset
;
2226 if ((off_t
)((unsigned int)uio
->uio_resid
) < max_size
)
2227 io_size
= uio
->uio_resid
;
2231 if (uio
->uio_segflg
== UIO_USERSPACE
&& !(vp
->v_flag
& VNOCACHE_DATA
)) {
2232 segflg
= uio
->uio_segflg
;
2234 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2236 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2237 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2239 while (io_size
&& retval
== 0) {
2245 UPL_POP_SET
| UPL_POP_BUSY
,
2246 &paddr
, 0) != KERN_SUCCESS
)
2249 xsize
= PAGE_SIZE
- start_offset
;
2251 if (xsize
> io_size
)
2254 retval
= uiomove64((addr64_t
)(((addr64_t
)paddr
<< 12) + start_offset
), xsize
, uio
);
2256 ubc_page_op(vp
, upl_f_offset
,
2257 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2260 start_offset
= (int)
2261 (uio
->uio_offset
& PAGE_MASK_64
);
2262 upl_f_offset
= uio
->uio_offset
- start_offset
;
2264 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2265 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2267 uio
->uio_segflg
= segflg
;
2274 * we're already finished with this read request
2275 * let's see if we should do a read-ahead
2278 ((uio
->uio_offset
- 1) / PAGE_SIZE_64
);
2280 if (!(vp
->v_flag
& VRAOFF
))
2282 * let's try to read ahead if we're in
2283 * a sequential access pattern
2285 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2286 vp
->v_lastr
= e_lblkno
;
2290 max_size
= filesize
- uio
->uio_offset
;
2292 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
2293 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2294 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2295 pages_in_upl
= upl_size
/ PAGE_SIZE
;
2297 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_START
,
2298 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2300 kret
= ubc_create_upl(vp
,
2306 if (kret
!= KERN_SUCCESS
)
2307 panic("cluster_read: failed to get pagelist");
2309 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_END
,
2310 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2313 * scan from the beginning of the upl looking for the first
2314 * non-valid page.... this will become the first page in
2315 * the request we're going to make to 'cluster_io'... if all
2316 * of the pages are valid, we won't call through to 'cluster_io'
2318 for (start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
2319 if (!upl_valid_page(pl
, start_pg
))
2324 * scan from the starting invalid page looking for a valid
2325 * page before the end of the upl is reached, if we
2326 * find one, then it will be the last page of the request to
2329 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
2330 if (upl_valid_page(pl
, last_pg
))
2334 if (start_pg
< last_pg
) {
2336 * we found a range of 'invalid' pages that must be filled
2337 * if the last page in this range is the last page of the file
2338 * we may have to clip the size of it to keep from reading past
2339 * the end of the last physical block associated with the file
2341 upl_offset
= start_pg
* PAGE_SIZE
;
2342 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2344 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
)
2345 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
2348 * issue a synchronous read to cluster_io
2351 error
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
,
2352 io_size
, devblocksize
, CL_READ
, (struct buf
*)0, (struct clios
*)0);
2356 * if the read completed successfully, or there was no I/O request
2357 * issued, than map the upl into kernel address space and
2358 * move the data into user land.... we'll first add on any 'valid'
2359 * pages that were present in the upl when we acquired it.
2362 u_int size_of_prefetch
;
2364 for (uio_last
= last_pg
; uio_last
< pages_in_upl
; uio_last
++) {
2365 if (!upl_valid_page(pl
, uio_last
))
2369 * compute size to transfer this round, if uio->uio_resid is
2370 * still non-zero after this uiomove, we'll loop around and
2371 * set up for another I/O.
2373 val_size
= (uio_last
* PAGE_SIZE
) - start_offset
;
2375 if (max_size
< val_size
)
2376 val_size
= max_size
;
2378 if (uio
->uio_resid
< val_size
)
2379 val_size
= uio
->uio_resid
;
2381 e_lblkno
= (int)((uio
->uio_offset
+ ((off_t
)val_size
- 1)) / PAGE_SIZE_64
);
2383 if (size_of_prefetch
= (uio
->uio_resid
- val_size
)) {
2385 * if there's still I/O left to do for this request, then issue a
2386 * pre-fetch I/O... the I/O wait time will overlap
2387 * with the copying of the data
2389 cluster_rd_prefetch(vp
, uio
->uio_offset
+ val_size
, size_of_prefetch
, filesize
, devblocksize
);
2391 if (!(vp
->v_flag
& VRAOFF
) && !(vp
->v_flag
& VNOCACHE_DATA
))
2393 * let's try to read ahead if we're in
2394 * a sequential access pattern
2396 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2397 vp
->v_lastr
= e_lblkno
;
2399 if (uio
->uio_segflg
== UIO_USERSPACE
) {
2402 segflg
= uio
->uio_segflg
;
2404 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2407 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2408 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2410 offset
= start_offset
;
2412 while (val_size
&& retval
== 0) {
2417 i
= offset
/ PAGE_SIZE
;
2418 csize
= min(PAGE_SIZE
- start_offset
, val_size
);
2420 paddr
= ((addr64_t
)upl_phys_page(pl
, i
) << 12) + start_offset
;
2422 retval
= uiomove64(paddr
, csize
, uio
);
2426 start_offset
= offset
& PAGE_MASK
;
2428 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2429 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2431 uio
->uio_segflg
= segflg
;
2435 if ((kret
= ubc_upl_map(upl
, &io_address
)) != KERN_SUCCESS
)
2436 panic("cluster_read: ubc_upl_map() failed\n");
2438 retval
= uiomove((caddr_t
)(io_address
+ start_offset
), val_size
, uio
);
2440 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
2441 panic("cluster_read: ubc_upl_unmap() failed\n");
2444 if (start_pg
< last_pg
) {
2446 * compute the range of pages that we actually issued an I/O for
2447 * and either commit them as valid if the I/O succeeded
2448 * or abort them if the I/O failed
2450 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2452 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2453 (int)upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2455 if (error
|| (vp
->v_flag
& VNOCACHE_DATA
))
2456 ubc_upl_abort_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2457 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2459 ubc_upl_commit_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2460 UPL_COMMIT_CLEAR_DIRTY
2461 | UPL_COMMIT_FREE_ON_EMPTY
2462 | UPL_COMMIT_INACTIVATE
);
2464 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2465 (int)upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2467 if ((last_pg
- start_pg
) < pages_in_upl
) {
2472 * the set of pages that we issued an I/O for did not encompass
2473 * the entire upl... so just release these without modifying
2477 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2479 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2480 (int)upl
, -1, pages_in_upl
- (last_pg
- start_pg
), 0, 0);
2484 * we found some already valid pages at the beginning of
2485 * the upl commit these back to the inactive list with
2488 for (cur_pg
= 0; cur_pg
< start_pg
; cur_pg
++) {
2489 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
2490 | UPL_COMMIT_INACTIVATE
;
2492 if (upl_dirty_page(pl
, cur_pg
))
2493 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2495 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2496 ubc_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2497 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2499 ubc_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2500 PAGE_SIZE
, commit_flags
);
2503 if (last_pg
< uio_last
) {
2505 * we found some already valid pages immediately after the
2506 * pages we issued I/O for, commit these back to the
2507 * inactive list with reference cleared
2509 for (cur_pg
= last_pg
; cur_pg
< uio_last
; cur_pg
++) {
2510 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
2511 | UPL_COMMIT_INACTIVATE
;
2513 if (upl_dirty_page(pl
, cur_pg
))
2514 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2516 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2517 ubc_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2518 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2520 ubc_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2521 PAGE_SIZE
, commit_flags
);
2524 if (uio_last
< pages_in_upl
) {
2526 * there were some invalid pages beyond the valid pages
2527 * that we didn't issue an I/O for, just release them
2530 ubc_upl_abort_range(upl
, uio_last
* PAGE_SIZE
,
2531 (pages_in_upl
- uio_last
) * PAGE_SIZE
, UPL_ABORT_FREE_ON_EMPTY
);
2534 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2535 (int)upl
, -1, -1, 0, 0);
2547 cluster_nocopy_read(vp
, uio
, filesize
, devblocksize
, flags
)
2555 upl_page_info_t
*pl
;
2557 vm_offset_t upl_offset
;
2558 off_t start_upl_f_offset
;
2562 int upl_needed_size
;
2570 int force_data_sync
;
2573 struct clios iostate
;
2575 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_START
,
2576 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
2579 * When we enter this routine, we know
2580 * -- the offset into the file is on a pagesize boundary
2581 * -- the resid is a page multiple
2582 * -- the resid will not exceed iov_len
2585 iostate
.io_completed
= 0;
2586 iostate
.io_issued
= 0;
2587 iostate
.io_error
= 0;
2588 iostate
.io_wanted
= 0;
2592 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2594 max_io_size
= filesize
- uio
->uio_offset
;
2596 if (max_io_size
< (off_t
)((unsigned int)uio
->uio_resid
))
2597 io_size
= max_io_size
;
2599 io_size
= uio
->uio_resid
;
2602 * We don't come into this routine unless
2603 * UIO_USERSPACE is set.
2605 segflg
= uio
->uio_segflg
;
2607 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2610 * First look for pages already in the cache
2611 * and move them to user space.
2613 while (io_size
&& (retval
== 0)) {
2614 upl_f_offset
= uio
->uio_offset
;
2617 * If this call fails, it means the page is not
2618 * in the page cache.
2620 if (ubc_page_op(vp
, upl_f_offset
,
2621 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) != KERN_SUCCESS
)
2624 retval
= uiomove64((addr64_t
)paddr
<< 12, PAGE_SIZE
, uio
);
2626 ubc_page_op(vp
, upl_f_offset
,
2627 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2629 io_size
-= PAGE_SIZE
;
2630 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 71)) | DBG_FUNC_NONE
,
2631 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2633 uio
->uio_segflg
= segflg
;
2637 * we may have already spun some portion of this request
2638 * off as async requests... we need to wait for the I/O
2639 * to complete before returning
2641 goto wait_for_reads
;
2644 * If we are already finished with this read, then return
2648 * we may have already spun some portion of this request
2649 * off as async requests... we need to wait for the I/O
2650 * to complete before returning
2652 goto wait_for_reads
;
2654 max_io_size
= io_size
;
2656 if (max_io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2657 max_io_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2659 if (max_io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
) / 4)
2660 max_io_size
= (MAX_UPL_TRANSFER
* PAGE_SIZE
) / 8;
2663 start_upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
2664 upl_f_offset
= start_upl_f_offset
;
2667 while (io_size
< max_io_size
) {
2668 if (ubc_page_op(vp
, upl_f_offset
,
2669 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) == KERN_SUCCESS
) {
2670 ubc_page_op(vp
, upl_f_offset
,
2671 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2675 * Build up the io request parameters.
2677 io_size
+= PAGE_SIZE_64
;
2678 upl_f_offset
+= PAGE_SIZE_64
;
2682 * we may have already spun some portion of this request
2683 * off as async requests... we need to wait for the I/O
2684 * to complete before returning
2686 goto wait_for_reads
;
2688 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2689 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
2691 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_START
,
2692 (int)upl_offset
, upl_needed_size
, (int)iov
->iov_base
, io_size
, 0);
2694 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++) {
2696 upl_size
= upl_needed_size
;
2697 upl_flags
= UPL_FILE_IO
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2699 kret
= vm_map_get_upl(current_map(),
2700 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2701 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, force_data_sync
);
2703 if (kret
!= KERN_SUCCESS
) {
2704 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2705 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2708 * cluster_nocopy_read: failed to get pagelist
2710 * we may have already spun some portion of this request
2711 * off as async requests... we need to wait for the I/O
2712 * to complete before returning
2714 goto wait_for_reads
;
2716 pages_in_pl
= upl_size
/ PAGE_SIZE
;
2717 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2719 for (i
= 0; i
< pages_in_pl
; i
++) {
2720 if (!upl_valid_page(pl
, i
))
2723 if (i
== pages_in_pl
)
2726 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2727 UPL_ABORT_FREE_ON_EMPTY
);
2729 if (force_data_sync
>= 3) {
2730 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2731 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2733 goto wait_for_reads
;
2736 * Consider the possibility that upl_size wasn't satisfied.
2738 if (upl_size
!= upl_needed_size
)
2739 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
2742 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2743 UPL_ABORT_FREE_ON_EMPTY
);
2744 goto wait_for_reads
;
2746 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2747 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2750 * request asynchronously so that we can overlap
2751 * the preparation of the next I/O
2752 * if there are already too many outstanding reads
2753 * wait until some have completed before issuing the next read
2755 while ((iostate
.io_issued
- iostate
.io_completed
) > (2 * MAX_UPL_TRANSFER
* PAGE_SIZE
)) {
2756 iostate
.io_wanted
= 1;
2757 tsleep((caddr_t
)&iostate
.io_wanted
, PRIBIO
+ 1, "cluster_nocopy_read", 0);
2759 if (iostate
.io_error
) {
2761 * one of the earlier reads we issued ran into a hard error
2762 * don't issue any more reads, cleanup the UPL
2763 * that was just created but not used, then
2764 * go wait for any other reads to complete before
2765 * returning the error to the caller
2767 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2768 UPL_ABORT_FREE_ON_EMPTY
);
2770 goto wait_for_reads
;
2772 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_START
,
2773 (int)upl
, (int)upl_offset
, (int)start_upl_f_offset
, io_size
, 0);
2775 retval
= cluster_io(vp
, upl
, upl_offset
, start_upl_f_offset
,
2776 io_size
, devblocksize
,
2777 CL_PRESERVE
| CL_COMMIT
| CL_READ
| CL_ASYNC
| CL_NOZERO
,
2778 (struct buf
*)0, &iostate
);
2781 * update the uio structure
2783 iov
->iov_base
+= io_size
;
2784 iov
->iov_len
-= io_size
;
2785 uio
->uio_resid
-= io_size
;
2786 uio
->uio_offset
+= io_size
;
2788 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_END
,
2789 (int)upl
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, retval
, 0);
2795 * make sure all async reads that are part of this stream
2796 * have completed before we return
2798 while (iostate
.io_issued
!= iostate
.io_completed
) {
2799 iostate
.io_wanted
= 1;
2800 tsleep((caddr_t
)&iostate
.io_wanted
, PRIBIO
+ 1, "cluster_nocopy_read", 0);
2802 if (iostate
.io_error
)
2803 retval
= iostate
.io_error
;
2805 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2806 (int)uio
->uio_offset
, (int)uio
->uio_resid
, 6, retval
, 0);
2813 cluster_phys_read(vp
, uio
, filesize
, devblocksize
, flags
)
2820 upl_page_info_t
*pl
;
2822 vm_offset_t upl_offset
;
2828 int upl_needed_size
;
2833 struct clios iostate
;
2837 * When we enter this routine, we know
2838 * -- the resid will not exceed iov_len
2839 * -- the target address is physically contiguous
2844 max_size
= filesize
- uio
->uio_offset
;
2846 if (max_size
> (off_t
)((unsigned int)iov
->iov_len
))
2847 io_size
= iov
->iov_len
;
2851 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2852 upl_needed_size
= upl_offset
+ io_size
;
2856 upl_size
= upl_needed_size
;
2857 upl_flags
= UPL_FILE_IO
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2859 kret
= vm_map_get_upl(current_map(),
2860 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2861 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0);
2863 if (kret
!= KERN_SUCCESS
) {
2865 * cluster_phys_read: failed to get pagelist
2869 if (upl_size
< upl_needed_size
) {
2871 * The upl_size wasn't satisfied.
2873 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2877 pl
= ubc_upl_pageinfo(upl
);
2879 dst_paddr
= (((addr64_t
)(int)upl_phys_page(pl
, 0)) << 12) + ((addr64_t
)iov
->iov_base
& PAGE_MASK
);
2881 while (((uio
->uio_offset
& (devblocksize
- 1)) || io_size
< devblocksize
) && io_size
) {
2884 head_size
= devblocksize
- (int)(uio
->uio_offset
& (devblocksize
- 1));
2886 if (head_size
> io_size
)
2887 head_size
= io_size
;
2889 error
= cluster_align_phys_io(vp
, uio
, dst_paddr
, head_size
, devblocksize
, CL_READ
);
2892 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2896 upl_offset
+= head_size
;
2897 dst_paddr
+= head_size
;
2898 io_size
-= head_size
;
2900 tail_size
= io_size
& (devblocksize
- 1);
2901 io_size
-= tail_size
;
2903 iostate
.io_completed
= 0;
2904 iostate
.io_issued
= 0;
2905 iostate
.io_error
= 0;
2906 iostate
.io_wanted
= 0;
2908 while (io_size
&& error
== 0) {
2911 if (io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2912 xsize
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2916 * request asynchronously so that we can overlap
2917 * the preparation of the next I/O... we'll do
2918 * the commit after all the I/O has completed
2919 * since its all issued against the same UPL
2920 * if there are already too many outstanding reads
2921 * wait until some have completed before issuing the next
2923 while ((iostate
.io_issued
- iostate
.io_completed
) > (2 * MAX_UPL_TRANSFER
* PAGE_SIZE
)) {
2924 iostate
.io_wanted
= 1;
2925 tsleep((caddr_t
)&iostate
.io_wanted
, PRIBIO
+ 1, "cluster_phys_read", 0);
2928 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
, xsize
, 0,
2929 CL_READ
| CL_NOZERO
| CL_DEV_MEMORY
| CL_ASYNC
,
2930 (struct buf
*)0, &iostate
);
2932 * The cluster_io read was issued successfully,
2933 * update the uio structure
2936 uio
->uio_resid
-= xsize
;
2937 iov
->iov_len
-= xsize
;
2938 iov
->iov_base
+= xsize
;
2939 uio
->uio_offset
+= xsize
;
2941 upl_offset
+= xsize
;
2946 * make sure all async reads that are part of this stream
2947 * have completed before we proceed
2949 while (iostate
.io_issued
!= iostate
.io_completed
) {
2950 iostate
.io_wanted
= 1;
2951 tsleep((caddr_t
)&iostate
.io_wanted
, PRIBIO
+ 1, "cluster_phys_read", 0);
2953 if (iostate
.io_error
) {
2954 error
= iostate
.io_error
;
2956 if (error
== 0 && tail_size
)
2957 error
= cluster_align_phys_io(vp
, uio
, dst_paddr
, tail_size
, devblocksize
, CL_READ
);
2960 * just release our hold on the physically contiguous
2961 * region without changing any state
2963 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2970 * generate advisory I/O's in the largest chunks possible
2971 * the completed pages will be released into the VM cache
2974 advisory_read(vp
, filesize
, f_offset
, resid
, devblocksize
)
2981 upl_page_info_t
*pl
;
2983 vm_offset_t upl_offset
;
2996 if (!UBCINFOEXISTS(vp
))
2999 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_START
,
3000 (int)f_offset
, resid
, (int)filesize
, devblocksize
, 0);
3002 while (resid
&& f_offset
< filesize
&& retval
== 0) {
3004 * compute the size of the upl needed to encompass
3005 * the requested read... limit each call to cluster_io
3006 * to the maximum UPL size... cluster_io will clip if
3007 * this exceeds the maximum io_size for the device,
3008 * make sure to account for
3009 * a starting offset that's not page aligned
3011 start_offset
= (int)(f_offset
& PAGE_MASK_64
);
3012 upl_f_offset
= f_offset
- (off_t
)start_offset
;
3013 max_size
= filesize
- f_offset
;
3015 if (resid
< max_size
)
3020 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
3021 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
3022 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
3023 pages_in_upl
= upl_size
/ PAGE_SIZE
;
3025 kret
= ubc_create_upl(vp
,
3030 UPL_RET_ONLY_ABSENT
);
3031 if (kret
!= KERN_SUCCESS
)
3036 * before we start marching forward, we must make sure we end on
3037 * a present page, otherwise we will be working with a freed
3040 for (last_pg
= pages_in_upl
- 1; last_pg
>= 0; last_pg
--) {
3041 if (upl_page_present(pl
, last_pg
))
3044 pages_in_upl
= last_pg
+ 1;
3047 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 61)) | DBG_FUNC_NONE
,
3048 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
3051 for (last_pg
= 0; last_pg
< pages_in_upl
; ) {
3053 * scan from the beginning of the upl looking for the first
3054 * page that is present.... this will become the first page in
3055 * the request we're going to make to 'cluster_io'... if all
3056 * of the pages are absent, we won't call through to 'cluster_io'
3058 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
3059 if (upl_page_present(pl
, start_pg
))
3064 * scan from the starting present page looking for an absent
3065 * page before the end of the upl is reached, if we
3066 * find one, then it will terminate the range of pages being
3067 * presented to 'cluster_io'
3069 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
3070 if (!upl_page_present(pl
, last_pg
))
3074 if (last_pg
> start_pg
) {
3076 * we found a range of pages that must be filled
3077 * if the last page in this range is the last page of the file
3078 * we may have to clip the size of it to keep from reading past
3079 * the end of the last physical block associated with the file
3081 upl_offset
= start_pg
* PAGE_SIZE
;
3082 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
3084 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
)
3085 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
3088 * issue an asynchronous read to cluster_io
3090 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, devblocksize
,
3091 CL_ASYNC
| CL_READ
| CL_COMMIT
| CL_AGE
, (struct buf
*)0, (struct clios
*)0);
3097 ubc_upl_abort(upl
, 0);
3099 io_size
= upl_size
- start_offset
;
3101 if (io_size
> resid
)
3103 f_offset
+= io_size
;
3107 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_END
,
3108 (int)f_offset
, resid
, retval
, 0, 0);
3120 if (!UBCINFOEXISTS(vp
) || vp
->v_clen
== 0) {
3121 vp
->v_flag
&= ~VHASDIRTY
;
3125 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 53)) | DBG_FUNC_START
,
3126 vp
->v_flag
& VHASDIRTY
, vp
->v_clen
, 0, 0, 0);
3128 if (vp
->v_flag
& VHASDIRTY
) {
3133 start_pg
= vp
->v_cstart
;
3134 end_pg
= vp
->v_lastw
;
3136 vp
->v_flag
&= ~VHASDIRTY
;
3139 while (start_pg
< end_pg
) {
3140 last_pg
= start_pg
+ MAX_UPL_TRANSFER
;
3142 if (last_pg
> end_pg
)
3145 cluster_push_x(vp
, ubc_getsize(vp
), start_pg
, last_pg
, 0);
3151 retval
= cluster_try_push(vp
, ubc_getsize(vp
), 0, 1);
3153 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 53)) | DBG_FUNC_END
,
3154 vp
->v_flag
& VHASDIRTY
, vp
->v_clen
, retval
, 0, 0);
3161 cluster_try_push(vp
, EOF
, can_delay
, push_all
)
3173 struct v_cluster l_clusters
[MAX_CLUSTERS
];
3176 * make a local 'sorted' copy of the clusters
3177 * and clear vp->v_clen so that new clusters can
3180 for (cl_index
= 0; cl_index
< vp
->v_clen
; cl_index
++) {
3181 for (min_index
= -1, cl_index1
= 0; cl_index1
< vp
->v_clen
; cl_index1
++) {
3182 if (vp
->v_clusters
[cl_index1
].start_pg
== vp
->v_clusters
[cl_index1
].last_pg
)
3184 if (min_index
== -1)
3185 min_index
= cl_index1
;
3186 else if (vp
->v_clusters
[cl_index1
].start_pg
< vp
->v_clusters
[min_index
].start_pg
)
3187 min_index
= cl_index1
;
3189 if (min_index
== -1)
3191 l_clusters
[cl_index
].start_pg
= vp
->v_clusters
[min_index
].start_pg
;
3192 l_clusters
[cl_index
].last_pg
= vp
->v_clusters
[min_index
].last_pg
;
3194 vp
->v_clusters
[min_index
].start_pg
= vp
->v_clusters
[min_index
].last_pg
;
3199 for (cl_pushed
= 0, cl_index
= 0; cl_index
< cl_len
; cl_index
++) {
3201 * try to push each cluster in turn... cluster_push_x may not
3202 * push the cluster if can_delay is TRUE and the cluster doesn't
3203 * meet the critera for an immediate push
3205 if (cluster_push_x(vp
, EOF
, l_clusters
[cl_index
].start_pg
, l_clusters
[cl_index
].last_pg
, can_delay
)) {
3206 l_clusters
[cl_index
].start_pg
= 0;
3207 l_clusters
[cl_index
].last_pg
= 0;
3215 if (cl_len
> cl_pushed
) {
3217 * we didn't push all of the clusters, so
3218 * lets try to merge them back in to the vnode
3220 if ((MAX_CLUSTERS
- vp
->v_clen
) < (cl_len
- cl_pushed
)) {
3222 * we picked up some new clusters while we were trying to
3223 * push the old ones (I don't think this can happen because
3224 * I'm holding the lock, but just in case)... the sum of the
3225 * leftovers plus the new cluster count exceeds our ability
3226 * to represent them, so fall back to the VHASDIRTY mechanism
3228 for (cl_index
= 0; cl_index
< cl_len
; cl_index
++) {
3229 if (l_clusters
[cl_index
].start_pg
== l_clusters
[cl_index
].last_pg
)
3232 if (l_clusters
[cl_index
].start_pg
< vp
->v_cstart
)
3233 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3234 if (l_clusters
[cl_index
].last_pg
> vp
->v_lastw
)
3235 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3237 vp
->v_flag
|= VHASDIRTY
;
3240 * we've got room to merge the leftovers back in
3241 * just append them starting at the next 'hole'
3242 * represented by vp->v_clen
3244 for (cl_index
= 0, cl_index1
= vp
->v_clen
; cl_index
< cl_len
; cl_index
++) {
3245 if (l_clusters
[cl_index
].start_pg
== l_clusters
[cl_index
].last_pg
)
3248 vp
->v_clusters
[cl_index1
].start_pg
= l_clusters
[cl_index
].start_pg
;
3249 vp
->v_clusters
[cl_index1
].last_pg
= l_clusters
[cl_index
].last_pg
;
3251 if (cl_index1
== 0) {
3252 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3253 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3255 if (l_clusters
[cl_index
].start_pg
< vp
->v_cstart
)
3256 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3257 if (l_clusters
[cl_index
].last_pg
> vp
->v_lastw
)
3258 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3263 * update the cluster count
3265 vp
->v_clen
= cl_index1
;
3268 return(MAX_CLUSTERS
- vp
->v_clen
);
3274 cluster_push_x(vp
, EOF
, first
, last
, can_delay
)
3281 upl_page_info_t
*pl
;
3283 vm_offset_t upl_offset
;
3295 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_START
,
3296 vp
->v_clen
, first
, last
, EOF
, 0);
3298 if ((pages_in_upl
= last
- first
) == 0) {
3299 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 0, 0, 0, 0);
3303 upl_size
= pages_in_upl
* PAGE_SIZE
;
3304 upl_f_offset
= ((off_t
)first
) * PAGE_SIZE_64
;
3306 if (upl_f_offset
+ upl_size
>= EOF
) {
3308 if (upl_f_offset
>= EOF
) {
3310 * must have truncated the file and missed
3311 * clearing a dangling cluster (i.e. it's completely
3312 * beyond the new EOF
3314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 1, 0, 0, 0);
3318 size
= EOF
- upl_f_offset
;
3320 upl_size
= (size
+ (PAGE_SIZE
- 1) ) & ~(PAGE_SIZE
- 1);
3321 pages_in_upl
= upl_size
/ PAGE_SIZE
;
3323 if (can_delay
&& (pages_in_upl
< (MAX_UPL_TRANSFER
- (MAX_UPL_TRANSFER
/ 2))))
3327 kret
= ubc_create_upl(vp
,
3332 UPL_RET_ONLY_DIRTY
);
3333 if (kret
!= KERN_SUCCESS
)
3334 panic("cluster_push: failed to get pagelist");
3339 for (num_of_dirty
= 0, start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
3340 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
3343 if (num_of_dirty
< pages_in_upl
/ 2) {
3344 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
3346 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 0, 2, num_of_dirty
, (pages_in_upl
/ 2), 0);
3355 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
3356 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
3359 if (start_pg
> last_pg
) {
3360 io_size
= (start_pg
- last_pg
) * PAGE_SIZE
;
3362 ubc_upl_abort_range(upl
, last_pg
* PAGE_SIZE
, io_size
,
3363 UPL_ABORT_FREE_ON_EMPTY
);
3370 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
3371 if (!upl_valid_page(pl
, last_pg
) || !upl_dirty_page(pl
, last_pg
))
3374 upl_offset
= start_pg
* PAGE_SIZE
;
3376 io_size
= min(size
, (last_pg
- start_pg
) * PAGE_SIZE
);
3378 if (vp
->v_flag
& VNOCACHE_DATA
)
3379 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
| CL_DUMP
;
3381 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
3383 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
3384 vp
->v_flag
|= VTHROTTLED
;
3385 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_push", 0);
3387 cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, vp
->v_ciosiz
, io_flags
, (struct buf
*)0, (struct clios
*)0);
3391 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 3, 0, 0, 0);
3399 cluster_align_phys_io(struct vnode
*vp
, struct uio
*uio
, addr64_t usr_paddr
, int xsize
, int devblocksize
, int flags
)
3402 upl_page_info_t
*pl
;
3410 kret
= ubc_create_upl(vp
,
3411 uio
->uio_offset
& ~PAGE_MASK_64
,
3417 if (kret
!= KERN_SUCCESS
)
3420 if (!upl_valid_page(pl
, 0)) {
3422 * issue a synchronous read to cluster_io
3424 error
= cluster_io(vp
, upl
, 0, uio
->uio_offset
& ~PAGE_MASK_64
, PAGE_SIZE
, devblocksize
,
3425 CL_READ
, (struct buf
*)0, (struct clios
*)0);
3427 ubc_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
3432 ubc_paddr
= ((addr64_t
)upl_phys_page(pl
, 0) << 12) + (addr64_t
)(uio
->uio_offset
& PAGE_MASK_64
);
3435 * NOTE: There is no prototype for the following in BSD. It, and the definitions
3436 * of the defines for cppvPsrc, cppvPsnk, cppvFsnk, and cppvFsrc will be found in
3437 * osfmk/ppc/mappings.h. They are not included here because there appears to be no
3438 * way to do so without exporting them to kexts as well.
3440 if (flags
& CL_READ
)
3441 // copypv(ubc_paddr, usr_paddr, xsize, cppvPsrc | cppvPsnk | cppvFsnk); /* Copy physical to physical and flush the destination */
3442 copypv(ubc_paddr
, usr_paddr
, xsize
, 2 | 1 | 4); /* Copy physical to physical and flush the destination */
3444 // copypv(ubc_paddr, usr_paddr, xsize, cppvPsrc | cppvPsnk | cppvFsrc); /* Copy physical to physical and flush the source */
3445 copypv(ubc_paddr
, usr_paddr
, xsize
, 2 | 1 | 8); /* Copy physical to physical and flush the source */
3447 if ( !(flags
& CL_READ
) || upl_dirty_page(pl
, 0)) {
3449 * issue a synchronous write to cluster_io
3451 error
= cluster_io(vp
, upl
, 0, uio
->uio_offset
& ~PAGE_MASK_64
, PAGE_SIZE
, devblocksize
,
3452 0, (struct buf
*)0, (struct clios
*)0);
3455 uio
->uio_offset
+= xsize
;
3456 iov
->iov_base
+= xsize
;
3457 iov
->iov_len
-= xsize
;
3458 uio
->uio_resid
-= xsize
;
3460 ubc_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);