]>
git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
25 * The Regents of the University of California. All rights reserved.
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
58 #include <sys/param.h>
61 #include <sys/vnode.h>
62 #include <sys/mount.h>
63 #include <sys/trace.h>
64 #include <sys/malloc.h>
65 #include <sys/resourcevar.h>
66 #include <libkern/libkern.h>
69 #include <vm/vm_pageout.h>
71 #include <sys/kdebug.h>
75 #define CL_COMMIT 0x04
76 #define CL_PAGEOUT 0x10
79 #define CL_NOZERO 0x80
80 #define CL_PAGEIN 0x100
81 #define CL_DEV_MEMORY 0x200
82 #define CL_PRESERVE 0x400
93 static void cluster_zero(upl_t upl
, vm_offset_t upl_offset
,
94 int size
, struct buf
*bp
);
95 static int cluster_read_x(struct vnode
*vp
, struct uio
*uio
,
96 off_t filesize
, int devblocksize
, int flags
);
97 static int cluster_write_x(struct vnode
*vp
, struct uio
*uio
,
98 off_t oldEOF
, off_t newEOF
, off_t headOff
,
99 off_t tailOff
, int devblocksize
, int flags
);
100 static int cluster_nocopy_read(struct vnode
*vp
, struct uio
*uio
,
101 off_t filesize
, int devblocksize
, int flags
);
102 static int cluster_nocopy_write(struct vnode
*vp
, struct uio
*uio
,
103 off_t newEOF
, int devblocksize
, int flags
);
104 static int cluster_phys_read(struct vnode
*vp
, struct uio
*uio
,
105 off_t filesize
, int devblocksize
, int flags
);
106 static int cluster_phys_write(struct vnode
*vp
, struct uio
*uio
,
107 off_t newEOF
, int devblocksize
, int flags
);
108 static int cluster_align_phys_io(struct vnode
*vp
, struct uio
*uio
,
109 vm_offset_t usr_paddr
, int xsize
, int devblocksize
, int flags
);
110 static int cluster_push_x(struct vnode
*vp
, off_t EOF
, daddr_t first
, daddr_t last
, int can_delay
);
111 static int cluster_try_push(struct vnode
*vp
, off_t newEOF
, int can_delay
, int push_all
);
115 * throttle the number of async writes that
116 * can be outstanding on a single vnode
117 * before we issue a synchronous write
119 #define ASYNC_THROTTLE 9
134 struct buf
*cbp_head
;
135 struct buf
*cbp_next
;
138 struct clios
*iostate
;
143 cbp_head
= (struct buf
*)(bp
->b_trans_head
);
145 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_START
,
146 (int)cbp_head
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
148 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
) {
150 * all I/O requests that are part of this transaction
151 * have to complete before we can process it
153 if ( !(cbp
->b_flags
& B_DONE
)) {
155 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
156 (int)cbp_head
, (int)cbp
, cbp
->b_bcount
, cbp
->b_flags
, 0);
166 upl_offset
= cbp
->b_uploffset
;
167 upl
= cbp
->b_pagelist
;
168 b_flags
= cbp
->b_flags
;
169 real_bp
= cbp
->b_real_bp
;
171 zero_offset
= cbp
->b_validend
;
172 l_blkno
= cbp
->b_lblkno
;
173 iostate
= (struct clios
*)cbp
->b_iostate
;
176 if (cbp
->b_vectorcount
> 1)
177 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
179 if ((cbp
->b_flags
& B_ERROR
) && error
== 0)
180 error
= cbp
->b_error
;
182 total_resid
+= cbp
->b_resid
;
183 total_size
+= cbp
->b_bcount
;
185 cbp_next
= cbp
->b_trans_next
;
192 cluster_zero(upl
, zero_offset
, PAGE_SIZE
- (zero_offset
& PAGE_MASK
), real_bp
);
194 if ((vp
->v_flag
& VTHROTTLED
) && (vp
->v_numoutput
<= (ASYNC_THROTTLE
/ 3))) {
195 vp
->v_flag
&= ~VTHROTTLED
;
196 wakeup((caddr_t
)&vp
->v_numoutput
);
202 error_offset
= (off_t
)l_blkno
* PAGE_SIZE_64
;
204 if (iostate
->io_error
== 0) {
205 iostate
->io_error
= error
;
206 iostate
->io_offset
= error_offset
;
208 if (error_offset
< iostate
->io_offset
)
209 iostate
->io_offset
= error_offset
;
212 iostate
->io_completed
+= total_size
;
214 if (iostate
->io_wanted
) {
215 iostate
->io_wanted
= 0;
216 wakeup((caddr_t
)&iostate
->io_wanted
);
219 if ((b_flags
& B_NEED_IODONE
) && real_bp
) {
221 real_bp
->b_flags
|= B_ERROR
;
222 real_bp
->b_error
= error
;
224 real_bp
->b_resid
= total_resid
;
228 if (error
== 0 && total_resid
)
231 if (b_flags
& B_COMMIT_UPL
) {
232 pg_offset
= upl_offset
& PAGE_MASK
;
233 commit_size
= (((pg_offset
+ total_size
) + (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
235 if (error
|| (b_flags
& B_NOCACHE
) || ((b_flags
& B_PHYS
) && !(b_flags
& B_READ
))) {
238 if (b_flags
& B_PHYS
)
239 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
240 else if ((b_flags
& B_PAGEOUT
) && (error
!= ENXIO
)) /* transient error */
241 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
242 else if (b_flags
& B_PGIN
)
243 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
245 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
247 ubc_upl_abort_range(upl
, upl_offset
- pg_offset
, commit_size
,
250 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
251 (int)upl
, upl_offset
- pg_offset
, commit_size
,
252 0x80000000|upl_abort_code
, 0);
255 int upl_commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
;
257 if (b_flags
& B_PHYS
)
258 upl_commit_flags
|= UPL_COMMIT_SET_DIRTY
;
259 else if ( !(b_flags
& B_PAGEOUT
))
260 upl_commit_flags
|= UPL_COMMIT_CLEAR_DIRTY
;
262 upl_commit_flags
|= UPL_COMMIT_INACTIVATE
;
264 ubc_upl_commit_range(upl
, upl_offset
- pg_offset
, commit_size
,
267 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
268 (int)upl
, upl_offset
- pg_offset
, commit_size
,
269 upl_commit_flags
, 0);
272 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
273 (int)upl
, upl_offset
, 0, error
, 0);
280 cluster_zero(upl
, upl_offset
, size
, bp
)
282 vm_offset_t upl_offset
;
286 vm_offset_t io_addr
= 0;
290 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 23)) | DBG_FUNC_NONE
,
291 upl_offset
, size
, (int)bp
, 0, 0);
293 if (bp
== NULL
|| bp
->b_data
== NULL
) {
294 kret
= ubc_upl_map(upl
, &io_addr
);
296 if (kret
!= KERN_SUCCESS
)
297 panic("cluster_zero: ubc_upl_map() failed with (%d)", kret
);
299 panic("cluster_zero: ubc_upl_map() mapped 0");
303 io_addr
= (vm_offset_t
)bp
->b_data
;
304 bzero((caddr_t
)(io_addr
+ upl_offset
), size
);
307 kret
= ubc_upl_unmap(upl
);
309 if (kret
!= KERN_SUCCESS
)
310 panic("cluster_zero: kernel_upl_unmap failed");
315 cluster_io(vp
, upl
, upl_offset
, f_offset
, non_rounded_size
, devblocksize
, flags
, real_bp
, iostate
)
318 vm_offset_t upl_offset
;
320 int non_rounded_size
;
324 struct clios
*iostate
;
333 struct buf
*cbp_head
= 0;
334 struct buf
*cbp_tail
= 0;
345 if (flags
& CL_READ
) {
346 io_flags
= (B_VECTORLIST
| B_READ
);
348 vfs_io_attributes(vp
, B_READ
, &max_iosize
, &max_vectors
);
350 io_flags
= (B_VECTORLIST
| B_WRITEINPROG
);
352 vfs_io_attributes(vp
, B_WRITE
, &max_iosize
, &max_vectors
);
354 pl
= ubc_upl_pageinfo(upl
);
359 io_flags
|= B_NOCACHE
;
360 if (flags
& CL_PAGEIN
)
362 if (flags
& CL_PAGEOUT
)
363 io_flags
|= B_PAGEOUT
;
364 if (flags
& CL_COMMIT
)
365 io_flags
|= B_COMMIT_UPL
;
366 if (flags
& CL_PRESERVE
)
370 size
= (non_rounded_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
372 size
= non_rounded_size
;
375 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_START
,
376 (int)f_offset
, size
, upl_offset
, flags
, 0);
378 if ((flags
& CL_READ
) && ((upl_offset
+ non_rounded_size
) & PAGE_MASK
) && (!(flags
& CL_NOZERO
))) {
380 * then we are going to end up
381 * with a page that we can't complete (the file size wasn't a multiple
382 * of PAGE_SIZE and we're trying to read to the end of the file
383 * so we'll go ahead and zero out the portion of the page we can't
384 * read in from the file
386 zero_offset
= upl_offset
+ non_rounded_size
;
397 if (size
> max_iosize
)
398 io_size
= max_iosize
;
402 if (error
= VOP_CMAP(vp
, f_offset
, io_size
, &blkno
, (size_t *)&io_size
, NULL
)) {
403 if (error
== EOPNOTSUPP
)
404 panic("VOP_CMAP Unimplemented");
408 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 24)) | DBG_FUNC_NONE
,
409 (int)f_offset
, (int)blkno
, io_size
, zero_offset
, 0);
411 if ( (!(flags
& CL_READ
) && (long)blkno
== -1) || io_size
== 0) {
412 if (flags
& CL_PAGEOUT
) {
417 /* Try paging out the page individually before
418 giving up entirely and dumping it (it could
419 be mapped in a "hole" and require allocation
422 ubc_upl_abort_range(upl
, upl_offset
, PAGE_SIZE_64
, UPL_ABORT_FREE_ON_EMPTY
);
423 if (ubc_pushdirty_range(vp
, f_offset
, PAGE_SIZE_64
) == 0) {
428 upl_offset
+= PAGE_SIZE_64
;
429 f_offset
+= PAGE_SIZE_64
;
430 size
-= PAGE_SIZE_64
;
433 lblkno
= (daddr_t
)(f_offset
/ PAGE_SIZE_64
);
435 * we have now figured out how much I/O we can do - this is in 'io_size'
436 * pl_index represents the first page in the 'upl' that the I/O will occur for
437 * pg_offset is the starting point in the first page for the I/O
438 * pg_count is the number of full and partial pages that 'io_size' encompasses
440 pl_index
= upl_offset
/ PAGE_SIZE
;
441 pg_offset
= upl_offset
& PAGE_MASK
;
442 pg_count
= (io_size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
444 if (flags
& CL_DEV_MEMORY
) {
446 * currently, can't deal with reading 'holes' in file
448 if ((long)blkno
== -1) {
453 * treat physical requests as one 'giant' page
457 if ((flags
& CL_READ
) && (long)blkno
== -1) {
461 * if we're reading and blkno == -1, then we've got a
462 * 'hole' in the file that we need to deal with by zeroing
463 * out the affected area in the upl
465 if (zero_offset
&& io_size
== size
) {
467 * if this upl contains the EOF and it is not a multiple of PAGE_SIZE
468 * than 'zero_offset' will be non-zero
469 * if the 'hole' returned by VOP_CMAP extends all the way to the eof
470 * (indicated by the io_size finishing off the I/O request for this UPL)
471 * than we're not going to issue an I/O for the
472 * last page in this upl... we need to zero both the hole and the tail
473 * of the page beyond the EOF, since the delayed zero-fill won't kick in
475 bytes_to_zero
= (((upl_offset
+ io_size
) + (PAGE_SIZE
- 1)) & ~PAGE_MASK
) - upl_offset
;
479 bytes_to_zero
= io_size
;
481 cluster_zero(upl
, upl_offset
, bytes_to_zero
, real_bp
);
485 * if there is a current I/O chain pending
486 * then the first page of the group we just zero'd
487 * will be handled by the I/O completion if the zero
488 * fill started in the middle of the page
490 pg_count
= (io_size
- pg_offset
) / PAGE_SIZE
;
493 * no pending I/O to pick up that first page
494 * so, we have to make sure it gets committed
496 * set the pg_offset to 0 so that the upl_commit_range
497 * starts with this page
499 pg_count
= (io_size
+ pg_offset
) / PAGE_SIZE
;
502 if (io_size
== size
&& ((upl_offset
+ io_size
) & PAGE_MASK
))
504 * if we're done with the request for this UPL
505 * then we have to make sure to commit the last page
506 * even if we only partially zero-filled it
512 pg_resid
= PAGE_SIZE
- pg_offset
;
516 if (flags
& CL_COMMIT
)
517 ubc_upl_commit_range(upl
,
518 (upl_offset
+ pg_resid
) & ~PAGE_MASK
,
519 pg_count
* PAGE_SIZE
,
520 UPL_COMMIT_CLEAR_DIRTY
| UPL_COMMIT_FREE_ON_EMPTY
);
522 upl_offset
+= io_size
;
526 if (cbp_head
&& pg_count
)
530 } else if (real_bp
&& (real_bp
->b_blkno
== real_bp
->b_lblkno
)) {
531 real_bp
->b_blkno
= blkno
;
535 if (pg_count
> max_vectors
) {
536 io_size
-= (pg_count
- max_vectors
) * PAGE_SIZE
;
539 io_size
= PAGE_SIZE
- pg_offset
;
542 pg_count
= max_vectors
;
545 * we need to allocate space for the vector list
548 iovp
= (struct iovec
*)_MALLOC(sizeof(struct iovec
) * pg_count
,
549 M_SEGMENT
, M_NOWAIT
);
551 if (iovp
== (struct iovec
*) 0) {
553 * if the allocation fails, then throttle down to a single page
555 io_size
= PAGE_SIZE
- pg_offset
;
561 /* Throttle the speculative IO */
562 if ((flags
& CL_ASYNC
) && !(flags
& CL_PAGEOUT
))
567 cbp
= alloc_io_buf(vp
, priv
);
571 * we use the io vector that's reserved in the buffer header
572 * this insures we can always issue an I/O even in a low memory
573 * condition that prevents the _MALLOC from succeeding... this
574 * is necessary to prevent deadlocks with the pager
576 iovp
= (struct iovec
*)(&cbp
->b_vects
[0]);
578 cbp
->b_vectorlist
= (void *)iovp
;
579 cbp
->b_vectorcount
= pg_count
;
581 if (flags
& CL_DEV_MEMORY
) {
583 iovp
->iov_len
= io_size
;
584 iovp
->iov_base
= (caddr_t
)upl_phys_page(pl
, 0);
586 if (iovp
->iov_base
== (caddr_t
) 0) {
590 iovp
->iov_base
+= upl_offset
;
593 for (i
= 0, vsize
= io_size
; i
< pg_count
; i
++, iovp
++) {
596 psize
= PAGE_SIZE
- pg_offset
;
601 iovp
->iov_len
= psize
;
602 iovp
->iov_base
= (caddr_t
)upl_phys_page(pl
, pl_index
+ i
);
604 if (iovp
->iov_base
== (caddr_t
) 0) {
606 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
612 iovp
->iov_base
+= pg_offset
;
615 if (flags
& CL_PAGEOUT
) {
620 if (bp
= incore(vp
, lblkno
+ i
)) {
621 if (!ISSET(bp
->b_flags
, B_BUSY
)) {
623 SET(bp
->b_flags
, (B_BUSY
| B_INVAL
));
627 panic("BUSY bp found in cluster_io");
637 if (flags
& CL_ASYNC
) {
638 cbp
->b_flags
|= (B_CALL
| B_ASYNC
);
639 cbp
->b_iodone
= (void *)cluster_iodone
;
641 cbp
->b_flags
|= io_flags
;
643 cbp
->b_lblkno
= lblkno
;
644 cbp
->b_blkno
= blkno
;
645 cbp
->b_bcount
= io_size
;
646 cbp
->b_pagelist
= upl
;
647 cbp
->b_uploffset
= upl_offset
;
648 cbp
->b_trans_next
= (struct buf
*)0;
650 if (cbp
->b_iostate
= (void *)iostate
)
651 iostate
->io_issued
+= io_size
;
654 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 26)) | DBG_FUNC_NONE
,
655 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
657 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 27)) | DBG_FUNC_NONE
,
658 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
661 cbp_tail
->b_trans_next
= cbp
;
667 (struct buf
*)(cbp
->b_trans_head
) = cbp_head
;
670 upl_offset
+= io_size
;
674 if ( (!(upl_offset
& PAGE_MASK
) && !(flags
& CL_DEV_MEMORY
) && ((flags
& CL_ASYNC
) || buf_count
> 8)) || size
== 0) {
676 * if we have no more I/O to issue or
677 * the current I/O we've prepared fully
678 * completes the last page in this request
679 * and it's either an ASYNC request or
680 * we've already accumulated more than 8 I/O's into
681 * this transaction and it's not an I/O directed to
682 * special DEVICE memory
683 * then go ahead and issue the I/O
687 cbp_head
->b_flags
|= B_NEED_IODONE
;
688 cbp_head
->b_real_bp
= real_bp
;
690 cbp_head
->b_real_bp
= (struct buf
*)NULL
;
694 * we're about to issue the last I/O for this upl
695 * if this was a read to the eof and the eof doesn't
696 * finish on a page boundary, than we need to zero-fill
697 * the rest of the page....
699 cbp_head
->b_validend
= zero_offset
;
701 cbp_head
->b_validend
= 0;
703 for (cbp
= cbp_head
; cbp
;) {
704 struct buf
* cbp_next
;
706 if (io_flags
& B_WRITEINPROG
)
707 cbp
->b_vp
->v_numoutput
++;
709 cbp_next
= cbp
->b_trans_next
;
711 (void) VOP_STRATEGY(cbp
);
714 if ( !(flags
& CL_ASYNC
)) {
715 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
)
718 if (error
= cluster_iodone(cbp_head
)) {
719 if ((flags
& CL_PAGEOUT
) && (error
== ENXIO
))
720 retval
= 0; /* drop the error */
726 cbp_head
= (struct buf
*)0;
727 cbp_tail
= (struct buf
*)0;
737 for (cbp
= cbp_head
; cbp
;) {
738 struct buf
* cbp_next
;
740 if (cbp
->b_vectorcount
> 1)
741 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
742 upl_offset
-= cbp
->b_bcount
;
743 size
+= cbp
->b_bcount
;
744 io_size
+= cbp
->b_bcount
;
746 cbp_next
= cbp
->b_trans_next
;
751 if (iostate
->io_error
== 0) {
752 iostate
->io_error
= error
;
753 iostate
->io_offset
= f_offset
- (off_t
)io_size
;
755 iostate
->io_issued
-= io_size
;
757 if (iostate
->io_wanted
) {
758 iostate
->io_wanted
= 0;
759 wakeup((caddr_t
)&iostate
->io_wanted
);
762 pg_offset
= upl_offset
& PAGE_MASK
;
763 abort_size
= ((size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
765 if (flags
& CL_COMMIT
) {
768 if (flags
& CL_PRESERVE
)
769 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
770 else if ((flags
& CL_PAGEOUT
) && (error
!= ENXIO
)) /* transient error */
771 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
772 else if (flags
& CL_PAGEIN
)
773 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
775 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
777 ubc_upl_abort_range(upl
, upl_offset
- pg_offset
, abort_size
,
780 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 28)) | DBG_FUNC_NONE
,
781 (int)upl
, upl_offset
- pg_offset
, abort_size
, error
, 0);
784 real_bp
->b_flags
|= B_ERROR
;
785 real_bp
->b_error
= error
;
792 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_END
,
793 (int)f_offset
, size
, upl_offset
, retval
, 0);
800 cluster_rd_prefetch(vp
, f_offset
, size
, filesize
, devblocksize
)
810 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_START
,
811 (int)f_offset
, size
, (int)filesize
, 0, 0);
813 if (f_offset
>= filesize
) {
814 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
815 (int)f_offset
, 0, 0, 0, 0);
818 if (size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
819 size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
821 size
= (size
+ (PAGE_SIZE
- 1)) & ~(PAGE_SIZE
- 1);
823 if ((off_t
)size
> (filesize
- f_offset
))
824 size
= filesize
- f_offset
;
826 pages_to_fetch
= (size
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
828 for (skipped_pages
= 0; skipped_pages
< pages_to_fetch
; skipped_pages
++) {
829 if (ubc_page_op(vp
, f_offset
, 0, 0, 0) != KERN_SUCCESS
)
831 f_offset
+= PAGE_SIZE
;
834 if (skipped_pages
< pages_to_fetch
)
835 advisory_read(vp
, filesize
, f_offset
, size
, devblocksize
);
837 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
838 (int)f_offset
+ (pages_to_fetch
* PAGE_SIZE
), skipped_pages
, 0, 1, 0);
840 return (pages_to_fetch
);
846 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
)
855 int size_of_prefetch
;
858 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_START
,
859 b_lblkno
, e_lblkno
, vp
->v_lastr
, 0, 0);
861 if (b_lblkno
== vp
->v_lastr
&& b_lblkno
== e_lblkno
) {
862 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
863 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 0, 0);
867 if (vp
->v_lastr
== -1 || (b_lblkno
!= vp
->v_lastr
&& b_lblkno
!= (vp
->v_lastr
+ 1) &&
868 (b_lblkno
!= (vp
->v_maxra
+ 1) || vp
->v_ralen
== 0))) {
872 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
873 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 1, 0);
877 max_pages
= MAX_UPL_TRANSFER
;
879 vp
->v_ralen
= vp
->v_ralen
? min(max_pages
, vp
->v_ralen
<< 1) : 1;
881 if (((e_lblkno
+ 1) - b_lblkno
) > vp
->v_ralen
)
882 vp
->v_ralen
= min(max_pages
, (e_lblkno
+ 1) - b_lblkno
);
884 if (e_lblkno
< vp
->v_maxra
) {
885 if ((vp
->v_maxra
- e_lblkno
) > max(max_pages
/ 16, 4)) {
887 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
888 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 2, 0);
892 r_lblkno
= max(e_lblkno
, vp
->v_maxra
) + 1;
893 f_offset
= (off_t
)r_lblkno
* PAGE_SIZE_64
;
895 if (f_offset
< filesize
) {
896 size_of_prefetch
= cluster_rd_prefetch(vp
, f_offset
, vp
->v_ralen
* PAGE_SIZE
, filesize
, devblocksize
);
898 if (size_of_prefetch
)
899 vp
->v_maxra
= (r_lblkno
+ size_of_prefetch
) - 1;
901 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
902 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 3, 0);
906 cluster_pageout(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
909 vm_offset_t upl_offset
;
919 int local_flags
= CL_PAGEOUT
;
921 if ((flags
& UPL_IOSYNC
) == 0)
922 local_flags
|= CL_ASYNC
;
923 if ((flags
& UPL_NOCOMMIT
) == 0)
924 local_flags
|= CL_COMMIT
;
927 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 52)) | DBG_FUNC_NONE
,
928 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
931 * If they didn't specify any I/O, then we are done...
932 * we can't issue an abort because we don't know how
933 * big the upl really is
938 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
) {
939 if (local_flags
& CL_COMMIT
)
940 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
944 * can't page-in from a negative offset
945 * or if we're starting beyond the EOF
946 * or if the file offset isn't page aligned
947 * or the size requested isn't a multiple of PAGE_SIZE
949 if (f_offset
< 0 || f_offset
>= filesize
||
950 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
)) {
951 if (local_flags
& CL_COMMIT
)
952 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
955 max_size
= filesize
- f_offset
;
962 pg_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
964 if (size
> pg_size
) {
965 if (local_flags
& CL_COMMIT
)
966 ubc_upl_abort_range(upl
, upl_offset
+ pg_size
, size
- pg_size
,
967 UPL_ABORT_FREE_ON_EMPTY
);
969 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
970 vp
->v_flag
|= VTHROTTLED
;
971 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_pageout", 0);
974 return (cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
, devblocksize
,
975 local_flags
, (struct buf
*)0, (struct clios
*)0));
979 cluster_pagein(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
982 vm_offset_t upl_offset
;
995 if (upl
== NULL
|| size
< 0)
996 panic("cluster_pagein: NULL upl passed in");
998 if ((flags
& UPL_IOSYNC
) == 0)
999 local_flags
|= CL_ASYNC
;
1000 if ((flags
& UPL_NOCOMMIT
) == 0)
1001 local_flags
|= CL_COMMIT
;
1004 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 56)) | DBG_FUNC_NONE
,
1005 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
1008 * can't page-in from a negative offset
1009 * or if we're starting beyond the EOF
1010 * or if the file offset isn't page aligned
1011 * or the size requested isn't a multiple of PAGE_SIZE
1013 if (f_offset
< 0 || f_offset
>= filesize
||
1014 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
) || (upl_offset
& PAGE_MASK
)) {
1015 if (local_flags
& CL_COMMIT
)
1016 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
1019 max_size
= filesize
- f_offset
;
1021 if (size
< max_size
)
1026 rounded_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
1028 if (size
> rounded_size
&& (local_flags
& CL_COMMIT
))
1029 ubc_upl_abort_range(upl
, upl_offset
+ rounded_size
,
1030 size
- (upl_offset
+ rounded_size
), UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
1032 retval
= cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
, devblocksize
,
1033 local_flags
| CL_READ
| CL_PAGEIN
, (struct buf
*)0, (struct clios
*)0);
1039 b_lblkno
= (int)(f_offset
/ PAGE_SIZE_64
);
1041 ((f_offset
+ ((off_t
)io_size
- 1)) / PAGE_SIZE_64
);
1043 if (!(flags
& UPL_NORDAHEAD
) && !(vp
->v_flag
& VRAOFF
) && rounded_size
== PAGE_SIZE
) {
1045 * we haven't read the last page in of the file yet
1046 * so let's try to read ahead if we're in
1047 * a sequential access pattern
1049 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
1051 vp
->v_lastr
= e_lblkno
;
1063 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 19)) | DBG_FUNC_START
,
1064 (int)bp
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
1066 if (bp
->b_pagelist
== (upl_t
) 0)
1067 panic("cluster_bp: can't handle NULL upl yet\n");
1068 if (bp
->b_flags
& B_READ
)
1069 flags
= CL_ASYNC
| CL_READ
;
1073 f_offset
= ubc_blktooff(bp
->b_vp
, bp
->b_lblkno
);
1075 return (cluster_io(bp
->b_vp
, bp
->b_pagelist
, 0, f_offset
, bp
->b_bcount
, 0, flags
, bp
, (struct clios
*)0));
1079 cluster_write(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1093 vm_offset_t upl_offset
;
1096 upl_page_info_t
*pl
;
1102 if ( (!(vp
->v_flag
& VNOCACHE_DATA
)) || (!uio
) || (uio
->uio_segflg
!= UIO_USERSPACE
))
1104 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1108 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& retval
== 0)
1110 /* we know we have a resid, so this is safe */
1112 while (iov
->iov_len
== 0) {
1119 * We check every vector target and if it is physically
1120 * contiguous space, we skip the sanity checks.
1123 upl_offset
= (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
;
1124 upl_size
= (upl_offset
+ PAGE_SIZE
+(PAGE_SIZE
-1)) & ~PAGE_MASK
;
1126 upl_flags
= UPL_QUERY_OBJECT_TYPE
;
1127 if ((vm_map_get_upl(current_map(),
1128 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1129 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0)) != KERN_SUCCESS
)
1132 * the user app must have passed in an invalid address
1137 if (upl_flags
& UPL_PHYS_CONTIG
)
1139 if (flags
& IO_HEADZEROFILL
)
1141 flags
&= ~IO_HEADZEROFILL
;
1143 if (retval
= cluster_write_x(vp
, (struct uio
*)0, 0, uio
->uio_offset
, headOff
, 0, devblocksize
, IO_HEADZEROFILL
))
1147 retval
= cluster_phys_write(vp
, uio
, newEOF
, devblocksize
, flags
);
1149 if (uio
->uio_resid
== 0 && (flags
& IO_TAILZEROFILL
))
1151 retval
= cluster_write_x(vp
, (struct uio
*)0, 0, tailOff
, uio
->uio_offset
, 0, devblocksize
, IO_HEADZEROFILL
);
1155 else if ((uio
->uio_resid
< 4 * PAGE_SIZE
) || (flags
& (IO_TAILZEROFILL
| IO_HEADZEROFILL
)))
1158 * We set a threshhold of 4 pages to decide if the nocopy
1159 * write loop is worth the trouble...
1160 * we also come here if we're trying to zero the head and/or tail
1161 * of a partially written page, and the user source is not a physically contiguous region
1163 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1166 else if (uio
->uio_offset
& PAGE_MASK_64
)
1168 /* Bring the file offset write up to a pagesize boundary */
1169 clip_size
= (PAGE_SIZE
- (uio
->uio_offset
& PAGE_MASK_64
));
1170 if (uio
->uio_resid
< clip_size
)
1171 clip_size
= uio
->uio_resid
;
1173 * Fake the resid going into the cluster_write_x call
1174 * and restore it on the way out.
1176 prev_resid
= uio
->uio_resid
;
1177 uio
->uio_resid
= clip_size
;
1178 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1179 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1181 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
1183 clip_size
= iov
->iov_len
;
1184 prev_resid
= uio
->uio_resid
;
1185 uio
->uio_resid
= clip_size
;
1186 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1187 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1192 * If we come in here, we know the offset into
1193 * the file is on a pagesize boundary
1196 max_io_size
= newEOF
- uio
->uio_offset
;
1197 clip_size
= uio
->uio_resid
;
1198 if (iov
->iov_len
< clip_size
)
1199 clip_size
= iov
->iov_len
;
1200 if (max_io_size
< clip_size
)
1201 clip_size
= max_io_size
;
1203 if (clip_size
< PAGE_SIZE
)
1206 * Take care of tail end of write in this vector
1208 prev_resid
= uio
->uio_resid
;
1209 uio
->uio_resid
= clip_size
;
1210 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1211 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1215 /* round clip_size down to a multiple of pagesize */
1216 clip_size
= clip_size
& ~(PAGE_MASK
);
1217 prev_resid
= uio
->uio_resid
;
1218 uio
->uio_resid
= clip_size
;
1219 retval
= cluster_nocopy_write(vp
, uio
, newEOF
, devblocksize
, flags
);
1220 if ((retval
== 0) && uio
->uio_resid
)
1221 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1222 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1231 cluster_nocopy_write(vp
, uio
, newEOF
, devblocksize
, flags
)
1239 upl_page_info_t
*pl
;
1241 vm_offset_t upl_offset
;
1245 int upl_needed_size
;
1251 int force_data_sync
;
1254 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_START
,
1255 (int)uio
->uio_offset
, (int)uio
->uio_resid
,
1256 (int)newEOF
, devblocksize
, 0);
1259 * When we enter this routine, we know
1260 * -- the offset into the file is on a pagesize boundary
1261 * -- the resid is a page multiple
1262 * -- the resid will not exceed iov_len
1264 cluster_try_push(vp
, newEOF
, 0, 1);
1268 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& error
== 0) {
1269 io_size
= uio
->uio_resid
;
1271 if (io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1272 io_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1274 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1275 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
1277 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_START
,
1278 (int)upl_offset
, upl_needed_size
, (int)iov
->iov_base
, io_size
, 0);
1280 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++)
1283 upl_size
= upl_needed_size
;
1284 upl_flags
= UPL_FILE_IO
| UPL_COPYOUT_FROM
| UPL_NO_SYNC
|
1285 UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1287 kret
= vm_map_get_upl(current_map(),
1288 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1296 if (kret
!= KERN_SUCCESS
)
1298 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1301 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1302 (int)uio
->uio_offset
, (int)uio
->uio_resid
, kret
, 1, 0);
1304 /* cluster_nocopy_write: failed to get pagelist */
1305 /* do not return kret here */
1309 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
1310 pages_in_pl
= upl_size
/ PAGE_SIZE
;
1312 for(i
=0; i
< pages_in_pl
; i
++)
1314 if (!upl_valid_page(pl
, i
))
1318 if (i
== pages_in_pl
)
1321 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1322 UPL_ABORT_FREE_ON_EMPTY
);
1325 if (force_data_sync
>= 3)
1327 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1328 i
, pages_in_pl
, upl_size
, kret
, 0);
1330 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1331 (int)uio
->uio_offset
, (int)uio
->uio_resid
, kret
, 2, 0);
1336 * Consider the possibility that upl_size wasn't satisfied.
1338 if (upl_size
!= upl_needed_size
)
1339 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
1341 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1342 (int)upl_offset
, upl_size
, (int)iov
->iov_base
, io_size
, 0);
1346 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1347 UPL_ABORT_FREE_ON_EMPTY
);
1348 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1349 (int)uio
->uio_offset
, uio
->uio_resid
, 0, 3, 0);
1355 * Now look for pages already in the cache
1356 * and throw them away.
1359 upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
1360 max_io_size
= io_size
;
1362 while (max_io_size
) {
1365 * Flag UPL_POP_DUMP says if the page is found
1366 * in the page cache it must be thrown away.
1370 UPL_POP_SET
| UPL_POP_BUSY
| UPL_POP_DUMP
,
1372 max_io_size
-= PAGE_SIZE
;
1373 upl_f_offset
+= PAGE_SIZE
;
1377 * issue a synchronous write to cluster_io
1380 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_START
,
1381 (int)upl_offset
, (int)uio
->uio_offset
, io_size
, 0, 0);
1383 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1384 io_size
, devblocksize
, 0, (struct buf
*)0, (struct clios
*)0);
1388 * The cluster_io write completed successfully,
1389 * update the uio structure.
1391 iov
->iov_base
+= io_size
;
1392 iov
->iov_len
-= io_size
;
1393 uio
->uio_resid
-= io_size
;
1394 uio
->uio_offset
+= io_size
;
1397 * always 'commit' the I/O via the abort primitive whether the I/O
1398 * succeeded cleanly or not... this is necessary to insure that
1399 * we preserve the state of the DIRTY flag on the pages used to
1400 * provide the data for the I/O... the state of this flag SHOULD
1401 * NOT be changed by a write
1403 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1404 UPL_ABORT_FREE_ON_EMPTY
);
1407 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_END
,
1408 (int)upl_offset
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
1413 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1414 (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 4, 0);
1421 cluster_phys_write(vp
, uio
, newEOF
, devblocksize
, flags
)
1428 upl_page_info_t
*pl
;
1429 vm_offset_t src_paddr
;
1431 vm_offset_t upl_offset
;
1435 int upl_needed_size
;
1443 * When we enter this routine, we know
1444 * -- the resid will not exceed iov_len
1445 * -- the vector target address is physcially contiguous
1447 cluster_try_push(vp
, newEOF
, 0, 1);
1450 io_size
= iov
->iov_len
;
1451 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1452 upl_needed_size
= upl_offset
+ io_size
;
1455 upl_size
= upl_needed_size
;
1456 upl_flags
= UPL_FILE_IO
| UPL_COPYOUT_FROM
| UPL_NO_SYNC
|
1457 UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1459 kret
= vm_map_get_upl(current_map(),
1460 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1461 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0);
1463 if (kret
!= KERN_SUCCESS
) {
1465 * cluster_phys_write: failed to get pagelist
1466 * note: return kret here
1471 * Consider the possibility that upl_size wasn't satisfied.
1472 * This is a failure in the physical memory case.
1474 if (upl_size
< upl_needed_size
) {
1475 kernel_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1478 pl
= ubc_upl_pageinfo(upl
);
1480 src_paddr
= (vm_offset_t
)upl_phys_page(pl
, 0) + ((vm_offset_t
)iov
->iov_base
& PAGE_MASK
);
1482 while (((uio
->uio_offset
& (devblocksize
- 1)) || io_size
< devblocksize
) && io_size
) {
1485 head_size
= devblocksize
- (int)(uio
->uio_offset
& (devblocksize
- 1));
1487 if (head_size
> io_size
)
1488 head_size
= io_size
;
1490 error
= cluster_align_phys_io(vp
, uio
, src_paddr
, head_size
, devblocksize
, 0);
1493 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1497 upl_offset
+= head_size
;
1498 src_paddr
+= head_size
;
1499 io_size
-= head_size
;
1501 tail_size
= io_size
& (devblocksize
- 1);
1502 io_size
-= tail_size
;
1506 * issue a synchronous write to cluster_io
1508 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1509 io_size
, 0, CL_DEV_MEMORY
, (struct buf
*)0, (struct clios
*)0);
1513 * The cluster_io write completed successfully,
1514 * update the uio structure
1516 uio
->uio_resid
-= io_size
;
1517 iov
->iov_len
-= io_size
;
1518 iov
->iov_base
+= io_size
;
1519 uio
->uio_offset
+= io_size
;
1520 src_paddr
+= io_size
;
1523 error
= cluster_align_phys_io(vp
, uio
, src_paddr
, tail_size
, devblocksize
, 0);
1526 * just release our hold on the physically contiguous
1527 * region without changing any state
1529 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1536 cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1546 upl_page_info_t
*pl
;
1548 vm_offset_t upl_offset
;
1556 vm_offset_t io_address
;
1563 long long total_size
;
1566 long long zero_cnt1
;
1568 daddr_t start_blkno
;
1572 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1573 (int)uio
->uio_offset
, uio
->uio_resid
, (int)oldEOF
, (int)newEOF
, 0);
1575 uio_resid
= uio
->uio_resid
;
1577 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1578 0, 0, (int)oldEOF
, (int)newEOF
, 0);
1585 if (flags
& IO_HEADZEROFILL
) {
1587 * some filesystems (HFS is one) don't support unallocated holes within a file...
1588 * so we zero fill the intervening space between the old EOF and the offset
1589 * where the next chunk of real data begins.... ftruncate will also use this
1590 * routine to zero fill to the new EOF when growing a file... in this case, the
1591 * uio structure will not be provided
1594 if (headOff
< uio
->uio_offset
) {
1595 zero_cnt
= uio
->uio_offset
- headOff
;
1598 } else if (headOff
< newEOF
) {
1599 zero_cnt
= newEOF
- headOff
;
1603 if (flags
& IO_TAILZEROFILL
) {
1605 zero_off1
= uio
->uio_offset
+ uio
->uio_resid
;
1607 if (zero_off1
< tailOff
)
1608 zero_cnt1
= tailOff
- zero_off1
;
1611 if (zero_cnt
== 0 && uio
== (struct uio
*) 0)
1613 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
1614 retval
, 0, 0, 0, 0);
1618 while ((total_size
= (uio_resid
+ zero_cnt
+ zero_cnt1
)) && retval
== 0) {
1620 * for this iteration of the loop, figure out where our starting point is
1623 start_offset
= (int)(zero_off
& PAGE_MASK_64
);
1624 upl_f_offset
= zero_off
- start_offset
;
1625 } else if (uio_resid
) {
1626 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
1627 upl_f_offset
= uio
->uio_offset
- start_offset
;
1629 start_offset
= (int)(zero_off1
& PAGE_MASK_64
);
1630 upl_f_offset
= zero_off1
- start_offset
;
1632 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 46)) | DBG_FUNC_NONE
,
1633 (int)zero_off
, (int)zero_cnt
, (int)zero_off1
, (int)zero_cnt1
, 0);
1635 if (total_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1636 total_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1639 * compute the size of the upl needed to encompass
1640 * the requested write... limit each call to cluster_io
1641 * to the maximum UPL size... cluster_io will clip if
1642 * this exceeds the maximum io_size for the device,
1643 * make sure to account for
1644 * a starting offset that's not page aligned
1646 upl_size
= (start_offset
+ total_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
1648 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1649 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1651 pages_in_upl
= upl_size
/ PAGE_SIZE
;
1652 io_size
= upl_size
- start_offset
;
1654 if ((long long)io_size
> total_size
)
1655 io_size
= total_size
;
1657 start_blkno
= (daddr_t
)(upl_f_offset
/ PAGE_SIZE_64
);
1658 last_blkno
= start_blkno
+ pages_in_upl
;
1660 kret
= ubc_create_upl(vp
,
1666 if (kret
!= KERN_SUCCESS
)
1667 panic("cluster_write: failed to get pagelist");
1669 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 41)) | DBG_FUNC_NONE
,
1670 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
1672 if (start_offset
&& !upl_valid_page(pl
, 0)) {
1676 * we're starting in the middle of the first page of the upl
1677 * and the page isn't currently valid, so we're going to have
1678 * to read it in first... this is a synchronous operation
1680 read_size
= PAGE_SIZE
;
1682 if ((upl_f_offset
+ read_size
) > newEOF
)
1683 read_size
= newEOF
- upl_f_offset
;
1685 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, read_size
, devblocksize
,
1686 CL_READ
, (struct buf
*)0, (struct clios
*)0);
1689 * we had an error during the read which causes us to abort
1690 * the current cluster_write request... before we do, we need
1691 * to release the rest of the pages in the upl without modifying
1692 * there state and mark the failed page in error
1694 ubc_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1695 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1697 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1698 (int)upl
, 0, 0, retval
, 0);
1702 if ((start_offset
== 0 || upl_size
> PAGE_SIZE
) && ((start_offset
+ io_size
) & PAGE_MASK
)) {
1704 * the last offset we're writing to in this upl does not end on a page
1705 * boundary... if it's not beyond the old EOF, then we'll also need to
1706 * pre-read this page in if it isn't already valid
1708 upl_offset
= upl_size
- PAGE_SIZE
;
1710 if ((upl_f_offset
+ start_offset
+ io_size
) < oldEOF
&&
1711 !upl_valid_page(pl
, upl_offset
/ PAGE_SIZE
)) {
1714 read_size
= PAGE_SIZE
;
1716 if ((upl_f_offset
+ upl_offset
+ read_size
) > newEOF
)
1717 read_size
= newEOF
- (upl_f_offset
+ upl_offset
);
1719 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, read_size
, devblocksize
,
1720 CL_READ
, (struct buf
*)0, (struct clios
*)0);
1723 * we had an error during the read which causes us to abort
1724 * the current cluster_write request... before we do, we
1725 * need to release the rest of the pages in the upl without
1726 * modifying there state and mark the failed page in error
1728 ubc_upl_abort_range(upl
, upl_offset
, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1729 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1731 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1732 (int)upl
, 0, 0, retval
, 0);
1737 if ((kret
= ubc_upl_map(upl
, &io_address
)) != KERN_SUCCESS
)
1738 panic("cluster_write: ubc_upl_map failed\n");
1739 xfer_resid
= io_size
;
1740 io_offset
= start_offset
;
1742 while (zero_cnt
&& xfer_resid
) {
1744 if (zero_cnt
< (long long)xfer_resid
)
1745 bytes_to_zero
= zero_cnt
;
1747 bytes_to_zero
= xfer_resid
;
1749 if ( !(flags
& (IO_NOZEROVALID
| IO_NOZERODIRTY
))) {
1750 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1752 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1753 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1754 (int)io_offset
, xfer_resid
, 0);
1758 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off
& PAGE_MASK_64
));
1759 zero_pg_index
= (int)((zero_off
- upl_f_offset
) / PAGE_SIZE_64
);
1761 if ( !upl_valid_page(pl
, zero_pg_index
)) {
1762 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1764 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1765 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1766 (int)io_offset
, xfer_resid
, 0);
1768 } else if ((flags
& (IO_NOZERODIRTY
| IO_NOZEROVALID
)) == IO_NOZERODIRTY
&&
1769 !upl_dirty_page(pl
, zero_pg_index
)) {
1770 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1772 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1773 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1774 (int)io_offset
, xfer_resid
, 0);
1777 xfer_resid
-= bytes_to_zero
;
1778 zero_cnt
-= bytes_to_zero
;
1779 zero_off
+= bytes_to_zero
;
1780 io_offset
+= bytes_to_zero
;
1782 if (xfer_resid
&& uio_resid
) {
1783 bytes_to_move
= min(uio_resid
, xfer_resid
);
1785 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 42)) | DBG_FUNC_NONE
,
1786 (int)uio
->uio_offset
, bytes_to_move
, uio_resid
, xfer_resid
, 0);
1788 retval
= uiomove((caddr_t
)(io_address
+ io_offset
), bytes_to_move
, uio
);
1792 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
1793 panic("cluster_write: kernel_upl_unmap failed\n");
1795 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
1797 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1798 (int)upl
, 0, 0, retval
, 0);
1800 uio_resid
-= bytes_to_move
;
1801 xfer_resid
-= bytes_to_move
;
1802 io_offset
+= bytes_to_move
;
1805 while (xfer_resid
&& zero_cnt1
&& retval
== 0) {
1807 if (zero_cnt1
< (long long)xfer_resid
)
1808 bytes_to_zero
= zero_cnt1
;
1810 bytes_to_zero
= xfer_resid
;
1812 if ( !(flags
& (IO_NOZEROVALID
| IO_NOZERODIRTY
))) {
1813 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1815 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1816 (int)upl_f_offset
+ io_offset
,
1817 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1821 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off1
& PAGE_MASK_64
));
1822 zero_pg_index
= (int)((zero_off1
- upl_f_offset
) / PAGE_SIZE_64
);
1824 if ( !upl_valid_page(pl
, zero_pg_index
)) {
1825 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1827 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1828 (int)upl_f_offset
+ io_offset
,
1829 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1831 } else if ((flags
& (IO_NOZERODIRTY
| IO_NOZEROVALID
)) == IO_NOZERODIRTY
&&
1832 !upl_dirty_page(pl
, zero_pg_index
)) {
1833 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1835 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1836 (int)upl_f_offset
+ io_offset
,
1837 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1840 xfer_resid
-= bytes_to_zero
;
1841 zero_cnt1
-= bytes_to_zero
;
1842 zero_off1
+= bytes_to_zero
;
1843 io_offset
+= bytes_to_zero
;
1850 io_size
+= start_offset
;
1852 if ((upl_f_offset
+ io_size
) >= newEOF
&& io_size
< upl_size
) {
1854 * if we're extending the file with this write
1855 * we'll zero fill the rest of the page so that
1856 * if the file gets extended again in such a way as to leave a
1857 * hole starting at this EOF, we'll have zero's in the correct spot
1859 bzero((caddr_t
)(io_address
+ io_size
), upl_size
- io_size
);
1861 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1862 (int)upl_f_offset
+ io_size
,
1863 upl_size
- io_size
, 0, 0, 0);
1865 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
1866 panic("cluster_write: kernel_upl_unmap failed\n");
1868 if (flags
& IO_SYNC
)
1870 * if the IO_SYNC flag is set than we need to
1871 * bypass any clusters and immediately issue
1876 if (vp
->v_clen
== 0)
1878 * no clusters currently present
1880 goto start_new_cluster
;
1883 * keep track of the overall dirty page
1884 * range we've developed
1885 * in case we have to fall back to the
1886 * VHASDIRTY method of flushing
1888 if (vp
->v_flag
& VHASDIRTY
)
1891 for (cl_index
= 0; cl_index
< vp
->v_clen
; cl_index
++) {
1893 * we have an existing cluster... see if this write will extend it nicely
1895 if (start_blkno
>= vp
->v_clusters
[cl_index
].start_pg
) {
1897 * the current write starts at or after the current cluster
1899 if (last_blkno
<= (vp
->v_clusters
[cl_index
].start_pg
+ MAX_UPL_TRANSFER
)) {
1901 * we have a write that fits entirely
1902 * within the existing cluster limits
1904 if (last_blkno
> vp
->v_clusters
[cl_index
].last_pg
)
1906 * update our idea of where the cluster ends
1908 vp
->v_clusters
[cl_index
].last_pg
= last_blkno
;
1911 if (start_blkno
< (vp
->v_clusters
[cl_index
].start_pg
+ MAX_UPL_TRANSFER
)) {
1913 * we have a write that starts in the middle of the current cluster
1914 * but extends beyond the cluster's limit
1915 * we'll clip the current cluster if we actually
1916 * overlap with the new write
1917 * and start a new cluster with the current write
1919 if (vp
->v_clusters
[cl_index
].last_pg
> start_blkno
)
1920 vp
->v_clusters
[cl_index
].last_pg
= start_blkno
;
1923 * we also get here for the case where the current write starts
1924 * beyond the limit of the existing cluster
1926 * in either case, we'll check the remaining clusters before
1927 * starting a new one
1931 * the current write starts in front of the current cluster
1933 if ((vp
->v_clusters
[cl_index
].last_pg
- start_blkno
) <= MAX_UPL_TRANSFER
) {
1935 * we can just merge the old cluster
1936 * with the new request and leave it
1939 vp
->v_clusters
[cl_index
].start_pg
= start_blkno
;
1941 if (last_blkno
> vp
->v_clusters
[cl_index
].last_pg
) {
1943 * the current write completely
1944 * envelops the existing cluster
1946 vp
->v_clusters
[cl_index
].last_pg
= last_blkno
;
1952 * if we were to combine this write with the current cluster
1953 * we would exceed the cluster size limit.... so,
1954 * let's see if there's any overlap of the new I/O with
1955 * the existing cluster...
1958 if (last_blkno
> vp
->v_clusters
[cl_index
].start_pg
)
1960 * the current write extends into the existing cluster
1961 * clip the current cluster by moving the start position
1962 * to where the current write ends
1964 vp
->v_clusters
[cl_index
].start_pg
= last_blkno
;
1966 * if we get here, there was no way to merge
1967 * the new I/O with this cluster and
1968 * keep it under our maximum cluster length
1969 * we'll check the remaining clusters before starting a new one
1973 if (cl_index
< vp
->v_clen
)
1975 * we found an existing cluster that we
1976 * could merger this I/O into
1980 if (vp
->v_clen
< MAX_CLUSTERS
&& !(vp
->v_flag
& VNOCACHE_DATA
))
1982 * we didn't find an existing cluster to
1983 * merge into, but there's room to start
1986 goto start_new_cluster
;
1989 * no exisitng cluster to merge with and no
1990 * room to start a new one... we'll try
1991 * pushing the existing ones... if none of
1992 * them are able to be pushed, we'll have
1993 * to fall back on the VHASDIRTY mechanism
1994 * cluster_try_push will set v_clen to the
1995 * number of remaining clusters if it is
1996 * unable to push all of them
1998 if (vp
->v_flag
& VNOCACHE_DATA
)
2003 if (cluster_try_push(vp
, newEOF
, 0, 0) == 0) {
2004 vp
->v_flag
|= VHASDIRTY
;
2008 if (vp
->v_clen
== 0) {
2009 vp
->v_ciosiz
= devblocksize
;
2010 vp
->v_cstart
= start_blkno
;
2011 vp
->v_lastw
= last_blkno
;
2013 vp
->v_clusters
[vp
->v_clen
].start_pg
= start_blkno
;
2014 vp
->v_clusters
[vp
->v_clen
].last_pg
= last_blkno
;
2018 * make sure we keep v_cstart and v_lastw up to
2019 * date in case we have to fall back on the
2020 * V_HASDIRTY mechanism (or we've already entered it)
2022 if (start_blkno
< vp
->v_cstart
)
2023 vp
->v_cstart
= start_blkno
;
2024 if (last_blkno
> vp
->v_lastw
)
2025 vp
->v_lastw
= last_blkno
;
2027 ubc_upl_commit_range(upl
, 0, upl_size
, UPL_COMMIT_SET_DIRTY
| UPL_COMMIT_INACTIVATE
| UPL_COMMIT_FREE_ON_EMPTY
);
2031 * in order to maintain some semblance of coherency with mapped writes
2032 * we need to write the cluster back out as a multiple of the PAGESIZE
2033 * unless the cluster encompasses the last page of the file... in this
2034 * case we'll round out to the nearest device block boundary
2038 if ((upl_f_offset
+ io_size
) > newEOF
) {
2039 io_size
= newEOF
- upl_f_offset
;
2040 io_size
= (io_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
2043 if (flags
& IO_SYNC
)
2044 io_flags
= CL_COMMIT
| CL_AGE
;
2046 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
2048 if (vp
->v_flag
& VNOCACHE_DATA
)
2049 io_flags
|= CL_DUMP
;
2051 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
2052 vp
->v_flag
|= VTHROTTLED
;
2053 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_write", 0);
2055 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, io_size
, devblocksize
,
2056 io_flags
, (struct buf
*)0, (struct clios
*)0);
2059 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
2060 retval
, 0, 0, 0, 0);
2066 cluster_read(vp
, uio
, filesize
, devblocksize
, flags
)
2077 vm_offset_t upl_offset
;
2080 upl_page_info_t
*pl
;
2085 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_START
,
2086 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
2089 * We set a threshhold of 4 pages to decide if the nocopy
2090 * read loop is worth the trouble...
2093 if (!((vp
->v_flag
& VNOCACHE_DATA
) && (uio
->uio_segflg
== UIO_USERSPACE
)))
2095 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2096 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2097 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2101 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0)
2103 /* we know we have a resid, so this is safe */
2105 while (iov
->iov_len
== 0) {
2112 * We check every vector target and if it is physically
2113 * contiguous space, we skip the sanity checks.
2116 upl_offset
= (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
;
2117 upl_size
= (upl_offset
+ PAGE_SIZE
+(PAGE_SIZE
-1)) & ~PAGE_MASK
;
2119 upl_flags
= UPL_QUERY_OBJECT_TYPE
;
2120 if((vm_map_get_upl(current_map(),
2121 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2122 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0)) != KERN_SUCCESS
)
2125 * the user app must have passed in an invalid address
2130 if (upl_flags
& UPL_PHYS_CONTIG
)
2132 retval
= cluster_phys_read(vp
, uio
, filesize
, devblocksize
, flags
);
2134 else if (uio
->uio_resid
< 4 * PAGE_SIZE
)
2137 * We set a threshhold of 4 pages to decide if the nocopy
2138 * read loop is worth the trouble...
2140 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2141 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2142 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2145 else if (uio
->uio_offset
& PAGE_MASK_64
)
2147 /* Bring the file offset read up to a pagesize boundary */
2148 clip_size
= (PAGE_SIZE
- (int)(uio
->uio_offset
& PAGE_MASK_64
));
2149 if (uio
->uio_resid
< clip_size
)
2150 clip_size
= uio
->uio_resid
;
2152 * Fake the resid going into the cluster_read_x call
2153 * and restore it on the way out.
2155 prev_resid
= uio
->uio_resid
;
2156 uio
->uio_resid
= clip_size
;
2157 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2158 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2160 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
2162 clip_size
= iov
->iov_len
;
2163 prev_resid
= uio
->uio_resid
;
2164 uio
->uio_resid
= clip_size
;
2165 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2166 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2171 * If we come in here, we know the offset into
2172 * the file is on a pagesize boundary
2175 max_io_size
= filesize
- uio
->uio_offset
;
2176 clip_size
= uio
->uio_resid
;
2177 if (iov
->iov_len
< clip_size
)
2178 clip_size
= iov
->iov_len
;
2179 if (max_io_size
< clip_size
)
2180 clip_size
= (int)max_io_size
;
2182 if (clip_size
< PAGE_SIZE
)
2185 * Take care of the tail end of the read in this vector.
2187 prev_resid
= uio
->uio_resid
;
2188 uio
->uio_resid
= clip_size
;
2189 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2190 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2194 /* round clip_size down to a multiple of pagesize */
2195 clip_size
= clip_size
& ~(PAGE_MASK
);
2196 prev_resid
= uio
->uio_resid
;
2197 uio
->uio_resid
= clip_size
;
2198 retval
= cluster_nocopy_read(vp
, uio
, filesize
, devblocksize
, flags
);
2199 if ((retval
==0) && uio
->uio_resid
)
2200 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2201 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2206 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2207 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2214 cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
)
2221 upl_page_info_t
*pl
;
2223 vm_offset_t upl_offset
;
2233 vm_offset_t io_address
;
2241 b_lblkno
= (int)(uio
->uio_offset
/ PAGE_SIZE_64
);
2243 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2245 * compute the size of the upl needed to encompass
2246 * the requested read... limit each call to cluster_io
2247 * to the maximum UPL size... cluster_io will clip if
2248 * this exceeds the maximum io_size for the device,
2249 * make sure to account for
2250 * a starting offset that's not page aligned
2252 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
2253 upl_f_offset
= uio
->uio_offset
- (off_t
)start_offset
;
2254 max_size
= filesize
- uio
->uio_offset
;
2256 if ((off_t
)((unsigned int)uio
->uio_resid
) < max_size
)
2257 io_size
= uio
->uio_resid
;
2261 if (uio
->uio_segflg
== UIO_USERSPACE
&& !(vp
->v_flag
& VNOCACHE_DATA
)) {
2262 segflg
= uio
->uio_segflg
;
2264 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2266 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2267 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2269 while (io_size
&& retval
== 0) {
2275 UPL_POP_SET
| UPL_POP_BUSY
,
2276 &paddr
, 0) != KERN_SUCCESS
)
2279 xsize
= PAGE_SIZE
- start_offset
;
2281 if (xsize
> io_size
)
2284 retval
= uiomove((caddr_t
)(paddr
+ start_offset
), xsize
, uio
);
2286 ubc_page_op(vp
, upl_f_offset
,
2287 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2290 start_offset
= (int)
2291 (uio
->uio_offset
& PAGE_MASK_64
);
2292 upl_f_offset
= uio
->uio_offset
- start_offset
;
2294 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2295 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2297 uio
->uio_segflg
= segflg
;
2304 * we're already finished with this read request
2305 * let's see if we should do a read-ahead
2308 ((uio
->uio_offset
- 1) / PAGE_SIZE_64
);
2310 if (!(vp
->v_flag
& VRAOFF
))
2312 * let's try to read ahead if we're in
2313 * a sequential access pattern
2315 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2316 vp
->v_lastr
= e_lblkno
;
2320 max_size
= filesize
- uio
->uio_offset
;
2322 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
2323 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2324 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2325 pages_in_upl
= upl_size
/ PAGE_SIZE
;
2327 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_START
,
2328 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2330 kret
= ubc_create_upl(vp
,
2336 if (kret
!= KERN_SUCCESS
)
2337 panic("cluster_read: failed to get pagelist");
2339 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_END
,
2340 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2343 * scan from the beginning of the upl looking for the first
2344 * non-valid page.... this will become the first page in
2345 * the request we're going to make to 'cluster_io'... if all
2346 * of the pages are valid, we won't call through to 'cluster_io'
2348 for (start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
2349 if (!upl_valid_page(pl
, start_pg
))
2354 * scan from the starting invalid page looking for a valid
2355 * page before the end of the upl is reached, if we
2356 * find one, then it will be the last page of the request to
2359 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
2360 if (upl_valid_page(pl
, last_pg
))
2364 if (start_pg
< last_pg
) {
2366 * we found a range of 'invalid' pages that must be filled
2367 * if the last page in this range is the last page of the file
2368 * we may have to clip the size of it to keep from reading past
2369 * the end of the last physical block associated with the file
2371 upl_offset
= start_pg
* PAGE_SIZE
;
2372 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2374 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
)
2375 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
2378 * issue a synchronous read to cluster_io
2381 error
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
,
2382 io_size
, devblocksize
, CL_READ
, (struct buf
*)0, (struct clios
*)0);
2386 * if the read completed successfully, or there was no I/O request
2387 * issued, than map the upl into kernel address space and
2388 * move the data into user land.... we'll first add on any 'valid'
2389 * pages that were present in the upl when we acquired it.
2392 u_int size_of_prefetch
;
2394 for (uio_last
= last_pg
; uio_last
< pages_in_upl
; uio_last
++) {
2395 if (!upl_valid_page(pl
, uio_last
))
2399 * compute size to transfer this round, if uio->uio_resid is
2400 * still non-zero after this uiomove, we'll loop around and
2401 * set up for another I/O.
2403 val_size
= (uio_last
* PAGE_SIZE
) - start_offset
;
2405 if (max_size
< val_size
)
2406 val_size
= max_size
;
2408 if (uio
->uio_resid
< val_size
)
2409 val_size
= uio
->uio_resid
;
2411 e_lblkno
= (int)((uio
->uio_offset
+ ((off_t
)val_size
- 1)) / PAGE_SIZE_64
);
2413 if (size_of_prefetch
= (uio
->uio_resid
- val_size
)) {
2415 * if there's still I/O left to do for this request, then issue a
2416 * pre-fetch I/O... the I/O wait time will overlap
2417 * with the copying of the data
2419 cluster_rd_prefetch(vp
, uio
->uio_offset
+ val_size
, size_of_prefetch
, filesize
, devblocksize
);
2421 if (!(vp
->v_flag
& VRAOFF
) && !(vp
->v_flag
& VNOCACHE_DATA
))
2423 * let's try to read ahead if we're in
2424 * a sequential access pattern
2426 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2427 vp
->v_lastr
= e_lblkno
;
2429 if (uio
->uio_segflg
== UIO_USERSPACE
) {
2432 segflg
= uio
->uio_segflg
;
2434 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2437 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2438 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2440 offset
= start_offset
;
2442 while (val_size
&& retval
== 0) {
2447 i
= offset
/ PAGE_SIZE
;
2448 csize
= min(PAGE_SIZE
- start_offset
, val_size
);
2450 paddr
= (caddr_t
)upl_phys_page(pl
, i
) + start_offset
;
2452 retval
= uiomove(paddr
, csize
, uio
);
2456 start_offset
= offset
& PAGE_MASK
;
2458 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2459 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2461 uio
->uio_segflg
= segflg
;
2465 if ((kret
= ubc_upl_map(upl
, &io_address
)) != KERN_SUCCESS
)
2466 panic("cluster_read: ubc_upl_map() failed\n");
2468 retval
= uiomove((caddr_t
)(io_address
+ start_offset
), val_size
, uio
);
2470 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
2471 panic("cluster_read: ubc_upl_unmap() failed\n");
2474 if (start_pg
< last_pg
) {
2476 * compute the range of pages that we actually issued an I/O for
2477 * and either commit them as valid if the I/O succeeded
2478 * or abort them if the I/O failed
2480 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2482 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2483 (int)upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2485 if (error
|| (vp
->v_flag
& VNOCACHE_DATA
))
2486 ubc_upl_abort_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2487 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2489 ubc_upl_commit_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2490 UPL_COMMIT_CLEAR_DIRTY
2491 | UPL_COMMIT_FREE_ON_EMPTY
2492 | UPL_COMMIT_INACTIVATE
);
2494 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2495 (int)upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2497 if ((last_pg
- start_pg
) < pages_in_upl
) {
2502 * the set of pages that we issued an I/O for did not encompass
2503 * the entire upl... so just release these without modifying
2507 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2509 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2510 (int)upl
, -1, pages_in_upl
- (last_pg
- start_pg
), 0, 0);
2514 * we found some already valid pages at the beginning of
2515 * the upl commit these back to the inactive list with
2518 for (cur_pg
= 0; cur_pg
< start_pg
; cur_pg
++) {
2519 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
2520 | UPL_COMMIT_INACTIVATE
;
2522 if (upl_dirty_page(pl
, cur_pg
))
2523 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2525 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2526 ubc_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2527 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2529 ubc_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2530 PAGE_SIZE
, commit_flags
);
2533 if (last_pg
< uio_last
) {
2535 * we found some already valid pages immediately after the
2536 * pages we issued I/O for, commit these back to the
2537 * inactive list with reference cleared
2539 for (cur_pg
= last_pg
; cur_pg
< uio_last
; cur_pg
++) {
2540 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
2541 | UPL_COMMIT_INACTIVATE
;
2543 if (upl_dirty_page(pl
, cur_pg
))
2544 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2546 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2547 ubc_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2548 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2550 ubc_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2551 PAGE_SIZE
, commit_flags
);
2554 if (uio_last
< pages_in_upl
) {
2556 * there were some invalid pages beyond the valid pages
2557 * that we didn't issue an I/O for, just release them
2560 ubc_upl_abort_range(upl
, uio_last
* PAGE_SIZE
,
2561 (pages_in_upl
- uio_last
) * PAGE_SIZE
, UPL_ABORT_FREE_ON_EMPTY
);
2564 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2565 (int)upl
, -1, -1, 0, 0);
2577 cluster_nocopy_read(vp
, uio
, filesize
, devblocksize
, flags
)
2585 upl_page_info_t
*pl
;
2587 vm_offset_t upl_offset
;
2588 off_t start_upl_f_offset
;
2592 int upl_needed_size
;
2600 int force_data_sync
;
2604 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_START
,
2605 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
2608 * When we enter this routine, we know
2609 * -- the offset into the file is on a pagesize boundary
2610 * -- the resid is a page multiple
2611 * -- the resid will not exceed iov_len
2615 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2617 max_io_size
= filesize
- uio
->uio_offset
;
2619 if (max_io_size
< (off_t
)((unsigned int)uio
->uio_resid
))
2620 io_size
= max_io_size
;
2622 io_size
= uio
->uio_resid
;
2625 * We don't come into this routine unless
2626 * UIO_USERSPACE is set.
2628 segflg
= uio
->uio_segflg
;
2630 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2633 * First look for pages already in the cache
2634 * and move them to user space.
2636 while (io_size
&& (retval
== 0)) {
2637 upl_f_offset
= uio
->uio_offset
;
2640 * If this call fails, it means the page is not
2641 * in the page cache.
2643 if (ubc_page_op(vp
, upl_f_offset
,
2644 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) != KERN_SUCCESS
)
2647 retval
= uiomove((caddr_t
)(paddr
), PAGE_SIZE
, uio
);
2649 ubc_page_op(vp
, upl_f_offset
,
2650 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2652 io_size
-= PAGE_SIZE
;
2653 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 71)) | DBG_FUNC_NONE
,
2654 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2657 uio
->uio_segflg
= segflg
;
2661 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2662 (int)uio
->uio_offset
, uio
->uio_resid
, 2, retval
, 0);
2666 /* If we are already finished with this read, then return */
2670 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2671 (int)uio
->uio_offset
, uio
->uio_resid
, 3, io_size
, 0);
2675 max_io_size
= io_size
;
2676 if (max_io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2677 max_io_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2679 start_upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
2680 upl_f_offset
= start_upl_f_offset
;
2683 while(io_size
< max_io_size
)
2686 if(ubc_page_op(vp
, upl_f_offset
,
2687 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) == KERN_SUCCESS
)
2689 ubc_page_op(vp
, upl_f_offset
,
2690 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2695 * Build up the io request parameters.
2698 io_size
+= PAGE_SIZE
;
2699 upl_f_offset
+= PAGE_SIZE
;
2705 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2706 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
2708 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_START
,
2709 (int)upl_offset
, upl_needed_size
, (int)iov
->iov_base
, io_size
, 0);
2711 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++)
2714 upl_size
= upl_needed_size
;
2715 upl_flags
= UPL_FILE_IO
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2717 kret
= vm_map_get_upl(current_map(),
2718 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2719 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, force_data_sync
);
2721 if (kret
!= KERN_SUCCESS
)
2723 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2724 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2726 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2727 (int)uio
->uio_offset
, uio
->uio_resid
, 4, retval
, 0);
2729 /* cluster_nocopy_read: failed to get pagelist */
2730 /* do not return kret here */
2734 pages_in_pl
= upl_size
/ PAGE_SIZE
;
2735 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2737 for(i
=0; i
< pages_in_pl
; i
++)
2739 if (!upl_valid_page(pl
, i
))
2742 if (i
== pages_in_pl
)
2745 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2746 UPL_ABORT_FREE_ON_EMPTY
);
2749 if (force_data_sync
>= 3)
2751 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2752 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2754 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2755 (int)uio
->uio_offset
, uio
->uio_resid
, 5, retval
, 0);
2759 * Consider the possibility that upl_size wasn't satisfied.
2761 if (upl_size
!= upl_needed_size
)
2762 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
2766 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2767 UPL_ABORT_FREE_ON_EMPTY
);
2771 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2772 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2775 * issue a synchronous read to cluster_io
2778 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_START
,
2779 (int)upl
, (int)upl_offset
, (int)start_upl_f_offset
, io_size
, 0);
2781 error
= cluster_io(vp
, upl
, upl_offset
, start_upl_f_offset
,
2782 io_size
, devblocksize
, CL_READ
| CL_NOZERO
, (struct buf
*)0, (struct clios
*)0);
2786 * The cluster_io read completed successfully,
2787 * update the uio structure and commit.
2790 ubc_upl_commit_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2791 UPL_COMMIT_SET_DIRTY
| UPL_COMMIT_FREE_ON_EMPTY
);
2793 iov
->iov_base
+= io_size
;
2794 iov
->iov_len
-= io_size
;
2795 uio
->uio_resid
-= io_size
;
2796 uio
->uio_offset
+= io_size
;
2799 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2800 UPL_ABORT_FREE_ON_EMPTY
);
2803 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_END
,
2804 (int)upl
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
2812 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2813 (int)uio
->uio_offset
, (int)uio
->uio_resid
, 6, retval
, 0);
2821 cluster_phys_read(vp
, uio
, filesize
, devblocksize
, flags
)
2828 upl_page_info_t
*pl
;
2830 vm_offset_t upl_offset
;
2831 vm_offset_t dst_paddr
;
2836 int upl_needed_size
;
2841 struct clios iostate
;
2845 * When we enter this routine, we know
2846 * -- the resid will not exceed iov_len
2847 * -- the target address is physically contiguous
2852 max_size
= filesize
- uio
->uio_offset
;
2854 if (max_size
> (off_t
)((unsigned int)iov
->iov_len
))
2855 io_size
= iov
->iov_len
;
2859 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2860 upl_needed_size
= upl_offset
+ io_size
;
2864 upl_size
= upl_needed_size
;
2865 upl_flags
= UPL_FILE_IO
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2867 kret
= vm_map_get_upl(current_map(),
2868 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2869 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0);
2871 if (kret
!= KERN_SUCCESS
) {
2873 * cluster_phys_read: failed to get pagelist
2877 if (upl_size
< upl_needed_size
) {
2879 * The upl_size wasn't satisfied.
2881 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2885 pl
= ubc_upl_pageinfo(upl
);
2887 dst_paddr
= (vm_offset_t
)upl_phys_page(pl
, 0) + ((vm_offset_t
)iov
->iov_base
& PAGE_MASK
);
2889 while (((uio
->uio_offset
& (devblocksize
- 1)) || io_size
< devblocksize
) && io_size
) {
2892 head_size
= devblocksize
- (int)(uio
->uio_offset
& (devblocksize
- 1));
2894 if (head_size
> io_size
)
2895 head_size
= io_size
;
2897 error
= cluster_align_phys_io(vp
, uio
, dst_paddr
, head_size
, devblocksize
, CL_READ
);
2900 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2904 upl_offset
+= head_size
;
2905 dst_paddr
+= head_size
;
2906 io_size
-= head_size
;
2908 tail_size
= io_size
& (devblocksize
- 1);
2909 io_size
-= tail_size
;
2911 iostate
.io_completed
= 0;
2912 iostate
.io_issued
= 0;
2913 iostate
.io_error
= 0;
2914 iostate
.io_wanted
= 0;
2916 while (io_size
&& error
== 0) {
2919 if (io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2920 xsize
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2924 * request asynchronously so that we can overlap
2925 * the preparation of the next I/O... we'll do
2926 * the commit after all the I/O has completed
2927 * since its all issued against the same UPL
2928 * if there are already too many outstanding reads
2929 * throttle back until we reach a more reasonable level
2931 while ((iostate
.io_issued
- iostate
.io_completed
) > (2 * MAX_UPL_TRANSFER
* PAGE_SIZE
)) {
2932 iostate
.io_wanted
= 1;
2933 tsleep((caddr_t
)&iostate
.io_wanted
, PRIBIO
+ 1, "cluster_phys_read", 0);
2936 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
, xsize
, 0,
2937 CL_READ
| CL_NOZERO
| CL_DEV_MEMORY
| CL_ASYNC
,
2938 (struct buf
*)0, &iostate
);
2940 * The cluster_io read was issued successfully,
2941 * update the uio structure
2944 uio
->uio_resid
-= xsize
;
2945 iov
->iov_len
-= xsize
;
2946 iov
->iov_base
+= xsize
;
2947 uio
->uio_offset
+= xsize
;
2949 upl_offset
+= xsize
;
2954 * make sure any async reads have completed before
2957 while (iostate
.io_issued
!= iostate
.io_completed
) {
2958 iostate
.io_wanted
= 1;
2959 tsleep((caddr_t
)&iostate
.io_wanted
, PRIBIO
+ 1, "cluster_phys_read", 0);
2961 if (iostate
.io_error
) {
2962 error
= iostate
.io_error
;
2964 if (error
== 0 && tail_size
)
2965 error
= cluster_align_phys_io(vp
, uio
, dst_paddr
, tail_size
, devblocksize
, CL_READ
);
2968 * just release our hold on the physically contiguous
2969 * region without changing any state
2971 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2978 * generate advisory I/O's in the largest chunks possible
2979 * the completed pages will be released into the VM cache
2982 advisory_read(vp
, filesize
, f_offset
, resid
, devblocksize
)
2989 upl_page_info_t
*pl
;
2991 vm_offset_t upl_offset
;
3004 if (!UBCINFOEXISTS(vp
))
3007 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_START
,
3008 (int)f_offset
, resid
, (int)filesize
, devblocksize
, 0);
3010 while (resid
&& f_offset
< filesize
&& retval
== 0) {
3012 * compute the size of the upl needed to encompass
3013 * the requested read... limit each call to cluster_io
3014 * to the maximum UPL size... cluster_io will clip if
3015 * this exceeds the maximum io_size for the device,
3016 * make sure to account for
3017 * a starting offset that's not page aligned
3019 start_offset
= (int)(f_offset
& PAGE_MASK_64
);
3020 upl_f_offset
= f_offset
- (off_t
)start_offset
;
3021 max_size
= filesize
- f_offset
;
3023 if (resid
< max_size
)
3028 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
3029 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
3030 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
3031 pages_in_upl
= upl_size
/ PAGE_SIZE
;
3033 kret
= ubc_create_upl(vp
,
3038 UPL_RET_ONLY_ABSENT
);
3039 if (kret
!= KERN_SUCCESS
)
3044 * before we start marching forward, we must make sure we end on
3045 * a present page, otherwise we will be working with a freed
3048 for (last_pg
= pages_in_upl
- 1; last_pg
>= 0; last_pg
--) {
3049 if (upl_page_present(pl
, last_pg
))
3052 pages_in_upl
= last_pg
+ 1;
3055 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 61)) | DBG_FUNC_NONE
,
3056 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
3059 for (last_pg
= 0; last_pg
< pages_in_upl
; ) {
3061 * scan from the beginning of the upl looking for the first
3062 * page that is present.... this will become the first page in
3063 * the request we're going to make to 'cluster_io'... if all
3064 * of the pages are absent, we won't call through to 'cluster_io'
3066 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
3067 if (upl_page_present(pl
, start_pg
))
3072 * scan from the starting present page looking for an absent
3073 * page before the end of the upl is reached, if we
3074 * find one, then it will terminate the range of pages being
3075 * presented to 'cluster_io'
3077 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
3078 if (!upl_page_present(pl
, last_pg
))
3082 if (last_pg
> start_pg
) {
3084 * we found a range of pages that must be filled
3085 * if the last page in this range is the last page of the file
3086 * we may have to clip the size of it to keep from reading past
3087 * the end of the last physical block associated with the file
3089 upl_offset
= start_pg
* PAGE_SIZE
;
3090 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
3092 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
)
3093 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
3096 * issue an asynchronous read to cluster_io
3098 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, devblocksize
,
3099 CL_ASYNC
| CL_READ
| CL_COMMIT
| CL_AGE
, (struct buf
*)0, (struct clios
*)0);
3105 ubc_upl_abort(upl
, 0);
3107 io_size
= upl_size
- start_offset
;
3109 if (io_size
> resid
)
3111 f_offset
+= io_size
;
3115 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_END
,
3116 (int)f_offset
, resid
, retval
, 0, 0);
3128 if (!UBCINFOEXISTS(vp
) || vp
->v_clen
== 0) {
3129 vp
->v_flag
&= ~VHASDIRTY
;
3133 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 53)) | DBG_FUNC_START
,
3134 vp
->v_flag
& VHASDIRTY
, vp
->v_clen
, 0, 0, 0);
3136 if (vp
->v_flag
& VHASDIRTY
) {
3141 start_pg
= vp
->v_cstart
;
3142 end_pg
= vp
->v_lastw
;
3144 vp
->v_flag
&= ~VHASDIRTY
;
3147 while (start_pg
< end_pg
) {
3148 last_pg
= start_pg
+ MAX_UPL_TRANSFER
;
3150 if (last_pg
> end_pg
)
3153 cluster_push_x(vp
, ubc_getsize(vp
), start_pg
, last_pg
, 0);
3159 retval
= cluster_try_push(vp
, ubc_getsize(vp
), 0, 1);
3161 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 53)) | DBG_FUNC_END
,
3162 vp
->v_flag
& VHASDIRTY
, vp
->v_clen
, retval
, 0, 0);
3169 cluster_try_push(vp
, EOF
, can_delay
, push_all
)
3181 struct v_cluster l_clusters
[MAX_CLUSTERS
];
3184 * make a local 'sorted' copy of the clusters
3185 * and clear vp->v_clen so that new clusters can
3188 for (cl_index
= 0; cl_index
< vp
->v_clen
; cl_index
++) {
3189 for (min_index
= -1, cl_index1
= 0; cl_index1
< vp
->v_clen
; cl_index1
++) {
3190 if (vp
->v_clusters
[cl_index1
].start_pg
== vp
->v_clusters
[cl_index1
].last_pg
)
3192 if (min_index
== -1)
3193 min_index
= cl_index1
;
3194 else if (vp
->v_clusters
[cl_index1
].start_pg
< vp
->v_clusters
[min_index
].start_pg
)
3195 min_index
= cl_index1
;
3197 if (min_index
== -1)
3199 l_clusters
[cl_index
].start_pg
= vp
->v_clusters
[min_index
].start_pg
;
3200 l_clusters
[cl_index
].last_pg
= vp
->v_clusters
[min_index
].last_pg
;
3202 vp
->v_clusters
[min_index
].start_pg
= vp
->v_clusters
[min_index
].last_pg
;
3207 for (cl_pushed
= 0, cl_index
= 0; cl_index
< cl_len
; cl_index
++) {
3209 * try to push each cluster in turn... cluster_push_x may not
3210 * push the cluster if can_delay is TRUE and the cluster doesn't
3211 * meet the critera for an immediate push
3213 if (cluster_push_x(vp
, EOF
, l_clusters
[cl_index
].start_pg
, l_clusters
[cl_index
].last_pg
, can_delay
)) {
3214 l_clusters
[cl_index
].start_pg
= 0;
3215 l_clusters
[cl_index
].last_pg
= 0;
3223 if (cl_len
> cl_pushed
) {
3225 * we didn't push all of the clusters, so
3226 * lets try to merge them back in to the vnode
3228 if ((MAX_CLUSTERS
- vp
->v_clen
) < (cl_len
- cl_pushed
)) {
3230 * we picked up some new clusters while we were trying to
3231 * push the old ones (I don't think this can happen because
3232 * I'm holding the lock, but just in case)... the sum of the
3233 * leftovers plus the new cluster count exceeds our ability
3234 * to represent them, so fall back to the VHASDIRTY mechanism
3236 for (cl_index
= 0; cl_index
< cl_len
; cl_index
++) {
3237 if (l_clusters
[cl_index
].start_pg
== l_clusters
[cl_index
].last_pg
)
3240 if (l_clusters
[cl_index
].start_pg
< vp
->v_cstart
)
3241 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3242 if (l_clusters
[cl_index
].last_pg
> vp
->v_lastw
)
3243 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3245 vp
->v_flag
|= VHASDIRTY
;
3248 * we've got room to merge the leftovers back in
3249 * just append them starting at the next 'hole'
3250 * represented by vp->v_clen
3252 for (cl_index
= 0, cl_index1
= vp
->v_clen
; cl_index
< cl_len
; cl_index
++) {
3253 if (l_clusters
[cl_index
].start_pg
== l_clusters
[cl_index
].last_pg
)
3256 vp
->v_clusters
[cl_index1
].start_pg
= l_clusters
[cl_index
].start_pg
;
3257 vp
->v_clusters
[cl_index1
].last_pg
= l_clusters
[cl_index
].last_pg
;
3259 if (cl_index1
== 0) {
3260 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3261 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3263 if (l_clusters
[cl_index
].start_pg
< vp
->v_cstart
)
3264 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3265 if (l_clusters
[cl_index
].last_pg
> vp
->v_lastw
)
3266 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3271 * update the cluster count
3273 vp
->v_clen
= cl_index1
;
3276 return(MAX_CLUSTERS
- vp
->v_clen
);
3282 cluster_push_x(vp
, EOF
, first
, last
, can_delay
)
3289 upl_page_info_t
*pl
;
3291 vm_offset_t upl_offset
;
3303 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_START
,
3304 vp
->v_clen
, first
, last
, EOF
, 0);
3306 if ((pages_in_upl
= last
- first
) == 0) {
3307 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 0, 0, 0, 0);
3311 upl_size
= pages_in_upl
* PAGE_SIZE
;
3312 upl_f_offset
= ((off_t
)first
) * PAGE_SIZE_64
;
3314 if (upl_f_offset
+ upl_size
>= EOF
) {
3316 if (upl_f_offset
>= EOF
) {
3318 * must have truncated the file and missed
3319 * clearing a dangling cluster (i.e. it's completely
3320 * beyond the new EOF
3322 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 1, 0, 0, 0);
3326 size
= EOF
- upl_f_offset
;
3328 upl_size
= (size
+ (PAGE_SIZE
- 1) ) & ~(PAGE_SIZE
- 1);
3329 pages_in_upl
= upl_size
/ PAGE_SIZE
;
3331 if (can_delay
&& (pages_in_upl
< (MAX_UPL_TRANSFER
- (MAX_UPL_TRANSFER
/ 2))))
3335 kret
= ubc_create_upl(vp
,
3340 UPL_RET_ONLY_DIRTY
);
3341 if (kret
!= KERN_SUCCESS
)
3342 panic("cluster_push: failed to get pagelist");
3347 for (num_of_dirty
= 0, start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
3348 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
3351 if (num_of_dirty
< pages_in_upl
/ 2) {
3352 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
3354 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 0, 2, num_of_dirty
, (pages_in_upl
/ 2), 0);
3363 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
3364 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
3367 if (start_pg
> last_pg
) {
3368 io_size
= (start_pg
- last_pg
) * PAGE_SIZE
;
3370 ubc_upl_abort_range(upl
, last_pg
* PAGE_SIZE
, io_size
,
3371 UPL_ABORT_FREE_ON_EMPTY
);
3378 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
3379 if (!upl_valid_page(pl
, last_pg
) || !upl_dirty_page(pl
, last_pg
))
3382 upl_offset
= start_pg
* PAGE_SIZE
;
3384 io_size
= min(size
, (last_pg
- start_pg
) * PAGE_SIZE
);
3386 if (vp
->v_flag
& VNOCACHE_DATA
)
3387 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
| CL_DUMP
;
3389 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
3391 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
3392 vp
->v_flag
|= VTHROTTLED
;
3393 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_push", 0);
3395 cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, vp
->v_ciosiz
, io_flags
, (struct buf
*)0, (struct clios
*)0);
3399 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 3, 0, 0, 0);
3407 cluster_align_phys_io(struct vnode
*vp
, struct uio
*uio
, vm_offset_t usr_paddr
, int xsize
, int devblocksize
, int flags
)
3410 upl_page_info_t
*pl
;
3412 vm_offset_t ubc_paddr
;
3418 kret
= ubc_create_upl(vp
,
3419 uio
->uio_offset
& ~PAGE_MASK_64
,
3425 if (kret
!= KERN_SUCCESS
)
3428 if (!upl_valid_page(pl
, 0)) {
3430 * issue a synchronous read to cluster_io
3432 error
= cluster_io(vp
, upl
, 0, uio
->uio_offset
& ~PAGE_MASK_64
, PAGE_SIZE
, devblocksize
,
3433 CL_READ
, (struct buf
*)0, (struct clios
*)0);
3435 ubc_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
3440 ubc_paddr
= (vm_offset_t
)upl_phys_page(pl
, 0) + (int)(uio
->uio_offset
& PAGE_MASK_64
);
3442 if (flags
& CL_READ
)
3443 copyp2p(ubc_paddr
, usr_paddr
, xsize
, 2);
3445 copyp2p(usr_paddr
, ubc_paddr
, xsize
, 1);
3447 if ( !(flags
& CL_READ
) || upl_dirty_page(pl
, 0)) {
3449 * issue a synchronous write to cluster_io
3451 error
= cluster_io(vp
, upl
, 0, uio
->uio_offset
& ~PAGE_MASK_64
, PAGE_SIZE
, devblocksize
,
3452 0, (struct buf
*)0, (struct clios
*)0);
3455 uio
->uio_offset
+= xsize
;
3456 iov
->iov_base
+= xsize
;
3457 iov
->iov_len
-= xsize
;
3458 uio
->uio_resid
-= xsize
;
3460 ubc_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);