]>
git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
3 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * The contents of this file constitute Original Code as defined in and
8 * are subject to the Apple Public Source License Version 1.1 (the
9 * "License"). You may not use this file except in compliance with the
10 * License. Please obtain a copy of the License at
11 * http://www.apple.com/publicsource and read it before using this file.
13 * This Original Code and all software distributed under the License are
14 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
18 * License for the specific language governing rights and limitations
21 * @APPLE_LICENSE_HEADER_END@
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26 * The Regents of the University of California. All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
59 #include <sys/param.h>
62 #include <sys/vnode.h>
63 #include <sys/mount.h>
64 #include <sys/trace.h>
65 #include <sys/malloc.h>
66 #include <sys/resourcevar.h>
67 #include <libkern/libkern.h>
70 #include <vm/vm_pageout.h>
72 #include <sys/kdebug.h>
76 #define CL_COMMIT 0x04
77 #define CL_PAGEOUT 0x10
80 #define CL_NOZERO 0x80
81 #define CL_PAGEIN 0x100
82 #define CL_DEV_MEMORY 0x200
84 static void cluster_zero(upl_t upl
, vm_offset_t upl_offset
,
85 int size
, struct buf
*bp
);
86 static int cluster_read_x(struct vnode
*vp
, struct uio
*uio
,
87 off_t filesize
, int devblocksize
, int flags
);
88 static int cluster_write_x(struct vnode
*vp
, struct uio
*uio
,
89 off_t oldEOF
, off_t newEOF
, off_t headOff
,
90 off_t tailOff
, int devblocksize
, int flags
);
91 static int cluster_nocopy_read(struct vnode
*vp
, struct uio
*uio
,
92 off_t filesize
, int devblocksize
, int flags
);
93 static int cluster_nocopy_write(struct vnode
*vp
, struct uio
*uio
,
94 off_t newEOF
, int devblocksize
, int flags
);
95 static int cluster_phys_read(struct vnode
*vp
, struct uio
*uio
,
97 static int cluster_phys_write(struct vnode
*vp
, struct uio
*uio
);
98 static int cluster_push_x(struct vnode
*vp
, off_t EOF
, daddr_t first
, daddr_t last
, int can_delay
);
99 static int cluster_try_push(struct vnode
*vp
, off_t newEOF
, int can_delay
, int push_all
);
103 * throttle the number of async writes that
104 * can be outstanding on a single vnode
105 * before we issue a synchronous write
107 #define ASYNC_THROTTLE 9
121 struct buf
*cbp_head
;
122 struct buf
*cbp_next
;
129 cbp_head
= (struct buf
*)(bp
->b_trans_head
);
131 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_START
,
132 (int)cbp_head
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
134 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
) {
136 * all I/O requests that are part of this transaction
137 * have to complete before we can process it
139 if ( !(cbp
->b_flags
& B_DONE
)) {
141 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
142 (int)cbp_head
, (int)cbp
, cbp
->b_bcount
, cbp
->b_flags
, 0);
152 upl_offset
= cbp
->b_uploffset
;
153 upl
= cbp
->b_pagelist
;
154 b_flags
= cbp
->b_flags
;
155 real_bp
= cbp
->b_real_bp
;
157 zero_offset
= cbp
->b_validend
;
160 if (cbp
->b_vectorcount
> 1)
161 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
163 if ((cbp
->b_flags
& B_ERROR
) && error
== 0)
164 error
= cbp
->b_error
;
166 total_resid
+= cbp
->b_resid
;
167 total_size
+= cbp
->b_bcount
;
169 cbp_next
= cbp
->b_trans_next
;
175 if ((vp
->v_flag
& VTHROTTLED
) && (vp
->v_numoutput
<= (ASYNC_THROTTLE
/ 3))) {
176 vp
->v_flag
&= ~VTHROTTLED
;
177 wakeup((caddr_t
)&vp
->v_numoutput
);
180 cluster_zero(upl
, zero_offset
, PAGE_SIZE
- (zero_offset
& PAGE_MASK
), real_bp
);
182 if ((b_flags
& B_NEED_IODONE
) && real_bp
) {
184 real_bp
->b_flags
|= B_ERROR
;
185 real_bp
->b_error
= error
;
187 real_bp
->b_resid
= total_resid
;
191 if (error
== 0 && total_resid
)
194 if (b_flags
& B_COMMIT_UPL
) {
195 pg_offset
= upl_offset
& PAGE_MASK
;
196 commit_size
= (((pg_offset
+ total_size
) + (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
198 if (error
|| (b_flags
& B_NOCACHE
)) {
201 if ((b_flags
& B_PAGEOUT
) && (error
!= ENXIO
)) /* transient error */
202 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
203 else if (b_flags
& B_PGIN
)
204 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
206 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
208 ubc_upl_abort_range(upl
, upl_offset
- pg_offset
, commit_size
,
211 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
212 (int)upl
, upl_offset
- pg_offset
, commit_size
,
213 0x80000000|upl_abort_code
, 0);
216 int upl_commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
;
218 if ( !(b_flags
& B_PAGEOUT
))
219 upl_commit_flags
|= UPL_COMMIT_CLEAR_DIRTY
;
221 upl_commit_flags
|= UPL_COMMIT_INACTIVATE
;
223 ubc_upl_commit_range(upl
, upl_offset
- pg_offset
, commit_size
,
226 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
227 (int)upl
, upl_offset
- pg_offset
, commit_size
,
228 upl_commit_flags
, 0);
231 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
232 (int)upl
, upl_offset
, 0, error
, 0);
239 cluster_zero(upl
, upl_offset
, size
, bp
)
241 vm_offset_t upl_offset
;
245 vm_offset_t io_addr
= 0;
249 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 23)) | DBG_FUNC_NONE
,
250 upl_offset
, size
, (int)bp
, 0, 0);
252 if (bp
== NULL
|| bp
->b_data
== NULL
) {
253 kret
= ubc_upl_map(upl
, &io_addr
);
255 if (kret
!= KERN_SUCCESS
)
256 panic("cluster_zero: ubc_upl_map() failed with (%d)", kret
);
258 panic("cluster_zero: ubc_upl_map() mapped 0");
262 io_addr
= (vm_offset_t
)bp
->b_data
;
263 bzero((caddr_t
)(io_addr
+ upl_offset
), size
);
266 kret
= ubc_upl_unmap(upl
);
268 if (kret
!= KERN_SUCCESS
)
269 panic("cluster_zero: kernel_upl_unmap failed");
274 cluster_io(vp
, upl
, upl_offset
, f_offset
, non_rounded_size
, devblocksize
, flags
, real_bp
)
277 vm_offset_t upl_offset
;
279 int non_rounded_size
;
290 struct buf
*cbp_head
= 0;
291 struct buf
*cbp_tail
= 0;
301 if (flags
& CL_READ
) {
302 io_flags
= (B_VECTORLIST
| B_READ
);
304 vfs_io_attributes(vp
, B_READ
, &max_iosize
, &max_vectors
);
306 io_flags
= (B_VECTORLIST
| B_WRITEINPROG
);
308 vfs_io_attributes(vp
, B_WRITE
, &max_iosize
, &max_vectors
);
310 pl
= ubc_upl_pageinfo(upl
);
312 if (flags
& CL_ASYNC
)
313 io_flags
|= (B_CALL
| B_ASYNC
);
317 io_flags
|= B_NOCACHE
;
318 if (flags
& CL_PAGEIN
)
322 size
= (non_rounded_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
324 size
= non_rounded_size
;
327 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_START
,
328 (int)f_offset
, size
, upl_offset
, flags
, 0);
330 if ((flags
& CL_READ
) && ((upl_offset
+ non_rounded_size
) & PAGE_MASK
) && (!(flags
& CL_NOZERO
))) {
332 * then we are going to end up
333 * with a page that we can't complete (the file size wasn't a multiple
334 * of PAGE_SIZE and we're trying to read to the end of the file
335 * so we'll go ahead and zero out the portion of the page we can't
336 * read in from the file
338 zero_offset
= upl_offset
+ non_rounded_size
;
350 if (size
> max_iosize
)
351 io_size
= max_iosize
;
355 if (error
= VOP_CMAP(vp
, f_offset
, io_size
, &blkno
, &io_size
, NULL
)) {
356 if (error
== EOPNOTSUPP
)
357 panic("VOP_CMAP Unimplemented");
361 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 24)) | DBG_FUNC_NONE
,
362 (int)f_offset
, (int)blkno
, io_size
, zero_offset
, 0);
364 if ( (!(flags
& CL_READ
) && (long)blkno
== -1) || io_size
== 0) {
365 if (flags
& CL_PAGEOUT
) {
370 /* Try paging out the page individually before
371 giving up entirely and dumping it (it could
372 be mapped in a "hole" and require allocation
375 ubc_upl_abort_range(upl
, upl_offset
, PAGE_SIZE_64
, UPL_ABORT_FREE_ON_EMPTY
);
376 if (ubc_pushdirty_range(vp
, f_offset
, PAGE_SIZE_64
) == 0) {
381 upl_offset
+= PAGE_SIZE_64
;
382 f_offset
+= PAGE_SIZE_64
;
383 size
-= PAGE_SIZE_64
;
386 lblkno
= (daddr_t
)(f_offset
/ PAGE_SIZE_64
);
388 * we have now figured out how much I/O we can do - this is in 'io_size'
389 * pl_index represents the first page in the 'upl' that the I/O will occur for
390 * pg_offset is the starting point in the first page for the I/O
391 * pg_count is the number of full and partial pages that 'io_size' encompasses
393 pl_index
= upl_offset
/ PAGE_SIZE
;
394 pg_offset
= upl_offset
& PAGE_MASK
;
395 pg_count
= (io_size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
397 if (flags
& CL_DEV_MEMORY
) {
399 * currently, can't deal with reading 'holes' in file
401 if ((long)blkno
== -1) {
406 * treat physical requests as one 'giant' page
410 if ((flags
& CL_READ
) && (long)blkno
== -1) {
414 * if we're reading and blkno == -1, then we've got a
415 * 'hole' in the file that we need to deal with by zeroing
416 * out the affected area in the upl
418 if (zero_offset
&& io_size
== size
) {
420 * if this upl contains the EOF and it is not a multiple of PAGE_SIZE
421 * than 'zero_offset' will be non-zero
422 * if the 'hole' returned by VOP_CMAP extends all the way to the eof
423 * (indicated by the io_size finishing off the I/O request for this UPL)
424 * than we're not going to issue an I/O for the
425 * last page in this upl... we need to zero both the hole and the tail
426 * of the page beyond the EOF, since the delayed zero-fill won't kick in
428 bytes_to_zero
= (((upl_offset
+ io_size
) + (PAGE_SIZE
- 1)) & ~PAGE_MASK
) - upl_offset
;
432 bytes_to_zero
= io_size
;
434 cluster_zero(upl
, upl_offset
, bytes_to_zero
, real_bp
);
438 * if there is a current I/O chain pending
439 * then the first page of the group we just zero'd
440 * will be handled by the I/O completion if the zero
441 * fill started in the middle of the page
443 pg_count
= (io_size
- pg_offset
) / PAGE_SIZE
;
446 * no pending I/O to pick up that first page
447 * so, we have to make sure it gets committed
449 * set the pg_offset to 0 so that the upl_commit_range
450 * starts with this page
452 pg_count
= (io_size
+ pg_offset
) / PAGE_SIZE
;
455 if (io_size
== size
&& ((upl_offset
+ io_size
) & PAGE_MASK
))
457 * if we're done with the request for this UPL
458 * then we have to make sure to commit the last page
459 * even if we only partially zero-filled it
465 pg_resid
= PAGE_SIZE
- pg_offset
;
469 if (flags
& CL_COMMIT
)
470 ubc_upl_commit_range(upl
,
471 (upl_offset
+ pg_resid
) & ~PAGE_MASK
,
472 pg_count
* PAGE_SIZE
,
473 UPL_COMMIT_CLEAR_DIRTY
| UPL_COMMIT_FREE_ON_EMPTY
);
475 upl_offset
+= io_size
;
479 if (cbp_head
&& pg_count
)
483 } else if (real_bp
&& (real_bp
->b_blkno
== real_bp
->b_lblkno
)) {
484 real_bp
->b_blkno
= blkno
;
488 if (pg_count
> max_vectors
) {
489 io_size
-= (pg_count
- max_vectors
) * PAGE_SIZE
;
492 io_size
= PAGE_SIZE
- pg_offset
;
495 pg_count
= max_vectors
;
498 * we need to allocate space for the vector list
501 iovp
= (struct iovec
*)_MALLOC(sizeof(struct iovec
) * pg_count
,
502 M_SEGMENT
, M_NOWAIT
);
504 if (iovp
== (struct iovec
*) 0) {
506 * if the allocation fails, then throttle down to a single page
508 io_size
= PAGE_SIZE
- pg_offset
;
514 /* Throttle the speculative IO */
515 if ((flags
& CL_ASYNC
) && !(flags
& CL_PAGEOUT
))
520 cbp
= alloc_io_buf(vp
, priv
);
524 * we use the io vector that's reserved in the buffer header
525 * this insures we can always issue an I/O even in a low memory
526 * condition that prevents the _MALLOC from succeeding... this
527 * is necessary to prevent deadlocks with the pager
529 iovp
= (struct iovec
*)(&cbp
->b_vects
[0]);
531 cbp
->b_vectorlist
= (void *)iovp
;
532 cbp
->b_vectorcount
= pg_count
;
534 if (flags
& CL_DEV_MEMORY
) {
536 iovp
->iov_len
= io_size
;
537 iovp
->iov_base
= (caddr_t
)upl_phys_page(pl
, 0);
539 if (iovp
->iov_base
== (caddr_t
) 0) {
543 iovp
->iov_base
+= upl_offset
;
546 for (i
= 0, vsize
= io_size
; i
< pg_count
; i
++, iovp
++) {
549 psize
= PAGE_SIZE
- pg_offset
;
554 iovp
->iov_len
= psize
;
555 iovp
->iov_base
= (caddr_t
)upl_phys_page(pl
, pl_index
+ i
);
557 if (iovp
->iov_base
== (caddr_t
) 0) {
559 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
565 iovp
->iov_base
+= pg_offset
;
568 if (flags
& CL_PAGEOUT
) {
573 if (bp
= incore(vp
, lblkno
+ i
)) {
574 if (!ISSET(bp
->b_flags
, B_BUSY
)) {
576 SET(bp
->b_flags
, (B_BUSY
| B_INVAL
));
580 panic("BUSY bp found in cluster_io");
590 if (flags
& CL_ASYNC
)
591 cbp
->b_iodone
= (void *)cluster_iodone
;
592 cbp
->b_flags
|= io_flags
;
594 cbp
->b_lblkno
= lblkno
;
595 cbp
->b_blkno
= blkno
;
596 cbp
->b_bcount
= io_size
;
597 cbp
->b_pagelist
= upl
;
598 cbp
->b_uploffset
= upl_offset
;
599 cbp
->b_trans_next
= (struct buf
*)0;
602 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 26)) | DBG_FUNC_NONE
,
603 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
605 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 27)) | DBG_FUNC_NONE
,
606 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
609 cbp_tail
->b_trans_next
= cbp
;
615 (struct buf
*)(cbp
->b_trans_head
) = cbp_head
;
618 upl_offset
+= io_size
;
622 if ( (!(upl_offset
& PAGE_MASK
) && !(flags
& CL_DEV_MEMORY
) && ((flags
& CL_ASYNC
) || buf_count
> 8)) || size
== 0) {
624 * if we have no more I/O to issue or
625 * the current I/O we've prepared fully
626 * completes the last page in this request
627 * and it's either an ASYNC request or
628 * we've already accumulated more than 8 I/O's into
629 * this transaction and it's not an I/O directed to
630 * special DEVICE memory
631 * then go ahead and issue the I/O
634 if (flags
& CL_COMMIT
)
635 cbp_head
->b_flags
|= B_COMMIT_UPL
;
636 if (flags
& CL_PAGEOUT
)
637 cbp_head
->b_flags
|= B_PAGEOUT
;
638 if (flags
& CL_PAGEIN
)
639 cbp_head
->b_flags
|= B_PGIN
;
642 cbp_head
->b_flags
|= B_NEED_IODONE
;
643 cbp_head
->b_real_bp
= real_bp
;
645 cbp_head
->b_real_bp
= (struct buf
*)NULL
;
649 * we're about to issue the last I/O for this upl
650 * if this was a read to the eof and the eof doesn't
651 * finish on a page boundary, than we need to zero-fill
652 * the rest of the page....
654 cbp_head
->b_validend
= zero_offset
;
656 cbp_head
->b_validend
= 0;
658 for (cbp
= cbp_head
; cbp
;) {
659 struct buf
* cbp_next
;
661 if (io_flags
& B_WRITEINPROG
)
662 cbp
->b_vp
->v_numoutput
++;
664 cbp_next
= cbp
->b_trans_next
;
666 (void) VOP_STRATEGY(cbp
);
669 if ( !(flags
& CL_ASYNC
)) {
670 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
)
673 if (error
= cluster_iodone(cbp_head
)) {
674 if ((flags
& CL_PAGEOUT
) && (error
== ENXIO
))
675 retval
= 0; /* drop the error */
681 cbp_head
= (struct buf
*)0;
682 cbp_tail
= (struct buf
*)0;
690 for (cbp
= cbp_head
; cbp
;) {
691 struct buf
* cbp_next
;
693 if (cbp
->b_vectorcount
> 1)
694 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
695 upl_offset
-= cbp
->b_bcount
;
696 size
+= cbp
->b_bcount
;
698 cbp_next
= cbp
->b_trans_next
;
702 pg_offset
= upl_offset
& PAGE_MASK
;
703 abort_size
= ((size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
705 if (flags
& CL_COMMIT
) {
708 if ((flags
& CL_PAGEOUT
) && (error
!= ENXIO
)) /* transient error */
709 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
710 else if (flags
& CL_PAGEIN
)
711 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
713 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
715 ubc_upl_abort_range(upl
, upl_offset
- pg_offset
, abort_size
,
718 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 28)) | DBG_FUNC_NONE
,
719 (int)upl
, upl_offset
- pg_offset
, abort_size
, error
, 0);
722 real_bp
->b_flags
|= B_ERROR
;
723 real_bp
->b_error
= error
;
730 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_END
,
731 (int)f_offset
, size
, upl_offset
, retval
, 0);
738 cluster_rd_prefetch(vp
, f_offset
, size
, filesize
, devblocksize
)
748 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_START
,
749 (int)f_offset
, size
, (int)filesize
, 0, 0);
751 if (f_offset
>= filesize
) {
752 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
753 (int)f_offset
, 0, 0, 0, 0);
756 if (size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
757 size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
759 size
= (size
+ (PAGE_SIZE
- 1)) & ~(PAGE_SIZE
- 1);
761 if ((off_t
)size
> (filesize
- f_offset
))
762 size
= filesize
- f_offset
;
764 pages_to_fetch
= (size
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
766 for (skipped_pages
= 0; skipped_pages
< pages_to_fetch
; skipped_pages
++) {
767 if (ubc_page_op(vp
, f_offset
, 0, 0, 0) != KERN_SUCCESS
)
769 f_offset
+= PAGE_SIZE
;
772 if (skipped_pages
< pages_to_fetch
)
773 advisory_read(vp
, filesize
, f_offset
, size
, devblocksize
);
775 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
776 (int)f_offset
+ (pages_to_fetch
* PAGE_SIZE
), skipped_pages
, 0, 1, 0);
778 return (pages_to_fetch
);
784 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
)
793 int size_of_prefetch
;
796 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_START
,
797 b_lblkno
, e_lblkno
, vp
->v_lastr
, 0, 0);
799 if (b_lblkno
== vp
->v_lastr
&& b_lblkno
== e_lblkno
) {
800 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
801 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 0, 0);
805 if (vp
->v_lastr
== -1 || (b_lblkno
!= vp
->v_lastr
&& b_lblkno
!= (vp
->v_lastr
+ 1) &&
806 (b_lblkno
!= (vp
->v_maxra
+ 1) || vp
->v_ralen
== 0))) {
810 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
811 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 1, 0);
815 max_pages
= MAX_UPL_TRANSFER
;
817 vp
->v_ralen
= vp
->v_ralen
? min(max_pages
, vp
->v_ralen
<< 1) : 1;
819 if (((e_lblkno
+ 1) - b_lblkno
) > vp
->v_ralen
)
820 vp
->v_ralen
= min(max_pages
, (e_lblkno
+ 1) - b_lblkno
);
822 if (e_lblkno
< vp
->v_maxra
) {
823 if ((vp
->v_maxra
- e_lblkno
) > max(max_pages
/ 16, 4)) {
825 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
826 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 2, 0);
830 r_lblkno
= max(e_lblkno
, vp
->v_maxra
) + 1;
831 f_offset
= (off_t
)r_lblkno
* PAGE_SIZE_64
;
833 if (f_offset
< filesize
) {
834 size_of_prefetch
= cluster_rd_prefetch(vp
, f_offset
, vp
->v_ralen
* PAGE_SIZE
, filesize
, devblocksize
);
836 if (size_of_prefetch
)
837 vp
->v_maxra
= (r_lblkno
+ size_of_prefetch
) - 1;
839 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
840 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 3, 0);
844 cluster_pageout(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
847 vm_offset_t upl_offset
;
857 int local_flags
= CL_PAGEOUT
;
859 if ((flags
& UPL_IOSYNC
) == 0)
860 local_flags
|= CL_ASYNC
;
861 if ((flags
& UPL_NOCOMMIT
) == 0)
862 local_flags
|= CL_COMMIT
;
865 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 52)) | DBG_FUNC_NONE
,
866 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
869 * If they didn't specify any I/O, then we are done...
870 * we can't issue an abort because we don't know how
871 * big the upl really is
876 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
) {
877 if (local_flags
& CL_COMMIT
)
878 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
882 * can't page-in from a negative offset
883 * or if we're starting beyond the EOF
884 * or if the file offset isn't page aligned
885 * or the size requested isn't a multiple of PAGE_SIZE
887 if (f_offset
< 0 || f_offset
>= filesize
||
888 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
)) {
889 if (local_flags
& CL_COMMIT
)
890 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
893 max_size
= filesize
- f_offset
;
900 pg_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
902 if (size
> pg_size
) {
903 if (local_flags
& CL_COMMIT
)
904 ubc_upl_abort_range(upl
, upl_offset
+ pg_size
, size
- pg_size
,
905 UPL_ABORT_FREE_ON_EMPTY
);
907 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
908 vp
->v_flag
|= VTHROTTLED
;
909 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_pageout", 0);
912 return (cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
, devblocksize
,
913 local_flags
, (struct buf
*)0));
917 cluster_pagein(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
920 vm_offset_t upl_offset
;
933 if (upl
== NULL
|| size
< 0)
934 panic("cluster_pagein: NULL upl passed in");
936 if ((flags
& UPL_IOSYNC
) == 0)
937 local_flags
|= CL_ASYNC
;
938 if ((flags
& UPL_NOCOMMIT
) == 0)
939 local_flags
|= CL_COMMIT
;
942 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 56)) | DBG_FUNC_NONE
,
943 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
946 * can't page-in from a negative offset
947 * or if we're starting beyond the EOF
948 * or if the file offset isn't page aligned
949 * or the size requested isn't a multiple of PAGE_SIZE
951 if (f_offset
< 0 || f_offset
>= filesize
||
952 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
) || (upl_offset
& PAGE_MASK
)) {
953 if (local_flags
& CL_COMMIT
)
954 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
957 max_size
= filesize
- f_offset
;
964 rounded_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
966 if (size
> rounded_size
&& (local_flags
& CL_COMMIT
))
967 ubc_upl_abort_range(upl
, upl_offset
+ rounded_size
,
968 size
- (upl_offset
+ rounded_size
), UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
970 retval
= cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
, devblocksize
,
971 local_flags
| CL_READ
| CL_PAGEIN
, (struct buf
*)0);
977 b_lblkno
= (int)(f_offset
/ PAGE_SIZE_64
);
979 ((f_offset
+ ((off_t
)io_size
- 1)) / PAGE_SIZE_64
);
981 if (!(flags
& UPL_NORDAHEAD
) && !(vp
->v_flag
& VRAOFF
) && rounded_size
== PAGE_SIZE
) {
983 * we haven't read the last page in of the file yet
984 * so let's try to read ahead if we're in
985 * a sequential access pattern
987 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
989 vp
->v_lastr
= e_lblkno
;
1001 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 19)) | DBG_FUNC_START
,
1002 (int)bp
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
1004 if (bp
->b_pagelist
== (upl_t
) 0)
1005 panic("cluster_bp: can't handle NULL upl yet\n");
1006 if (bp
->b_flags
& B_READ
)
1007 flags
= CL_ASYNC
| CL_READ
;
1011 f_offset
= ubc_blktooff(bp
->b_vp
, bp
->b_lblkno
);
1013 return (cluster_io(bp
->b_vp
, bp
->b_pagelist
, 0, f_offset
, bp
->b_bcount
, 0, flags
, bp
));
1017 cluster_write(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1031 vm_offset_t upl_offset
;
1034 upl_page_info_t
*pl
;
1040 if ((!uio
) || (uio
->uio_segflg
!= UIO_USERSPACE
) || (!(vp
->v_flag
& VNOCACHE_DATA
)))
1042 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1046 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& retval
== 0)
1048 /* we know we have a resid, so this is safe */
1050 while (iov
->iov_len
== 0) {
1057 * We check every vector target and if it is physically
1058 * contiguous space, we skip the sanity checks.
1061 upl_offset
= (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
;
1062 upl_size
= (upl_offset
+ PAGE_SIZE
+(PAGE_SIZE
-1)) & ~PAGE_MASK
;
1064 upl_flags
= UPL_QUERY_OBJECT_TYPE
;
1065 if ((vm_map_get_upl(current_map(),
1066 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1067 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0)) != KERN_SUCCESS
)
1070 * the user app must have passed in an invalid address
1075 if (upl_flags
& UPL_PHYS_CONTIG
)
1078 * since the interface to the IOKit below us uses physical block #'s and
1079 * block counts to specify the I/O, we can't handle anything that isn't
1080 * devblocksize aligned
1082 if ((uio
->uio_offset
& (devblocksize
- 1)) || (uio
->uio_resid
& (devblocksize
- 1)))
1085 if (flags
& IO_HEADZEROFILL
)
1087 flags
&= ~IO_HEADZEROFILL
;
1089 if (retval
= cluster_write_x(vp
, (struct uio
*)0, 0, uio
->uio_offset
, headOff
, 0, devblocksize
, IO_HEADZEROFILL
))
1093 retval
= cluster_phys_write(vp
, uio
);
1095 if (uio
->uio_resid
== 0 && (flags
& IO_TAILZEROFILL
))
1097 retval
= cluster_write_x(vp
, (struct uio
*)0, 0, tailOff
, uio
->uio_offset
, 0, devblocksize
, IO_HEADZEROFILL
);
1101 else if ((uio
->uio_resid
< 4 * PAGE_SIZE
) || (flags
& (IO_TAILZEROFILL
| IO_HEADZEROFILL
)))
1104 * We set a threshhold of 4 pages to decide if the nocopy
1105 * write loop is worth the trouble...
1106 * we also come here if we're trying to zero the head and/or tail
1107 * of a partially written page, and the user source is not a physically contiguous region
1109 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1112 else if (uio
->uio_offset
& PAGE_MASK_64
)
1114 /* Bring the file offset write up to a pagesize boundary */
1115 clip_size
= (PAGE_SIZE
- (uio
->uio_offset
& PAGE_MASK_64
));
1116 if (uio
->uio_resid
< clip_size
)
1117 clip_size
= uio
->uio_resid
;
1119 * Fake the resid going into the cluster_write_x call
1120 * and restore it on the way out.
1122 prev_resid
= uio
->uio_resid
;
1123 uio
->uio_resid
= clip_size
;
1124 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1125 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1127 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
1129 clip_size
= iov
->iov_len
;
1130 prev_resid
= uio
->uio_resid
;
1131 uio
->uio_resid
= clip_size
;
1132 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1133 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1138 * If we come in here, we know the offset into
1139 * the file is on a pagesize boundary
1142 max_io_size
= newEOF
- uio
->uio_offset
;
1143 clip_size
= uio
->uio_resid
;
1144 if (iov
->iov_len
< clip_size
)
1145 clip_size
= iov
->iov_len
;
1146 if (max_io_size
< clip_size
)
1147 clip_size
= max_io_size
;
1149 if (clip_size
< PAGE_SIZE
)
1152 * Take care of tail end of write in this vector
1154 prev_resid
= uio
->uio_resid
;
1155 uio
->uio_resid
= clip_size
;
1156 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1157 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1161 /* round clip_size down to a multiple of pagesize */
1162 clip_size
= clip_size
& ~(PAGE_MASK
);
1163 prev_resid
= uio
->uio_resid
;
1164 uio
->uio_resid
= clip_size
;
1165 retval
= cluster_nocopy_write(vp
, uio
, newEOF
, devblocksize
, flags
);
1166 if ((retval
== 0) && uio
->uio_resid
)
1167 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1168 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1176 cluster_nocopy_write(vp
, uio
, newEOF
, devblocksize
, flags
)
1184 upl_page_info_t
*pl
;
1186 vm_offset_t upl_offset
;
1190 int upl_needed_size
;
1196 int force_data_sync
;
1199 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_START
,
1200 (int)uio
->uio_offset
, (int)uio
->uio_resid
,
1201 (int)newEOF
, devblocksize
, 0);
1204 * When we enter this routine, we know
1205 * -- the offset into the file is on a pagesize boundary
1206 * -- the resid is a page multiple
1207 * -- the resid will not exceed iov_len
1212 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& error
== 0) {
1213 io_size
= uio
->uio_resid
;
1215 if (io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1216 io_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1218 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1219 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
1221 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_START
,
1222 (int)upl_offset
, upl_needed_size
, (int)iov
->iov_base
, io_size
, 0);
1224 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++)
1227 upl_size
= upl_needed_size
;
1228 upl_flags
= UPL_FILE_IO
| UPL_COPYOUT_FROM
| UPL_NO_SYNC
|
1229 UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1231 kret
= vm_map_get_upl(current_map(),
1232 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1240 if (kret
!= KERN_SUCCESS
)
1242 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1245 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1246 (int)uio
->uio_offset
, (int)uio
->uio_resid
, kret
, 1, 0);
1248 /* cluster_nocopy_write: failed to get pagelist */
1249 /* do not return kret here */
1253 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
1254 pages_in_pl
= upl_size
/ PAGE_SIZE
;
1256 for(i
=0; i
< pages_in_pl
; i
++)
1258 if (!upl_valid_page(pl
, i
))
1262 if (i
== pages_in_pl
)
1265 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1266 UPL_ABORT_FREE_ON_EMPTY
);
1269 if (force_data_sync
>= 3)
1271 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1272 i
, pages_in_pl
, upl_size
, kret
, 0);
1274 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1275 (int)uio
->uio_offset
, (int)uio
->uio_resid
, kret
, 2, 0);
1280 * Consider the possibility that upl_size wasn't satisfied.
1282 if (upl_size
!= upl_needed_size
)
1283 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
1285 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1286 (int)upl_offset
, upl_size
, (int)iov
->iov_base
, io_size
, 0);
1290 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1291 UPL_ABORT_FREE_ON_EMPTY
);
1292 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1293 (int)uio
->uio_offset
, uio
->uio_resid
, 0, 3, 0);
1299 * Now look for pages already in the cache
1300 * and throw them away.
1303 upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
1304 max_io_size
= io_size
;
1306 while (max_io_size
) {
1309 * Flag UPL_POP_DUMP says if the page is found
1310 * in the page cache it must be thrown away.
1314 UPL_POP_SET
| UPL_POP_BUSY
| UPL_POP_DUMP
,
1316 max_io_size
-= PAGE_SIZE
;
1317 upl_f_offset
+= PAGE_SIZE
;
1321 * issue a synchronous write to cluster_io
1324 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_START
,
1325 (int)upl_offset
, (int)uio
->uio_offset
, io_size
, 0, 0);
1327 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1328 io_size
, devblocksize
, 0, (struct buf
*)0);
1332 * The cluster_io write completed successfully,
1333 * update the uio structure.
1335 iov
->iov_base
+= io_size
;
1336 iov
->iov_len
-= io_size
;
1337 uio
->uio_resid
-= io_size
;
1338 uio
->uio_offset
+= io_size
;
1341 * always 'commit' the I/O via the abort primitive whether the I/O
1342 * succeeded cleanly or not... this is necessary to insure that
1343 * we preserve the state of the DIRTY flag on the pages used to
1344 * provide the data for the I/O... the state of this flag SHOULD
1345 * NOT be changed by a write
1347 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1348 UPL_ABORT_FREE_ON_EMPTY
);
1351 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_END
,
1352 (int)upl_offset
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
1357 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1358 (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 4, 0);
1364 cluster_phys_write(vp
, uio
)
1369 vm_offset_t upl_offset
;
1372 int upl_needed_size
;
1380 * When we enter this routine, we know
1381 * -- the resid will not exceed iov_len
1382 * -- the vector target address is physcially contiguous
1386 io_size
= iov
->iov_len
;
1387 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1388 upl_needed_size
= upl_offset
+ io_size
;
1391 upl_size
= upl_needed_size
;
1392 upl_flags
= UPL_FILE_IO
| UPL_COPYOUT_FROM
| UPL_NO_SYNC
|
1393 UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1395 kret
= vm_map_get_upl(current_map(),
1396 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1397 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0);
1399 if (kret
!= KERN_SUCCESS
)
1401 /* cluster_phys_write: failed to get pagelist */
1402 /* note: return kret here */
1407 * Consider the possibility that upl_size wasn't satisfied.
1408 * This is a failure in the physical memory case.
1410 if (upl_size
< upl_needed_size
)
1412 kernel_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1417 * issue a synchronous write to cluster_io
1420 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1421 io_size
, 0, CL_DEV_MEMORY
, (struct buf
*)0);
1425 * The cluster_io write completed successfully,
1426 * update the uio structure and commit.
1429 ubc_upl_commit_range(upl
, 0, upl_size
, UPL_COMMIT_FREE_ON_EMPTY
);
1431 iov
->iov_base
+= io_size
;
1432 iov
->iov_len
-= io_size
;
1433 uio
->uio_resid
-= io_size
;
1434 uio
->uio_offset
+= io_size
;
1437 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1443 cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1453 upl_page_info_t
*pl
;
1455 vm_offset_t upl_offset
;
1463 vm_offset_t io_address
;
1470 long long total_size
;
1473 long long zero_cnt1
;
1475 daddr_t start_blkno
;
1479 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1480 (int)uio
->uio_offset
, uio
->uio_resid
, (int)oldEOF
, (int)newEOF
, 0);
1482 uio_resid
= uio
->uio_resid
;
1484 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1485 0, 0, (int)oldEOF
, (int)newEOF
, 0);
1492 if (flags
& IO_HEADZEROFILL
) {
1494 * some filesystems (HFS is one) don't support unallocated holes within a file...
1495 * so we zero fill the intervening space between the old EOF and the offset
1496 * where the next chunk of real data begins.... ftruncate will also use this
1497 * routine to zero fill to the new EOF when growing a file... in this case, the
1498 * uio structure will not be provided
1501 if (headOff
< uio
->uio_offset
) {
1502 zero_cnt
= uio
->uio_offset
- headOff
;
1505 } else if (headOff
< newEOF
) {
1506 zero_cnt
= newEOF
- headOff
;
1510 if (flags
& IO_TAILZEROFILL
) {
1512 zero_off1
= uio
->uio_offset
+ uio
->uio_resid
;
1514 if (zero_off1
< tailOff
)
1515 zero_cnt1
= tailOff
- zero_off1
;
1518 if (zero_cnt
== 0 && uio
== (struct uio
*) 0)
1520 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
1521 retval
, 0, 0, 0, 0);
1525 while ((total_size
= (uio_resid
+ zero_cnt
+ zero_cnt1
)) && retval
== 0) {
1527 * for this iteration of the loop, figure out where our starting point is
1530 start_offset
= (int)(zero_off
& PAGE_MASK_64
);
1531 upl_f_offset
= zero_off
- start_offset
;
1532 } else if (uio_resid
) {
1533 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
1534 upl_f_offset
= uio
->uio_offset
- start_offset
;
1536 start_offset
= (int)(zero_off1
& PAGE_MASK_64
);
1537 upl_f_offset
= zero_off1
- start_offset
;
1539 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 46)) | DBG_FUNC_NONE
,
1540 (int)zero_off
, (int)zero_cnt
, (int)zero_off1
, (int)zero_cnt1
, 0);
1542 if (total_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1543 total_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1546 * compute the size of the upl needed to encompass
1547 * the requested write... limit each call to cluster_io
1548 * to the maximum UPL size... cluster_io will clip if
1549 * this exceeds the maximum io_size for the device,
1550 * make sure to account for
1551 * a starting offset that's not page aligned
1553 upl_size
= (start_offset
+ total_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
1555 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1556 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1558 pages_in_upl
= upl_size
/ PAGE_SIZE
;
1559 io_size
= upl_size
- start_offset
;
1561 if ((long long)io_size
> total_size
)
1562 io_size
= total_size
;
1564 start_blkno
= (daddr_t
)(upl_f_offset
/ PAGE_SIZE_64
);
1565 last_blkno
= start_blkno
+ pages_in_upl
;
1567 kret
= ubc_create_upl(vp
,
1573 if (kret
!= KERN_SUCCESS
)
1574 panic("cluster_write: failed to get pagelist");
1576 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 41)) | DBG_FUNC_NONE
,
1577 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
1579 if (start_offset
&& !upl_valid_page(pl
, 0)) {
1583 * we're starting in the middle of the first page of the upl
1584 * and the page isn't currently valid, so we're going to have
1585 * to read it in first... this is a synchronous operation
1587 read_size
= PAGE_SIZE
;
1589 if ((upl_f_offset
+ read_size
) > newEOF
)
1590 read_size
= newEOF
- upl_f_offset
;
1592 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, read_size
, devblocksize
,
1593 CL_READ
, (struct buf
*)0);
1596 * we had an error during the read which causes us to abort
1597 * the current cluster_write request... before we do, we need
1598 * to release the rest of the pages in the upl without modifying
1599 * there state and mark the failed page in error
1601 ubc_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1602 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1604 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1605 (int)upl
, 0, 0, retval
, 0);
1609 if ((start_offset
== 0 || upl_size
> PAGE_SIZE
) && ((start_offset
+ io_size
) & PAGE_MASK
)) {
1611 * the last offset we're writing to in this upl does not end on a page
1612 * boundary... if it's not beyond the old EOF, then we'll also need to
1613 * pre-read this page in if it isn't already valid
1615 upl_offset
= upl_size
- PAGE_SIZE
;
1617 if ((upl_f_offset
+ start_offset
+ io_size
) < oldEOF
&&
1618 !upl_valid_page(pl
, upl_offset
/ PAGE_SIZE
)) {
1621 read_size
= PAGE_SIZE
;
1623 if ((upl_f_offset
+ upl_offset
+ read_size
) > newEOF
)
1624 read_size
= newEOF
- (upl_f_offset
+ upl_offset
);
1626 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, read_size
, devblocksize
,
1627 CL_READ
, (struct buf
*)0);
1630 * we had an error during the read which causes us to abort
1631 * the current cluster_write request... before we do, we
1632 * need to release the rest of the pages in the upl without
1633 * modifying there state and mark the failed page in error
1635 ubc_upl_abort_range(upl
, upl_offset
, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1636 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1638 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1639 (int)upl
, 0, 0, retval
, 0);
1644 if ((kret
= ubc_upl_map(upl
, &io_address
)) != KERN_SUCCESS
)
1645 panic("cluster_write: ubc_upl_map failed\n");
1646 xfer_resid
= io_size
;
1647 io_offset
= start_offset
;
1649 while (zero_cnt
&& xfer_resid
) {
1651 if (zero_cnt
< (long long)xfer_resid
)
1652 bytes_to_zero
= zero_cnt
;
1654 bytes_to_zero
= xfer_resid
;
1656 if ( !(flags
& (IO_NOZEROVALID
| IO_NOZERODIRTY
))) {
1657 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1659 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1660 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1661 (int)io_offset
, xfer_resid
, 0);
1665 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off
& PAGE_MASK_64
));
1666 zero_pg_index
= (int)((zero_off
- upl_f_offset
) / PAGE_SIZE_64
);
1668 if ( !upl_valid_page(pl
, zero_pg_index
)) {
1669 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1671 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1672 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1673 (int)io_offset
, xfer_resid
, 0);
1675 } else if ((flags
& (IO_NOZERODIRTY
| IO_NOZEROVALID
)) == IO_NOZERODIRTY
&&
1676 !upl_dirty_page(pl
, zero_pg_index
)) {
1677 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1679 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1680 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1681 (int)io_offset
, xfer_resid
, 0);
1684 xfer_resid
-= bytes_to_zero
;
1685 zero_cnt
-= bytes_to_zero
;
1686 zero_off
+= bytes_to_zero
;
1687 io_offset
+= bytes_to_zero
;
1689 if (xfer_resid
&& uio_resid
) {
1690 bytes_to_move
= min(uio_resid
, xfer_resid
);
1692 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 42)) | DBG_FUNC_NONE
,
1693 (int)uio
->uio_offset
, bytes_to_move
, uio_resid
, xfer_resid
, 0);
1695 retval
= uiomove((caddr_t
)(io_address
+ io_offset
), bytes_to_move
, uio
);
1699 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
1700 panic("cluster_write: kernel_upl_unmap failed\n");
1702 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
1704 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1705 (int)upl
, 0, 0, retval
, 0);
1707 uio_resid
-= bytes_to_move
;
1708 xfer_resid
-= bytes_to_move
;
1709 io_offset
+= bytes_to_move
;
1712 while (xfer_resid
&& zero_cnt1
&& retval
== 0) {
1714 if (zero_cnt1
< (long long)xfer_resid
)
1715 bytes_to_zero
= zero_cnt1
;
1717 bytes_to_zero
= xfer_resid
;
1719 if ( !(flags
& (IO_NOZEROVALID
| IO_NOZERODIRTY
))) {
1720 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1722 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1723 (int)upl_f_offset
+ io_offset
,
1724 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1728 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off1
& PAGE_MASK_64
));
1729 zero_pg_index
= (int)((zero_off1
- upl_f_offset
) / PAGE_SIZE_64
);
1731 if ( !upl_valid_page(pl
, zero_pg_index
)) {
1732 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1734 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1735 (int)upl_f_offset
+ io_offset
,
1736 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1738 } else if ((flags
& (IO_NOZERODIRTY
| IO_NOZEROVALID
)) == IO_NOZERODIRTY
&&
1739 !upl_dirty_page(pl
, zero_pg_index
)) {
1740 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1742 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1743 (int)upl_f_offset
+ io_offset
,
1744 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1747 xfer_resid
-= bytes_to_zero
;
1748 zero_cnt1
-= bytes_to_zero
;
1749 zero_off1
+= bytes_to_zero
;
1750 io_offset
+= bytes_to_zero
;
1757 io_size
+= start_offset
;
1759 if ((upl_f_offset
+ io_size
) >= newEOF
&& io_size
< upl_size
) {
1761 * if we're extending the file with this write
1762 * we'll zero fill the rest of the page so that
1763 * if the file gets extended again in such a way as to leave a
1764 * hole starting at this EOF, we'll have zero's in the correct spot
1766 bzero((caddr_t
)(io_address
+ io_size
), upl_size
- io_size
);
1768 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1769 (int)upl_f_offset
+ io_size
,
1770 upl_size
- io_size
, 0, 0, 0);
1772 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
1773 panic("cluster_write: kernel_upl_unmap failed\n");
1775 if (flags
& IO_SYNC
)
1777 * if the IO_SYNC flag is set than we need to
1778 * bypass any clusters and immediately issue
1783 if (vp
->v_clen
== 0)
1785 * no clusters currently present
1787 goto start_new_cluster
;
1790 * keep track of the overall dirty page
1791 * range we've developed
1792 * in case we have to fall back to the
1793 * VHASDIRTY method of flushing
1795 if (vp
->v_flag
& VHASDIRTY
)
1798 for (cl_index
= 0; cl_index
< vp
->v_clen
; cl_index
++) {
1800 * we have an existing cluster... see if this write will extend it nicely
1802 if (start_blkno
>= vp
->v_clusters
[cl_index
].start_pg
) {
1804 * the current write starts at or after the current cluster
1806 if (last_blkno
<= (vp
->v_clusters
[cl_index
].start_pg
+ MAX_UPL_TRANSFER
)) {
1808 * we have a write that fits entirely
1809 * within the existing cluster limits
1811 if (last_blkno
> vp
->v_clusters
[cl_index
].last_pg
)
1813 * update our idea of where the cluster ends
1815 vp
->v_clusters
[cl_index
].last_pg
= last_blkno
;
1818 if (start_blkno
< (vp
->v_clusters
[cl_index
].start_pg
+ MAX_UPL_TRANSFER
)) {
1820 * we have a write that starts in the middle of the current cluster
1821 * but extends beyond the cluster's limit
1822 * we'll clip the current cluster if we actually
1823 * overlap with the new write
1824 * and start a new cluster with the current write
1826 if (vp
->v_clusters
[cl_index
].last_pg
> start_blkno
)
1827 vp
->v_clusters
[cl_index
].last_pg
= start_blkno
;
1830 * we also get here for the case where the current write starts
1831 * beyond the limit of the existing cluster
1833 * in either case, we'll check the remaining clusters before
1834 * starting a new one
1838 * the current write starts in front of the current cluster
1840 if ((vp
->v_clusters
[cl_index
].last_pg
- start_blkno
) <= MAX_UPL_TRANSFER
) {
1842 * we can just merge the old cluster
1843 * with the new request and leave it
1846 vp
->v_clusters
[cl_index
].start_pg
= start_blkno
;
1848 if (last_blkno
> vp
->v_clusters
[cl_index
].last_pg
) {
1850 * the current write completely
1851 * envelops the existing cluster
1853 vp
->v_clusters
[cl_index
].last_pg
= last_blkno
;
1859 * if we were to combine this write with the current cluster
1860 * we would exceed the cluster size limit.... so,
1861 * let's see if there's any overlap of the new I/O with
1862 * the existing cluster...
1865 if (last_blkno
> vp
->v_clusters
[cl_index
].start_pg
)
1867 * the current write extends into the existing cluster
1868 * clip the current cluster by moving the start position
1869 * to where the current write ends
1871 vp
->v_clusters
[cl_index
].start_pg
= last_blkno
;
1873 * if we get here, there was no way to merge
1874 * the new I/O with this cluster and
1875 * keep it under our maximum cluster length
1876 * we'll check the remaining clusters before starting a new one
1880 if (cl_index
< vp
->v_clen
)
1882 * we found an existing cluster that we
1883 * could merger this I/O into
1887 if (vp
->v_clen
< MAX_CLUSTERS
&& !(vp
->v_flag
& VNOCACHE_DATA
))
1889 * we didn't find an existing cluster to
1890 * merge into, but there's room to start
1893 goto start_new_cluster
;
1896 * no exisitng cluster to merge with and no
1897 * room to start a new one... we'll try
1898 * pushing the existing ones... if none of
1899 * them are able to be pushed, we'll have
1900 * to fall back on the VHASDIRTY mechanism
1901 * cluster_try_push will set v_clen to the
1902 * number of remaining clusters if it is
1903 * unable to push all of them
1905 if (vp
->v_flag
& VNOCACHE_DATA
)
1910 if (cluster_try_push(vp
, newEOF
, can_delay
, 0) == 0) {
1911 vp
->v_flag
|= VHASDIRTY
;
1915 if (vp
->v_clen
== 0) {
1916 vp
->v_ciosiz
= devblocksize
;
1917 vp
->v_cstart
= start_blkno
;
1918 vp
->v_lastw
= last_blkno
;
1920 vp
->v_clusters
[vp
->v_clen
].start_pg
= start_blkno
;
1921 vp
->v_clusters
[vp
->v_clen
].last_pg
= last_blkno
;
1925 * make sure we keep v_cstart and v_lastw up to
1926 * date in case we have to fall back on the
1927 * V_HASDIRTY mechanism (or we've already entered it)
1929 if (start_blkno
< vp
->v_cstart
)
1930 vp
->v_cstart
= start_blkno
;
1931 if (last_blkno
> vp
->v_lastw
)
1932 vp
->v_lastw
= last_blkno
;
1934 ubc_upl_commit_range(upl
, 0, upl_size
, UPL_COMMIT_SET_DIRTY
| UPL_COMMIT_INACTIVATE
| UPL_COMMIT_FREE_ON_EMPTY
);
1938 * in order to maintain some semblance of coherency with mapped writes
1939 * we need to write the cluster back out as a multiple of the PAGESIZE
1940 * unless the cluster encompasses the last page of the file... in this
1941 * case we'll round out to the nearest device block boundary
1945 if ((upl_f_offset
+ io_size
) > newEOF
) {
1946 io_size
= newEOF
- upl_f_offset
;
1947 io_size
= (io_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
1950 if (flags
& IO_SYNC
)
1951 io_flags
= CL_COMMIT
| CL_AGE
;
1953 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
1955 if (vp
->v_flag
& VNOCACHE_DATA
)
1956 io_flags
|= CL_DUMP
;
1958 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
1959 vp
->v_flag
|= VTHROTTLED
;
1960 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_write", 0);
1962 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, io_size
, devblocksize
,
1963 io_flags
, (struct buf
*)0);
1966 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
1967 retval
, 0, 0, 0, 0);
1973 cluster_read(vp
, uio
, filesize
, devblocksize
, flags
)
1984 vm_offset_t upl_offset
;
1987 upl_page_info_t
*pl
;
1992 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_START
,
1993 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
1996 * We set a threshhold of 4 pages to decide if the nocopy
1997 * read loop is worth the trouble...
2000 if (!((vp
->v_flag
& VNOCACHE_DATA
) && (uio
->uio_segflg
== UIO_USERSPACE
)))
2002 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2003 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2004 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2008 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0)
2010 /* we know we have a resid, so this is safe */
2012 while (iov
->iov_len
== 0) {
2019 * We check every vector target and if it is physically
2020 * contiguous space, we skip the sanity checks.
2023 upl_offset
= (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
;
2024 upl_size
= (upl_offset
+ PAGE_SIZE
+(PAGE_SIZE
-1)) & ~PAGE_MASK
;
2026 upl_flags
= UPL_QUERY_OBJECT_TYPE
;
2027 if((vm_map_get_upl(current_map(),
2028 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2029 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0)) != KERN_SUCCESS
)
2032 * the user app must have passed in an invalid address
2037 if (upl_flags
& UPL_PHYS_CONTIG
)
2039 retval
= cluster_phys_read(vp
, uio
, filesize
);
2041 else if (uio
->uio_resid
< 4 * PAGE_SIZE
)
2044 * We set a threshhold of 4 pages to decide if the nocopy
2045 * read loop is worth the trouble...
2047 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2048 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2049 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2052 else if (uio
->uio_offset
& PAGE_MASK_64
)
2054 /* Bring the file offset read up to a pagesize boundary */
2055 clip_size
= (PAGE_SIZE
- (int)(uio
->uio_offset
& PAGE_MASK_64
));
2056 if (uio
->uio_resid
< clip_size
)
2057 clip_size
= uio
->uio_resid
;
2059 * Fake the resid going into the cluster_read_x call
2060 * and restore it on the way out.
2062 prev_resid
= uio
->uio_resid
;
2063 uio
->uio_resid
= clip_size
;
2064 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2065 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2067 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
2069 clip_size
= iov
->iov_len
;
2070 prev_resid
= uio
->uio_resid
;
2071 uio
->uio_resid
= clip_size
;
2072 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2073 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2078 * If we come in here, we know the offset into
2079 * the file is on a pagesize boundary
2082 max_io_size
= filesize
- uio
->uio_offset
;
2083 clip_size
= uio
->uio_resid
;
2084 if (iov
->iov_len
< clip_size
)
2085 clip_size
= iov
->iov_len
;
2086 if (max_io_size
< clip_size
)
2087 clip_size
= (int)max_io_size
;
2089 if (clip_size
< PAGE_SIZE
)
2092 * Take care of the tail end of the read in this vector.
2094 prev_resid
= uio
->uio_resid
;
2095 uio
->uio_resid
= clip_size
;
2096 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2097 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2101 /* round clip_size down to a multiple of pagesize */
2102 clip_size
= clip_size
& ~(PAGE_MASK
);
2103 prev_resid
= uio
->uio_resid
;
2104 uio
->uio_resid
= clip_size
;
2105 retval
= cluster_nocopy_read(vp
, uio
, filesize
, devblocksize
, flags
);
2106 if ((retval
==0) && uio
->uio_resid
)
2107 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2108 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2113 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2114 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2120 cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
)
2127 upl_page_info_t
*pl
;
2129 vm_offset_t upl_offset
;
2139 vm_offset_t io_address
;
2147 b_lblkno
= (int)(uio
->uio_offset
/ PAGE_SIZE_64
);
2149 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2151 * compute the size of the upl needed to encompass
2152 * the requested read... limit each call to cluster_io
2153 * to the maximum UPL size... cluster_io will clip if
2154 * this exceeds the maximum io_size for the device,
2155 * make sure to account for
2156 * a starting offset that's not page aligned
2158 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
2159 upl_f_offset
= uio
->uio_offset
- (off_t
)start_offset
;
2160 max_size
= filesize
- uio
->uio_offset
;
2162 if ((off_t
)((unsigned int)uio
->uio_resid
) < max_size
)
2163 io_size
= uio
->uio_resid
;
2167 if (uio
->uio_segflg
== UIO_USERSPACE
&& !(vp
->v_flag
& VNOCACHE_DATA
)) {
2168 segflg
= uio
->uio_segflg
;
2170 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2172 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2173 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2175 while (io_size
&& retval
== 0) {
2181 UPL_POP_SET
| UPL_POP_BUSY
,
2182 &paddr
, 0) != KERN_SUCCESS
)
2185 xsize
= PAGE_SIZE
- start_offset
;
2187 if (xsize
> io_size
)
2190 retval
= uiomove((caddr_t
)(paddr
+ start_offset
), xsize
, uio
);
2192 ubc_page_op(vp
, upl_f_offset
,
2193 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2196 start_offset
= (int)
2197 (uio
->uio_offset
& PAGE_MASK_64
);
2198 upl_f_offset
= uio
->uio_offset
- start_offset
;
2200 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2201 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2203 uio
->uio_segflg
= segflg
;
2210 * we're already finished with this read request
2211 * let's see if we should do a read-ahead
2214 ((uio
->uio_offset
- 1) / PAGE_SIZE_64
);
2216 if (!(vp
->v_flag
& VRAOFF
))
2218 * let's try to read ahead if we're in
2219 * a sequential access pattern
2221 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2222 vp
->v_lastr
= e_lblkno
;
2226 max_size
= filesize
- uio
->uio_offset
;
2228 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
2229 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2230 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2231 pages_in_upl
= upl_size
/ PAGE_SIZE
;
2233 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_START
,
2234 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2236 kret
= ubc_create_upl(vp
,
2242 if (kret
!= KERN_SUCCESS
)
2243 panic("cluster_read: failed to get pagelist");
2245 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_END
,
2246 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2249 * scan from the beginning of the upl looking for the first
2250 * non-valid page.... this will become the first page in
2251 * the request we're going to make to 'cluster_io'... if all
2252 * of the pages are valid, we won't call through to 'cluster_io'
2254 for (start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
2255 if (!upl_valid_page(pl
, start_pg
))
2260 * scan from the starting invalid page looking for a valid
2261 * page before the end of the upl is reached, if we
2262 * find one, then it will be the last page of the request to
2265 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
2266 if (upl_valid_page(pl
, last_pg
))
2270 if (start_pg
< last_pg
) {
2272 * we found a range of 'invalid' pages that must be filled
2273 * if the last page in this range is the last page of the file
2274 * we may have to clip the size of it to keep from reading past
2275 * the end of the last physical block associated with the file
2277 upl_offset
= start_pg
* PAGE_SIZE
;
2278 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2280 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
)
2281 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
2284 * issue a synchronous read to cluster_io
2287 error
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
,
2288 io_size
, devblocksize
, CL_READ
, (struct buf
*)0);
2292 * if the read completed successfully, or there was no I/O request
2293 * issued, than map the upl into kernel address space and
2294 * move the data into user land.... we'll first add on any 'valid'
2295 * pages that were present in the upl when we acquired it.
2298 u_int size_of_prefetch
;
2300 for (uio_last
= last_pg
; uio_last
< pages_in_upl
; uio_last
++) {
2301 if (!upl_valid_page(pl
, uio_last
))
2305 * compute size to transfer this round, if uio->uio_resid is
2306 * still non-zero after this uiomove, we'll loop around and
2307 * set up for another I/O.
2309 val_size
= (uio_last
* PAGE_SIZE
) - start_offset
;
2311 if (max_size
< val_size
)
2312 val_size
= max_size
;
2314 if (uio
->uio_resid
< val_size
)
2315 val_size
= uio
->uio_resid
;
2317 e_lblkno
= (int)((uio
->uio_offset
+ ((off_t
)val_size
- 1)) / PAGE_SIZE_64
);
2319 if (size_of_prefetch
= (uio
->uio_resid
- val_size
)) {
2321 * if there's still I/O left to do for this request, then issue a
2322 * pre-fetch I/O... the I/O wait time will overlap
2323 * with the copying of the data
2325 cluster_rd_prefetch(vp
, uio
->uio_offset
+ val_size
, size_of_prefetch
, filesize
, devblocksize
);
2327 if (!(vp
->v_flag
& VRAOFF
) && !(vp
->v_flag
& VNOCACHE_DATA
))
2329 * let's try to read ahead if we're in
2330 * a sequential access pattern
2332 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2333 vp
->v_lastr
= e_lblkno
;
2335 if (uio
->uio_segflg
== UIO_USERSPACE
) {
2338 segflg
= uio
->uio_segflg
;
2340 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2343 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2344 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2346 offset
= start_offset
;
2348 while (val_size
&& retval
== 0) {
2353 i
= offset
/ PAGE_SIZE
;
2354 csize
= min(PAGE_SIZE
- start_offset
, val_size
);
2356 paddr
= (caddr_t
)upl_phys_page(pl
, i
) + start_offset
;
2358 retval
= uiomove(paddr
, csize
, uio
);
2362 start_offset
= offset
& PAGE_MASK
;
2364 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2365 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2367 uio
->uio_segflg
= segflg
;
2371 if ((kret
= ubc_upl_map(upl
, &io_address
)) != KERN_SUCCESS
)
2372 panic("cluster_read: ubc_upl_map() failed\n");
2374 retval
= uiomove((caddr_t
)(io_address
+ start_offset
), val_size
, uio
);
2376 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
2377 panic("cluster_read: ubc_upl_unmap() failed\n");
2380 if (start_pg
< last_pg
) {
2382 * compute the range of pages that we actually issued an I/O for
2383 * and either commit them as valid if the I/O succeeded
2384 * or abort them if the I/O failed
2386 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2388 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2389 (int)upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2391 if (error
|| (vp
->v_flag
& VNOCACHE_DATA
))
2392 ubc_upl_abort_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2393 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2395 ubc_upl_commit_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2396 UPL_COMMIT_CLEAR_DIRTY
2397 | UPL_COMMIT_FREE_ON_EMPTY
2398 | UPL_COMMIT_INACTIVATE
);
2400 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2401 (int)upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2403 if ((last_pg
- start_pg
) < pages_in_upl
) {
2408 * the set of pages that we issued an I/O for did not encompass
2409 * the entire upl... so just release these without modifying
2413 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2415 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2416 (int)upl
, -1, pages_in_upl
- (last_pg
- start_pg
), 0, 0);
2420 * we found some already valid pages at the beginning of
2421 * the upl commit these back to the inactive list with
2424 for (cur_pg
= 0; cur_pg
< start_pg
; cur_pg
++) {
2425 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
2426 | UPL_COMMIT_INACTIVATE
;
2428 if (upl_dirty_page(pl
, cur_pg
))
2429 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2431 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2432 ubc_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2433 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2435 ubc_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2436 PAGE_SIZE
, commit_flags
);
2439 if (last_pg
< uio_last
) {
2441 * we found some already valid pages immediately after the
2442 * pages we issued I/O for, commit these back to the
2443 * inactive list with reference cleared
2445 for (cur_pg
= last_pg
; cur_pg
< uio_last
; cur_pg
++) {
2446 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
2447 | UPL_COMMIT_INACTIVATE
;
2449 if (upl_dirty_page(pl
, cur_pg
))
2450 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2452 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2453 ubc_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2454 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2456 ubc_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2457 PAGE_SIZE
, commit_flags
);
2460 if (uio_last
< pages_in_upl
) {
2462 * there were some invalid pages beyond the valid pages
2463 * that we didn't issue an I/O for, just release them
2466 ubc_upl_abort_range(upl
, uio_last
* PAGE_SIZE
,
2467 (pages_in_upl
- uio_last
) * PAGE_SIZE
, UPL_ABORT_FREE_ON_EMPTY
);
2470 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2471 (int)upl
, -1, -1, 0, 0);
2482 cluster_nocopy_read(vp
, uio
, filesize
, devblocksize
, flags
)
2490 upl_page_info_t
*pl
;
2492 vm_offset_t upl_offset
;
2493 off_t start_upl_f_offset
;
2497 int upl_needed_size
;
2505 int force_data_sync
;
2509 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_START
,
2510 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
2513 * When we enter this routine, we know
2514 * -- the offset into the file is on a pagesize boundary
2515 * -- the resid is a page multiple
2516 * -- the resid will not exceed iov_len
2520 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2522 max_io_size
= filesize
- uio
->uio_offset
;
2524 if (max_io_size
< (off_t
)((unsigned int)uio
->uio_resid
))
2525 io_size
= max_io_size
;
2527 io_size
= uio
->uio_resid
;
2530 * We don't come into this routine unless
2531 * UIO_USERSPACE is set.
2533 segflg
= uio
->uio_segflg
;
2535 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2538 * First look for pages already in the cache
2539 * and move them to user space.
2541 while (io_size
&& (retval
== 0)) {
2542 upl_f_offset
= uio
->uio_offset
;
2545 * If this call fails, it means the page is not
2546 * in the page cache.
2548 if (ubc_page_op(vp
, upl_f_offset
,
2549 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) != KERN_SUCCESS
)
2552 retval
= uiomove((caddr_t
)(paddr
), PAGE_SIZE
, uio
);
2554 ubc_page_op(vp
, upl_f_offset
,
2555 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2557 io_size
-= PAGE_SIZE
;
2558 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 71)) | DBG_FUNC_NONE
,
2559 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2562 uio
->uio_segflg
= segflg
;
2566 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2567 (int)uio
->uio_offset
, uio
->uio_resid
, 2, retval
, 0);
2571 /* If we are already finished with this read, then return */
2575 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2576 (int)uio
->uio_offset
, uio
->uio_resid
, 3, io_size
, 0);
2580 max_io_size
= io_size
;
2581 if (max_io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2582 max_io_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2584 start_upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
2585 upl_f_offset
= start_upl_f_offset
;
2588 while(io_size
< max_io_size
)
2591 if(ubc_page_op(vp
, upl_f_offset
,
2592 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) == KERN_SUCCESS
)
2594 ubc_page_op(vp
, upl_f_offset
,
2595 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2600 * Build up the io request parameters.
2603 io_size
+= PAGE_SIZE
;
2604 upl_f_offset
+= PAGE_SIZE
;
2610 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2611 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
2613 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_START
,
2614 (int)upl_offset
, upl_needed_size
, (int)iov
->iov_base
, io_size
, 0);
2616 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++)
2619 upl_size
= upl_needed_size
;
2620 upl_flags
= UPL_FILE_IO
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2622 kret
= vm_map_get_upl(current_map(),
2623 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2624 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, force_data_sync
);
2626 if (kret
!= KERN_SUCCESS
)
2628 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2629 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2631 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2632 (int)uio
->uio_offset
, uio
->uio_resid
, 4, retval
, 0);
2634 /* cluster_nocopy_read: failed to get pagelist */
2635 /* do not return kret here */
2639 pages_in_pl
= upl_size
/ PAGE_SIZE
;
2640 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2642 for(i
=0; i
< pages_in_pl
; i
++)
2644 if (!upl_valid_page(pl
, i
))
2647 if (i
== pages_in_pl
)
2650 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2651 UPL_ABORT_FREE_ON_EMPTY
);
2654 if (force_data_sync
>= 3)
2656 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2657 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2659 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2660 (int)uio
->uio_offset
, uio
->uio_resid
, 5, retval
, 0);
2664 * Consider the possibility that upl_size wasn't satisfied.
2666 if (upl_size
!= upl_needed_size
)
2667 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
2671 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2672 UPL_ABORT_FREE_ON_EMPTY
);
2676 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2677 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2680 * issue a synchronous read to cluster_io
2683 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_START
,
2684 (int)upl
, (int)upl_offset
, (int)start_upl_f_offset
, io_size
, 0);
2686 error
= cluster_io(vp
, upl
, upl_offset
, start_upl_f_offset
,
2687 io_size
, devblocksize
, CL_READ
| CL_NOZERO
, (struct buf
*)0);
2691 * The cluster_io read completed successfully,
2692 * update the uio structure and commit.
2695 ubc_upl_commit_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2696 UPL_COMMIT_SET_DIRTY
| UPL_COMMIT_FREE_ON_EMPTY
);
2698 iov
->iov_base
+= io_size
;
2699 iov
->iov_len
-= io_size
;
2700 uio
->uio_resid
-= io_size
;
2701 uio
->uio_offset
+= io_size
;
2704 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2705 UPL_ABORT_FREE_ON_EMPTY
);
2708 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_END
,
2709 (int)upl
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
2717 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2718 (int)uio
->uio_offset
, (int)uio
->uio_resid
, 6, retval
, 0);
2725 cluster_phys_read(vp
, uio
, filesize
)
2731 vm_offset_t upl_offset
;
2735 int upl_needed_size
;
2743 * When we enter this routine, we know
2744 * -- the resid will not exceed iov_len
2745 * -- the target address is physically contiguous
2750 max_size
= filesize
- uio
->uio_offset
;
2752 if (max_size
< (off_t
)((unsigned int)iov
->iov_len
))
2755 io_size
= iov
->iov_len
;
2757 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2758 upl_needed_size
= upl_offset
+ io_size
;
2761 upl_size
= upl_needed_size
;
2762 upl_flags
= UPL_FILE_IO
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2764 kret
= vm_map_get_upl(current_map(),
2765 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2766 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0);
2768 if (kret
!= KERN_SUCCESS
)
2770 /* cluster_phys_read: failed to get pagelist */
2775 * Consider the possibility that upl_size wasn't satisfied.
2777 if (upl_size
< upl_needed_size
)
2779 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2784 * issue a synchronous read to cluster_io
2787 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
2788 io_size
, 0, CL_READ
| CL_NOZERO
| CL_DEV_MEMORY
, (struct buf
*)0);
2793 * The cluster_io read completed successfully,
2794 * update the uio structure and commit.
2797 ubc_upl_commit_range(upl
, 0, upl_size
, UPL_COMMIT_FREE_ON_EMPTY
);
2799 iov
->iov_base
+= io_size
;
2800 iov
->iov_len
-= io_size
;
2801 uio
->uio_resid
-= io_size
;
2802 uio
->uio_offset
+= io_size
;
2805 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2811 * generate advisory I/O's in the largest chunks possible
2812 * the completed pages will be released into the VM cache
2815 advisory_read(vp
, filesize
, f_offset
, resid
, devblocksize
)
2822 upl_page_info_t
*pl
;
2824 vm_offset_t upl_offset
;
2837 if (!UBCINFOEXISTS(vp
))
2840 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_START
,
2841 (int)f_offset
, resid
, (int)filesize
, devblocksize
, 0);
2843 while (resid
&& f_offset
< filesize
&& retval
== 0) {
2845 * compute the size of the upl needed to encompass
2846 * the requested read... limit each call to cluster_io
2847 * to the maximum UPL size... cluster_io will clip if
2848 * this exceeds the maximum io_size for the device,
2849 * make sure to account for
2850 * a starting offset that's not page aligned
2852 start_offset
= (int)(f_offset
& PAGE_MASK_64
);
2853 upl_f_offset
= f_offset
- (off_t
)start_offset
;
2854 max_size
= filesize
- f_offset
;
2856 if (resid
< max_size
)
2861 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
2862 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2863 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2864 pages_in_upl
= upl_size
/ PAGE_SIZE
;
2866 kret
= ubc_create_upl(vp
,
2871 UPL_RET_ONLY_ABSENT
);
2872 if (kret
!= KERN_SUCCESS
)
2877 * before we start marching forward, we must make sure we end on
2878 * a present page, otherwise we will be working with a freed
2881 for (last_pg
= pages_in_upl
- 1; last_pg
>= 0; last_pg
--) {
2882 if (upl_page_present(pl
, last_pg
))
2885 pages_in_upl
= last_pg
+ 1;
2888 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 61)) | DBG_FUNC_NONE
,
2889 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2892 for (last_pg
= 0; last_pg
< pages_in_upl
; ) {
2894 * scan from the beginning of the upl looking for the first
2895 * page that is present.... this will become the first page in
2896 * the request we're going to make to 'cluster_io'... if all
2897 * of the pages are absent, we won't call through to 'cluster_io'
2899 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
2900 if (upl_page_present(pl
, start_pg
))
2905 * scan from the starting present page looking for an absent
2906 * page before the end of the upl is reached, if we
2907 * find one, then it will terminate the range of pages being
2908 * presented to 'cluster_io'
2910 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
2911 if (!upl_page_present(pl
, last_pg
))
2915 if (last_pg
> start_pg
) {
2917 * we found a range of pages that must be filled
2918 * if the last page in this range is the last page of the file
2919 * we may have to clip the size of it to keep from reading past
2920 * the end of the last physical block associated with the file
2922 upl_offset
= start_pg
* PAGE_SIZE
;
2923 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2925 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
)
2926 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
2929 * issue an asynchronous read to cluster_io
2931 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, devblocksize
,
2932 CL_ASYNC
| CL_READ
| CL_COMMIT
| CL_AGE
, (struct buf
*)0);
2938 ubc_upl_abort(upl
, 0);
2940 io_size
= upl_size
- start_offset
;
2942 if (io_size
> resid
)
2944 f_offset
+= io_size
;
2948 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_END
,
2949 (int)f_offset
, resid
, retval
, 0, 0);
2961 if (!UBCINFOEXISTS(vp
) || vp
->v_clen
== 0) {
2962 vp
->v_flag
&= ~VHASDIRTY
;
2966 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 53)) | DBG_FUNC_START
,
2967 vp
->v_flag
& VHASDIRTY
, vp
->v_clen
, 0, 0, 0);
2969 if (vp
->v_flag
& VHASDIRTY
) {
2974 start_pg
= vp
->v_cstart
;
2975 end_pg
= vp
->v_lastw
;
2977 vp
->v_flag
&= ~VHASDIRTY
;
2980 while (start_pg
< end_pg
) {
2981 last_pg
= start_pg
+ MAX_UPL_TRANSFER
;
2983 if (last_pg
> end_pg
)
2986 cluster_push_x(vp
, ubc_getsize(vp
), start_pg
, last_pg
, 0);
2992 retval
= cluster_try_push(vp
, ubc_getsize(vp
), 0, 1);
2994 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 53)) | DBG_FUNC_END
,
2995 vp
->v_flag
& VHASDIRTY
, vp
->v_clen
, retval
, 0, 0);
3002 cluster_try_push(vp
, EOF
, can_delay
, push_all
)
3014 struct v_cluster l_clusters
[MAX_CLUSTERS
];
3017 * make a local 'sorted' copy of the clusters
3018 * and clear vp->v_clen so that new clusters can
3021 for (cl_index
= 0; cl_index
< vp
->v_clen
; cl_index
++) {
3022 for (min_index
= -1, cl_index1
= 0; cl_index1
< vp
->v_clen
; cl_index1
++) {
3023 if (vp
->v_clusters
[cl_index1
].start_pg
== vp
->v_clusters
[cl_index1
].last_pg
)
3025 if (min_index
== -1)
3026 min_index
= cl_index1
;
3027 else if (vp
->v_clusters
[cl_index1
].start_pg
< vp
->v_clusters
[min_index
].start_pg
)
3028 min_index
= cl_index1
;
3030 if (min_index
== -1)
3032 l_clusters
[cl_index
].start_pg
= vp
->v_clusters
[min_index
].start_pg
;
3033 l_clusters
[cl_index
].last_pg
= vp
->v_clusters
[min_index
].last_pg
;
3035 vp
->v_clusters
[min_index
].start_pg
= vp
->v_clusters
[min_index
].last_pg
;
3040 for (cl_pushed
= 0, cl_index
= 0; cl_index
< cl_len
; cl_index
++) {
3042 * try to push each cluster in turn... cluster_push_x may not
3043 * push the cluster if can_delay is TRUE and the cluster doesn't
3044 * meet the critera for an immediate push
3046 if (cluster_push_x(vp
, EOF
, l_clusters
[cl_index
].start_pg
, l_clusters
[cl_index
].last_pg
, can_delay
)) {
3047 l_clusters
[cl_index
].start_pg
= 0;
3048 l_clusters
[cl_index
].last_pg
= 0;
3056 if (cl_len
> cl_pushed
) {
3058 * we didn't push all of the clusters, so
3059 * lets try to merge them back in to the vnode
3061 if ((MAX_CLUSTERS
- vp
->v_clen
) < (cl_len
- cl_pushed
)) {
3063 * we picked up some new clusters while we were trying to
3064 * push the old ones (I don't think this can happen because
3065 * I'm holding the lock, but just in case)... the sum of the
3066 * leftovers plus the new cluster count exceeds our ability
3067 * to represent them, so fall back to the VHASDIRTY mechanism
3069 for (cl_index
= 0; cl_index
< cl_len
; cl_index
++) {
3070 if (l_clusters
[cl_index
].start_pg
== l_clusters
[cl_index
].last_pg
)
3073 if (l_clusters
[cl_index
].start_pg
< vp
->v_cstart
)
3074 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3075 if (l_clusters
[cl_index
].last_pg
> vp
->v_lastw
)
3076 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3078 vp
->v_flag
|= VHASDIRTY
;
3081 * we've got room to merge the leftovers back in
3082 * just append them starting at the next 'hole'
3083 * represented by vp->v_clen
3085 for (cl_index
= 0, cl_index1
= vp
->v_clen
; cl_index
< cl_len
; cl_index
++) {
3086 if (l_clusters
[cl_index
].start_pg
== l_clusters
[cl_index
].last_pg
)
3089 vp
->v_clusters
[cl_index1
].start_pg
= l_clusters
[cl_index
].start_pg
;
3090 vp
->v_clusters
[cl_index1
].last_pg
= l_clusters
[cl_index
].last_pg
;
3092 if (cl_index1
== 0) {
3093 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3094 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3096 if (l_clusters
[cl_index
].start_pg
< vp
->v_cstart
)
3097 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3098 if (l_clusters
[cl_index
].last_pg
> vp
->v_lastw
)
3099 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3104 * update the cluster count
3106 vp
->v_clen
= cl_index1
;
3109 return(MAX_CLUSTERS
- vp
->v_clen
);
3115 cluster_push_x(vp
, EOF
, first
, last
, can_delay
)
3122 upl_page_info_t
*pl
;
3124 vm_offset_t upl_offset
;
3136 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_START
,
3137 vp
->v_clen
, first
, last
, EOF
, 0);
3139 if ((pages_in_upl
= last
- first
) == 0) {
3140 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 0, 0, 0, 0);
3144 upl_size
= pages_in_upl
* PAGE_SIZE
;
3145 upl_f_offset
= ((off_t
)first
) * PAGE_SIZE_64
;
3147 if (upl_f_offset
+ upl_size
>= EOF
) {
3149 if (upl_f_offset
>= EOF
) {
3151 * must have truncated the file and missed
3152 * clearing a dangling cluster (i.e. it's completely
3153 * beyond the new EOF
3155 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 1, 0, 0, 0);
3159 size
= EOF
- upl_f_offset
;
3161 upl_size
= (size
+ (PAGE_SIZE
- 1) ) & ~(PAGE_SIZE
- 1);
3162 pages_in_upl
= upl_size
/ PAGE_SIZE
;
3164 if (can_delay
&& (pages_in_upl
< (MAX_UPL_TRANSFER
- (MAX_UPL_TRANSFER
/ 2))))
3168 kret
= ubc_create_upl(vp
,
3173 UPL_RET_ONLY_DIRTY
);
3174 if (kret
!= KERN_SUCCESS
)
3175 panic("cluster_push: failed to get pagelist");
3180 for (num_of_dirty
= 0, start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
3181 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
3184 if (num_of_dirty
< pages_in_upl
/ 2) {
3185 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
3187 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 0, 2, num_of_dirty
, (pages_in_upl
/ 2), 0);
3196 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
3197 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
3200 if (start_pg
> last_pg
) {
3201 io_size
= (start_pg
- last_pg
) * PAGE_SIZE
;
3203 ubc_upl_abort_range(upl
, last_pg
* PAGE_SIZE
, io_size
,
3204 UPL_ABORT_FREE_ON_EMPTY
);
3211 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
3212 if (!upl_valid_page(pl
, last_pg
) || !upl_dirty_page(pl
, last_pg
))
3215 upl_offset
= start_pg
* PAGE_SIZE
;
3217 io_size
= min(size
, (last_pg
- start_pg
) * PAGE_SIZE
);
3219 if (vp
->v_flag
& VNOCACHE_DATA
)
3220 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
| CL_DUMP
;
3222 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
3224 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
3225 vp
->v_flag
|= VTHROTTLED
;
3226 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_push", 0);
3228 cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, vp
->v_ciosiz
, io_flags
, (struct buf
*)0);
3232 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 3, 0, 0, 0);