]>
git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
df2e7375144c2142f50f58f754aeed59057af4ee
3 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * The contents of this file constitute Original Code as defined in and
8 * are subject to the Apple Public Source License Version 1.1 (the
9 * "License"). You may not use this file except in compliance with the
10 * License. Please obtain a copy of the License at
11 * http://www.apple.com/publicsource and read it before using this file.
13 * This Original Code and all software distributed under the License are
14 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
18 * License for the specific language governing rights and limitations
21 * @APPLE_LICENSE_HEADER_END@
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26 * The Regents of the University of California. All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
59 #include <sys/param.h>
62 #include <sys/vnode.h>
63 #include <sys/mount.h>
64 #include <sys/trace.h>
65 #include <sys/malloc.h>
66 #include <sys/resourcevar.h>
67 #include <libkern/libkern.h>
70 #include <vm/vm_pageout.h>
72 #include <sys/kdebug.h>
76 #define CL_COMMIT 0x04
77 #define CL_PAGEOUT 0x10
80 #define CL_NOZERO 0x80
81 #define CL_PAGEIN 0x100
82 #define CL_DEV_MEMORY 0x200
84 static void cluster_zero(upl_t upl
, vm_offset_t upl_offset
,
85 int size
, struct buf
*bp
);
86 static int cluster_read_x(struct vnode
*vp
, struct uio
*uio
,
87 off_t filesize
, int devblocksize
, int flags
);
88 static int cluster_write_x(struct vnode
*vp
, struct uio
*uio
,
89 off_t oldEOF
, off_t newEOF
, off_t headOff
,
90 off_t tailOff
, int devblocksize
, int flags
);
91 static int cluster_nocopy_read(struct vnode
*vp
, struct uio
*uio
,
92 off_t filesize
, int devblocksize
, int flags
);
93 static int cluster_nocopy_write(struct vnode
*vp
, struct uio
*uio
,
94 off_t newEOF
, int devblocksize
, int flags
);
95 static int cluster_phys_read(struct vnode
*vp
, struct uio
*uio
,
97 static int cluster_phys_write(struct vnode
*vp
, struct uio
*uio
, off_t newEOF
);
98 static int cluster_push_x(struct vnode
*vp
, off_t EOF
, daddr_t first
, daddr_t last
, int can_delay
);
99 static int cluster_try_push(struct vnode
*vp
, off_t newEOF
, int can_delay
, int push_all
);
103 * throttle the number of async writes that
104 * can be outstanding on a single vnode
105 * before we issue a synchronous write
107 #define ASYNC_THROTTLE 9
121 struct buf
*cbp_head
;
122 struct buf
*cbp_next
;
129 cbp_head
= (struct buf
*)(bp
->b_trans_head
);
131 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_START
,
132 (int)cbp_head
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
134 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
) {
136 * all I/O requests that are part of this transaction
137 * have to complete before we can process it
139 if ( !(cbp
->b_flags
& B_DONE
)) {
141 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
142 (int)cbp_head
, (int)cbp
, cbp
->b_bcount
, cbp
->b_flags
, 0);
152 upl_offset
= cbp
->b_uploffset
;
153 upl
= cbp
->b_pagelist
;
154 b_flags
= cbp
->b_flags
;
155 real_bp
= cbp
->b_real_bp
;
157 zero_offset
= cbp
->b_validend
;
160 if (cbp
->b_vectorcount
> 1)
161 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
163 if ((cbp
->b_flags
& B_ERROR
) && error
== 0)
164 error
= cbp
->b_error
;
166 total_resid
+= cbp
->b_resid
;
167 total_size
+= cbp
->b_bcount
;
169 cbp_next
= cbp
->b_trans_next
;
175 if ((vp
->v_flag
& VTHROTTLED
) && (vp
->v_numoutput
<= (ASYNC_THROTTLE
/ 3))) {
176 vp
->v_flag
&= ~VTHROTTLED
;
177 wakeup((caddr_t
)&vp
->v_numoutput
);
180 cluster_zero(upl
, zero_offset
, PAGE_SIZE
- (zero_offset
& PAGE_MASK
), real_bp
);
182 if ((b_flags
& B_NEED_IODONE
) && real_bp
) {
184 real_bp
->b_flags
|= B_ERROR
;
185 real_bp
->b_error
= error
;
187 real_bp
->b_resid
= total_resid
;
191 if (error
== 0 && total_resid
)
194 if (b_flags
& B_COMMIT_UPL
) {
195 pg_offset
= upl_offset
& PAGE_MASK
;
196 commit_size
= (((pg_offset
+ total_size
) + (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
198 if (error
|| (b_flags
& B_NOCACHE
)) {
201 if ((b_flags
& B_PAGEOUT
) && (error
!= ENXIO
)) /* transient error */
202 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
203 else if (b_flags
& B_PGIN
)
204 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
206 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
208 ubc_upl_abort_range(upl
, upl_offset
- pg_offset
, commit_size
,
211 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
212 (int)upl
, upl_offset
- pg_offset
, commit_size
,
213 0x80000000|upl_abort_code
, 0);
216 int upl_commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
;
218 if ( !(b_flags
& B_PAGEOUT
))
219 upl_commit_flags
|= UPL_COMMIT_CLEAR_DIRTY
;
221 upl_commit_flags
|= UPL_COMMIT_INACTIVATE
;
223 ubc_upl_commit_range(upl
, upl_offset
- pg_offset
, commit_size
,
226 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
227 (int)upl
, upl_offset
- pg_offset
, commit_size
,
228 upl_commit_flags
, 0);
231 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
232 (int)upl
, upl_offset
, 0, error
, 0);
239 cluster_zero(upl
, upl_offset
, size
, bp
)
241 vm_offset_t upl_offset
;
245 vm_offset_t io_addr
= 0;
249 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 23)) | DBG_FUNC_NONE
,
250 upl_offset
, size
, (int)bp
, 0, 0);
252 if (bp
== NULL
|| bp
->b_data
== NULL
) {
253 kret
= ubc_upl_map(upl
, &io_addr
);
255 if (kret
!= KERN_SUCCESS
)
256 panic("cluster_zero: ubc_upl_map() failed with (%d)", kret
);
258 panic("cluster_zero: ubc_upl_map() mapped 0");
262 io_addr
= (vm_offset_t
)bp
->b_data
;
263 bzero((caddr_t
)(io_addr
+ upl_offset
), size
);
266 kret
= ubc_upl_unmap(upl
);
268 if (kret
!= KERN_SUCCESS
)
269 panic("cluster_zero: kernel_upl_unmap failed");
274 cluster_io(vp
, upl
, upl_offset
, f_offset
, non_rounded_size
, devblocksize
, flags
, real_bp
)
277 vm_offset_t upl_offset
;
279 int non_rounded_size
;
290 struct buf
*cbp_head
= 0;
291 struct buf
*cbp_tail
= 0;
301 if (flags
& CL_READ
) {
302 io_flags
= (B_VECTORLIST
| B_READ
);
304 vfs_io_attributes(vp
, B_READ
, &max_iosize
, &max_vectors
);
306 io_flags
= (B_VECTORLIST
| B_WRITEINPROG
);
308 vfs_io_attributes(vp
, B_WRITE
, &max_iosize
, &max_vectors
);
310 pl
= ubc_upl_pageinfo(upl
);
312 if (flags
& CL_ASYNC
)
313 io_flags
|= (B_CALL
| B_ASYNC
);
317 io_flags
|= B_NOCACHE
;
318 if (flags
& CL_PAGEIN
)
322 size
= (non_rounded_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
324 size
= non_rounded_size
;
327 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_START
,
328 (int)f_offset
, size
, upl_offset
, flags
, 0);
330 if ((flags
& CL_READ
) && ((upl_offset
+ non_rounded_size
) & PAGE_MASK
) && (!(flags
& CL_NOZERO
))) {
332 * then we are going to end up
333 * with a page that we can't complete (the file size wasn't a multiple
334 * of PAGE_SIZE and we're trying to read to the end of the file
335 * so we'll go ahead and zero out the portion of the page we can't
336 * read in from the file
338 zero_offset
= upl_offset
+ non_rounded_size
;
350 if (size
> max_iosize
)
351 io_size
= max_iosize
;
355 if (error
= VOP_CMAP(vp
, f_offset
, io_size
, &blkno
, &io_size
, NULL
)) {
356 if (error
== EOPNOTSUPP
)
357 panic("VOP_CMAP Unimplemented");
361 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 24)) | DBG_FUNC_NONE
,
362 (int)f_offset
, (int)blkno
, io_size
, zero_offset
, 0);
364 if ( (!(flags
& CL_READ
) && (long)blkno
== -1) || io_size
== 0) {
365 if (flags
& CL_PAGEOUT
) {
370 /* Try paging out the page individually before
371 giving up entirely and dumping it (it could
372 be mapped in a "hole" and require allocation
375 ubc_upl_abort_range(upl
, upl_offset
, PAGE_SIZE_64
, UPL_ABORT_FREE_ON_EMPTY
);
376 if (ubc_pushdirty_range(vp
, f_offset
, PAGE_SIZE_64
) == 0) {
381 upl_offset
+= PAGE_SIZE_64
;
382 f_offset
+= PAGE_SIZE_64
;
383 size
-= PAGE_SIZE_64
;
386 lblkno
= (daddr_t
)(f_offset
/ PAGE_SIZE_64
);
388 * we have now figured out how much I/O we can do - this is in 'io_size'
389 * pl_index represents the first page in the 'upl' that the I/O will occur for
390 * pg_offset is the starting point in the first page for the I/O
391 * pg_count is the number of full and partial pages that 'io_size' encompasses
393 pl_index
= upl_offset
/ PAGE_SIZE
;
394 pg_offset
= upl_offset
& PAGE_MASK
;
395 pg_count
= (io_size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
397 if (flags
& CL_DEV_MEMORY
) {
399 * currently, can't deal with reading 'holes' in file
401 if ((long)blkno
== -1) {
406 * treat physical requests as one 'giant' page
410 if ((flags
& CL_READ
) && (long)blkno
== -1) {
414 * if we're reading and blkno == -1, then we've got a
415 * 'hole' in the file that we need to deal with by zeroing
416 * out the affected area in the upl
418 if (zero_offset
&& io_size
== size
) {
420 * if this upl contains the EOF and it is not a multiple of PAGE_SIZE
421 * than 'zero_offset' will be non-zero
422 * if the 'hole' returned by VOP_CMAP extends all the way to the eof
423 * (indicated by the io_size finishing off the I/O request for this UPL)
424 * than we're not going to issue an I/O for the
425 * last page in this upl... we need to zero both the hole and the tail
426 * of the page beyond the EOF, since the delayed zero-fill won't kick in
428 bytes_to_zero
= (((upl_offset
+ io_size
) + (PAGE_SIZE
- 1)) & ~PAGE_MASK
) - upl_offset
;
432 bytes_to_zero
= io_size
;
434 cluster_zero(upl
, upl_offset
, bytes_to_zero
, real_bp
);
438 * if there is a current I/O chain pending
439 * then the first page of the group we just zero'd
440 * will be handled by the I/O completion if the zero
441 * fill started in the middle of the page
443 pg_count
= (io_size
- pg_offset
) / PAGE_SIZE
;
446 * no pending I/O to pick up that first page
447 * so, we have to make sure it gets committed
449 * set the pg_offset to 0 so that the upl_commit_range
450 * starts with this page
452 pg_count
= (io_size
+ pg_offset
) / PAGE_SIZE
;
455 if (io_size
== size
&& ((upl_offset
+ io_size
) & PAGE_MASK
))
457 * if we're done with the request for this UPL
458 * then we have to make sure to commit the last page
459 * even if we only partially zero-filled it
465 pg_resid
= PAGE_SIZE
- pg_offset
;
469 if (flags
& CL_COMMIT
)
470 ubc_upl_commit_range(upl
,
471 (upl_offset
+ pg_resid
) & ~PAGE_MASK
,
472 pg_count
* PAGE_SIZE
,
473 UPL_COMMIT_CLEAR_DIRTY
| UPL_COMMIT_FREE_ON_EMPTY
);
475 upl_offset
+= io_size
;
479 if (cbp_head
&& pg_count
)
483 } else if (real_bp
&& (real_bp
->b_blkno
== real_bp
->b_lblkno
)) {
484 real_bp
->b_blkno
= blkno
;
488 if (pg_count
> max_vectors
) {
489 io_size
-= (pg_count
- max_vectors
) * PAGE_SIZE
;
492 io_size
= PAGE_SIZE
- pg_offset
;
495 pg_count
= max_vectors
;
498 * we need to allocate space for the vector list
501 iovp
= (struct iovec
*)_MALLOC(sizeof(struct iovec
) * pg_count
,
502 M_SEGMENT
, M_NOWAIT
);
504 if (iovp
== (struct iovec
*) 0) {
506 * if the allocation fails, then throttle down to a single page
508 io_size
= PAGE_SIZE
- pg_offset
;
514 /* Throttle the speculative IO */
515 if ((flags
& CL_ASYNC
) && !(flags
& CL_PAGEOUT
))
520 cbp
= alloc_io_buf(vp
, priv
);
524 * we use the io vector that's reserved in the buffer header
525 * this insures we can always issue an I/O even in a low memory
526 * condition that prevents the _MALLOC from succeeding... this
527 * is necessary to prevent deadlocks with the pager
529 iovp
= (struct iovec
*)(&cbp
->b_vects
[0]);
531 cbp
->b_vectorlist
= (void *)iovp
;
532 cbp
->b_vectorcount
= pg_count
;
534 if (flags
& CL_DEV_MEMORY
) {
536 iovp
->iov_len
= io_size
;
537 iovp
->iov_base
= (caddr_t
)upl_phys_page(pl
, 0);
539 if (iovp
->iov_base
== (caddr_t
) 0) {
543 iovp
->iov_base
+= upl_offset
;
546 for (i
= 0, vsize
= io_size
; i
< pg_count
; i
++, iovp
++) {
549 psize
= PAGE_SIZE
- pg_offset
;
554 iovp
->iov_len
= psize
;
555 iovp
->iov_base
= (caddr_t
)upl_phys_page(pl
, pl_index
+ i
);
557 if (iovp
->iov_base
== (caddr_t
) 0) {
559 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
565 iovp
->iov_base
+= pg_offset
;
568 if (flags
& CL_PAGEOUT
) {
573 if (bp
= incore(vp
, lblkno
+ i
)) {
574 if (!ISSET(bp
->b_flags
, B_BUSY
)) {
576 SET(bp
->b_flags
, (B_BUSY
| B_INVAL
));
580 panic("BUSY bp found in cluster_io");
590 if (flags
& CL_ASYNC
)
591 cbp
->b_iodone
= (void *)cluster_iodone
;
592 cbp
->b_flags
|= io_flags
;
594 cbp
->b_lblkno
= lblkno
;
595 cbp
->b_blkno
= blkno
;
596 cbp
->b_bcount
= io_size
;
597 cbp
->b_pagelist
= upl
;
598 cbp
->b_uploffset
= upl_offset
;
599 cbp
->b_trans_next
= (struct buf
*)0;
602 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 26)) | DBG_FUNC_NONE
,
603 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
605 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 27)) | DBG_FUNC_NONE
,
606 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
609 cbp_tail
->b_trans_next
= cbp
;
615 (struct buf
*)(cbp
->b_trans_head
) = cbp_head
;
618 upl_offset
+= io_size
;
622 if ( (!(upl_offset
& PAGE_MASK
) && !(flags
& CL_DEV_MEMORY
) && ((flags
& CL_ASYNC
) || buf_count
> 8)) || size
== 0) {
624 * if we have no more I/O to issue or
625 * the current I/O we've prepared fully
626 * completes the last page in this request
627 * and it's either an ASYNC request or
628 * we've already accumulated more than 8 I/O's into
629 * this transaction and it's not an I/O directed to
630 * special DEVICE memory
631 * then go ahead and issue the I/O
634 if (flags
& CL_COMMIT
)
635 cbp_head
->b_flags
|= B_COMMIT_UPL
;
636 if (flags
& CL_PAGEOUT
)
637 cbp_head
->b_flags
|= B_PAGEOUT
;
638 if (flags
& CL_PAGEIN
)
639 cbp_head
->b_flags
|= B_PGIN
;
642 cbp_head
->b_flags
|= B_NEED_IODONE
;
643 cbp_head
->b_real_bp
= real_bp
;
645 cbp_head
->b_real_bp
= (struct buf
*)NULL
;
649 * we're about to issue the last I/O for this upl
650 * if this was a read to the eof and the eof doesn't
651 * finish on a page boundary, than we need to zero-fill
652 * the rest of the page....
654 cbp_head
->b_validend
= zero_offset
;
656 cbp_head
->b_validend
= 0;
658 for (cbp
= cbp_head
; cbp
;) {
659 struct buf
* cbp_next
;
661 if (io_flags
& B_WRITEINPROG
)
662 cbp
->b_vp
->v_numoutput
++;
664 cbp_next
= cbp
->b_trans_next
;
666 (void) VOP_STRATEGY(cbp
);
669 if ( !(flags
& CL_ASYNC
)) {
670 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
)
673 if (error
= cluster_iodone(cbp_head
)) {
674 if ((flags
& CL_PAGEOUT
) && (error
== ENXIO
))
675 retval
= 0; /* drop the error */
681 cbp_head
= (struct buf
*)0;
682 cbp_tail
= (struct buf
*)0;
690 for (cbp
= cbp_head
; cbp
;) {
691 struct buf
* cbp_next
;
693 if (cbp
->b_vectorcount
> 1)
694 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
695 upl_offset
-= cbp
->b_bcount
;
696 size
+= cbp
->b_bcount
;
698 cbp_next
= cbp
->b_trans_next
;
702 pg_offset
= upl_offset
& PAGE_MASK
;
703 abort_size
= ((size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
705 if (flags
& CL_COMMIT
) {
708 if ((flags
& CL_PAGEOUT
) && (error
!= ENXIO
)) /* transient error */
709 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
710 else if (flags
& CL_PAGEIN
)
711 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
713 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
715 ubc_upl_abort_range(upl
, upl_offset
- pg_offset
, abort_size
,
718 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 28)) | DBG_FUNC_NONE
,
719 (int)upl
, upl_offset
- pg_offset
, abort_size
, error
, 0);
722 real_bp
->b_flags
|= B_ERROR
;
723 real_bp
->b_error
= error
;
730 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_END
,
731 (int)f_offset
, size
, upl_offset
, retval
, 0);
738 cluster_rd_prefetch(vp
, f_offset
, size
, filesize
, devblocksize
)
748 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_START
,
749 (int)f_offset
, size
, (int)filesize
, 0, 0);
751 if (f_offset
>= filesize
) {
752 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
753 (int)f_offset
, 0, 0, 0, 0);
756 if (size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
757 size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
759 size
= (size
+ (PAGE_SIZE
- 1)) & ~(PAGE_SIZE
- 1);
761 if ((off_t
)size
> (filesize
- f_offset
))
762 size
= filesize
- f_offset
;
764 pages_to_fetch
= (size
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
766 for (skipped_pages
= 0; skipped_pages
< pages_to_fetch
; skipped_pages
++) {
767 if (ubc_page_op(vp
, f_offset
, 0, 0, 0) != KERN_SUCCESS
)
769 f_offset
+= PAGE_SIZE
;
772 if (skipped_pages
< pages_to_fetch
)
773 advisory_read(vp
, filesize
, f_offset
, size
, devblocksize
);
775 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
776 (int)f_offset
+ (pages_to_fetch
* PAGE_SIZE
), skipped_pages
, 0, 1, 0);
778 return (pages_to_fetch
);
784 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
)
793 int size_of_prefetch
;
796 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_START
,
797 b_lblkno
, e_lblkno
, vp
->v_lastr
, 0, 0);
799 if (b_lblkno
== vp
->v_lastr
&& b_lblkno
== e_lblkno
) {
800 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
801 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 0, 0);
805 if (vp
->v_lastr
== -1 || (b_lblkno
!= vp
->v_lastr
&& b_lblkno
!= (vp
->v_lastr
+ 1) &&
806 (b_lblkno
!= (vp
->v_maxra
+ 1) || vp
->v_ralen
== 0))) {
810 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
811 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 1, 0);
815 max_pages
= MAX_UPL_TRANSFER
;
817 vp
->v_ralen
= vp
->v_ralen
? min(max_pages
, vp
->v_ralen
<< 1) : 1;
819 if (((e_lblkno
+ 1) - b_lblkno
) > vp
->v_ralen
)
820 vp
->v_ralen
= min(max_pages
, (e_lblkno
+ 1) - b_lblkno
);
822 if (e_lblkno
< vp
->v_maxra
) {
823 if ((vp
->v_maxra
- e_lblkno
) > max(max_pages
/ 16, 4)) {
825 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
826 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 2, 0);
830 r_lblkno
= max(e_lblkno
, vp
->v_maxra
) + 1;
831 f_offset
= (off_t
)r_lblkno
* PAGE_SIZE_64
;
833 if (f_offset
< filesize
) {
834 size_of_prefetch
= cluster_rd_prefetch(vp
, f_offset
, vp
->v_ralen
* PAGE_SIZE
, filesize
, devblocksize
);
836 if (size_of_prefetch
)
837 vp
->v_maxra
= (r_lblkno
+ size_of_prefetch
) - 1;
839 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
840 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 3, 0);
844 cluster_pageout(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
847 vm_offset_t upl_offset
;
857 int local_flags
= CL_PAGEOUT
;
859 if ((flags
& UPL_IOSYNC
) == 0)
860 local_flags
|= CL_ASYNC
;
861 if ((flags
& UPL_NOCOMMIT
) == 0)
862 local_flags
|= CL_COMMIT
;
865 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 52)) | DBG_FUNC_NONE
,
866 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
869 * If they didn't specify any I/O, then we are done...
870 * we can't issue an abort because we don't know how
871 * big the upl really is
876 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
) {
877 if (local_flags
& CL_COMMIT
)
878 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
882 * can't page-in from a negative offset
883 * or if we're starting beyond the EOF
884 * or if the file offset isn't page aligned
885 * or the size requested isn't a multiple of PAGE_SIZE
887 if (f_offset
< 0 || f_offset
>= filesize
||
888 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
)) {
889 if (local_flags
& CL_COMMIT
)
890 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
893 max_size
= filesize
- f_offset
;
900 pg_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
902 if (size
> pg_size
) {
903 if (local_flags
& CL_COMMIT
)
904 ubc_upl_abort_range(upl
, upl_offset
+ pg_size
, size
- pg_size
,
905 UPL_ABORT_FREE_ON_EMPTY
);
907 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
908 vp
->v_flag
|= VTHROTTLED
;
909 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_pageout", 0);
912 return (cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
, devblocksize
,
913 local_flags
, (struct buf
*)0));
917 cluster_pagein(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
920 vm_offset_t upl_offset
;
933 if (upl
== NULL
|| size
< 0)
934 panic("cluster_pagein: NULL upl passed in");
936 if ((flags
& UPL_IOSYNC
) == 0)
937 local_flags
|= CL_ASYNC
;
938 if ((flags
& UPL_NOCOMMIT
) == 0)
939 local_flags
|= CL_COMMIT
;
942 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 56)) | DBG_FUNC_NONE
,
943 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
946 * can't page-in from a negative offset
947 * or if we're starting beyond the EOF
948 * or if the file offset isn't page aligned
949 * or the size requested isn't a multiple of PAGE_SIZE
951 if (f_offset
< 0 || f_offset
>= filesize
||
952 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
) || (upl_offset
& PAGE_MASK
)) {
953 if (local_flags
& CL_COMMIT
)
954 ubc_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
957 max_size
= filesize
- f_offset
;
964 rounded_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
966 if (size
> rounded_size
&& (local_flags
& CL_COMMIT
))
967 ubc_upl_abort_range(upl
, upl_offset
+ rounded_size
,
968 size
- (upl_offset
+ rounded_size
), UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
970 retval
= cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
, devblocksize
,
971 local_flags
| CL_READ
| CL_PAGEIN
, (struct buf
*)0);
977 b_lblkno
= (int)(f_offset
/ PAGE_SIZE_64
);
979 ((f_offset
+ ((off_t
)io_size
- 1)) / PAGE_SIZE_64
);
981 if (!(flags
& UPL_NORDAHEAD
) && !(vp
->v_flag
& VRAOFF
) && rounded_size
== PAGE_SIZE
) {
983 * we haven't read the last page in of the file yet
984 * so let's try to read ahead if we're in
985 * a sequential access pattern
987 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
989 vp
->v_lastr
= e_lblkno
;
1001 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 19)) | DBG_FUNC_START
,
1002 (int)bp
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
1004 if (bp
->b_pagelist
== (upl_t
) 0)
1005 panic("cluster_bp: can't handle NULL upl yet\n");
1006 if (bp
->b_flags
& B_READ
)
1007 flags
= CL_ASYNC
| CL_READ
;
1011 f_offset
= ubc_blktooff(bp
->b_vp
, bp
->b_lblkno
);
1013 return (cluster_io(bp
->b_vp
, bp
->b_pagelist
, 0, f_offset
, bp
->b_bcount
, 0, flags
, bp
));
1017 cluster_write(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1031 vm_offset_t upl_offset
;
1034 upl_page_info_t
*pl
;
1040 if ((!uio
) || (uio
->uio_segflg
!= UIO_USERSPACE
) || (!(vp
->v_flag
& VNOCACHE_DATA
)))
1042 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1046 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& retval
== 0)
1048 /* we know we have a resid, so this is safe */
1050 while (iov
->iov_len
== 0) {
1057 * We check every vector target and if it is physically
1058 * contiguous space, we skip the sanity checks.
1061 upl_offset
= (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
;
1062 upl_size
= (upl_offset
+ PAGE_SIZE
+(PAGE_SIZE
-1)) & ~PAGE_MASK
;
1064 upl_flags
= UPL_QUERY_OBJECT_TYPE
;
1065 if ((vm_map_get_upl(current_map(),
1066 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1067 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0)) != KERN_SUCCESS
)
1070 * the user app must have passed in an invalid address
1075 if (upl_flags
& UPL_PHYS_CONTIG
)
1078 * since the interface to the IOKit below us uses physical block #'s and
1079 * block counts to specify the I/O, we can't handle anything that isn't
1080 * devblocksize aligned
1082 if ((uio
->uio_offset
& (devblocksize
- 1)) || (uio
->uio_resid
& (devblocksize
- 1)))
1085 if (flags
& IO_HEADZEROFILL
)
1087 flags
&= ~IO_HEADZEROFILL
;
1089 if (retval
= cluster_write_x(vp
, (struct uio
*)0, 0, uio
->uio_offset
, headOff
, 0, devblocksize
, IO_HEADZEROFILL
))
1093 retval
= cluster_phys_write(vp
, uio
, newEOF
);
1095 if (uio
->uio_resid
== 0 && (flags
& IO_TAILZEROFILL
))
1097 retval
= cluster_write_x(vp
, (struct uio
*)0, 0, tailOff
, uio
->uio_offset
, 0, devblocksize
, IO_HEADZEROFILL
);
1101 else if ((uio
->uio_resid
< 4 * PAGE_SIZE
) || (flags
& (IO_TAILZEROFILL
| IO_HEADZEROFILL
)))
1104 * We set a threshhold of 4 pages to decide if the nocopy
1105 * write loop is worth the trouble...
1106 * we also come here if we're trying to zero the head and/or tail
1107 * of a partially written page, and the user source is not a physically contiguous region
1109 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1112 else if (uio
->uio_offset
& PAGE_MASK_64
)
1114 /* Bring the file offset write up to a pagesize boundary */
1115 clip_size
= (PAGE_SIZE
- (uio
->uio_offset
& PAGE_MASK_64
));
1116 if (uio
->uio_resid
< clip_size
)
1117 clip_size
= uio
->uio_resid
;
1119 * Fake the resid going into the cluster_write_x call
1120 * and restore it on the way out.
1122 prev_resid
= uio
->uio_resid
;
1123 uio
->uio_resid
= clip_size
;
1124 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1125 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1127 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
1129 clip_size
= iov
->iov_len
;
1130 prev_resid
= uio
->uio_resid
;
1131 uio
->uio_resid
= clip_size
;
1132 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1133 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1138 * If we come in here, we know the offset into
1139 * the file is on a pagesize boundary
1142 max_io_size
= newEOF
- uio
->uio_offset
;
1143 clip_size
= uio
->uio_resid
;
1144 if (iov
->iov_len
< clip_size
)
1145 clip_size
= iov
->iov_len
;
1146 if (max_io_size
< clip_size
)
1147 clip_size
= max_io_size
;
1149 if (clip_size
< PAGE_SIZE
)
1152 * Take care of tail end of write in this vector
1154 prev_resid
= uio
->uio_resid
;
1155 uio
->uio_resid
= clip_size
;
1156 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1157 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1161 /* round clip_size down to a multiple of pagesize */
1162 clip_size
= clip_size
& ~(PAGE_MASK
);
1163 prev_resid
= uio
->uio_resid
;
1164 uio
->uio_resid
= clip_size
;
1165 retval
= cluster_nocopy_write(vp
, uio
, newEOF
, devblocksize
, flags
);
1166 if ((retval
== 0) && uio
->uio_resid
)
1167 retval
= cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1168 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1176 cluster_nocopy_write(vp
, uio
, newEOF
, devblocksize
, flags
)
1184 upl_page_info_t
*pl
;
1186 vm_offset_t upl_offset
;
1190 int upl_needed_size
;
1196 int force_data_sync
;
1199 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_START
,
1200 (int)uio
->uio_offset
, (int)uio
->uio_resid
,
1201 (int)newEOF
, devblocksize
, 0);
1204 * When we enter this routine, we know
1205 * -- the offset into the file is on a pagesize boundary
1206 * -- the resid is a page multiple
1207 * -- the resid will not exceed iov_len
1209 cluster_try_push(vp
, newEOF
, 0, 1);
1213 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& error
== 0) {
1214 io_size
= uio
->uio_resid
;
1216 if (io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1217 io_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1219 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1220 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
1222 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_START
,
1223 (int)upl_offset
, upl_needed_size
, (int)iov
->iov_base
, io_size
, 0);
1225 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++)
1228 upl_size
= upl_needed_size
;
1229 upl_flags
= UPL_FILE_IO
| UPL_COPYOUT_FROM
| UPL_NO_SYNC
|
1230 UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1232 kret
= vm_map_get_upl(current_map(),
1233 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1241 if (kret
!= KERN_SUCCESS
)
1243 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1246 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1247 (int)uio
->uio_offset
, (int)uio
->uio_resid
, kret
, 1, 0);
1249 /* cluster_nocopy_write: failed to get pagelist */
1250 /* do not return kret here */
1254 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
1255 pages_in_pl
= upl_size
/ PAGE_SIZE
;
1257 for(i
=0; i
< pages_in_pl
; i
++)
1259 if (!upl_valid_page(pl
, i
))
1263 if (i
== pages_in_pl
)
1266 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1267 UPL_ABORT_FREE_ON_EMPTY
);
1270 if (force_data_sync
>= 3)
1272 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1273 i
, pages_in_pl
, upl_size
, kret
, 0);
1275 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1276 (int)uio
->uio_offset
, (int)uio
->uio_resid
, kret
, 2, 0);
1281 * Consider the possibility that upl_size wasn't satisfied.
1283 if (upl_size
!= upl_needed_size
)
1284 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
1286 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1287 (int)upl_offset
, upl_size
, (int)iov
->iov_base
, io_size
, 0);
1291 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1292 UPL_ABORT_FREE_ON_EMPTY
);
1293 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1294 (int)uio
->uio_offset
, uio
->uio_resid
, 0, 3, 0);
1300 * Now look for pages already in the cache
1301 * and throw them away.
1304 upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
1305 max_io_size
= io_size
;
1307 while (max_io_size
) {
1310 * Flag UPL_POP_DUMP says if the page is found
1311 * in the page cache it must be thrown away.
1315 UPL_POP_SET
| UPL_POP_BUSY
| UPL_POP_DUMP
,
1317 max_io_size
-= PAGE_SIZE
;
1318 upl_f_offset
+= PAGE_SIZE
;
1322 * issue a synchronous write to cluster_io
1325 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_START
,
1326 (int)upl_offset
, (int)uio
->uio_offset
, io_size
, 0, 0);
1328 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1329 io_size
, devblocksize
, 0, (struct buf
*)0);
1333 * The cluster_io write completed successfully,
1334 * update the uio structure.
1336 iov
->iov_base
+= io_size
;
1337 iov
->iov_len
-= io_size
;
1338 uio
->uio_resid
-= io_size
;
1339 uio
->uio_offset
+= io_size
;
1342 * always 'commit' the I/O via the abort primitive whether the I/O
1343 * succeeded cleanly or not... this is necessary to insure that
1344 * we preserve the state of the DIRTY flag on the pages used to
1345 * provide the data for the I/O... the state of this flag SHOULD
1346 * NOT be changed by a write
1348 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1349 UPL_ABORT_FREE_ON_EMPTY
);
1352 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_END
,
1353 (int)upl_offset
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
1358 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1359 (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 4, 0);
1365 cluster_phys_write(vp
, uio
, newEOF
)
1371 vm_offset_t upl_offset
;
1374 int upl_needed_size
;
1382 * When we enter this routine, we know
1383 * -- the resid will not exceed iov_len
1384 * -- the vector target address is physcially contiguous
1386 cluster_try_push(vp
, newEOF
, 0, 1);
1389 io_size
= iov
->iov_len
;
1390 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1391 upl_needed_size
= upl_offset
+ io_size
;
1394 upl_size
= upl_needed_size
;
1395 upl_flags
= UPL_FILE_IO
| UPL_COPYOUT_FROM
| UPL_NO_SYNC
|
1396 UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1398 kret
= vm_map_get_upl(current_map(),
1399 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1400 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0);
1402 if (kret
!= KERN_SUCCESS
)
1404 /* cluster_phys_write: failed to get pagelist */
1405 /* note: return kret here */
1410 * Consider the possibility that upl_size wasn't satisfied.
1411 * This is a failure in the physical memory case.
1413 if (upl_size
< upl_needed_size
)
1415 kernel_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1420 * issue a synchronous write to cluster_io
1423 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1424 io_size
, 0, CL_DEV_MEMORY
, (struct buf
*)0);
1428 * The cluster_io write completed successfully,
1429 * update the uio structure and commit.
1432 ubc_upl_commit_range(upl
, 0, upl_size
, UPL_COMMIT_FREE_ON_EMPTY
);
1434 iov
->iov_base
+= io_size
;
1435 iov
->iov_len
-= io_size
;
1436 uio
->uio_resid
-= io_size
;
1437 uio
->uio_offset
+= io_size
;
1440 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1446 cluster_write_x(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1456 upl_page_info_t
*pl
;
1458 vm_offset_t upl_offset
;
1466 vm_offset_t io_address
;
1473 long long total_size
;
1476 long long zero_cnt1
;
1478 daddr_t start_blkno
;
1482 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1483 (int)uio
->uio_offset
, uio
->uio_resid
, (int)oldEOF
, (int)newEOF
, 0);
1485 uio_resid
= uio
->uio_resid
;
1487 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1488 0, 0, (int)oldEOF
, (int)newEOF
, 0);
1495 if (flags
& IO_HEADZEROFILL
) {
1497 * some filesystems (HFS is one) don't support unallocated holes within a file...
1498 * so we zero fill the intervening space between the old EOF and the offset
1499 * where the next chunk of real data begins.... ftruncate will also use this
1500 * routine to zero fill to the new EOF when growing a file... in this case, the
1501 * uio structure will not be provided
1504 if (headOff
< uio
->uio_offset
) {
1505 zero_cnt
= uio
->uio_offset
- headOff
;
1508 } else if (headOff
< newEOF
) {
1509 zero_cnt
= newEOF
- headOff
;
1513 if (flags
& IO_TAILZEROFILL
) {
1515 zero_off1
= uio
->uio_offset
+ uio
->uio_resid
;
1517 if (zero_off1
< tailOff
)
1518 zero_cnt1
= tailOff
- zero_off1
;
1521 if (zero_cnt
== 0 && uio
== (struct uio
*) 0)
1523 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
1524 retval
, 0, 0, 0, 0);
1528 while ((total_size
= (uio_resid
+ zero_cnt
+ zero_cnt1
)) && retval
== 0) {
1530 * for this iteration of the loop, figure out where our starting point is
1533 start_offset
= (int)(zero_off
& PAGE_MASK_64
);
1534 upl_f_offset
= zero_off
- start_offset
;
1535 } else if (uio_resid
) {
1536 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
1537 upl_f_offset
= uio
->uio_offset
- start_offset
;
1539 start_offset
= (int)(zero_off1
& PAGE_MASK_64
);
1540 upl_f_offset
= zero_off1
- start_offset
;
1542 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 46)) | DBG_FUNC_NONE
,
1543 (int)zero_off
, (int)zero_cnt
, (int)zero_off1
, (int)zero_cnt1
, 0);
1545 if (total_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1546 total_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1549 * compute the size of the upl needed to encompass
1550 * the requested write... limit each call to cluster_io
1551 * to the maximum UPL size... cluster_io will clip if
1552 * this exceeds the maximum io_size for the device,
1553 * make sure to account for
1554 * a starting offset that's not page aligned
1556 upl_size
= (start_offset
+ total_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
1558 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
1559 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
1561 pages_in_upl
= upl_size
/ PAGE_SIZE
;
1562 io_size
= upl_size
- start_offset
;
1564 if ((long long)io_size
> total_size
)
1565 io_size
= total_size
;
1567 start_blkno
= (daddr_t
)(upl_f_offset
/ PAGE_SIZE_64
);
1568 last_blkno
= start_blkno
+ pages_in_upl
;
1570 kret
= ubc_create_upl(vp
,
1576 if (kret
!= KERN_SUCCESS
)
1577 panic("cluster_write: failed to get pagelist");
1579 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 41)) | DBG_FUNC_NONE
,
1580 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
1582 if (start_offset
&& !upl_valid_page(pl
, 0)) {
1586 * we're starting in the middle of the first page of the upl
1587 * and the page isn't currently valid, so we're going to have
1588 * to read it in first... this is a synchronous operation
1590 read_size
= PAGE_SIZE
;
1592 if ((upl_f_offset
+ read_size
) > newEOF
)
1593 read_size
= newEOF
- upl_f_offset
;
1595 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, read_size
, devblocksize
,
1596 CL_READ
, (struct buf
*)0);
1599 * we had an error during the read which causes us to abort
1600 * the current cluster_write request... before we do, we need
1601 * to release the rest of the pages in the upl without modifying
1602 * there state and mark the failed page in error
1604 ubc_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1605 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1607 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1608 (int)upl
, 0, 0, retval
, 0);
1612 if ((start_offset
== 0 || upl_size
> PAGE_SIZE
) && ((start_offset
+ io_size
) & PAGE_MASK
)) {
1614 * the last offset we're writing to in this upl does not end on a page
1615 * boundary... if it's not beyond the old EOF, then we'll also need to
1616 * pre-read this page in if it isn't already valid
1618 upl_offset
= upl_size
- PAGE_SIZE
;
1620 if ((upl_f_offset
+ start_offset
+ io_size
) < oldEOF
&&
1621 !upl_valid_page(pl
, upl_offset
/ PAGE_SIZE
)) {
1624 read_size
= PAGE_SIZE
;
1626 if ((upl_f_offset
+ upl_offset
+ read_size
) > newEOF
)
1627 read_size
= newEOF
- (upl_f_offset
+ upl_offset
);
1629 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, read_size
, devblocksize
,
1630 CL_READ
, (struct buf
*)0);
1633 * we had an error during the read which causes us to abort
1634 * the current cluster_write request... before we do, we
1635 * need to release the rest of the pages in the upl without
1636 * modifying there state and mark the failed page in error
1638 ubc_upl_abort_range(upl
, upl_offset
, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1639 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
1641 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1642 (int)upl
, 0, 0, retval
, 0);
1647 if ((kret
= ubc_upl_map(upl
, &io_address
)) != KERN_SUCCESS
)
1648 panic("cluster_write: ubc_upl_map failed\n");
1649 xfer_resid
= io_size
;
1650 io_offset
= start_offset
;
1652 while (zero_cnt
&& xfer_resid
) {
1654 if (zero_cnt
< (long long)xfer_resid
)
1655 bytes_to_zero
= zero_cnt
;
1657 bytes_to_zero
= xfer_resid
;
1659 if ( !(flags
& (IO_NOZEROVALID
| IO_NOZERODIRTY
))) {
1660 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1662 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1663 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1664 (int)io_offset
, xfer_resid
, 0);
1668 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off
& PAGE_MASK_64
));
1669 zero_pg_index
= (int)((zero_off
- upl_f_offset
) / PAGE_SIZE_64
);
1671 if ( !upl_valid_page(pl
, zero_pg_index
)) {
1672 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1674 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1675 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1676 (int)io_offset
, xfer_resid
, 0);
1678 } else if ((flags
& (IO_NOZERODIRTY
| IO_NOZEROVALID
)) == IO_NOZERODIRTY
&&
1679 !upl_dirty_page(pl
, zero_pg_index
)) {
1680 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1682 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1683 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1684 (int)io_offset
, xfer_resid
, 0);
1687 xfer_resid
-= bytes_to_zero
;
1688 zero_cnt
-= bytes_to_zero
;
1689 zero_off
+= bytes_to_zero
;
1690 io_offset
+= bytes_to_zero
;
1692 if (xfer_resid
&& uio_resid
) {
1693 bytes_to_move
= min(uio_resid
, xfer_resid
);
1695 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 42)) | DBG_FUNC_NONE
,
1696 (int)uio
->uio_offset
, bytes_to_move
, uio_resid
, xfer_resid
, 0);
1698 retval
= uiomove((caddr_t
)(io_address
+ io_offset
), bytes_to_move
, uio
);
1702 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
1703 panic("cluster_write: kernel_upl_unmap failed\n");
1705 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
1707 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1708 (int)upl
, 0, 0, retval
, 0);
1710 uio_resid
-= bytes_to_move
;
1711 xfer_resid
-= bytes_to_move
;
1712 io_offset
+= bytes_to_move
;
1715 while (xfer_resid
&& zero_cnt1
&& retval
== 0) {
1717 if (zero_cnt1
< (long long)xfer_resid
)
1718 bytes_to_zero
= zero_cnt1
;
1720 bytes_to_zero
= xfer_resid
;
1722 if ( !(flags
& (IO_NOZEROVALID
| IO_NOZERODIRTY
))) {
1723 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1725 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1726 (int)upl_f_offset
+ io_offset
,
1727 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1731 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off1
& PAGE_MASK_64
));
1732 zero_pg_index
= (int)((zero_off1
- upl_f_offset
) / PAGE_SIZE_64
);
1734 if ( !upl_valid_page(pl
, zero_pg_index
)) {
1735 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1737 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1738 (int)upl_f_offset
+ io_offset
,
1739 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1741 } else if ((flags
& (IO_NOZERODIRTY
| IO_NOZEROVALID
)) == IO_NOZERODIRTY
&&
1742 !upl_dirty_page(pl
, zero_pg_index
)) {
1743 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1745 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1746 (int)upl_f_offset
+ io_offset
,
1747 bytes_to_zero
, (int)io_offset
, xfer_resid
, 0);
1750 xfer_resid
-= bytes_to_zero
;
1751 zero_cnt1
-= bytes_to_zero
;
1752 zero_off1
+= bytes_to_zero
;
1753 io_offset
+= bytes_to_zero
;
1760 io_size
+= start_offset
;
1762 if ((upl_f_offset
+ io_size
) >= newEOF
&& io_size
< upl_size
) {
1764 * if we're extending the file with this write
1765 * we'll zero fill the rest of the page so that
1766 * if the file gets extended again in such a way as to leave a
1767 * hole starting at this EOF, we'll have zero's in the correct spot
1769 bzero((caddr_t
)(io_address
+ io_size
), upl_size
- io_size
);
1771 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1772 (int)upl_f_offset
+ io_size
,
1773 upl_size
- io_size
, 0, 0, 0);
1775 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
1776 panic("cluster_write: kernel_upl_unmap failed\n");
1778 if (flags
& IO_SYNC
)
1780 * if the IO_SYNC flag is set than we need to
1781 * bypass any clusters and immediately issue
1786 if (vp
->v_clen
== 0)
1788 * no clusters currently present
1790 goto start_new_cluster
;
1793 * keep track of the overall dirty page
1794 * range we've developed
1795 * in case we have to fall back to the
1796 * VHASDIRTY method of flushing
1798 if (vp
->v_flag
& VHASDIRTY
)
1801 for (cl_index
= 0; cl_index
< vp
->v_clen
; cl_index
++) {
1803 * we have an existing cluster... see if this write will extend it nicely
1805 if (start_blkno
>= vp
->v_clusters
[cl_index
].start_pg
) {
1807 * the current write starts at or after the current cluster
1809 if (last_blkno
<= (vp
->v_clusters
[cl_index
].start_pg
+ MAX_UPL_TRANSFER
)) {
1811 * we have a write that fits entirely
1812 * within the existing cluster limits
1814 if (last_blkno
> vp
->v_clusters
[cl_index
].last_pg
)
1816 * update our idea of where the cluster ends
1818 vp
->v_clusters
[cl_index
].last_pg
= last_blkno
;
1821 if (start_blkno
< (vp
->v_clusters
[cl_index
].start_pg
+ MAX_UPL_TRANSFER
)) {
1823 * we have a write that starts in the middle of the current cluster
1824 * but extends beyond the cluster's limit
1825 * we'll clip the current cluster if we actually
1826 * overlap with the new write
1827 * and start a new cluster with the current write
1829 if (vp
->v_clusters
[cl_index
].last_pg
> start_blkno
)
1830 vp
->v_clusters
[cl_index
].last_pg
= start_blkno
;
1833 * we also get here for the case where the current write starts
1834 * beyond the limit of the existing cluster
1836 * in either case, we'll check the remaining clusters before
1837 * starting a new one
1841 * the current write starts in front of the current cluster
1843 if ((vp
->v_clusters
[cl_index
].last_pg
- start_blkno
) <= MAX_UPL_TRANSFER
) {
1845 * we can just merge the old cluster
1846 * with the new request and leave it
1849 vp
->v_clusters
[cl_index
].start_pg
= start_blkno
;
1851 if (last_blkno
> vp
->v_clusters
[cl_index
].last_pg
) {
1853 * the current write completely
1854 * envelops the existing cluster
1856 vp
->v_clusters
[cl_index
].last_pg
= last_blkno
;
1862 * if we were to combine this write with the current cluster
1863 * we would exceed the cluster size limit.... so,
1864 * let's see if there's any overlap of the new I/O with
1865 * the existing cluster...
1868 if (last_blkno
> vp
->v_clusters
[cl_index
].start_pg
)
1870 * the current write extends into the existing cluster
1871 * clip the current cluster by moving the start position
1872 * to where the current write ends
1874 vp
->v_clusters
[cl_index
].start_pg
= last_blkno
;
1876 * if we get here, there was no way to merge
1877 * the new I/O with this cluster and
1878 * keep it under our maximum cluster length
1879 * we'll check the remaining clusters before starting a new one
1883 if (cl_index
< vp
->v_clen
)
1885 * we found an existing cluster that we
1886 * could merger this I/O into
1890 if (vp
->v_clen
< MAX_CLUSTERS
&& !(vp
->v_flag
& VNOCACHE_DATA
))
1892 * we didn't find an existing cluster to
1893 * merge into, but there's room to start
1896 goto start_new_cluster
;
1899 * no exisitng cluster to merge with and no
1900 * room to start a new one... we'll try
1901 * pushing the existing ones... if none of
1902 * them are able to be pushed, we'll have
1903 * to fall back on the VHASDIRTY mechanism
1904 * cluster_try_push will set v_clen to the
1905 * number of remaining clusters if it is
1906 * unable to push all of them
1908 if (vp
->v_flag
& VNOCACHE_DATA
)
1913 if (cluster_try_push(vp
, newEOF
, 0, 0) == 0) {
1914 vp
->v_flag
|= VHASDIRTY
;
1918 if (vp
->v_clen
== 0) {
1919 vp
->v_ciosiz
= devblocksize
;
1920 vp
->v_cstart
= start_blkno
;
1921 vp
->v_lastw
= last_blkno
;
1923 vp
->v_clusters
[vp
->v_clen
].start_pg
= start_blkno
;
1924 vp
->v_clusters
[vp
->v_clen
].last_pg
= last_blkno
;
1928 * make sure we keep v_cstart and v_lastw up to
1929 * date in case we have to fall back on the
1930 * V_HASDIRTY mechanism (or we've already entered it)
1932 if (start_blkno
< vp
->v_cstart
)
1933 vp
->v_cstart
= start_blkno
;
1934 if (last_blkno
> vp
->v_lastw
)
1935 vp
->v_lastw
= last_blkno
;
1937 ubc_upl_commit_range(upl
, 0, upl_size
, UPL_COMMIT_SET_DIRTY
| UPL_COMMIT_INACTIVATE
| UPL_COMMIT_FREE_ON_EMPTY
);
1941 * in order to maintain some semblance of coherency with mapped writes
1942 * we need to write the cluster back out as a multiple of the PAGESIZE
1943 * unless the cluster encompasses the last page of the file... in this
1944 * case we'll round out to the nearest device block boundary
1948 if ((upl_f_offset
+ io_size
) > newEOF
) {
1949 io_size
= newEOF
- upl_f_offset
;
1950 io_size
= (io_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
1953 if (flags
& IO_SYNC
)
1954 io_flags
= CL_COMMIT
| CL_AGE
;
1956 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
1958 if (vp
->v_flag
& VNOCACHE_DATA
)
1959 io_flags
|= CL_DUMP
;
1961 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
1962 vp
->v_flag
|= VTHROTTLED
;
1963 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_write", 0);
1965 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, io_size
, devblocksize
,
1966 io_flags
, (struct buf
*)0);
1969 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
1970 retval
, 0, 0, 0, 0);
1976 cluster_read(vp
, uio
, filesize
, devblocksize
, flags
)
1987 vm_offset_t upl_offset
;
1990 upl_page_info_t
*pl
;
1995 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_START
,
1996 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
1999 * We set a threshhold of 4 pages to decide if the nocopy
2000 * read loop is worth the trouble...
2003 if (!((vp
->v_flag
& VNOCACHE_DATA
) && (uio
->uio_segflg
== UIO_USERSPACE
)))
2005 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2006 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2007 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2011 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0)
2013 /* we know we have a resid, so this is safe */
2015 while (iov
->iov_len
== 0) {
2022 * We check every vector target and if it is physically
2023 * contiguous space, we skip the sanity checks.
2026 upl_offset
= (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
;
2027 upl_size
= (upl_offset
+ PAGE_SIZE
+(PAGE_SIZE
-1)) & ~PAGE_MASK
;
2029 upl_flags
= UPL_QUERY_OBJECT_TYPE
;
2030 if((vm_map_get_upl(current_map(),
2031 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2032 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0)) != KERN_SUCCESS
)
2035 * the user app must have passed in an invalid address
2040 if (upl_flags
& UPL_PHYS_CONTIG
)
2042 retval
= cluster_phys_read(vp
, uio
, filesize
);
2044 else if (uio
->uio_resid
< 4 * PAGE_SIZE
)
2047 * We set a threshhold of 4 pages to decide if the nocopy
2048 * read loop is worth the trouble...
2050 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2051 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2052 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2055 else if (uio
->uio_offset
& PAGE_MASK_64
)
2057 /* Bring the file offset read up to a pagesize boundary */
2058 clip_size
= (PAGE_SIZE
- (int)(uio
->uio_offset
& PAGE_MASK_64
));
2059 if (uio
->uio_resid
< clip_size
)
2060 clip_size
= uio
->uio_resid
;
2062 * Fake the resid going into the cluster_read_x call
2063 * and restore it on the way out.
2065 prev_resid
= uio
->uio_resid
;
2066 uio
->uio_resid
= clip_size
;
2067 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2068 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2070 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
2072 clip_size
= iov
->iov_len
;
2073 prev_resid
= uio
->uio_resid
;
2074 uio
->uio_resid
= clip_size
;
2075 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2076 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2081 * If we come in here, we know the offset into
2082 * the file is on a pagesize boundary
2085 max_io_size
= filesize
- uio
->uio_offset
;
2086 clip_size
= uio
->uio_resid
;
2087 if (iov
->iov_len
< clip_size
)
2088 clip_size
= iov
->iov_len
;
2089 if (max_io_size
< clip_size
)
2090 clip_size
= (int)max_io_size
;
2092 if (clip_size
< PAGE_SIZE
)
2095 * Take care of the tail end of the read in this vector.
2097 prev_resid
= uio
->uio_resid
;
2098 uio
->uio_resid
= clip_size
;
2099 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2100 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2104 /* round clip_size down to a multiple of pagesize */
2105 clip_size
= clip_size
& ~(PAGE_MASK
);
2106 prev_resid
= uio
->uio_resid
;
2107 uio
->uio_resid
= clip_size
;
2108 retval
= cluster_nocopy_read(vp
, uio
, filesize
, devblocksize
, flags
);
2109 if ((retval
==0) && uio
->uio_resid
)
2110 retval
= cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
);
2111 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
2116 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
2117 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
2123 cluster_read_x(vp
, uio
, filesize
, devblocksize
, flags
)
2130 upl_page_info_t
*pl
;
2132 vm_offset_t upl_offset
;
2142 vm_offset_t io_address
;
2150 b_lblkno
= (int)(uio
->uio_offset
/ PAGE_SIZE_64
);
2152 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2154 * compute the size of the upl needed to encompass
2155 * the requested read... limit each call to cluster_io
2156 * to the maximum UPL size... cluster_io will clip if
2157 * this exceeds the maximum io_size for the device,
2158 * make sure to account for
2159 * a starting offset that's not page aligned
2161 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
2162 upl_f_offset
= uio
->uio_offset
- (off_t
)start_offset
;
2163 max_size
= filesize
- uio
->uio_offset
;
2165 if ((off_t
)((unsigned int)uio
->uio_resid
) < max_size
)
2166 io_size
= uio
->uio_resid
;
2170 if (uio
->uio_segflg
== UIO_USERSPACE
&& !(vp
->v_flag
& VNOCACHE_DATA
)) {
2171 segflg
= uio
->uio_segflg
;
2173 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2175 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2176 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2178 while (io_size
&& retval
== 0) {
2184 UPL_POP_SET
| UPL_POP_BUSY
,
2185 &paddr
, 0) != KERN_SUCCESS
)
2188 xsize
= PAGE_SIZE
- start_offset
;
2190 if (xsize
> io_size
)
2193 retval
= uiomove((caddr_t
)(paddr
+ start_offset
), xsize
, uio
);
2195 ubc_page_op(vp
, upl_f_offset
,
2196 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2199 start_offset
= (int)
2200 (uio
->uio_offset
& PAGE_MASK_64
);
2201 upl_f_offset
= uio
->uio_offset
- start_offset
;
2203 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2204 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2206 uio
->uio_segflg
= segflg
;
2213 * we're already finished with this read request
2214 * let's see if we should do a read-ahead
2217 ((uio
->uio_offset
- 1) / PAGE_SIZE_64
);
2219 if (!(vp
->v_flag
& VRAOFF
))
2221 * let's try to read ahead if we're in
2222 * a sequential access pattern
2224 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2225 vp
->v_lastr
= e_lblkno
;
2229 max_size
= filesize
- uio
->uio_offset
;
2231 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
2232 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2233 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2234 pages_in_upl
= upl_size
/ PAGE_SIZE
;
2236 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_START
,
2237 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2239 kret
= ubc_create_upl(vp
,
2245 if (kret
!= KERN_SUCCESS
)
2246 panic("cluster_read: failed to get pagelist");
2248 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_END
,
2249 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2252 * scan from the beginning of the upl looking for the first
2253 * non-valid page.... this will become the first page in
2254 * the request we're going to make to 'cluster_io'... if all
2255 * of the pages are valid, we won't call through to 'cluster_io'
2257 for (start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
2258 if (!upl_valid_page(pl
, start_pg
))
2263 * scan from the starting invalid page looking for a valid
2264 * page before the end of the upl is reached, if we
2265 * find one, then it will be the last page of the request to
2268 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
2269 if (upl_valid_page(pl
, last_pg
))
2273 if (start_pg
< last_pg
) {
2275 * we found a range of 'invalid' pages that must be filled
2276 * if the last page in this range is the last page of the file
2277 * we may have to clip the size of it to keep from reading past
2278 * the end of the last physical block associated with the file
2280 upl_offset
= start_pg
* PAGE_SIZE
;
2281 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2283 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
)
2284 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
2287 * issue a synchronous read to cluster_io
2290 error
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
,
2291 io_size
, devblocksize
, CL_READ
, (struct buf
*)0);
2295 * if the read completed successfully, or there was no I/O request
2296 * issued, than map the upl into kernel address space and
2297 * move the data into user land.... we'll first add on any 'valid'
2298 * pages that were present in the upl when we acquired it.
2301 u_int size_of_prefetch
;
2303 for (uio_last
= last_pg
; uio_last
< pages_in_upl
; uio_last
++) {
2304 if (!upl_valid_page(pl
, uio_last
))
2308 * compute size to transfer this round, if uio->uio_resid is
2309 * still non-zero after this uiomove, we'll loop around and
2310 * set up for another I/O.
2312 val_size
= (uio_last
* PAGE_SIZE
) - start_offset
;
2314 if (max_size
< val_size
)
2315 val_size
= max_size
;
2317 if (uio
->uio_resid
< val_size
)
2318 val_size
= uio
->uio_resid
;
2320 e_lblkno
= (int)((uio
->uio_offset
+ ((off_t
)val_size
- 1)) / PAGE_SIZE_64
);
2322 if (size_of_prefetch
= (uio
->uio_resid
- val_size
)) {
2324 * if there's still I/O left to do for this request, then issue a
2325 * pre-fetch I/O... the I/O wait time will overlap
2326 * with the copying of the data
2328 cluster_rd_prefetch(vp
, uio
->uio_offset
+ val_size
, size_of_prefetch
, filesize
, devblocksize
);
2330 if (!(vp
->v_flag
& VRAOFF
) && !(vp
->v_flag
& VNOCACHE_DATA
))
2332 * let's try to read ahead if we're in
2333 * a sequential access pattern
2335 cluster_rd_ahead(vp
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2336 vp
->v_lastr
= e_lblkno
;
2338 if (uio
->uio_segflg
== UIO_USERSPACE
) {
2341 segflg
= uio
->uio_segflg
;
2343 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2346 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2347 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2349 offset
= start_offset
;
2351 while (val_size
&& retval
== 0) {
2356 i
= offset
/ PAGE_SIZE
;
2357 csize
= min(PAGE_SIZE
- start_offset
, val_size
);
2359 paddr
= (caddr_t
)upl_phys_page(pl
, i
) + start_offset
;
2361 retval
= uiomove(paddr
, csize
, uio
);
2365 start_offset
= offset
& PAGE_MASK
;
2367 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2368 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2370 uio
->uio_segflg
= segflg
;
2374 if ((kret
= ubc_upl_map(upl
, &io_address
)) != KERN_SUCCESS
)
2375 panic("cluster_read: ubc_upl_map() failed\n");
2377 retval
= uiomove((caddr_t
)(io_address
+ start_offset
), val_size
, uio
);
2379 if ((kret
= ubc_upl_unmap(upl
)) != KERN_SUCCESS
)
2380 panic("cluster_read: ubc_upl_unmap() failed\n");
2383 if (start_pg
< last_pg
) {
2385 * compute the range of pages that we actually issued an I/O for
2386 * and either commit them as valid if the I/O succeeded
2387 * or abort them if the I/O failed
2389 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2391 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2392 (int)upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2394 if (error
|| (vp
->v_flag
& VNOCACHE_DATA
))
2395 ubc_upl_abort_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2396 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2398 ubc_upl_commit_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2399 UPL_COMMIT_CLEAR_DIRTY
2400 | UPL_COMMIT_FREE_ON_EMPTY
2401 | UPL_COMMIT_INACTIVATE
);
2403 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2404 (int)upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2406 if ((last_pg
- start_pg
) < pages_in_upl
) {
2411 * the set of pages that we issued an I/O for did not encompass
2412 * the entire upl... so just release these without modifying
2416 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2418 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2419 (int)upl
, -1, pages_in_upl
- (last_pg
- start_pg
), 0, 0);
2423 * we found some already valid pages at the beginning of
2424 * the upl commit these back to the inactive list with
2427 for (cur_pg
= 0; cur_pg
< start_pg
; cur_pg
++) {
2428 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
2429 | UPL_COMMIT_INACTIVATE
;
2431 if (upl_dirty_page(pl
, cur_pg
))
2432 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2434 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2435 ubc_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2436 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2438 ubc_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2439 PAGE_SIZE
, commit_flags
);
2442 if (last_pg
< uio_last
) {
2444 * we found some already valid pages immediately after the
2445 * pages we issued I/O for, commit these back to the
2446 * inactive list with reference cleared
2448 for (cur_pg
= last_pg
; cur_pg
< uio_last
; cur_pg
++) {
2449 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
2450 | UPL_COMMIT_INACTIVATE
;
2452 if (upl_dirty_page(pl
, cur_pg
))
2453 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2455 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2456 ubc_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2457 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2459 ubc_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2460 PAGE_SIZE
, commit_flags
);
2463 if (uio_last
< pages_in_upl
) {
2465 * there were some invalid pages beyond the valid pages
2466 * that we didn't issue an I/O for, just release them
2469 ubc_upl_abort_range(upl
, uio_last
* PAGE_SIZE
,
2470 (pages_in_upl
- uio_last
) * PAGE_SIZE
, UPL_ABORT_FREE_ON_EMPTY
);
2473 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2474 (int)upl
, -1, -1, 0, 0);
2485 cluster_nocopy_read(vp
, uio
, filesize
, devblocksize
, flags
)
2493 upl_page_info_t
*pl
;
2495 vm_offset_t upl_offset
;
2496 off_t start_upl_f_offset
;
2500 int upl_needed_size
;
2508 int force_data_sync
;
2512 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_START
,
2513 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
2516 * When we enter this routine, we know
2517 * -- the offset into the file is on a pagesize boundary
2518 * -- the resid is a page multiple
2519 * -- the resid will not exceed iov_len
2523 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2525 max_io_size
= filesize
- uio
->uio_offset
;
2527 if (max_io_size
< (off_t
)((unsigned int)uio
->uio_resid
))
2528 io_size
= max_io_size
;
2530 io_size
= uio
->uio_resid
;
2533 * We don't come into this routine unless
2534 * UIO_USERSPACE is set.
2536 segflg
= uio
->uio_segflg
;
2538 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2541 * First look for pages already in the cache
2542 * and move them to user space.
2544 while (io_size
&& (retval
== 0)) {
2545 upl_f_offset
= uio
->uio_offset
;
2548 * If this call fails, it means the page is not
2549 * in the page cache.
2551 if (ubc_page_op(vp
, upl_f_offset
,
2552 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) != KERN_SUCCESS
)
2555 retval
= uiomove((caddr_t
)(paddr
), PAGE_SIZE
, uio
);
2557 ubc_page_op(vp
, upl_f_offset
,
2558 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2560 io_size
-= PAGE_SIZE
;
2561 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 71)) | DBG_FUNC_NONE
,
2562 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2565 uio
->uio_segflg
= segflg
;
2569 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2570 (int)uio
->uio_offset
, uio
->uio_resid
, 2, retval
, 0);
2574 /* If we are already finished with this read, then return */
2578 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2579 (int)uio
->uio_offset
, uio
->uio_resid
, 3, io_size
, 0);
2583 max_io_size
= io_size
;
2584 if (max_io_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2585 max_io_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2587 start_upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
2588 upl_f_offset
= start_upl_f_offset
;
2591 while(io_size
< max_io_size
)
2594 if(ubc_page_op(vp
, upl_f_offset
,
2595 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) == KERN_SUCCESS
)
2597 ubc_page_op(vp
, upl_f_offset
,
2598 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2603 * Build up the io request parameters.
2606 io_size
+= PAGE_SIZE
;
2607 upl_f_offset
+= PAGE_SIZE
;
2613 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2614 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
2616 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_START
,
2617 (int)upl_offset
, upl_needed_size
, (int)iov
->iov_base
, io_size
, 0);
2619 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++)
2622 upl_size
= upl_needed_size
;
2623 upl_flags
= UPL_FILE_IO
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2625 kret
= vm_map_get_upl(current_map(),
2626 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2627 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, force_data_sync
);
2629 if (kret
!= KERN_SUCCESS
)
2631 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2632 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2634 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2635 (int)uio
->uio_offset
, uio
->uio_resid
, 4, retval
, 0);
2637 /* cluster_nocopy_read: failed to get pagelist */
2638 /* do not return kret here */
2642 pages_in_pl
= upl_size
/ PAGE_SIZE
;
2643 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2645 for(i
=0; i
< pages_in_pl
; i
++)
2647 if (!upl_valid_page(pl
, i
))
2650 if (i
== pages_in_pl
)
2653 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2654 UPL_ABORT_FREE_ON_EMPTY
);
2657 if (force_data_sync
>= 3)
2659 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2660 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2662 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2663 (int)uio
->uio_offset
, uio
->uio_resid
, 5, retval
, 0);
2667 * Consider the possibility that upl_size wasn't satisfied.
2669 if (upl_size
!= upl_needed_size
)
2670 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
2674 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2675 UPL_ABORT_FREE_ON_EMPTY
);
2679 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2680 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2683 * issue a synchronous read to cluster_io
2686 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_START
,
2687 (int)upl
, (int)upl_offset
, (int)start_upl_f_offset
, io_size
, 0);
2689 error
= cluster_io(vp
, upl
, upl_offset
, start_upl_f_offset
,
2690 io_size
, devblocksize
, CL_READ
| CL_NOZERO
, (struct buf
*)0);
2694 * The cluster_io read completed successfully,
2695 * update the uio structure and commit.
2698 ubc_upl_commit_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2699 UPL_COMMIT_SET_DIRTY
| UPL_COMMIT_FREE_ON_EMPTY
);
2701 iov
->iov_base
+= io_size
;
2702 iov
->iov_len
-= io_size
;
2703 uio
->uio_resid
-= io_size
;
2704 uio
->uio_offset
+= io_size
;
2707 ubc_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2708 UPL_ABORT_FREE_ON_EMPTY
);
2711 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_END
,
2712 (int)upl
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
2720 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2721 (int)uio
->uio_offset
, (int)uio
->uio_resid
, 6, retval
, 0);
2728 cluster_phys_read(vp
, uio
, filesize
)
2734 vm_offset_t upl_offset
;
2738 int upl_needed_size
;
2746 * When we enter this routine, we know
2747 * -- the resid will not exceed iov_len
2748 * -- the target address is physically contiguous
2753 max_size
= filesize
- uio
->uio_offset
;
2755 if (max_size
< (off_t
)((unsigned int)iov
->iov_len
))
2758 io_size
= iov
->iov_len
;
2760 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2761 upl_needed_size
= upl_offset
+ io_size
;
2764 upl_size
= upl_needed_size
;
2765 upl_flags
= UPL_FILE_IO
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2767 kret
= vm_map_get_upl(current_map(),
2768 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2769 &upl_size
, &upl
, NULL
, &pages_in_pl
, &upl_flags
, 0);
2771 if (kret
!= KERN_SUCCESS
)
2773 /* cluster_phys_read: failed to get pagelist */
2778 * Consider the possibility that upl_size wasn't satisfied.
2780 if (upl_size
< upl_needed_size
)
2782 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2787 * issue a synchronous read to cluster_io
2790 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
2791 io_size
, 0, CL_READ
| CL_NOZERO
| CL_DEV_MEMORY
, (struct buf
*)0);
2796 * The cluster_io read completed successfully,
2797 * update the uio structure and commit.
2800 ubc_upl_commit_range(upl
, 0, upl_size
, UPL_COMMIT_FREE_ON_EMPTY
);
2802 iov
->iov_base
+= io_size
;
2803 iov
->iov_len
-= io_size
;
2804 uio
->uio_resid
-= io_size
;
2805 uio
->uio_offset
+= io_size
;
2808 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
2814 * generate advisory I/O's in the largest chunks possible
2815 * the completed pages will be released into the VM cache
2818 advisory_read(vp
, filesize
, f_offset
, resid
, devblocksize
)
2825 upl_page_info_t
*pl
;
2827 vm_offset_t upl_offset
;
2840 if (!UBCINFOEXISTS(vp
))
2843 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_START
,
2844 (int)f_offset
, resid
, (int)filesize
, devblocksize
, 0);
2846 while (resid
&& f_offset
< filesize
&& retval
== 0) {
2848 * compute the size of the upl needed to encompass
2849 * the requested read... limit each call to cluster_io
2850 * to the maximum UPL size... cluster_io will clip if
2851 * this exceeds the maximum io_size for the device,
2852 * make sure to account for
2853 * a starting offset that's not page aligned
2855 start_offset
= (int)(f_offset
& PAGE_MASK_64
);
2856 upl_f_offset
= f_offset
- (off_t
)start_offset
;
2857 max_size
= filesize
- f_offset
;
2859 if (resid
< max_size
)
2864 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
2865 if (upl_size
> (MAX_UPL_TRANSFER
* PAGE_SIZE
))
2866 upl_size
= MAX_UPL_TRANSFER
* PAGE_SIZE
;
2867 pages_in_upl
= upl_size
/ PAGE_SIZE
;
2869 kret
= ubc_create_upl(vp
,
2874 UPL_RET_ONLY_ABSENT
);
2875 if (kret
!= KERN_SUCCESS
)
2880 * before we start marching forward, we must make sure we end on
2881 * a present page, otherwise we will be working with a freed
2884 for (last_pg
= pages_in_upl
- 1; last_pg
>= 0; last_pg
--) {
2885 if (upl_page_present(pl
, last_pg
))
2888 pages_in_upl
= last_pg
+ 1;
2891 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 61)) | DBG_FUNC_NONE
,
2892 (int)upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2895 for (last_pg
= 0; last_pg
< pages_in_upl
; ) {
2897 * scan from the beginning of the upl looking for the first
2898 * page that is present.... this will become the first page in
2899 * the request we're going to make to 'cluster_io'... if all
2900 * of the pages are absent, we won't call through to 'cluster_io'
2902 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
2903 if (upl_page_present(pl
, start_pg
))
2908 * scan from the starting present page looking for an absent
2909 * page before the end of the upl is reached, if we
2910 * find one, then it will terminate the range of pages being
2911 * presented to 'cluster_io'
2913 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
2914 if (!upl_page_present(pl
, last_pg
))
2918 if (last_pg
> start_pg
) {
2920 * we found a range of pages that must be filled
2921 * if the last page in this range is the last page of the file
2922 * we may have to clip the size of it to keep from reading past
2923 * the end of the last physical block associated with the file
2925 upl_offset
= start_pg
* PAGE_SIZE
;
2926 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2928 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
)
2929 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
2932 * issue an asynchronous read to cluster_io
2934 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, devblocksize
,
2935 CL_ASYNC
| CL_READ
| CL_COMMIT
| CL_AGE
, (struct buf
*)0);
2941 ubc_upl_abort(upl
, 0);
2943 io_size
= upl_size
- start_offset
;
2945 if (io_size
> resid
)
2947 f_offset
+= io_size
;
2951 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_END
,
2952 (int)f_offset
, resid
, retval
, 0, 0);
2964 if (!UBCINFOEXISTS(vp
) || vp
->v_clen
== 0) {
2965 vp
->v_flag
&= ~VHASDIRTY
;
2969 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 53)) | DBG_FUNC_START
,
2970 vp
->v_flag
& VHASDIRTY
, vp
->v_clen
, 0, 0, 0);
2972 if (vp
->v_flag
& VHASDIRTY
) {
2977 start_pg
= vp
->v_cstart
;
2978 end_pg
= vp
->v_lastw
;
2980 vp
->v_flag
&= ~VHASDIRTY
;
2983 while (start_pg
< end_pg
) {
2984 last_pg
= start_pg
+ MAX_UPL_TRANSFER
;
2986 if (last_pg
> end_pg
)
2989 cluster_push_x(vp
, ubc_getsize(vp
), start_pg
, last_pg
, 0);
2995 retval
= cluster_try_push(vp
, ubc_getsize(vp
), 0, 1);
2997 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 53)) | DBG_FUNC_END
,
2998 vp
->v_flag
& VHASDIRTY
, vp
->v_clen
, retval
, 0, 0);
3005 cluster_try_push(vp
, EOF
, can_delay
, push_all
)
3017 struct v_cluster l_clusters
[MAX_CLUSTERS
];
3020 * make a local 'sorted' copy of the clusters
3021 * and clear vp->v_clen so that new clusters can
3024 for (cl_index
= 0; cl_index
< vp
->v_clen
; cl_index
++) {
3025 for (min_index
= -1, cl_index1
= 0; cl_index1
< vp
->v_clen
; cl_index1
++) {
3026 if (vp
->v_clusters
[cl_index1
].start_pg
== vp
->v_clusters
[cl_index1
].last_pg
)
3028 if (min_index
== -1)
3029 min_index
= cl_index1
;
3030 else if (vp
->v_clusters
[cl_index1
].start_pg
< vp
->v_clusters
[min_index
].start_pg
)
3031 min_index
= cl_index1
;
3033 if (min_index
== -1)
3035 l_clusters
[cl_index
].start_pg
= vp
->v_clusters
[min_index
].start_pg
;
3036 l_clusters
[cl_index
].last_pg
= vp
->v_clusters
[min_index
].last_pg
;
3038 vp
->v_clusters
[min_index
].start_pg
= vp
->v_clusters
[min_index
].last_pg
;
3043 for (cl_pushed
= 0, cl_index
= 0; cl_index
< cl_len
; cl_index
++) {
3045 * try to push each cluster in turn... cluster_push_x may not
3046 * push the cluster if can_delay is TRUE and the cluster doesn't
3047 * meet the critera for an immediate push
3049 if (cluster_push_x(vp
, EOF
, l_clusters
[cl_index
].start_pg
, l_clusters
[cl_index
].last_pg
, can_delay
)) {
3050 l_clusters
[cl_index
].start_pg
= 0;
3051 l_clusters
[cl_index
].last_pg
= 0;
3059 if (cl_len
> cl_pushed
) {
3061 * we didn't push all of the clusters, so
3062 * lets try to merge them back in to the vnode
3064 if ((MAX_CLUSTERS
- vp
->v_clen
) < (cl_len
- cl_pushed
)) {
3066 * we picked up some new clusters while we were trying to
3067 * push the old ones (I don't think this can happen because
3068 * I'm holding the lock, but just in case)... the sum of the
3069 * leftovers plus the new cluster count exceeds our ability
3070 * to represent them, so fall back to the VHASDIRTY mechanism
3072 for (cl_index
= 0; cl_index
< cl_len
; cl_index
++) {
3073 if (l_clusters
[cl_index
].start_pg
== l_clusters
[cl_index
].last_pg
)
3076 if (l_clusters
[cl_index
].start_pg
< vp
->v_cstart
)
3077 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3078 if (l_clusters
[cl_index
].last_pg
> vp
->v_lastw
)
3079 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3081 vp
->v_flag
|= VHASDIRTY
;
3084 * we've got room to merge the leftovers back in
3085 * just append them starting at the next 'hole'
3086 * represented by vp->v_clen
3088 for (cl_index
= 0, cl_index1
= vp
->v_clen
; cl_index
< cl_len
; cl_index
++) {
3089 if (l_clusters
[cl_index
].start_pg
== l_clusters
[cl_index
].last_pg
)
3092 vp
->v_clusters
[cl_index1
].start_pg
= l_clusters
[cl_index
].start_pg
;
3093 vp
->v_clusters
[cl_index1
].last_pg
= l_clusters
[cl_index
].last_pg
;
3095 if (cl_index1
== 0) {
3096 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3097 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3099 if (l_clusters
[cl_index
].start_pg
< vp
->v_cstart
)
3100 vp
->v_cstart
= l_clusters
[cl_index
].start_pg
;
3101 if (l_clusters
[cl_index
].last_pg
> vp
->v_lastw
)
3102 vp
->v_lastw
= l_clusters
[cl_index
].last_pg
;
3107 * update the cluster count
3109 vp
->v_clen
= cl_index1
;
3112 return(MAX_CLUSTERS
- vp
->v_clen
);
3118 cluster_push_x(vp
, EOF
, first
, last
, can_delay
)
3125 upl_page_info_t
*pl
;
3127 vm_offset_t upl_offset
;
3139 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_START
,
3140 vp
->v_clen
, first
, last
, EOF
, 0);
3142 if ((pages_in_upl
= last
- first
) == 0) {
3143 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 0, 0, 0, 0);
3147 upl_size
= pages_in_upl
* PAGE_SIZE
;
3148 upl_f_offset
= ((off_t
)first
) * PAGE_SIZE_64
;
3150 if (upl_f_offset
+ upl_size
>= EOF
) {
3152 if (upl_f_offset
>= EOF
) {
3154 * must have truncated the file and missed
3155 * clearing a dangling cluster (i.e. it's completely
3156 * beyond the new EOF
3158 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 1, 0, 0, 0);
3162 size
= EOF
- upl_f_offset
;
3164 upl_size
= (size
+ (PAGE_SIZE
- 1) ) & ~(PAGE_SIZE
- 1);
3165 pages_in_upl
= upl_size
/ PAGE_SIZE
;
3167 if (can_delay
&& (pages_in_upl
< (MAX_UPL_TRANSFER
- (MAX_UPL_TRANSFER
/ 2))))
3171 kret
= ubc_create_upl(vp
,
3176 UPL_RET_ONLY_DIRTY
);
3177 if (kret
!= KERN_SUCCESS
)
3178 panic("cluster_push: failed to get pagelist");
3183 for (num_of_dirty
= 0, start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
3184 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
3187 if (num_of_dirty
< pages_in_upl
/ 2) {
3188 ubc_upl_abort_range(upl
, 0, upl_size
, UPL_ABORT_FREE_ON_EMPTY
);
3190 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 0, 2, num_of_dirty
, (pages_in_upl
/ 2), 0);
3199 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
3200 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
3203 if (start_pg
> last_pg
) {
3204 io_size
= (start_pg
- last_pg
) * PAGE_SIZE
;
3206 ubc_upl_abort_range(upl
, last_pg
* PAGE_SIZE
, io_size
,
3207 UPL_ABORT_FREE_ON_EMPTY
);
3214 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
3215 if (!upl_valid_page(pl
, last_pg
) || !upl_dirty_page(pl
, last_pg
))
3218 upl_offset
= start_pg
* PAGE_SIZE
;
3220 io_size
= min(size
, (last_pg
- start_pg
) * PAGE_SIZE
);
3222 if (vp
->v_flag
& VNOCACHE_DATA
)
3223 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
| CL_DUMP
;
3225 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
3227 while (vp
->v_numoutput
>= ASYNC_THROTTLE
) {
3228 vp
->v_flag
|= VTHROTTLED
;
3229 tsleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1, "cluster_push", 0);
3231 cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, vp
->v_ciosiz
, io_flags
, (struct buf
*)0);
3235 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 51)) | DBG_FUNC_END
, 1, 3, 0, 0, 0);