]>
git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
c877d4e7e6ca9e7ad84615115658c2a92bb0be66
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
25 * The Regents of the University of California. All rights reserved.
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
58 #include <sys/param.h>
61 #include <sys/vnode.h>
62 #include <sys/mount.h>
63 #include <sys/trace.h>
64 #include <sys/malloc.h>
65 #include <sys/resourcevar.h>
66 #include <libkern/libkern.h>
69 #include <vm/vm_pageout.h>
70 #include <mach/memory_object_types.h>
72 #include <sys/kdebug.h>
77 #define CL_COMMIT 0x04
79 #define CL_PAGEOUT 0x10
82 #define CL_NOZERO 0x80
83 #define CL_PAGEIN 0x100
86 * throttle the number of async writes that
87 * can be outstanding on a single vnode
88 * before we issue a synchronous write
90 #define ASYNC_THROTTLE 3
103 struct buf
*cbp_head
;
104 struct buf
*cbp_next
;
110 cbp_head
= (struct buf
*)(bp
->b_trans_head
);
112 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_START
,
113 cbp_head
, bp
->b_lblkno
, bp
->b_bcount
, bp
->b_flags
, 0);
115 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
) {
117 * all I/O requests that are part of this transaction
118 * have to complete before we can process it
120 if ( !(cbp
->b_flags
& B_DONE
)) {
122 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
123 cbp_head
, cbp
, cbp
->b_bcount
, cbp
->b_flags
, 0);
133 upl_offset
= cbp
->b_uploffset
;
134 upl
= cbp
->b_pagelist
;
135 b_flags
= cbp
->b_flags
;
136 real_bp
= cbp
->b_real_bp
;
139 if (cbp
->b_vectorcount
> 1)
140 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
142 if ((cbp
->b_flags
& B_ERROR
) && error
== 0)
143 error
= cbp
->b_error
;
145 total_resid
+= cbp
->b_resid
;
146 total_size
+= cbp
->b_bcount
;
148 cbp_next
= cbp
->b_trans_next
;
154 if ((b_flags
& B_NEED_IODONE
) && real_bp
) {
156 real_bp
->b_flags
|= B_ERROR
;
157 real_bp
->b_error
= error
;
159 real_bp
->b_resid
= total_resid
;
163 if (error
== 0 && total_resid
)
166 if (b_flags
& B_COMMIT_UPL
) {
167 pg_offset
= upl_offset
& PAGE_MASK
;
168 commit_size
= (((pg_offset
+ total_size
) + (PAGE_SIZE
- 1)) / PAGE_SIZE
) * PAGE_SIZE
;
170 if (error
|| (b_flags
& B_NOCACHE
)) {
173 if (b_flags
& B_PAGEOUT
)
174 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
176 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
178 kernel_upl_abort_range(upl
, upl_offset
- pg_offset
, commit_size
, upl_abort_code
);
180 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
181 upl
, upl_offset
- pg_offset
, commit_size
,
182 0x80000000|upl_abort_code
, 0);
185 int upl_commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
;
187 if ( !(b_flags
& B_PAGEOUT
))
188 upl_commit_flags
|= UPL_COMMIT_CLEAR_DIRTY
;
190 upl_commit_flags
|= UPL_COMMIT_INACTIVATE
;
192 kernel_upl_commit_range(upl
, upl_offset
- pg_offset
,
193 commit_size
, upl_commit_flags
,
194 UPL_GET_INTERNAL_PAGE_LIST(upl
),
197 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
198 upl
, upl_offset
- pg_offset
, commit_size
,
199 upl_commit_flags
, 0);
202 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 20)) | DBG_FUNC_END
,
203 upl
, upl_offset
, 0, error
, 0);
210 cluster_zero(upl
, upl_offset
, size
, flags
, bp
)
212 vm_offset_t upl_offset
;
217 vm_offset_t io_addr
= 0;
220 if ( !(flags
& CL_NOMAP
)) {
221 kret
= kernel_upl_map(kernel_map
, upl
, &io_addr
);
223 if (kret
!= KERN_SUCCESS
)
224 panic("cluster_zero: kernel_upl_map() failed with (%d)", kret
);
226 panic("cluster_zero: kernel_upl_map mapped 0");
228 io_addr
= (vm_offset_t
)bp
->b_data
;
229 bzero((caddr_t
)(io_addr
+ upl_offset
), size
);
231 if ( !(flags
& CL_NOMAP
)) {
232 kret
= kernel_upl_unmap(kernel_map
, upl
);
234 if (kret
!= KERN_SUCCESS
)
235 panic("cluster_zero: kernel_upl_unmap failed");
241 cluster_io(vp
, upl
, upl_offset
, f_offset
, size
, flags
, real_bp
)
244 vm_offset_t upl_offset
;
255 struct buf
*cbp_head
= 0;
256 struct buf
*cbp_tail
= 0;
262 io_flags
= (B_VECTORLIST
| B_READ
);
264 io_flags
= (B_VECTORLIST
| B_WRITEINPROG
);
266 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
268 if (flags
& CL_ASYNC
)
269 io_flags
|= (B_CALL
| B_ASYNC
);
273 io_flags
|= B_NOCACHE
;
276 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_START
,
277 (int)f_offset
, size
, upl_offset
, flags
, 0);
279 if ((flags
& CL_READ
) && ((upl_offset
+ size
) & PAGE_MASK
) && (!(flags
& CL_NOZERO
))) {
281 * then we are going to end up
282 * with a page that we can't complete (the file size wasn't a multiple
283 * of PAGE_SIZE and we're trying to read to the end of the file
284 * so we'll go ahead and zero out the portion of the page we can't
285 * read in from the file
287 cluster_zero(upl
, upl_offset
+ size
, PAGE_SIZE
- ((upl_offset
+ size
) & PAGE_MASK
), flags
, real_bp
);
289 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 23)) | DBG_FUNC_NONE
,
290 upl_offset
+ size
, PAGE_SIZE
- ((upl_offset
+ size
) & PAGE_MASK
),
303 if (size
> MAXPHYSIO
)
308 if (error
= VOP_CMAP(vp
, f_offset
, io_size
, &blkno
, &io_size
, NULL
)) {
309 if (error
== EOPNOTSUPP
)
310 panic("VOP_CMAP Unimplemented");
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 24)) | DBG_FUNC_NONE
,
315 (int)f_offset
, (int)blkno
, io_size
, 0, 0);
317 if ( (!(flags
& CL_READ
) && (long)blkno
== -1) || io_size
== 0) {
321 lblkno
= (daddr_t
)(f_offset
/ PAGE_SIZE_64
);
323 * we have now figured out how much I/O we can do - this is in 'io_size'
324 * pl_index represents the first page in the 'upl' that the I/O will occur for
325 * pg_offset is the starting point in the first page for the I/O
326 * pg_count is the number of full and partial pages that 'io_size' encompasses
328 pl_index
= upl_offset
/ PAGE_SIZE
;
329 pg_offset
= upl_offset
& PAGE_MASK
;
330 pg_count
= (io_size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
332 if ((flags
& CL_READ
) && (long)blkno
== -1) {
334 * if we're reading and blkno == -1, then we've got a
335 * 'hole' in the file that we need to deal with by zeroing
336 * out the affected area in the upl
338 cluster_zero(upl
, upl_offset
, io_size
, flags
, real_bp
);
340 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 23)) | DBG_FUNC_NONE
,
341 upl_offset
, io_size
, flags
, real_bp
, 0);
343 pg_count
= (io_size
- pg_offset
) / PAGE_SIZE
;
345 if (io_size
== size
&& ((upl_offset
+ io_size
) & PAGE_MASK
))
350 pg_resid
= PAGE_SIZE
- pg_offset
;
353 if (flags
& CL_COMMIT
)
354 kernel_upl_commit_range(upl
,
355 upl_offset
+ pg_resid
,
356 pg_count
* PAGE_SIZE
,
357 UPL_COMMIT_CLEAR_DIRTY
358 | UPL_COMMIT_FREE_ON_EMPTY
,
359 pl
, MAX_UPL_TRANSFER
);
361 upl_offset
+= io_size
;
365 if (cbp_head
&& pg_count
)
368 } else if (real_bp
&& (real_bp
->b_blkno
== real_bp
->b_lblkno
)) {
369 real_bp
->b_blkno
= blkno
;
373 * we need to allocate space for the vector list
375 iovp
= (struct iovec
*)_MALLOC(sizeof(struct iovec
) * pg_count
,
376 M_SEGMENT
, M_NOWAIT
);
377 if (iovp
== (struct iovec
*) 0) {
379 * if the allocation fails, then throttle down to a single page
381 io_size
= PAGE_SIZE
- pg_offset
;
385 cbp
= alloc_io_buf(vp
);
390 * we use the io vector that's reserved in the buffer header
391 * this insures we can always issue an I/O even in a low memory
392 * condition that prevents the _MALLOC from succeeding... this
393 * is necessary to prevent deadlocks with the pager
395 iovp
= (struct iovec
*)(&cbp
->b_vects
[0]);
397 cbp
->b_vectorlist
= (void *)iovp
;
398 cbp
->b_vectorcount
= pg_count
;
400 for (i
= 0, vsize
= io_size
; i
< pg_count
; i
++, iovp
++) {
403 psize
= PAGE_SIZE
- pg_offset
;
408 iovp
->iov_len
= psize
;
409 iovp
->iov_base
= (caddr_t
)upl_phys_page(pl
, pl_index
+ i
);
411 if (iovp
->iov_base
== (caddr_t
) 0) {
413 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
419 iovp
->iov_base
+= pg_offset
;
422 if (flags
& CL_PAGEOUT
) {
427 if (bp
= incore(vp
, lblkno
+ i
)) {
428 if (!ISSET(bp
->b_flags
, B_BUSY
)) {
430 SET(bp
->b_flags
, (B_BUSY
| B_INVAL
));
434 panic("BUSY bp found in cluster_io");
443 if (flags
& CL_ASYNC
)
444 cbp
->b_iodone
= (void *)cluster_iodone
;
445 cbp
->b_flags
|= io_flags
;
447 cbp
->b_lblkno
= lblkno
;
448 cbp
->b_blkno
= blkno
;
449 cbp
->b_bcount
= io_size
;
450 cbp
->b_pagelist
= upl
;
451 cbp
->b_uploffset
= upl_offset
;
452 cbp
->b_trans_next
= (struct buf
*)0;
455 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 26)) | DBG_FUNC_NONE
,
456 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
458 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 27)) | DBG_FUNC_NONE
,
459 cbp
->b_lblkno
, cbp
->b_blkno
, upl_offset
, io_size
, 0);
462 cbp_tail
->b_trans_next
= cbp
;
468 (struct buf
*)(cbp
->b_trans_head
) = cbp_head
;
470 upl_offset
+= io_size
;
474 if ( !(upl_offset
& PAGE_MASK
) || size
== 0) {
476 * if we have no more I/O to issue or
477 * the current I/O we've prepared fully
478 * completes the last page in this request
479 * or it's been completed via a zero-fill
480 * due to a 'hole' in the file
481 * then go ahead and issue the I/O
484 if (flags
& CL_COMMIT
)
485 cbp_head
->b_flags
|= B_COMMIT_UPL
;
486 if (flags
& CL_PAGEOUT
)
487 cbp_head
->b_flags
|= B_PAGEOUT
;
490 cbp_head
->b_flags
|= B_NEED_IODONE
;
491 cbp_head
->b_real_bp
= real_bp
;
494 for (cbp
= cbp_head
; cbp
;) {
495 struct buf
* cbp_next
;
497 if (io_flags
& B_WRITEINPROG
)
498 cbp
->b_vp
->v_numoutput
++;
500 cbp_next
= cbp
->b_trans_next
;
502 (void) VOP_STRATEGY(cbp
);
505 if ( !(flags
& CL_ASYNC
)) {
506 for (cbp
= cbp_head
; cbp
; cbp
= cbp
->b_trans_next
)
509 if (error
= cluster_iodone(cbp_head
)) {
514 cbp_head
= (struct buf
*)0;
515 cbp_tail
= (struct buf
*)0;
519 for (cbp
= cbp_head
; cbp
;) {
520 struct buf
* cbp_next
;
522 if (cbp
->b_vectorcount
> 1)
523 _FREE(cbp
->b_vectorlist
, M_SEGMENT
);
524 cbp_next
= cbp
->b_trans_next
;
529 pg_offset
= upl_offset
& PAGE_MASK
;
530 pg_count
= (size
+ pg_offset
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
532 if (flags
& CL_COMMIT
) {
535 if (flags
& CL_PAGEOUT
)
536 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
;
537 else if (flags
& CL_PAGEIN
)
538 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
;
540 upl_abort_code
= UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_DUMP_PAGES
;
542 kernel_upl_abort_range(upl
, upl_offset
- pg_offset
, pg_count
* PAGE_SIZE
, upl_abort_code
);
544 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 28)) | DBG_FUNC_NONE
,
545 upl
, upl_offset
- pg_offset
, pg_count
* PAGE_SIZE
, error
, 0);
548 real_bp
->b_flags
|= B_ERROR
;
549 real_bp
->b_error
= error
;
556 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 22)) | DBG_FUNC_END
,
557 (int)f_offset
, size
, upl_offset
, retval
, 0);
564 cluster_rd_prefetch(vp
, object
, f_offset
, size
, filesize
, devblocksize
)
581 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_START
,
582 (int)f_offset
, size
, (int)filesize
, 0, 0);
584 if (f_offset
>= filesize
) {
585 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
586 (int)f_offset
, 0, 0, 0, 0);
589 if (memory_object_page_op(object
, (vm_offset_t
)f_offset
, 0, 0, 0) == KERN_SUCCESS
) {
590 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
591 (int)f_offset
, 0, 0, 0, 0);
594 if (size
> MAXPHYSIO
)
597 size
= (size
+ (PAGE_SIZE
- 1)) & ~(PAGE_SIZE
- 1);
599 if ((off_t
)size
> (filesize
- f_offset
))
600 size
= ((filesize
- f_offset
) + (devblocksize
- 1)) & ~(devblocksize
- 1);
602 pages_in_upl
= (size
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
605 vm_fault_list_request(object
, (vm_object_offset_t
)f_offset
, pages_in_upl
* PAGE_SIZE
, &upl
, NULL
, 0,
606 UPL_CLEAN_IN_PLACE
| UPL_NO_SYNC
| UPL_SET_INTERNAL
);
607 if (upl
== (upl_t
) 0)
610 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
613 * scan from the beginning of the upl looking for the first
614 * non-valid page.... this will become the first page in
615 * the request we're going to make to 'cluster_io'... if all
616 * of the pages are valid, we won't call through to 'cluster_io'
618 for (start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
619 if (!upl_valid_page(pl
, start_pg
))
624 * scan from the starting invalid page looking for a valid
625 * page before the end of the upl is reached, if we
626 * find one, then it will be the last page of the request to
629 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
630 if (upl_valid_page(pl
, last_pg
))
635 * if we find any more free valid pages at the tail of the upl
636 * than update maxra accordingly....
638 for (last_valid
= last_pg
; last_valid
< pages_in_upl
; last_valid
++) {
639 if (!upl_valid_page(pl
, last_valid
))
642 if (start_pg
< last_pg
) {
643 vm_offset_t upl_offset
;
646 * we found a range of 'invalid' pages that must be filled
647 * 'size' has already been clipped to the LEOF
648 * make sure it's at least a multiple of the device block size
650 upl_offset
= start_pg
* PAGE_SIZE
;
651 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
653 if ((upl_offset
+ io_size
) > size
) {
654 io_size
= size
- upl_offset
;
656 KERNEL_DEBUG(0xd001000, upl_offset
, size
, io_size
, 0, 0);
658 cluster_io(vp
, upl
, upl_offset
, f_offset
+ upl_offset
, io_size
,
659 CL_READ
| CL_COMMIT
| CL_ASYNC
| CL_AGE
, (struct buf
*)0);
663 * start_pg of non-zero indicates we found some already valid pages
664 * at the beginning of the upl.... we need to release these without
665 * modifying there state
667 kernel_upl_abort_range(upl
, 0, start_pg
* PAGE_SIZE
, UPL_ABORT_FREE_ON_EMPTY
);
669 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 50)) | DBG_FUNC_NONE
,
670 upl
, 0, start_pg
* PAGE_SIZE
, 0, 0);
672 if (last_pg
< pages_in_upl
) {
674 * the set of pages that we issued an I/O for did not extend all the
675 * way to the end of the upl... so just release them without modifying
678 kernel_upl_abort_range(upl
, last_pg
* PAGE_SIZE
, (pages_in_upl
- last_pg
) * PAGE_SIZE
,
679 UPL_ABORT_FREE_ON_EMPTY
);
681 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 50)) | DBG_FUNC_NONE
,
682 upl
, last_pg
* PAGE_SIZE
, (pages_in_upl
- last_pg
) * PAGE_SIZE
, 0, 0);
685 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 49)) | DBG_FUNC_END
,
686 (int)f_offset
+ (last_valid
* PAGE_SIZE
), 0, 0, 0, 0);
694 cluster_rd_ahead(vp
, object
, b_lblkno
, e_lblkno
, filesize
, devblocksize
)
704 int size_of_prefetch
;
707 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_START
,
708 b_lblkno
, e_lblkno
, vp
->v_lastr
, 0, 0);
710 if (b_lblkno
== vp
->v_lastr
&& b_lblkno
== e_lblkno
) {
711 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
712 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 0, 0);
716 if (vp
->v_lastr
== -1 || (b_lblkno
!= vp
->v_lastr
&& b_lblkno
!= (vp
->v_lastr
+ 1) && b_lblkno
!= (vp
->v_maxra
+ 1))) {
720 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
721 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 1, 0);
725 vp
->v_ralen
= vp
->v_ralen
? min(MAXPHYSIO
/PAGE_SIZE
, vp
->v_ralen
<< 1) : 1;
727 if (((e_lblkno
+ 1) - b_lblkno
) > vp
->v_ralen
)
728 vp
->v_ralen
= min(MAXPHYSIO
/PAGE_SIZE
, (e_lblkno
+ 1) - b_lblkno
);
730 if (e_lblkno
< vp
->v_maxra
) {
731 if ((vp
->v_maxra
- e_lblkno
) > ((MAXPHYSIO
/PAGE_SIZE
) / 4)) {
733 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
734 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 2, 0);
738 r_lblkno
= max(e_lblkno
, vp
->v_maxra
) + 1;
739 f_offset
= (off_t
)r_lblkno
* PAGE_SIZE_64
;
741 size_of_prefetch
= cluster_rd_prefetch(vp
, object
, f_offset
, vp
->v_ralen
* PAGE_SIZE
, filesize
, devblocksize
);
743 if (size_of_prefetch
)
744 vp
->v_maxra
= r_lblkno
+ (size_of_prefetch
- 1);
746 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 48)) | DBG_FUNC_END
,
747 vp
->v_ralen
, vp
->v_maxra
, vp
->v_lastr
, 3, 0);
751 cluster_pageout(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
754 vm_offset_t upl_offset
;
764 int local_flags
= CL_PAGEOUT
;
766 if ((flags
& UPL_IOSYNC
) == 0)
767 local_flags
|= CL_ASYNC
;
768 if ((flags
& UPL_NOCOMMIT
) == 0)
769 local_flags
|= CL_COMMIT
;
771 if (upl
== (upl_t
) 0)
772 panic("cluster_pageout: can't handle NULL upl yet\n");
775 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 52)) | DBG_FUNC_NONE
,
776 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
779 * If they didn't specify any I/O, then we are done...
780 * we can't issue an abort because we don't know how
781 * big the upl really is
786 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
) {
787 if (local_flags
& CL_COMMIT
)
788 kernel_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
792 * can't page-in from a negative offset
793 * or if we're starting beyond the EOF
794 * or if the file offset isn't page aligned
795 * or the size requested isn't a multiple of PAGE_SIZE
797 if (f_offset
< 0 || f_offset
>= filesize
||
798 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
)) {
799 if (local_flags
& CL_COMMIT
)
800 kernel_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_FREE_ON_EMPTY
);
803 max_size
= filesize
- f_offset
;
808 io_size
= (max_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
810 pg_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
812 if (size
> pg_size
) {
813 if (local_flags
& CL_COMMIT
)
814 kernel_upl_abort_range(upl
, upl_offset
+ pg_size
, size
- pg_size
,
815 UPL_ABORT_FREE_ON_EMPTY
);
818 return (cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
,
819 local_flags
, (struct buf
*)0));
823 cluster_pagein(vp
, upl
, upl_offset
, f_offset
, size
, filesize
, devblocksize
, flags
)
826 vm_offset_t upl_offset
;
842 * If they didn't ask for any data, then we are done...
843 * we can't issue an abort because we don't know how
844 * big the upl really is
849 if ((flags
& UPL_NOCOMMIT
) == 0)
850 local_flags
= CL_COMMIT
;
852 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 56)) | DBG_FUNC_NONE
,
853 (int)f_offset
, size
, (int)filesize
, local_flags
, 0);
856 * can't page-in from a negative offset
857 * or if we're starting beyond the EOF
858 * or if the file offset isn't page aligned
859 * or the size requested isn't a multiple of PAGE_SIZE
861 if (f_offset
< 0 || f_offset
>= filesize
||
862 (f_offset
& PAGE_MASK_64
) || (size
& PAGE_MASK
)) {
863 if (local_flags
& CL_COMMIT
)
864 kernel_upl_abort_range(upl
, upl_offset
, size
, UPL_ABORT_ERROR
| UPL_ABORT_FREE_ON_EMPTY
);
867 max_size
= filesize
- f_offset
;
872 io_size
= (max_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
874 pg_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
876 if (upl
== (upl_t
) 0) {
877 object
= ubc_getobject(vp
, UBC_PAGINGOP
|UBC_NOREACTIVATE
);
878 if (object
== (void *)NULL
)
879 panic("cluster_pagein: ubc_getobject failed");
881 vm_fault_list_request(object
, (vm_offset_t
)f_offset
, pg_size
, &upl
, NULL
, 0,
882 UPL_CLEAN_IN_PLACE
| UPL_NO_SYNC
| UPL_SET_INTERNAL
);
883 if (upl
== (upl_t
) 0)
886 upl_offset
= (vm_offset_t
)0;
889 if (size
> pg_size
) {
890 if (local_flags
& CL_COMMIT
)
891 kernel_upl_abort_range(upl
, upl_offset
+ pg_size
, size
- pg_size
,
892 UPL_ABORT_FREE_ON_EMPTY
);
895 retval
= cluster_io(vp
, upl
, upl_offset
, f_offset
, io_size
,
896 local_flags
| CL_READ
| CL_PAGEIN
, (struct buf
*)0);
902 b_lblkno
= (int)(f_offset
/ PAGE_SIZE_64
);
904 ((f_offset
+ ((off_t
)io_size
- 1)) / PAGE_SIZE_64
);
906 if (!(flags
& UPL_NORDAHEAD
) && !(vp
->v_flag
& VRAOFF
)) {
907 if (object
== (void *)0) {
908 object
= ubc_getobject(vp
, UBC_PAGINGOP
|UBC_NOREACTIVATE
);
909 if (object
== (void *)NULL
)
910 panic("cluster_pagein: ubc_getobject failed");
913 * we haven't read the last page in of the file yet
914 * so let's try to read ahead if we're in
915 * a sequential access pattern
917 cluster_rd_ahead(vp
, object
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
919 vp
->v_lastr
= e_lblkno
;
931 if (bp
->b_pagelist
== (upl_t
) 0)
932 panic("cluster_bp: can't handle NULL upl yet\n");
933 if (bp
->b_flags
& B_READ
)
934 flags
= CL_ASYNC
| CL_NOMAP
| CL_READ
;
936 flags
= CL_ASYNC
| CL_NOMAP
;
938 f_offset
= ubc_blktooff(bp
->b_vp
, bp
->b_lblkno
);
940 return (cluster_io(bp
->b_vp
, bp
->b_pagelist
, 0, f_offset
, bp
->b_bcount
, flags
, bp
));
944 cluster_write(vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
962 object
= ubc_getobject(vp
, UBC_NOREACTIVATE
);
963 if (object
== (void *)NULL
)
964 panic("cluster_write: ubc_getobject failed");
967 * We set a threshhold of 4 pages to decide if the nocopy
968 * write loop is worth the trouble...
971 if ((!uio
) || (uio
->uio_resid
< 4 * PAGE_SIZE
) ||
972 (flags
& IO_TAILZEROFILL
) || (flags
& IO_HEADZEROFILL
) ||
973 (uio
->uio_segflg
!= UIO_USERSPACE
) || (!(vp
->v_flag
& VNOCACHE_DATA
)))
975 retval
= cluster_write_x(object
, vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
979 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& retval
== 0)
981 /* we know we have a resid, so this is safe */
983 while (iov
->iov_len
== 0) {
989 if (uio
->uio_offset
& PAGE_MASK_64
)
991 /* Bring the file offset write up to a pagesize boundary */
992 clip_size
= (PAGE_SIZE
- (uio
->uio_offset
& PAGE_MASK_64
));
993 if (uio
->uio_resid
< clip_size
)
994 clip_size
= uio
->uio_resid
;
996 * Fake the resid going into the cluster_write_x call
997 * and restore it on the way out.
999 prev_resid
= uio
->uio_resid
;
1000 uio
->uio_resid
= clip_size
;
1001 retval
= cluster_write_x(object
, vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1002 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1004 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
1006 clip_size
= iov
->iov_len
;
1007 prev_resid
= uio
->uio_resid
;
1008 uio
->uio_resid
= clip_size
;
1009 retval
= cluster_write_x(object
, vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1010 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1015 * If we come in here, we know the offset into
1016 * the file is on a pagesize boundary
1019 max_io_size
= newEOF
- uio
->uio_offset
;
1020 clip_size
= uio
->uio_resid
;
1021 if (iov
->iov_len
< clip_size
)
1022 clip_size
= iov
->iov_len
;
1023 if (max_io_size
< clip_size
)
1024 clip_size
= max_io_size
;
1026 if (clip_size
< PAGE_SIZE
)
1029 * Take care of tail end of write in this vector
1031 prev_resid
= uio
->uio_resid
;
1032 uio
->uio_resid
= clip_size
;
1033 retval
= cluster_write_x(object
, vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1034 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1038 /* round clip_size down to a multiple of pagesize */
1039 clip_size
= clip_size
& ~(PAGE_MASK
);
1040 prev_resid
= uio
->uio_resid
;
1041 uio
->uio_resid
= clip_size
;
1042 retval
= cluster_nocopy_write(object
, vp
, uio
, newEOF
, devblocksize
, flags
);
1043 if ((retval
== 0) && uio
->uio_resid
)
1044 retval
= cluster_write_x(object
, vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
);
1045 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1053 cluster_nocopy_write(object
, vp
, uio
, newEOF
, devblocksize
, flags
)
1062 upl_page_info_t
*pl
;
1064 vm_offset_t upl_offset
;
1068 int upl_needed_size
;
1074 int force_data_sync
;
1077 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_START
,
1078 (int)uio
->uio_offset
, (int)uio
->uio_resid
,
1079 (int)newEOF
, devblocksize
, 0);
1082 * When we enter this routine, we know
1083 * -- the offset into the file is on a pagesize boundary
1084 * -- the resid is a page multiple
1085 * -- the resid will not exceed iov_len
1090 while (uio
->uio_resid
&& uio
->uio_offset
< newEOF
&& error
== 0) {
1092 io_size
= uio
->uio_resid
;
1093 if (io_size
> MAXPHYSIO
)
1094 io_size
= MAXPHYSIO
;
1096 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
1097 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
1099 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_START
,
1100 (int)upl_offset
, upl_needed_size
, iov
->iov_base
, io_size
, 0);
1102 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++)
1105 upl_size
= upl_needed_size
;
1106 upl_flags
= UPL_COPYOUT_FROM
| UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
1108 kret
= vm_map_get_upl(current_map(),
1109 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
1110 &upl_size
, &upl
, &pl
, &pages_in_pl
, &upl_flags
, force_data_sync
);
1112 pages_in_pl
= upl_size
/ PAGE_SIZE
;
1114 if (kret
!= KERN_SUCCESS
)
1116 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1119 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1120 (int)uio
->uio_offset
, (int)uio
->uio_resid
, kret
, 1, 0);
1122 /* cluster_nocopy_write: failed to get pagelist */
1123 /* do not return kret here */
1127 for(i
=0; i
< pages_in_pl
; i
++)
1129 if (!upl_valid_page(pl
, i
))
1133 if (i
== pages_in_pl
)
1136 kernel_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1137 UPL_ABORT_FREE_ON_EMPTY
);
1140 if (force_data_sync
>= 3)
1142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1145 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1146 (int)uio
->uio_offset
, (int)uio
->uio_resid
, kret
, 2, 0);
1151 * Consider the possibility that upl_size wasn't satisfied.
1153 if (upl_size
!= upl_needed_size
)
1154 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
1156 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 76)) | DBG_FUNC_END
,
1157 (int)upl_offset
, upl_size
, iov
->iov_base
, io_size
, 0);
1161 kernel_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1162 UPL_ABORT_FREE_ON_EMPTY
);
1163 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1164 (int)uio
->uio_offset
, uio
->uio_resid
, 0, 3, 0);
1170 * Now look for pages already in the cache
1171 * and throw them away.
1174 upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
1175 max_io_size
= io_size
;
1177 while (max_io_size
) {
1180 * Flag UPL_POP_DUMP says if the page is found
1181 * in the page cache it must be thrown away.
1183 memory_object_page_op(object
, (vm_offset_t
)upl_f_offset
,
1184 UPL_POP_SET
| UPL_POP_BUSY
| UPL_POP_DUMP
,
1186 max_io_size
-= PAGE_SIZE
;
1187 upl_f_offset
+= PAGE_SIZE
;
1191 * issue a synchronous write to cluster_io
1194 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_START
,
1195 (int)upl_offset
, (int)uio
->uio_offset
, io_size
, 0, 0);
1197 error
= cluster_io(vp
, upl
, upl_offset
, uio
->uio_offset
,
1198 io_size
, 0, (struct buf
*)0);
1202 * The cluster_io write completed successfully,
1203 * update the uio structure and commit.
1206 kernel_upl_commit_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1207 UPL_COMMIT_SET_DIRTY
| UPL_COMMIT_FREE_ON_EMPTY
,
1208 pl
, MAX_UPL_TRANSFER
);
1210 iov
->iov_base
+= io_size
;
1211 iov
->iov_len
-= io_size
;
1212 uio
->uio_resid
-= io_size
;
1213 uio
->uio_offset
+= io_size
;
1216 kernel_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
1217 UPL_ABORT_FREE_ON_EMPTY
);
1220 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 77)) | DBG_FUNC_END
,
1221 (int)upl_offset
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
1226 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 75)) | DBG_FUNC_END
,
1227 (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 4, 0);
1233 cluster_write_x(object
, vp
, uio
, oldEOF
, newEOF
, headOff
, tailOff
, devblocksize
, flags
)
1244 upl_page_info_t
*pl
;
1246 vm_offset_t upl_offset
;
1253 int io_size_before_rounding
;
1255 vm_offset_t io_address
;
1262 long long total_size
;
1265 long long zero_cnt1
;
1267 daddr_t start_blkno
;
1271 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1272 (int)uio
->uio_offset
, uio
->uio_resid
, (int)oldEOF
, (int)newEOF
, 0);
1274 uio_resid
= uio
->uio_resid
;
1276 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_START
,
1277 0, 0, (int)oldEOF
, (int)newEOF
, 0);
1284 if (flags
& IO_HEADZEROFILL
) {
1286 * some filesystems (HFS is one) don't support unallocated holes within a file...
1287 * so we zero fill the intervening space between the old EOF and the offset
1288 * where the next chunk of real data begins.... ftruncate will also use this
1289 * routine to zero fill to the new EOF when growing a file... in this case, the
1290 * uio structure will not be provided
1293 if (headOff
< uio
->uio_offset
) {
1294 zero_cnt
= uio
->uio_offset
- headOff
;
1297 } else if (headOff
< newEOF
) {
1298 zero_cnt
= newEOF
- headOff
;
1302 if (flags
& IO_TAILZEROFILL
) {
1304 zero_off1
= uio
->uio_offset
+ uio
->uio_resid
;
1306 if (zero_off1
< tailOff
)
1307 zero_cnt1
= tailOff
- zero_off1
;
1310 if (zero_cnt
== 0 && uio
== (struct uio
*) 0)
1312 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
1313 retval
, 0, 0, 0, 0);
1317 while ((total_size
= (uio_resid
+ zero_cnt
+ zero_cnt1
)) && retval
== 0) {
1319 * for this iteration of the loop, figure out where our starting point is
1322 start_offset
= (int)(zero_off
& PAGE_MASK_64
);
1323 upl_f_offset
= zero_off
- start_offset
;
1324 } else if (uio_resid
) {
1325 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
1326 upl_f_offset
= uio
->uio_offset
- start_offset
;
1328 start_offset
= (int)(zero_off1
& PAGE_MASK_64
);
1329 upl_f_offset
= zero_off1
- start_offset
;
1331 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 46)) | DBG_FUNC_NONE
,
1332 (int)zero_off
, (int)zero_cnt
, (int)zero_off1
, (int)zero_cnt1
, 0);
1334 if (total_size
> (long long)MAXPHYSIO
)
1335 total_size
= MAXPHYSIO
;
1338 * compute the size of the upl needed to encompass
1339 * the requested write... limit each call to cluster_io
1340 * to at most MAXPHYSIO, make sure to account for
1341 * a starting offset that's not page aligned
1343 upl_size
= (start_offset
+ total_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
1345 if (upl_size
> MAXPHYSIO
)
1346 upl_size
= MAXPHYSIO
;
1348 pages_in_upl
= upl_size
/ PAGE_SIZE
;
1349 io_size
= upl_size
- start_offset
;
1351 if ((long long)io_size
> total_size
)
1352 io_size
= total_size
;
1354 start_blkno
= (daddr_t
)(upl_f_offset
/ PAGE_SIZE_64
);
1355 last_blkno
= start_blkno
+ pages_in_upl
;
1357 kret
= vm_fault_list_request(object
,
1358 (vm_object_offset_t
)upl_f_offset
, upl_size
, &upl
, NULL
, 0,
1359 (UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
));
1361 if (kret
!= KERN_SUCCESS
)
1362 panic("cluster_write: failed to get pagelist");
1364 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
1366 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 41)) | DBG_FUNC_NONE
,
1367 upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
1370 if (start_offset
&& !upl_valid_page(pl
, 0)) {
1374 * we're starting in the middle of the first page of the upl
1375 * and the page isn't currently valid, so we're going to have
1376 * to read it in first... this is a synchronous operation
1378 read_size
= PAGE_SIZE
;
1380 if ((upl_f_offset
+ read_size
) > newEOF
) {
1381 read_size
= newEOF
- upl_f_offset
;
1382 read_size
= (read_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
1384 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, read_size
,
1385 CL_READ
, (struct buf
*)0);
1388 * we had an error during the read which causes us to abort
1389 * the current cluster_write request... before we do, we need
1390 * to release the rest of the pages in the upl without modifying
1391 * there state and mark the failed page in error
1393 kernel_upl_abort_range(upl
, 0, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1394 kernel_upl_abort(upl
, 0);
1396 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1397 upl
, 0, 0, retval
, 0);
1401 if ((start_offset
== 0 || upl_size
> PAGE_SIZE
) && ((start_offset
+ io_size
) & PAGE_MASK
)) {
1403 * the last offset we're writing to in this upl does not end on a page
1404 * boundary... if it's not beyond the old EOF, then we'll also need to
1405 * pre-read this page in if it isn't already valid
1407 upl_offset
= upl_size
- PAGE_SIZE
;
1409 if ((upl_f_offset
+ start_offset
+ io_size
) < oldEOF
&&
1410 !upl_valid_page(pl
, upl_offset
/ PAGE_SIZE
)) {
1413 read_size
= PAGE_SIZE
;
1415 if ((upl_f_offset
+ upl_offset
+ read_size
) > newEOF
) {
1416 read_size
= newEOF
- (upl_f_offset
+ upl_offset
);
1417 read_size
= (read_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
1419 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, read_size
,
1420 CL_READ
, (struct buf
*)0);
1423 * we had an error during the read which causes us to abort
1424 * the current cluster_write request... before we do, we need
1425 * to release the rest of the pages in the upl without modifying
1426 * there state and mark the failed page in error
1428 kernel_upl_abort_range(upl
, upl_offset
, PAGE_SIZE
, UPL_ABORT_DUMP_PAGES
);
1429 kernel_upl_abort(upl
, 0);
1431 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1432 upl
, 0, 0, retval
, 0);
1437 if ((kret
= kernel_upl_map(kernel_map
, upl
, &io_address
)) != KERN_SUCCESS
)
1438 panic("cluster_write: kernel_upl_map failed\n");
1439 xfer_resid
= io_size
;
1440 io_offset
= start_offset
;
1442 while (zero_cnt
&& xfer_resid
) {
1444 if (zero_cnt
< (long long)xfer_resid
)
1445 bytes_to_zero
= zero_cnt
;
1447 bytes_to_zero
= xfer_resid
;
1449 if ( !(flags
& IO_NOZEROVALID
)) {
1450 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1452 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1453 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1454 (int)zero_cnt
, xfer_resid
, 0);
1456 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off
& PAGE_MASK_64
));
1458 if ( !upl_valid_page(pl
, (int)(zero_off
/ PAGE_SIZE_64
))) {
1459 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1461 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1462 (int)upl_f_offset
+ io_offset
, bytes_to_zero
,
1463 (int)zero_cnt
, xfer_resid
, 0);
1466 xfer_resid
-= bytes_to_zero
;
1467 zero_cnt
-= bytes_to_zero
;
1468 zero_off
+= bytes_to_zero
;
1469 io_offset
+= bytes_to_zero
;
1471 if (xfer_resid
&& uio_resid
) {
1472 bytes_to_move
= min(uio_resid
, xfer_resid
);
1474 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 42)) | DBG_FUNC_NONE
,
1475 (int)uio
->uio_offset
, bytes_to_move
, uio_resid
, xfer_resid
, 0);
1477 retval
= uiomove((caddr_t
)(io_address
+ io_offset
), bytes_to_move
, uio
);
1480 if ((kret
= kernel_upl_unmap(kernel_map
, upl
)) != KERN_SUCCESS
)
1481 panic("cluster_write: kernel_upl_unmap failed\n");
1482 kernel_upl_abort(upl
, UPL_ABORT_DUMP_PAGES
);
1484 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 45)) | DBG_FUNC_NONE
,
1485 upl
, 0, 0, retval
, 0);
1487 uio_resid
-= bytes_to_move
;
1488 xfer_resid
-= bytes_to_move
;
1489 io_offset
+= bytes_to_move
;
1492 while (xfer_resid
&& zero_cnt1
&& retval
== 0) {
1494 if (zero_cnt1
< (long long)xfer_resid
)
1495 bytes_to_zero
= zero_cnt1
;
1497 bytes_to_zero
= xfer_resid
;
1499 if ( !(flags
& IO_NOZEROVALID
)) {
1500 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1502 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1503 (int)upl_f_offset
+ io_offset
,
1504 bytes_to_zero
, (int)zero_cnt1
, xfer_resid
, 0);
1506 bytes_to_zero
= min(bytes_to_zero
, PAGE_SIZE
- (int)(zero_off1
& PAGE_MASK_64
));
1507 if ( !upl_valid_page(pl
, (int)(zero_off1
/ PAGE_SIZE_64
))) {
1508 bzero((caddr_t
)(io_address
+ io_offset
), bytes_to_zero
);
1510 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1511 (int)upl_f_offset
+ io_offset
,
1512 bytes_to_zero
, (int)zero_cnt1
, xfer_resid
, 0);
1515 xfer_resid
-= bytes_to_zero
;
1516 zero_cnt1
-= bytes_to_zero
;
1517 zero_off1
+= bytes_to_zero
;
1518 io_offset
+= bytes_to_zero
;
1525 io_size
+= start_offset
;
1527 if ((upl_f_offset
+ io_size
) == newEOF
&& io_size
< upl_size
) {
1529 * if we're extending the file with this write
1530 * we'll zero fill the rest of the page so that
1531 * if the file gets extended again in such a way as to leave a
1532 * hole starting at this EOF, we'll have zero's in the correct spot
1534 bzero((caddr_t
)(io_address
+ io_size
), upl_size
- io_size
);
1536 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 43)) | DBG_FUNC_NONE
,
1537 (int)upl_f_offset
+ io_size
,
1538 upl_size
- io_size
, 0, 0, 0);
1540 if ((kret
= kernel_upl_unmap(kernel_map
, upl
)) != KERN_SUCCESS
)
1541 panic("cluster_write: kernel_upl_unmap failed\n");
1543 io_size_before_rounding
= io_size
;
1545 if (io_size
& (devblocksize
- 1))
1546 io_size
= (io_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
1555 * we have an existing cluster... see if this write will extend it nicely
1557 if (start_blkno
>= vp
->v_cstart
) {
1558 if (last_blkno
<= (vp
->v_cstart
+ vp
->v_clen
)) {
1560 * we have a write that fits entirely
1561 * within the existing cluster limits
1563 if (last_blkno
>= vp
->v_lastw
) {
1565 * if we're extending the dirty region within the cluster
1566 * we need to update the cluster info... we check for blkno
1567 * equality because we may be extending the file with a
1568 * partial write.... this in turn changes our idea of how
1569 * much data to write out (v_ciosiz) for the last page
1571 vp
->v_lastw
= last_blkno
;
1572 newsize
= io_size
+ ((start_blkno
- vp
->v_cstart
) * PAGE_SIZE
);
1574 if (newsize
> vp
->v_ciosiz
)
1575 vp
->v_ciosiz
= newsize
;
1580 if (start_blkno
< (vp
->v_cstart
+ vp
->v_clen
)) {
1582 * we have a write that starts in the middle of the current cluster
1583 * but extends beyond the cluster's limit
1584 * we'll clip the current cluster if we actually
1585 * overlap with the new write and then push it out
1586 * and start a new cluster with the current write
1588 if (vp
->v_lastw
> start_blkno
) {
1589 vp
->v_lastw
= start_blkno
;
1590 vp
->v_ciosiz
= (vp
->v_lastw
- vp
->v_cstart
) * PAGE_SIZE
;
1594 * we also get here for the case where the current write starts
1595 * beyond the limit of the existing cluster
1601 * the current write starts in front of the current cluster
1603 if (last_blkno
> vp
->v_cstart
) {
1605 * the current write extends into the existing cluster
1607 if ((vp
->v_lastw
- start_blkno
) > vp
->v_clen
) {
1609 * if we were to combine this write with the current cluster
1610 * we would exceed the cluster size limit....
1611 * clip the current cluster by moving the start position
1612 * to where the current write ends, and then push it
1614 vp
->v_ciosiz
-= (last_blkno
- vp
->v_cstart
) * PAGE_SIZE
;
1615 vp
->v_cstart
= last_blkno
;
1618 * round up the io_size to the nearest page size
1619 * since we've coalesced with at least 1 pre-existing
1620 * page in the current cluster... this write may have ended in the
1621 * middle of the page which would cause io_size to give us an
1622 * inaccurate view of how much I/O we actually need to do
1624 io_size
= (io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
1630 * we can coalesce the current write with the existing cluster
1631 * adjust the cluster info to reflect this
1633 if (last_blkno
> vp
->v_lastw
) {
1635 * the current write completey overlaps
1636 * the existing cluster
1638 vp
->v_lastw
= last_blkno
;
1639 vp
->v_ciosiz
= io_size
;
1641 vp
->v_ciosiz
+= (vp
->v_cstart
- start_blkno
) * PAGE_SIZE
;
1643 if (io_size
> vp
->v_ciosiz
)
1644 vp
->v_ciosiz
= io_size
;
1646 vp
->v_cstart
= start_blkno
;
1651 * this I/O range is entirely in front of the current cluster
1652 * so we need to push the current cluster out before beginning
1661 if (io_size_before_rounding
< MAXPHYSIO
&& !(flags
& IO_SYNC
)) {
1662 vp
->v_clen
= MAXPHYSIO
/ PAGE_SIZE
;
1663 vp
->v_cstart
= start_blkno
;
1664 vp
->v_lastw
= last_blkno
;
1665 vp
->v_ciosiz
= io_size
;
1671 kernel_upl_commit_range(upl
, 0, upl_size
,
1672 UPL_COMMIT_SET_DIRTY
1673 | UPL_COMMIT_FREE_ON_EMPTY
,
1674 pl
, MAX_UPL_TRANSFER
);
1678 if ((flags
& IO_SYNC
) || (vp
->v_numoutput
> ASYNC_THROTTLE
))
1679 io_flags
= CL_COMMIT
| CL_AGE
;
1681 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
1683 if (vp
->v_flag
& VNOCACHE_DATA
)
1684 io_flags
|= CL_DUMP
;
1686 retval
= cluster_io(vp
, upl
, 0, upl_f_offset
, io_size
,
1687 io_flags
, (struct buf
*)0);
1690 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 40)) | DBG_FUNC_END
,
1691 retval
, 0, 0, 0, 0);
1696 cluster_read(vp
, uio
, filesize
, devblocksize
, flags
)
1710 object
= ubc_getobject(vp
, UBC_NOREACTIVATE
);
1711 if (object
== (void *)NULL
)
1712 panic("cluster_read: ubc_getobject failed");
1714 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_START
,
1715 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
1718 * We set a threshhold of 4 pages to decide if the nocopy
1719 * read loop is worth the trouble...
1722 if ((!((vp
->v_flag
& VNOCACHE_DATA
) && (uio
->uio_segflg
== UIO_USERSPACE
)))
1723 || (uio
->uio_resid
< 4 * PAGE_SIZE
))
1725 retval
= cluster_read_x(object
, vp
, uio
, filesize
, devblocksize
, flags
);
1726 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
1727 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
1732 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0)
1734 /* we know we have a resid, so this is safe */
1736 while (iov
->iov_len
== 0) {
1742 if (uio
->uio_offset
& PAGE_MASK_64
)
1744 /* Bring the file offset read up to a pagesize boundary */
1745 clip_size
= (PAGE_SIZE
- (int)(uio
->uio_offset
& PAGE_MASK_64
));
1746 if (uio
->uio_resid
< clip_size
)
1747 clip_size
= uio
->uio_resid
;
1749 * Fake the resid going into the cluster_read_x call
1750 * and restore it on the way out.
1752 prev_resid
= uio
->uio_resid
;
1753 uio
->uio_resid
= clip_size
;
1754 retval
= cluster_read_x(object
, vp
, uio
, filesize
, devblocksize
, flags
);
1755 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1757 else if ((int)iov
->iov_base
& PAGE_MASK_64
)
1759 clip_size
= iov
->iov_len
;
1760 prev_resid
= uio
->uio_resid
;
1761 uio
->uio_resid
= clip_size
;
1762 retval
= cluster_read_x(object
, vp
, uio
, filesize
, devblocksize
, flags
);
1763 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1768 * If we come in here, we know the offset into
1769 * the file is on a pagesize boundary
1772 max_io_size
= filesize
- uio
->uio_offset
;
1773 clip_size
= uio
->uio_resid
;
1774 if (iov
->iov_len
< clip_size
)
1775 clip_size
= iov
->iov_len
;
1776 if (max_io_size
< clip_size
)
1777 clip_size
= (int)max_io_size
;
1779 if (clip_size
< PAGE_SIZE
)
1782 * Take care of the tail end of the read in this vector.
1784 prev_resid
= uio
->uio_resid
;
1785 uio
->uio_resid
= clip_size
;
1786 retval
= cluster_read_x(object
,vp
, uio
, filesize
, devblocksize
, flags
);
1787 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1791 /* round clip_size down to a multiple of pagesize */
1792 clip_size
= clip_size
& ~(PAGE_MASK
);
1793 prev_resid
= uio
->uio_resid
;
1794 uio
->uio_resid
= clip_size
;
1795 retval
= cluster_nocopy_read(object
, vp
, uio
, filesize
, devblocksize
, flags
);
1796 if ((retval
==0) && uio
->uio_resid
)
1797 retval
= cluster_read_x(object
,vp
, uio
, filesize
, devblocksize
, flags
);
1798 uio
->uio_resid
= prev_resid
- (clip_size
- uio
->uio_resid
);
1803 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 32)) | DBG_FUNC_END
,
1804 (int)uio
->uio_offset
, uio
->uio_resid
, vp
->v_lastr
, retval
, 0);
1810 cluster_read_x(object
, vp
, uio
, filesize
, devblocksize
, flags
)
1818 upl_page_info_t
*pl
;
1820 vm_offset_t upl_offset
;
1830 vm_offset_t io_address
;
1838 b_lblkno
= (int)(uio
->uio_offset
/ PAGE_SIZE_64
);
1840 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
1842 * compute the size of the upl needed to encompass
1843 * the requested read... limit each call to cluster_io
1844 * to at most MAXPHYSIO, make sure to account for
1845 * a starting offset that's not page aligned
1847 start_offset
= (int)(uio
->uio_offset
& PAGE_MASK_64
);
1848 upl_f_offset
= uio
->uio_offset
- (off_t
)start_offset
;
1849 max_size
= filesize
- uio
->uio_offset
;
1851 if (uio
->uio_resid
< max_size
)
1852 io_size
= uio
->uio_resid
;
1856 if (uio
->uio_segflg
== UIO_USERSPACE
&& !(vp
->v_flag
& VNOCACHE_DATA
)) {
1857 segflg
= uio
->uio_segflg
;
1859 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
1861 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
1862 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
1864 while (io_size
&& retval
== 0) {
1868 if (memory_object_page_op(object
, (vm_offset_t
)upl_f_offset
, UPL_POP_SET
| UPL_POP_BUSY
,
1869 &paddr
, 0) != KERN_SUCCESS
)
1872 xsize
= PAGE_SIZE
- start_offset
;
1874 if (xsize
> io_size
)
1877 retval
= uiomove((caddr_t
)(paddr
+ start_offset
), xsize
, uio
);
1879 memory_object_page_op(object
, (vm_offset_t
)upl_f_offset
, UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
1882 start_offset
= (int)
1883 (uio
->uio_offset
& PAGE_MASK_64
);
1884 upl_f_offset
= uio
->uio_offset
- start_offset
;
1886 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
1887 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
1889 uio
->uio_segflg
= segflg
;
1896 * we're already finished with this read request
1897 * let's see if we should do a read-ahead
1900 ((uio
->uio_offset
- 1) / PAGE_SIZE_64
);
1902 if (!(vp
->v_flag
& VRAOFF
))
1904 * let's try to read ahead if we're in
1905 * a sequential access pattern
1907 cluster_rd_ahead(vp
, object
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
1908 vp
->v_lastr
= e_lblkno
;
1912 max_size
= filesize
- uio
->uio_offset
;
1915 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
1916 if (upl_size
> MAXPHYSIO
)
1917 upl_size
= MAXPHYSIO
;
1918 pages_in_upl
= upl_size
/ PAGE_SIZE
;
1920 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_START
,
1921 upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
1923 kret
= vm_fault_list_request(object
,
1924 (vm_object_offset_t
)upl_f_offset
, upl_size
, &upl
, NULL
, 0,
1925 (UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
));
1927 if (kret
!= KERN_SUCCESS
)
1928 panic("cluster_read: failed to get pagelist");
1930 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
1933 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 33)) | DBG_FUNC_END
,
1934 upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
1937 * scan from the beginning of the upl looking for the first
1938 * non-valid page.... this will become the first page in
1939 * the request we're going to make to 'cluster_io'... if all
1940 * of the pages are valid, we won't call through to 'cluster_io'
1942 for (start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
1943 if (!upl_valid_page(pl
, start_pg
))
1948 * scan from the starting invalid page looking for a valid
1949 * page before the end of the upl is reached, if we
1950 * find one, then it will be the last page of the request to
1953 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
1954 if (upl_valid_page(pl
, last_pg
))
1958 if (start_pg
< last_pg
) {
1960 * we found a range of 'invalid' pages that must be filled
1961 * if the last page in this range is the last page of the file
1962 * we may have to clip the size of it to keep from reading past
1963 * the end of the last physical block associated with the file
1965 upl_offset
= start_pg
* PAGE_SIZE
;
1966 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
1968 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
) {
1969 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
1970 io_size
= (io_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
1973 * issue a synchronous read to cluster_io
1976 error
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
,
1977 io_size
, CL_READ
, (struct buf
*)0);
1981 * if the read completed successfully, or there was no I/O request
1982 * issued, than map the upl into kernel address space and
1983 * move the data into user land.... we'll first add on any 'valid'
1984 * pages that were present in the upl when we acquired it.
1987 u_int size_of_prefetch
;
1989 for (uio_last
= last_pg
; uio_last
< pages_in_upl
; uio_last
++) {
1990 if (!upl_valid_page(pl
, uio_last
))
1994 * compute size to transfer this round, if uio->uio_resid is
1995 * still non-zero after this uiomove, we'll loop around and
1996 * set up for another I/O.
1998 val_size
= (uio_last
* PAGE_SIZE
) - start_offset
;
2000 if (max_size
< val_size
)
2001 val_size
= max_size
;
2003 if (uio
->uio_resid
< val_size
)
2004 val_size
= uio
->uio_resid
;
2006 e_lblkno
= (int)((uio
->uio_offset
+ ((off_t
)val_size
- 1)) / PAGE_SIZE_64
);
2008 if (size_of_prefetch
= (uio
->uio_resid
- val_size
)) {
2010 * if there's still I/O left to do for this request, then issue a
2011 * pre-fetch I/O... the I/O wait time will overlap
2012 * with the copying of the data
2014 cluster_rd_prefetch(vp
, object
, uio
->uio_offset
+ val_size
, size_of_prefetch
, filesize
, devblocksize
);
2016 if (!(vp
->v_flag
& VRAOFF
) && !(vp
->v_flag
& VNOCACHE_DATA
))
2018 * let's try to read ahead if we're in
2019 * a sequential access pattern
2021 cluster_rd_ahead(vp
, object
, b_lblkno
, e_lblkno
, filesize
, devblocksize
);
2022 vp
->v_lastr
= e_lblkno
;
2025 if (uio
->uio_segflg
== UIO_USERSPACE
) {
2028 segflg
= uio
->uio_segflg
;
2030 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2033 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_START
,
2034 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2036 offset
= start_offset
;
2038 while (val_size
&& retval
== 0) {
2043 i
= offset
/ PAGE_SIZE
;
2044 csize
= min(PAGE_SIZE
- start_offset
, val_size
);
2046 paddr
= (caddr_t
)upl_phys_page(pl
, i
) + start_offset
;
2048 retval
= uiomove(paddr
, csize
, uio
);
2052 start_offset
= offset
& PAGE_MASK
;
2054 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 34)) | DBG_FUNC_END
,
2055 (int)uio
->uio_offset
, val_size
, uio
->uio_resid
, 0, 0);
2057 uio
->uio_segflg
= segflg
;
2061 if ((kret
= kernel_upl_map(kernel_map
, upl
, &io_address
)) != KERN_SUCCESS
)
2062 panic("cluster_read: kernel_upl_map failed\n");
2064 retval
= uiomove((caddr_t
)(io_address
+ start_offset
), val_size
, uio
);
2066 if ((kret
= kernel_upl_unmap(kernel_map
, upl
)) != KERN_SUCCESS
)
2067 panic("cluster_read: kernel_upl_unmap failed\n");
2070 if (start_pg
< last_pg
) {
2072 * compute the range of pages that we actually issued an I/O for
2073 * and either commit them as valid if the I/O succeeded
2074 * or abort them if the I/O failed
2076 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2078 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2079 upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2081 if (error
|| (vp
->v_flag
& VNOCACHE_DATA
))
2082 kernel_upl_abort_range(upl
, start_pg
* PAGE_SIZE
, io_size
,
2083 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2085 kernel_upl_commit_range(upl
,
2086 start_pg
* PAGE_SIZE
, io_size
,
2087 UPL_COMMIT_CLEAR_DIRTY
2088 | UPL_COMMIT_FREE_ON_EMPTY
2089 | UPL_COMMIT_INACTIVATE
,
2090 pl
, MAX_UPL_TRANSFER
);
2092 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2093 upl
, start_pg
* PAGE_SIZE
, io_size
, error
, 0);
2095 if ((last_pg
- start_pg
) < pages_in_upl
) {
2100 * the set of pages that we issued an I/O for did not encompass
2101 * the entire upl... so just release these without modifying
2105 kernel_upl_abort(upl
, 0);
2107 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_START
,
2108 upl
, -1, pages_in_upl
- (last_pg
- start_pg
), 0, 0);
2112 * we found some already valid pages at the beginning of the upl
2113 * commit these back to the inactive list with reference cleared
2115 for (cur_pg
= 0; cur_pg
< start_pg
; cur_pg
++) {
2116 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
| UPL_COMMIT_INACTIVATE
;
2118 if (upl_dirty_page(pl
, cur_pg
))
2119 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2121 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2122 kernel_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2123 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2125 kernel_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2126 PAGE_SIZE
, commit_flags
, pl
, MAX_UPL_TRANSFER
);
2129 if (last_pg
< uio_last
) {
2131 * we found some already valid pages immediately after the pages we issued
2132 * I/O for, commit these back to the inactive list with reference cleared
2134 for (cur_pg
= last_pg
; cur_pg
< uio_last
; cur_pg
++) {
2135 commit_flags
= UPL_COMMIT_FREE_ON_EMPTY
| UPL_COMMIT_INACTIVATE
;
2137 if (upl_dirty_page(pl
, cur_pg
))
2138 commit_flags
|= UPL_COMMIT_SET_DIRTY
;
2140 if ( !(commit_flags
& UPL_COMMIT_SET_DIRTY
) && (vp
->v_flag
& VNOCACHE_DATA
))
2141 kernel_upl_abort_range(upl
, cur_pg
* PAGE_SIZE
, PAGE_SIZE
,
2142 UPL_ABORT_DUMP_PAGES
| UPL_ABORT_FREE_ON_EMPTY
);
2144 kernel_upl_commit_range(upl
, cur_pg
* PAGE_SIZE
,
2145 PAGE_SIZE
, commit_flags
, pl
, MAX_UPL_TRANSFER
);
2148 if (uio_last
< pages_in_upl
) {
2150 * there were some invalid pages beyond the valid pages that we didn't
2151 * issue an I/O for, just release them unchanged
2153 kernel_upl_abort(upl
, 0);
2156 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 35)) | DBG_FUNC_END
,
2168 cluster_nocopy_read(object
, vp
, uio
, filesize
, devblocksize
, flags
)
2177 upl_page_info_t
*pl
;
2179 vm_offset_t upl_offset
;
2180 off_t start_upl_f_offset
;
2184 int upl_needed_size
;
2192 int force_data_sync
;
2196 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_START
,
2197 (int)uio
->uio_offset
, uio
->uio_resid
, (int)filesize
, devblocksize
, 0);
2200 * When we enter this routine, we know
2201 * -- the offset into the file is on a pagesize boundary
2202 * -- the resid is a page multiple
2203 * -- the resid will not exceed iov_len
2207 while (uio
->uio_resid
&& uio
->uio_offset
< filesize
&& retval
== 0) {
2209 io_size
= uio
->uio_resid
;
2212 * We don't come into this routine unless
2213 * UIO_USERSPACE is set.
2215 segflg
= uio
->uio_segflg
;
2217 uio
->uio_segflg
= UIO_PHYS_USERSPACE
;
2220 * First look for pages already in the cache
2221 * and move them to user space.
2223 while (io_size
&& retval
== 0) {
2225 upl_f_offset
= uio
->uio_offset
;
2228 * If this call fails, it means the page is not
2229 * in the page cache.
2231 if (memory_object_page_op(object
, (vm_offset_t
)upl_f_offset
,
2232 UPL_POP_SET
| UPL_POP_BUSY
,
2233 &paddr
, 0) != KERN_SUCCESS
)
2236 retval
= uiomove((caddr_t
)(paddr
), PAGE_SIZE
, uio
);
2238 memory_object_page_op(object
, (vm_offset_t
)upl_f_offset
,
2239 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2241 io_size
-= PAGE_SIZE
;
2242 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 71)) | DBG_FUNC_NONE
,
2243 (int)uio
->uio_offset
, io_size
, uio
->uio_resid
, 0, 0);
2246 uio
->uio_segflg
= segflg
;
2250 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2251 (int)uio
->uio_offset
, uio
->uio_resid
, 2, retval
, 0);
2255 /* If we are already finished with this read, then return */
2259 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2260 (int)uio
->uio_offset
, uio
->uio_resid
, 3, io_size
, 0);
2264 max_io_size
= io_size
;
2265 if (max_io_size
> MAXPHYSIO
)
2266 max_io_size
= MAXPHYSIO
;
2268 start_upl_f_offset
= uio
->uio_offset
; /* this is page aligned in the file */
2269 upl_f_offset
= start_upl_f_offset
;
2272 while(io_size
< max_io_size
)
2275 if(memory_object_page_op(object
, (vm_offset_t
)upl_f_offset
,
2276 UPL_POP_SET
| UPL_POP_BUSY
, &paddr
, 0) == KERN_SUCCESS
)
2278 memory_object_page_op(object
, (vm_offset_t
)upl_f_offset
,
2279 UPL_POP_CLR
| UPL_POP_BUSY
, 0, 0);
2284 * Build up the io request parameters.
2287 io_size
+= PAGE_SIZE
;
2288 upl_f_offset
+= PAGE_SIZE
;
2294 upl_offset
= (vm_offset_t
)iov
->iov_base
& PAGE_MASK_64
;
2295 upl_needed_size
= (upl_offset
+ io_size
+ (PAGE_SIZE
-1)) & ~PAGE_MASK
;
2297 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_START
,
2298 (int)upl_offset
, upl_needed_size
, iov
->iov_base
, io_size
, 0);
2300 for (force_data_sync
= 0; force_data_sync
< 3; force_data_sync
++)
2303 upl_size
= upl_needed_size
;
2304 upl_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
;
2306 kret
= vm_map_get_upl(current_map(),
2307 (vm_offset_t
)iov
->iov_base
& ~PAGE_MASK
,
2308 &upl_size
, &upl
, &pl
, &pages_in_pl
, &upl_flags
, force_data_sync
);
2310 pages_in_pl
= upl_size
/ PAGE_SIZE
;
2312 if (kret
!= KERN_SUCCESS
)
2314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2315 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2317 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2318 (int)uio
->uio_offset
, uio
->uio_resid
, 4, retval
, 0);
2320 /* cluster_nocopy_read: failed to get pagelist */
2321 /* do not return kret here */
2325 for(i
=0; i
< pages_in_pl
; i
++)
2327 if (!upl_valid_page(pl
, i
))
2330 if (i
== pages_in_pl
)
2333 kernel_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2334 UPL_ABORT_FREE_ON_EMPTY
);
2337 if (force_data_sync
>= 3)
2339 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2340 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2342 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2343 (int)uio
->uio_offset
, uio
->uio_resid
, 5, retval
, 0);
2347 * Consider the possibility that upl_size wasn't satisfied.
2349 if (upl_size
!= upl_needed_size
)
2350 io_size
= (upl_size
- (int)upl_offset
) & ~PAGE_MASK
;
2354 kernel_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2355 UPL_ABORT_FREE_ON_EMPTY
);
2359 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 72)) | DBG_FUNC_END
,
2360 (int)upl_offset
, upl_size
, io_size
, kret
, 0);
2363 * issue a synchronous read to cluster_io
2366 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_START
,
2367 upl
, (int)upl_offset
, (int)start_upl_f_offset
, io_size
, 0);
2369 error
= cluster_io(vp
, upl
, upl_offset
, start_upl_f_offset
,
2370 io_size
, CL_READ
| CL_NOZERO
, (struct buf
*)0);
2374 * The cluster_io read completed successfully,
2375 * update the uio structure and commit.
2378 kernel_upl_commit_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2379 UPL_COMMIT_SET_DIRTY
2380 | UPL_COMMIT_FREE_ON_EMPTY
,
2381 pl
, MAX_UPL_TRANSFER
);
2383 iov
->iov_base
+= io_size
;
2384 iov
->iov_len
-= io_size
;
2385 uio
->uio_resid
-= io_size
;
2386 uio
->uio_offset
+= io_size
;
2389 kernel_upl_abort_range(upl
, (upl_offset
& ~PAGE_MASK
), upl_size
,
2390 UPL_ABORT_FREE_ON_EMPTY
);
2393 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 73)) | DBG_FUNC_END
,
2394 upl
, (int)uio
->uio_offset
, (int)uio
->uio_resid
, error
, 0);
2402 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 70)) | DBG_FUNC_END
,
2403 (int)uio
->uio_offset
, (int)uio
->uio_resid
, 6, retval
, 0);
2411 * generate advisory I/O's in the largest chunks possible
2412 * the completed pages will be released into the VM cache
2414 advisory_read(vp
, filesize
, f_offset
, resid
, devblocksize
)
2422 upl_page_info_t
*pl
;
2424 vm_offset_t upl_offset
;
2437 if (!UBCINFOEXISTS(vp
))
2440 object
= ubc_getobject(vp
, UBC_NOREACTIVATE
);
2441 if (object
== (void *)NULL
)
2442 panic("advisory_read: ubc_getobject failed");
2444 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_START
,
2445 (int)f_offset
, resid
, (int)filesize
, devblocksize
, 0);
2447 while (resid
&& f_offset
< filesize
&& retval
== 0) {
2449 * compute the size of the upl needed to encompass
2450 * the requested read... limit each call to cluster_io
2451 * to at most MAXPHYSIO, make sure to account for
2452 * a starting offset that's not page aligned
2454 start_offset
= (int)(f_offset
& PAGE_MASK_64
);
2455 upl_f_offset
= f_offset
- (off_t
)start_offset
;
2456 max_size
= filesize
- f_offset
;
2458 if (resid
< max_size
)
2463 upl_size
= (start_offset
+ io_size
+ (PAGE_SIZE
- 1)) & ~PAGE_MASK
;
2464 if (upl_size
> MAXPHYSIO
)
2465 upl_size
= MAXPHYSIO
;
2466 pages_in_upl
= upl_size
/ PAGE_SIZE
;
2468 kret
= vm_fault_list_request(object
,
2469 (vm_object_offset_t
)upl_f_offset
, upl_size
, &upl
, NULL
, 0,
2470 (UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
));
2472 if (kret
!= KERN_SUCCESS
)
2473 panic("advisory_read: failed to get pagelist");
2475 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2478 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 61)) | DBG_FUNC_NONE
,
2479 upl
, (int)upl_f_offset
, upl_size
, start_offset
, 0);
2482 * scan from the beginning of the upl looking for the first
2483 * non-valid page.... this will become the first page in
2484 * the request we're going to make to 'cluster_io'... if all
2485 * of the pages are valid, we won't call through to 'cluster_io'
2487 for (start_pg
= 0; start_pg
< pages_in_upl
; start_pg
++) {
2488 if (!upl_valid_page(pl
, start_pg
))
2493 * scan from the starting invalid page looking for a valid
2494 * page before the end of the upl is reached, if we
2495 * find one, then it will be the last page of the request to
2498 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
2499 if (upl_valid_page(pl
, last_pg
))
2503 if (start_pg
< last_pg
) {
2505 * we found a range of 'invalid' pages that must be filled
2506 * if the last page in this range is the last page of the file
2507 * we may have to clip the size of it to keep from reading past
2508 * the end of the last physical block associated with the file
2510 upl_offset
= start_pg
* PAGE_SIZE
;
2511 io_size
= (last_pg
- start_pg
) * PAGE_SIZE
;
2513 if ((upl_f_offset
+ upl_offset
+ io_size
) > filesize
) {
2514 io_size
= filesize
- (upl_f_offset
+ upl_offset
);
2515 io_size
= (io_size
+ (devblocksize
- 1)) & ~(devblocksize
- 1);
2518 * issue an asynchronous read to cluster_io
2520 retval
= cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
,
2521 CL_ASYNC
| CL_READ
| CL_COMMIT
| CL_AGE
, (struct buf
*)0);
2525 * start_pg of non-zero indicates we found some already valid pages
2526 * at the beginning of the upl.... we need to release these without
2527 * modifying there state
2529 kernel_upl_abort_range(upl
, 0, start_pg
* PAGE_SIZE
, UPL_ABORT_FREE_ON_EMPTY
);
2531 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 62)) | DBG_FUNC_NONE
,
2532 upl
, 0, start_pg
* PAGE_SIZE
, 0, 0);
2534 if (last_pg
< pages_in_upl
) {
2536 * the set of pages that we issued an I/O for did not extend all the
2537 * way to the end of the upl... so just release them without modifying
2540 kernel_upl_abort_range(upl
, last_pg
* PAGE_SIZE
, (pages_in_upl
- last_pg
) * PAGE_SIZE
,
2541 UPL_ABORT_FREE_ON_EMPTY
);
2543 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 63)) | DBG_FUNC_NONE
,
2544 upl
, last_pg
* PAGE_SIZE
,
2545 (pages_in_upl
- last_pg
) * PAGE_SIZE
, 0, 0);
2547 io_size
= (last_pg
* PAGE_SIZE
) - start_offset
;
2549 if (io_size
> resid
)
2551 f_offset
+= io_size
;
2554 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 60)) | DBG_FUNC_END
,
2555 (int)f_offset
, resid
, retval
, 0, 0);
2565 upl_page_info_t
*pl
;
2567 vm_offset_t upl_offset
;
2579 if (!UBCINFOEXISTS(vp
))
2582 if (vp
->v_clen
== 0 || (pages_in_upl
= vp
->v_lastw
- vp
->v_cstart
) == 0)
2584 upl_size
= pages_in_upl
* PAGE_SIZE
;
2585 upl_f_offset
= ((off_t
)vp
->v_cstart
) * PAGE_SIZE_64
;
2586 size
= vp
->v_ciosiz
;
2589 if (size
> upl_size
|| (upl_size
- size
) > PAGE_SIZE
)
2590 panic("cluster_push: v_ciosiz doesn't match size of cluster\n");
2592 object
= ubc_getobject(vp
, UBC_NOREACTIVATE
);
2593 if (object
== (void *)NULL
)
2594 panic("cluster_push: ubc_getobject failed");
2596 kret
= vm_fault_list_request(object
,
2597 (vm_object_offset_t
)upl_f_offset
, upl_size
, &upl
, NULL
, 0,
2598 (UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_INTERNAL
));
2599 if (kret
!= KERN_SUCCESS
)
2600 panic("cluster_push: failed to get pagelist");
2602 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2608 for (start_pg
= last_pg
; start_pg
< pages_in_upl
; start_pg
++) {
2609 if (upl_valid_page(pl
, start_pg
) && upl_dirty_page(pl
, start_pg
))
2612 if (start_pg
> last_pg
) {
2613 io_size
= (start_pg
- last_pg
) * PAGE_SIZE
;
2615 kernel_upl_abort_range(upl
, last_pg
* PAGE_SIZE
, io_size
, UPL_ABORT_FREE_ON_EMPTY
);
2622 for (last_pg
= start_pg
; last_pg
< pages_in_upl
; last_pg
++) {
2623 if (!upl_valid_page(pl
, last_pg
) || !upl_dirty_page(pl
, last_pg
))
2626 upl_offset
= start_pg
* PAGE_SIZE
;
2628 io_size
= min(size
, (last_pg
- start_pg
) * PAGE_SIZE
);
2630 if (vp
->v_numoutput
> ASYNC_THROTTLE
)
2631 io_flags
= CL_COMMIT
| CL_AGE
;
2633 io_flags
= CL_COMMIT
| CL_AGE
| CL_ASYNC
;
2635 if (vp
->v_flag
& VNOCACHE_DATA
)
2636 io_flags
|= CL_DUMP
;
2638 cluster_io(vp
, upl
, upl_offset
, upl_f_offset
+ upl_offset
, io_size
, io_flags
, (struct buf
*)0);