]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_cluster.c
xnu-124.1.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cluster.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
56 */
57
58#include <sys/param.h>
59#include <sys/proc.h>
60#include <sys/buf.h>
61#include <sys/vnode.h>
62#include <sys/mount.h>
63#include <sys/trace.h>
64#include <sys/malloc.h>
65#include <sys/resourcevar.h>
66#include <libkern/libkern.h>
67
68#include <sys/ubc.h>
69#include <vm/vm_pageout.h>
70#include <mach/memory_object_types.h>
71
72#include <sys/kdebug.h>
73
74
75#define CL_READ 0x01
76#define CL_ASYNC 0x02
77#define CL_COMMIT 0x04
78#define CL_NOMAP 0x08
79#define CL_PAGEOUT 0x10
80#define CL_AGE 0x20
81#define CL_DUMP 0x40
82#define CL_NOZERO 0x80
83#define CL_PAGEIN 0x100
84
85/*
86 * throttle the number of async writes that
87 * can be outstanding on a single vnode
88 * before we issue a synchronous write
89 */
90#define ASYNC_THROTTLE 3
91
92static int
93cluster_iodone(bp)
94 struct buf *bp;
95{
96 int b_flags;
97 int error;
98 int total_size;
99 int total_resid;
100 int upl_offset;
101 upl_t upl;
102 struct buf *cbp;
103 struct buf *cbp_head;
104 struct buf *cbp_next;
105 struct buf *real_bp;
106 int commit_size;
107 int pg_offset;
108
109
110 cbp_head = (struct buf *)(bp->b_trans_head);
111
112 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START,
113 cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
114
115 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
116 /*
117 * all I/O requests that are part of this transaction
118 * have to complete before we can process it
119 */
120 if ( !(cbp->b_flags & B_DONE)) {
121
122 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
123 cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0);
124
125 return 0;
126 }
127 }
128 error = 0;
129 total_size = 0;
130 total_resid = 0;
131
132 cbp = cbp_head;
133 upl_offset = cbp->b_uploffset;
134 upl = cbp->b_pagelist;
135 b_flags = cbp->b_flags;
136 real_bp = cbp->b_real_bp;
137
138 while (cbp) {
139 if (cbp->b_vectorcount > 1)
140 _FREE(cbp->b_vectorlist, M_SEGMENT);
141
142 if ((cbp->b_flags & B_ERROR) && error == 0)
143 error = cbp->b_error;
144
145 total_resid += cbp->b_resid;
146 total_size += cbp->b_bcount;
147
148 cbp_next = cbp->b_trans_next;
149
150 free_io_buf(cbp);
151
152 cbp = cbp_next;
153 }
154 if ((b_flags & B_NEED_IODONE) && real_bp) {
155 if (error) {
156 real_bp->b_flags |= B_ERROR;
157 real_bp->b_error = error;
158 }
159 real_bp->b_resid = total_resid;
160
161 biodone(real_bp);
162 }
163 if (error == 0 && total_resid)
164 error = EIO;
165
166 if (b_flags & B_COMMIT_UPL) {
167 pg_offset = upl_offset & PAGE_MASK;
168 commit_size = (((pg_offset + total_size) + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
169
170 if (error || (b_flags & B_NOCACHE)) {
171 int upl_abort_code;
172
173 if (b_flags & B_PAGEOUT)
174 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
175 else
176 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
177
178 kernel_upl_abort_range(upl, upl_offset - pg_offset, commit_size, upl_abort_code);
179
180 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
181 upl, upl_offset - pg_offset, commit_size,
182 0x80000000|upl_abort_code, 0);
183
184 } else {
185 int upl_commit_flags = UPL_COMMIT_FREE_ON_EMPTY;
186
187 if ( !(b_flags & B_PAGEOUT))
188 upl_commit_flags |= UPL_COMMIT_CLEAR_DIRTY;
189 if (b_flags & B_AGE)
190 upl_commit_flags |= UPL_COMMIT_INACTIVATE;
191
192 kernel_upl_commit_range(upl, upl_offset - pg_offset,
193 commit_size, upl_commit_flags,
194 UPL_GET_INTERNAL_PAGE_LIST(upl),
195 MAX_UPL_TRANSFER);
196
197 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
198 upl, upl_offset - pg_offset, commit_size,
199 upl_commit_flags, 0);
200 }
201 } else
202 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
203 upl, upl_offset, 0, error, 0);
204
205 return (error);
206}
207
208
209static void
210cluster_zero(upl, upl_offset, size, flags, bp)
211 upl_t upl;
212 vm_offset_t upl_offset;
213 int size;
214 int flags;
215 struct buf *bp;
216{
217 vm_offset_t io_addr = 0;
218 kern_return_t kret;
219
220 if ( !(flags & CL_NOMAP)) {
221 kret = kernel_upl_map(kernel_map, upl, &io_addr);
222
223 if (kret != KERN_SUCCESS)
224 panic("cluster_zero: kernel_upl_map() failed with (%d)", kret);
225 if (io_addr == 0)
226 panic("cluster_zero: kernel_upl_map mapped 0");
227 } else
228 io_addr = (vm_offset_t)bp->b_data;
229 bzero((caddr_t)(io_addr + upl_offset), size);
230
231 if ( !(flags & CL_NOMAP)) {
232 kret = kernel_upl_unmap(kernel_map, upl);
233
234 if (kret != KERN_SUCCESS)
235 panic("cluster_zero: kernel_upl_unmap failed");
236 }
237}
238
239
240static int
241cluster_io(vp, upl, upl_offset, f_offset, size, flags, real_bp)
242 struct vnode *vp;
243 upl_t upl;
244 vm_offset_t upl_offset;
245 off_t f_offset;
246 int size;
247 int flags;
248 struct buf *real_bp;
249{
250 struct buf *cbp;
251 struct iovec *iovp;
252 int io_flags;
253 int error = 0;
254 int retval = 0;
255 struct buf *cbp_head = 0;
256 struct buf *cbp_tail = 0;
257 upl_page_info_t *pl;
258 int pg_count;
259 int pg_offset;
260
261 if (flags & CL_READ)
262 io_flags = (B_VECTORLIST | B_READ);
263 else
264 io_flags = (B_VECTORLIST | B_WRITEINPROG);
265
266 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
267
268 if (flags & CL_ASYNC)
269 io_flags |= (B_CALL | B_ASYNC);
270 if (flags & CL_AGE)
271 io_flags |= B_AGE;
272 if (flags & CL_DUMP)
273 io_flags |= B_NOCACHE;
274
275
276 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_START,
277 (int)f_offset, size, upl_offset, flags, 0);
278
279 if ((flags & CL_READ) && ((upl_offset + size) & PAGE_MASK) && (!(flags & CL_NOZERO))) {
280 /*
281 * then we are going to end up
282 * with a page that we can't complete (the file size wasn't a multiple
283 * of PAGE_SIZE and we're trying to read to the end of the file
284 * so we'll go ahead and zero out the portion of the page we can't
285 * read in from the file
286 */
287 cluster_zero(upl, upl_offset + size, PAGE_SIZE - ((upl_offset + size) & PAGE_MASK), flags, real_bp);
288
289 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_NONE,
290 upl_offset + size, PAGE_SIZE - ((upl_offset + size) & PAGE_MASK),
291 flags, real_bp, 0);
292 }
293 while (size) {
294 size_t io_size;
295 int vsize;
296 int i;
297 int pl_index;
298 int pg_resid;
299 int num_contig;
300 daddr_t lblkno;
301 daddr_t blkno;
302
303 if (size > MAXPHYSIO)
304 io_size = MAXPHYSIO;
305 else
306 io_size = size;
307
308 if (error = VOP_CMAP(vp, f_offset, io_size, &blkno, &io_size, NULL)) {
309 if (error == EOPNOTSUPP)
310 panic("VOP_CMAP Unimplemented");
311 break;
312 }
313
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 24)) | DBG_FUNC_NONE,
315 (int)f_offset, (int)blkno, io_size, 0, 0);
316
317 if ( (!(flags & CL_READ) && (long)blkno == -1) || io_size == 0) {
318 error = EINVAL;
319 break;
320 }
321 lblkno = (daddr_t)(f_offset / PAGE_SIZE_64);
322 /*
323 * we have now figured out how much I/O we can do - this is in 'io_size'
324 * pl_index represents the first page in the 'upl' that the I/O will occur for
325 * pg_offset is the starting point in the first page for the I/O
326 * pg_count is the number of full and partial pages that 'io_size' encompasses
327 */
328 pl_index = upl_offset / PAGE_SIZE;
329 pg_offset = upl_offset & PAGE_MASK;
330 pg_count = (io_size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE;
331
332 if ((flags & CL_READ) && (long)blkno == -1) {
333 /*
334 * if we're reading and blkno == -1, then we've got a
335 * 'hole' in the file that we need to deal with by zeroing
336 * out the affected area in the upl
337 */
338 cluster_zero(upl, upl_offset, io_size, flags, real_bp);
339
340 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_NONE,
341 upl_offset, io_size, flags, real_bp, 0);
342
343 pg_count = (io_size - pg_offset) / PAGE_SIZE;
344
345 if (io_size == size && ((upl_offset + io_size) & PAGE_MASK))
346 pg_count++;
347
348 if (pg_count) {
349 if (pg_offset)
350 pg_resid = PAGE_SIZE - pg_offset;
351 else
352 pg_resid = 0;
353 if (flags & CL_COMMIT)
354 kernel_upl_commit_range(upl,
355 upl_offset + pg_resid,
356 pg_count * PAGE_SIZE,
357 UPL_COMMIT_CLEAR_DIRTY
358 | UPL_COMMIT_FREE_ON_EMPTY,
359 pl, MAX_UPL_TRANSFER);
360 }
361 upl_offset += io_size;
362 f_offset += io_size;
363 size -= io_size;
364
365 if (cbp_head && pg_count)
366 goto start_io;
367 continue;
368 } else if (real_bp && (real_bp->b_blkno == real_bp->b_lblkno)) {
369 real_bp->b_blkno = blkno;
370 }
371 if (pg_count > 1) {
372 /*
373 * we need to allocate space for the vector list
374 */
375 iovp = (struct iovec *)_MALLOC(sizeof(struct iovec) * pg_count,
376 M_SEGMENT, M_NOWAIT);
377 if (iovp == (struct iovec *) 0) {
378 /*
379 * if the allocation fails, then throttle down to a single page
380 */
381 io_size = PAGE_SIZE - pg_offset;
382 pg_count = 1;
383 }
384 }
385 cbp = alloc_io_buf(vp);
386
387
388 if (pg_count == 1)
389 /*
390 * we use the io vector that's reserved in the buffer header
391 * this insures we can always issue an I/O even in a low memory
392 * condition that prevents the _MALLOC from succeeding... this
393 * is necessary to prevent deadlocks with the pager
394 */
395 iovp = (struct iovec *)(&cbp->b_vects[0]);
396
397 cbp->b_vectorlist = (void *)iovp;
398 cbp->b_vectorcount = pg_count;
399
400 for (i = 0, vsize = io_size; i < pg_count; i++, iovp++) {
401 int psize;
402
403 psize = PAGE_SIZE - pg_offset;
404
405 if (psize > vsize)
406 psize = vsize;
407
408 iovp->iov_len = psize;
409 iovp->iov_base = (caddr_t)upl_phys_page(pl, pl_index + i);
410
411 if (iovp->iov_base == (caddr_t) 0) {
412 if (pg_count > 1)
413 _FREE(cbp->b_vectorlist, M_SEGMENT);
414 free_io_buf(cbp);
415
416 error = EINVAL;
417 break;
418 }
419 iovp->iov_base += pg_offset;
420 pg_offset = 0;
421
422 if (flags & CL_PAGEOUT) {
423 int s;
424 struct buf *bp;
425
426 s = splbio();
427 if (bp = incore(vp, lblkno + i)) {
428 if (!ISSET(bp->b_flags, B_BUSY)) {
429 bremfree(bp);
430 SET(bp->b_flags, (B_BUSY | B_INVAL));
431 splx(s);
432 brelse(bp);
433 } else
434 panic("BUSY bp found in cluster_io");
435 }
436 splx(s);
437 }
438 vsize -= psize;
439 }
440 if (error)
441 break;
442
443 if (flags & CL_ASYNC)
444 cbp->b_iodone = (void *)cluster_iodone;
445 cbp->b_flags |= io_flags;
446
447 cbp->b_lblkno = lblkno;
448 cbp->b_blkno = blkno;
449 cbp->b_bcount = io_size;
450 cbp->b_pagelist = upl;
451 cbp->b_uploffset = upl_offset;
452 cbp->b_trans_next = (struct buf *)0;
453
454 if (flags & CL_READ)
455 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 26)) | DBG_FUNC_NONE,
456 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
457 else
458 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 27)) | DBG_FUNC_NONE,
459 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
460
461 if (cbp_head) {
462 cbp_tail->b_trans_next = cbp;
463 cbp_tail = cbp;
464 } else {
465 cbp_head = cbp;
466 cbp_tail = cbp;
467 }
468 (struct buf *)(cbp->b_trans_head) = cbp_head;
469
470 upl_offset += io_size;
471 f_offset += io_size;
472 size -= io_size;
473
474 if ( !(upl_offset & PAGE_MASK) || size == 0) {
475 /*
476 * if we have no more I/O to issue or
477 * the current I/O we've prepared fully
478 * completes the last page in this request
479 * or it's been completed via a zero-fill
480 * due to a 'hole' in the file
481 * then go ahead and issue the I/O
482 */
483start_io:
484 if (flags & CL_COMMIT)
485 cbp_head->b_flags |= B_COMMIT_UPL;
486 if (flags & CL_PAGEOUT)
487 cbp_head->b_flags |= B_PAGEOUT;
488
489 if (real_bp) {
490 cbp_head->b_flags |= B_NEED_IODONE;
491 cbp_head->b_real_bp = real_bp;
492 }
493
494 for (cbp = cbp_head; cbp;) {
495 struct buf * cbp_next;
496
497 if (io_flags & B_WRITEINPROG)
498 cbp->b_vp->v_numoutput++;
499
500 cbp_next = cbp->b_trans_next;
501
502 (void) VOP_STRATEGY(cbp);
503 cbp = cbp_next;
504 }
505 if ( !(flags & CL_ASYNC)) {
506 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next)
507 biowait(cbp);
508
509 if (error = cluster_iodone(cbp_head)) {
510 retval = error;
511 error = 0;
512 }
513 }
514 cbp_head = (struct buf *)0;
515 cbp_tail = (struct buf *)0;
516 }
517 }
518 if (error) {
519 for (cbp = cbp_head; cbp;) {
520 struct buf * cbp_next;
521
522 if (cbp->b_vectorcount > 1)
523 _FREE(cbp->b_vectorlist, M_SEGMENT);
524 cbp_next = cbp->b_trans_next;
525 free_io_buf(cbp);
526 cbp = cbp_next;
527
528 }
529 pg_offset = upl_offset & PAGE_MASK;
530 pg_count = (size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE;
531
532 if (flags & CL_COMMIT) {
533 int upl_abort_code;
534
535 if (flags & CL_PAGEOUT)
536 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
537 else if (flags & CL_PAGEIN)
538 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
539 else
540 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
541
542 kernel_upl_abort_range(upl, upl_offset - pg_offset, pg_count * PAGE_SIZE, upl_abort_code);
543
544 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE,
545 upl, upl_offset - pg_offset, pg_count * PAGE_SIZE, error, 0);
546 }
547 if (real_bp) {
548 real_bp->b_flags |= B_ERROR;
549 real_bp->b_error = error;
550
551 biodone(real_bp);
552 }
553 if (retval == 0)
554 retval = error;
555 }
556 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_END,
557 (int)f_offset, size, upl_offset, retval, 0);
558
559 return (retval);
560}
561
562
563static int
564cluster_rd_prefetch(vp, object, f_offset, size, filesize, devblocksize)
565 struct vnode *vp;
566 void *object;
567 off_t f_offset;
568 u_int size;
569 off_t filesize;
570 int devblocksize;
571{
572 upl_t upl;
573 upl_page_info_t *pl;
574 int pages_in_upl;
575 int start_pg;
576 int last_pg;
577 int last_valid;
578 int io_size;
579
580
581 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_START,
582 (int)f_offset, size, (int)filesize, 0, 0);
583
584 if (f_offset >= filesize) {
585 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
586 (int)f_offset, 0, 0, 0, 0);
587 return(0);
588 }
589 if (memory_object_page_op(object, (vm_offset_t)f_offset, 0, 0, 0) == KERN_SUCCESS) {
590 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
591 (int)f_offset, 0, 0, 0, 0);
592 return(0);
593 }
594 if (size > MAXPHYSIO)
595 size = MAXPHYSIO;
596 else
597 size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
598
599 if ((off_t)size > (filesize - f_offset))
600 size = ((filesize - f_offset) + (devblocksize - 1)) & ~(devblocksize - 1);
601
602 pages_in_upl = (size + (PAGE_SIZE - 1)) / PAGE_SIZE;
603
604
605 vm_fault_list_request(object, (vm_object_offset_t)f_offset, pages_in_upl * PAGE_SIZE, &upl, NULL, 0,
606 UPL_CLEAN_IN_PLACE | UPL_NO_SYNC | UPL_SET_INTERNAL);
607 if (upl == (upl_t) 0)
608 return(0);
609
610 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
611
612 /*
613 * scan from the beginning of the upl looking for the first
614 * non-valid page.... this will become the first page in
615 * the request we're going to make to 'cluster_io'... if all
616 * of the pages are valid, we won't call through to 'cluster_io'
617 */
618 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
619 if (!upl_valid_page(pl, start_pg))
620 break;
621 }
622
623 /*
624 * scan from the starting invalid page looking for a valid
625 * page before the end of the upl is reached, if we
626 * find one, then it will be the last page of the request to
627 * 'cluster_io'
628 */
629 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
630 if (upl_valid_page(pl, last_pg))
631 break;
632 }
633
634 /*
635 * if we find any more free valid pages at the tail of the upl
636 * than update maxra accordingly....
637 */
638 for (last_valid = last_pg; last_valid < pages_in_upl; last_valid++) {
639 if (!upl_valid_page(pl, last_valid))
640 break;
641 }
642 if (start_pg < last_pg) {
643 vm_offset_t upl_offset;
644
645 /*
646 * we found a range of 'invalid' pages that must be filled
647 * 'size' has already been clipped to the LEOF
648 * make sure it's at least a multiple of the device block size
649 */
650 upl_offset = start_pg * PAGE_SIZE;
651 io_size = (last_pg - start_pg) * PAGE_SIZE;
652
653 if ((upl_offset + io_size) > size) {
654 io_size = size - upl_offset;
655
656 KERNEL_DEBUG(0xd001000, upl_offset, size, io_size, 0, 0);
657 }
658 cluster_io(vp, upl, upl_offset, f_offset + upl_offset, io_size,
659 CL_READ | CL_COMMIT | CL_ASYNC | CL_AGE, (struct buf *)0);
660 }
661 if (start_pg) {
662 /*
663 * start_pg of non-zero indicates we found some already valid pages
664 * at the beginning of the upl.... we need to release these without
665 * modifying there state
666 */
667 kernel_upl_abort_range(upl, 0, start_pg * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
668
669 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 50)) | DBG_FUNC_NONE,
670 upl, 0, start_pg * PAGE_SIZE, 0, 0);
671 }
672 if (last_pg < pages_in_upl) {
673 /*
674 * the set of pages that we issued an I/O for did not extend all the
675 * way to the end of the upl... so just release them without modifying
676 * there state
677 */
678 kernel_upl_abort_range(upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE,
679 UPL_ABORT_FREE_ON_EMPTY);
680
681 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 50)) | DBG_FUNC_NONE,
682 upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE, 0, 0);
683 }
684
685 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
686 (int)f_offset + (last_valid * PAGE_SIZE), 0, 0, 0, 0);
687
688 return(last_valid);
689}
690
691
692
693static void
694cluster_rd_ahead(vp, object, b_lblkno, e_lblkno, filesize, devblocksize)
695 struct vnode *vp;
696 void *object;
697 daddr_t b_lblkno;
698 daddr_t e_lblkno;
699 off_t filesize;
700 int devblocksize;
701{
702 daddr_t r_lblkno;
703 off_t f_offset;
704 int size_of_prefetch;
705
706
707 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_START,
708 b_lblkno, e_lblkno, vp->v_lastr, 0, 0);
709
710 if (b_lblkno == vp->v_lastr && b_lblkno == e_lblkno) {
711 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
712 vp->v_ralen, vp->v_maxra, vp->v_lastr, 0, 0);
713 return;
714 }
715
716 if (vp->v_lastr == -1 || (b_lblkno != vp->v_lastr && b_lblkno != (vp->v_lastr + 1) && b_lblkno != (vp->v_maxra + 1))) {
717 vp->v_ralen = 0;
718 vp->v_maxra = 0;
719
720 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
721 vp->v_ralen, vp->v_maxra, vp->v_lastr, 1, 0);
722
723 return;
724 }
725 vp->v_ralen = vp->v_ralen ? min(MAXPHYSIO/PAGE_SIZE, vp->v_ralen << 1) : 1;
726
727 if (((e_lblkno + 1) - b_lblkno) > vp->v_ralen)
728 vp->v_ralen = min(MAXPHYSIO/PAGE_SIZE, (e_lblkno + 1) - b_lblkno);
729
730 if (e_lblkno < vp->v_maxra) {
731 if ((vp->v_maxra - e_lblkno) > ((MAXPHYSIO/PAGE_SIZE) / 4)) {
732
733 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
734 vp->v_ralen, vp->v_maxra, vp->v_lastr, 2, 0);
735 return;
736 }
737 }
738 r_lblkno = max(e_lblkno, vp->v_maxra) + 1;
739 f_offset = (off_t)r_lblkno * PAGE_SIZE_64;
740
741 size_of_prefetch = cluster_rd_prefetch(vp, object, f_offset, vp->v_ralen * PAGE_SIZE, filesize, devblocksize);
742
743 if (size_of_prefetch)
744 vp->v_maxra = r_lblkno + (size_of_prefetch - 1);
745
746 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
747 vp->v_ralen, vp->v_maxra, vp->v_lastr, 3, 0);
748}
749
750
751cluster_pageout(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
752 struct vnode *vp;
753 upl_t upl;
754 vm_offset_t upl_offset;
755 off_t f_offset;
756 int size;
757 off_t filesize;
758 int devblocksize;
759 int flags;
760{
761 int io_size;
762 int pg_size;
763 off_t max_size;
764 int local_flags = CL_PAGEOUT;
765
766 if ((flags & UPL_IOSYNC) == 0)
767 local_flags |= CL_ASYNC;
768 if ((flags & UPL_NOCOMMIT) == 0)
769 local_flags |= CL_COMMIT;
770
771 if (upl == (upl_t) 0)
772 panic("cluster_pageout: can't handle NULL upl yet\n");
773
774
775 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 52)) | DBG_FUNC_NONE,
776 (int)f_offset, size, (int)filesize, local_flags, 0);
777
778 /*
779 * If they didn't specify any I/O, then we are done...
780 * we can't issue an abort because we don't know how
781 * big the upl really is
782 */
783 if (size <= 0)
784 return (EINVAL);
785
786 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
787 if (local_flags & CL_COMMIT)
788 kernel_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
789 return (EROFS);
790 }
791 /*
792 * can't page-in from a negative offset
793 * or if we're starting beyond the EOF
794 * or if the file offset isn't page aligned
795 * or the size requested isn't a multiple of PAGE_SIZE
796 */
797 if (f_offset < 0 || f_offset >= filesize ||
798 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
799 if (local_flags & CL_COMMIT)
800 kernel_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
801 return (EINVAL);
802 }
803 max_size = filesize - f_offset;
804
805 if (size < max_size)
806 io_size = size;
807 else
808 io_size = (max_size + (devblocksize - 1)) & ~(devblocksize - 1);
809
810 pg_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
811
812 if (size > pg_size) {
813 if (local_flags & CL_COMMIT)
814 kernel_upl_abort_range(upl, upl_offset + pg_size, size - pg_size,
815 UPL_ABORT_FREE_ON_EMPTY);
816 }
817
818 return (cluster_io(vp, upl, upl_offset, f_offset, io_size,
819 local_flags, (struct buf *)0));
820}
821
822
823cluster_pagein(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
824 struct vnode *vp;
825 upl_t upl;
826 vm_offset_t upl_offset;
827 off_t f_offset;
828 int size;
829 off_t filesize;
830 int devblocksize;
831 int flags;
832{
833 u_int io_size;
834 int pg_size;
835 off_t max_size;
836 int retval;
837 int local_flags = 0;
838 void *object = 0;
839
840
841 /*
842 * If they didn't ask for any data, then we are done...
843 * we can't issue an abort because we don't know how
844 * big the upl really is
845 */
846 if (size <= 0)
847 return (EINVAL);
848
849 if ((flags & UPL_NOCOMMIT) == 0)
850 local_flags = CL_COMMIT;
851
852 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 56)) | DBG_FUNC_NONE,
853 (int)f_offset, size, (int)filesize, local_flags, 0);
854
855 /*
856 * can't page-in from a negative offset
857 * or if we're starting beyond the EOF
858 * or if the file offset isn't page aligned
859 * or the size requested isn't a multiple of PAGE_SIZE
860 */
861 if (f_offset < 0 || f_offset >= filesize ||
862 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
863 if (local_flags & CL_COMMIT)
864 kernel_upl_abort_range(upl, upl_offset, size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
865 return (EINVAL);
866 }
867 max_size = filesize - f_offset;
868
869 if (size < max_size)
870 io_size = size;
871 else
872 io_size = (max_size + (devblocksize - 1)) & ~(devblocksize - 1);
873
874 pg_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
875
876 if (upl == (upl_t) 0) {
877 object = ubc_getobject(vp, UBC_PAGINGOP|UBC_NOREACTIVATE);
878 if (object == (void *)NULL)
879 panic("cluster_pagein: ubc_getobject failed");
880
881 vm_fault_list_request(object, (vm_offset_t)f_offset, pg_size, &upl, NULL, 0,
882 UPL_CLEAN_IN_PLACE | UPL_NO_SYNC | UPL_SET_INTERNAL);
883 if (upl == (upl_t) 0)
884 return (EINVAL);
885
886 upl_offset = (vm_offset_t)0;
887 size = pg_size;
888 }
889 if (size > pg_size) {
890 if (local_flags & CL_COMMIT)
891 kernel_upl_abort_range(upl, upl_offset + pg_size, size - pg_size,
892 UPL_ABORT_FREE_ON_EMPTY);
893 }
894
895 retval = cluster_io(vp, upl, upl_offset, f_offset, io_size,
896 local_flags | CL_READ | CL_PAGEIN, (struct buf *)0);
897
898 if (retval == 0) {
899 int b_lblkno;
900 int e_lblkno;
901
902 b_lblkno = (int)(f_offset / PAGE_SIZE_64);
903 e_lblkno = (int)
904 ((f_offset + ((off_t)io_size - 1)) / PAGE_SIZE_64);
905
906 if (!(flags & UPL_NORDAHEAD) && !(vp->v_flag & VRAOFF)) {
907 if (object == (void *)0) {
908 object = ubc_getobject(vp, UBC_PAGINGOP|UBC_NOREACTIVATE);
909 if (object == (void *)NULL)
910 panic("cluster_pagein: ubc_getobject failed");
911 }
912 /*
913 * we haven't read the last page in of the file yet
914 * so let's try to read ahead if we're in
915 * a sequential access pattern
916 */
917 cluster_rd_ahead(vp, object, b_lblkno, e_lblkno, filesize, devblocksize);
918 }
919 vp->v_lastr = e_lblkno;
920 }
921 return (retval);
922}
923
924
925cluster_bp(bp)
926 struct buf *bp;
927{
928 off_t f_offset;
929 int flags;
930
931 if (bp->b_pagelist == (upl_t) 0)
932 panic("cluster_bp: can't handle NULL upl yet\n");
933 if (bp->b_flags & B_READ)
934 flags = CL_ASYNC | CL_NOMAP | CL_READ;
935 else
936 flags = CL_ASYNC | CL_NOMAP;
937
938 f_offset = ubc_blktooff(bp->b_vp, bp->b_lblkno);
939
940 return (cluster_io(bp->b_vp, bp->b_pagelist, 0, f_offset, bp->b_bcount, flags, bp));
941}
942
943
944cluster_write(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
945 struct vnode *vp;
946 struct uio *uio;
947 off_t oldEOF;
948 off_t newEOF;
949 off_t headOff;
950 off_t tailOff;
951 int devblocksize;
952 int flags;
953{
954 void *object;
955 int prev_resid;
956 int clip_size;
957 off_t max_io_size;
958 struct iovec *iov;
959 int retval = 0;
960
961
962 object = ubc_getobject(vp, UBC_NOREACTIVATE);
963 if (object == (void *)NULL)
964 panic("cluster_write: ubc_getobject failed");
965
966 /*
967 * We set a threshhold of 4 pages to decide if the nocopy
968 * write loop is worth the trouble...
969 */
970
971 if ((!uio) || (uio->uio_resid < 4 * PAGE_SIZE) ||
972 (flags & IO_TAILZEROFILL) || (flags & IO_HEADZEROFILL) ||
973 (uio->uio_segflg != UIO_USERSPACE) || (!(vp->v_flag & VNOCACHE_DATA)))
974 {
975 retval = cluster_write_x(object, vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
976 return(retval);
977 }
978
979 while (uio->uio_resid && uio->uio_offset < newEOF && retval == 0)
980 {
981 /* we know we have a resid, so this is safe */
982 iov = uio->uio_iov;
983 while (iov->iov_len == 0) {
984 uio->uio_iov++;
985 uio->uio_iovcnt--;
986 iov = uio->uio_iov;
987 }
988
989 if (uio->uio_offset & PAGE_MASK_64)
990 {
991 /* Bring the file offset write up to a pagesize boundary */
992 clip_size = (PAGE_SIZE - (uio->uio_offset & PAGE_MASK_64));
993 if (uio->uio_resid < clip_size)
994 clip_size = uio->uio_resid;
995 /*
996 * Fake the resid going into the cluster_write_x call
997 * and restore it on the way out.
998 */
999 prev_resid = uio->uio_resid;
1000 uio->uio_resid = clip_size;
1001 retval = cluster_write_x(object, vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1002 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1003 }
1004 else if ((int)iov->iov_base & PAGE_MASK_64)
1005 {
1006 clip_size = iov->iov_len;
1007 prev_resid = uio->uio_resid;
1008 uio->uio_resid = clip_size;
1009 retval = cluster_write_x(object, vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1010 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1011 }
1012 else
1013 {
1014 /*
1015 * If we come in here, we know the offset into
1016 * the file is on a pagesize boundary
1017 */
1018
1019 max_io_size = newEOF - uio->uio_offset;
1020 clip_size = uio->uio_resid;
1021 if (iov->iov_len < clip_size)
1022 clip_size = iov->iov_len;
1023 if (max_io_size < clip_size)
1024 clip_size = max_io_size;
1025
1026 if (clip_size < PAGE_SIZE)
1027 {
1028 /*
1029 * Take care of tail end of write in this vector
1030 */
1031 prev_resid = uio->uio_resid;
1032 uio->uio_resid = clip_size;
1033 retval = cluster_write_x(object, vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1034 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1035 }
1036 else
1037 {
1038 /* round clip_size down to a multiple of pagesize */
1039 clip_size = clip_size & ~(PAGE_MASK);
1040 prev_resid = uio->uio_resid;
1041 uio->uio_resid = clip_size;
1042 retval = cluster_nocopy_write(object, vp, uio, newEOF, devblocksize, flags);
1043 if ((retval == 0) && uio->uio_resid)
1044 retval = cluster_write_x(object, vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1045 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1046 }
1047 } /* end else */
1048 } /* end while */
1049 return(retval);
1050}
1051
1052static
1053cluster_nocopy_write(object, vp, uio, newEOF, devblocksize, flags)
1054 void *object;
1055 struct vnode *vp;
1056 struct uio *uio;
1057 off_t newEOF;
1058 int devblocksize;
1059 int flags;
1060{
1061 upl_t upl;
1062 upl_page_info_t *pl;
1063 off_t upl_f_offset;
1064 vm_offset_t upl_offset;
1065 off_t max_io_size;
1066 int io_size;
1067 int upl_size;
1068 int upl_needed_size;
1069 int pages_in_pl;
1070 int upl_flags;
1071 kern_return_t kret;
1072 struct iovec *iov;
1073 int i;
1074 int force_data_sync;
1075 int error = 0;
1076
1077 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START,
1078 (int)uio->uio_offset, (int)uio->uio_resid,
1079 (int)newEOF, devblocksize, 0);
1080
1081 /*
1082 * When we enter this routine, we know
1083 * -- the offset into the file is on a pagesize boundary
1084 * -- the resid is a page multiple
1085 * -- the resid will not exceed iov_len
1086 */
1087
1088 iov = uio->uio_iov;
1089
1090 while (uio->uio_resid && uio->uio_offset < newEOF && error == 0) {
1091
1092 io_size = uio->uio_resid;
1093 if (io_size > MAXPHYSIO)
1094 io_size = MAXPHYSIO;
1095
1096 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1097 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
1098
1099 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START,
1100 (int)upl_offset, upl_needed_size, iov->iov_base, io_size, 0);
1101
1102 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
1103 {
1104 pages_in_pl = 0;
1105 upl_size = upl_needed_size;
1106 upl_flags = UPL_COPYOUT_FROM | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1107
1108 kret = vm_map_get_upl(current_map(),
1109 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1110 &upl_size, &upl, &pl, &pages_in_pl, &upl_flags, force_data_sync);
1111
1112 pages_in_pl = upl_size / PAGE_SIZE;
1113
1114 if (kret != KERN_SUCCESS)
1115 {
1116 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1117 0, 0, 0, kret, 0);
1118
1119 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1120 (int)uio->uio_offset, (int)uio->uio_resid, kret, 1, 0);
1121
1122 /* cluster_nocopy_write: failed to get pagelist */
1123 /* do not return kret here */
1124 return(0);
1125 }
1126
1127 for(i=0; i < pages_in_pl; i++)
1128 {
1129 if (!upl_valid_page(pl, i))
1130 break;
1131 }
1132
1133 if (i == pages_in_pl)
1134 break;
1135
1136 kernel_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1137 UPL_ABORT_FREE_ON_EMPTY);
1138 }
1139
1140 if (force_data_sync >= 3)
1141 {
1142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1143 0, 0, 0, kret, 0);
1144
1145 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1146 (int)uio->uio_offset, (int)uio->uio_resid, kret, 2, 0);
1147 return(0);
1148 }
1149
1150 /*
1151 * Consider the possibility that upl_size wasn't satisfied.
1152 */
1153 if (upl_size != upl_needed_size)
1154 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
1155
1156 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1157 (int)upl_offset, upl_size, iov->iov_base, io_size, 0);
1158
1159 if (io_size == 0)
1160 {
1161 kernel_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1162 UPL_ABORT_FREE_ON_EMPTY);
1163 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1164 (int)uio->uio_offset, uio->uio_resid, 0, 3, 0);
1165
1166 return(0);
1167 }
1168
1169 /*
1170 * Now look for pages already in the cache
1171 * and throw them away.
1172 */
1173
1174 upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
1175 max_io_size = io_size;
1176
1177 while (max_io_size) {
1178
1179 /*
1180 * Flag UPL_POP_DUMP says if the page is found
1181 * in the page cache it must be thrown away.
1182 */
1183 memory_object_page_op(object, (vm_offset_t)upl_f_offset,
1184 UPL_POP_SET | UPL_POP_BUSY | UPL_POP_DUMP,
1185 0, 0);
1186 max_io_size -= PAGE_SIZE;
1187 upl_f_offset += PAGE_SIZE;
1188 }
1189
1190 /*
1191 * issue a synchronous write to cluster_io
1192 */
1193
1194 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_START,
1195 (int)upl_offset, (int)uio->uio_offset, io_size, 0, 0);
1196
1197 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1198 io_size, 0, (struct buf *)0);
1199
1200 if (error == 0) {
1201 /*
1202 * The cluster_io write completed successfully,
1203 * update the uio structure and commit.
1204 */
1205
1206 kernel_upl_commit_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1207 UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY,
1208 pl, MAX_UPL_TRANSFER);
1209
1210 iov->iov_base += io_size;
1211 iov->iov_len -= io_size;
1212 uio->uio_resid -= io_size;
1213 uio->uio_offset += io_size;
1214 }
1215 else {
1216 kernel_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1217 UPL_ABORT_FREE_ON_EMPTY);
1218 }
1219
1220 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END,
1221 (int)upl_offset, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
1222
1223 } /* end while */
1224
1225
1226 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1227 (int)uio->uio_offset, (int)uio->uio_resid, error, 4, 0);
1228
1229 return (error);
1230}
1231
1232static
1233cluster_write_x(object, vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1234 void *object;
1235 struct vnode *vp;
1236 struct uio *uio;
1237 off_t oldEOF;
1238 off_t newEOF;
1239 off_t headOff;
1240 off_t tailOff;
1241 int devblocksize;
1242 int flags;
1243{
1244 upl_page_info_t *pl;
1245 upl_t upl;
1246 vm_offset_t upl_offset;
1247 int upl_size;
1248 off_t upl_f_offset;
1249 int pages_in_upl;
1250 int start_offset;
1251 int xfer_resid;
1252 int io_size;
1253 int io_size_before_rounding;
1254 int io_flags;
1255 vm_offset_t io_address;
1256 int io_offset;
1257 int bytes_to_zero;
1258 int bytes_to_move;
1259 kern_return_t kret;
1260 int retval = 0;
1261 int uio_resid;
1262 long long total_size;
1263 long long zero_cnt;
1264 off_t zero_off;
1265 long long zero_cnt1;
1266 off_t zero_off1;
1267 daddr_t start_blkno;
1268 daddr_t last_blkno;
1269
1270 if (uio) {
1271 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1272 (int)uio->uio_offset, uio->uio_resid, (int)oldEOF, (int)newEOF, 0);
1273
1274 uio_resid = uio->uio_resid;
1275 } else {
1276 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1277 0, 0, (int)oldEOF, (int)newEOF, 0);
1278
1279 uio_resid = 0;
1280 }
1281 zero_cnt = 0;
1282 zero_cnt1 = 0;
1283
1284 if (flags & IO_HEADZEROFILL) {
1285 /*
1286 * some filesystems (HFS is one) don't support unallocated holes within a file...
1287 * so we zero fill the intervening space between the old EOF and the offset
1288 * where the next chunk of real data begins.... ftruncate will also use this
1289 * routine to zero fill to the new EOF when growing a file... in this case, the
1290 * uio structure will not be provided
1291 */
1292 if (uio) {
1293 if (headOff < uio->uio_offset) {
1294 zero_cnt = uio->uio_offset - headOff;
1295 zero_off = headOff;
1296 }
1297 } else if (headOff < newEOF) {
1298 zero_cnt = newEOF - headOff;
1299 zero_off = headOff;
1300 }
1301 }
1302 if (flags & IO_TAILZEROFILL) {
1303 if (uio) {
1304 zero_off1 = uio->uio_offset + uio->uio_resid;
1305
1306 if (zero_off1 < tailOff)
1307 zero_cnt1 = tailOff - zero_off1;
1308 }
1309 }
1310 if (zero_cnt == 0 && uio == (struct uio *) 0)
1311 {
1312 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1313 retval, 0, 0, 0, 0);
1314 return (0);
1315 }
1316
1317 while ((total_size = (uio_resid + zero_cnt + zero_cnt1)) && retval == 0) {
1318 /*
1319 * for this iteration of the loop, figure out where our starting point is
1320 */
1321 if (zero_cnt) {
1322 start_offset = (int)(zero_off & PAGE_MASK_64);
1323 upl_f_offset = zero_off - start_offset;
1324 } else if (uio_resid) {
1325 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
1326 upl_f_offset = uio->uio_offset - start_offset;
1327 } else {
1328 start_offset = (int)(zero_off1 & PAGE_MASK_64);
1329 upl_f_offset = zero_off1 - start_offset;
1330 }
1331 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 46)) | DBG_FUNC_NONE,
1332 (int)zero_off, (int)zero_cnt, (int)zero_off1, (int)zero_cnt1, 0);
1333
1334 if (total_size > (long long)MAXPHYSIO)
1335 total_size = MAXPHYSIO;
1336
1337 /*
1338 * compute the size of the upl needed to encompass
1339 * the requested write... limit each call to cluster_io
1340 * to at most MAXPHYSIO, make sure to account for
1341 * a starting offset that's not page aligned
1342 */
1343 upl_size = (start_offset + total_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1344
1345 if (upl_size > MAXPHYSIO)
1346 upl_size = MAXPHYSIO;
1347
1348 pages_in_upl = upl_size / PAGE_SIZE;
1349 io_size = upl_size - start_offset;
1350
1351 if ((long long)io_size > total_size)
1352 io_size = total_size;
1353
1354 start_blkno = (daddr_t)(upl_f_offset / PAGE_SIZE_64);
1355 last_blkno = start_blkno + pages_in_upl;
1356
1357 kret = vm_fault_list_request(object,
1358 (vm_object_offset_t)upl_f_offset, upl_size, &upl, NULL, 0,
1359 (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL));
1360
1361 if (kret != KERN_SUCCESS)
1362 panic("cluster_write: failed to get pagelist");
1363
1364 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1365
1366 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_NONE,
1367 upl, (int)upl_f_offset, upl_size, start_offset, 0);
1368
1369
1370 if (start_offset && !upl_valid_page(pl, 0)) {
1371 int read_size;
1372
1373 /*
1374 * we're starting in the middle of the first page of the upl
1375 * and the page isn't currently valid, so we're going to have
1376 * to read it in first... this is a synchronous operation
1377 */
1378 read_size = PAGE_SIZE;
1379
1380 if ((upl_f_offset + read_size) > newEOF) {
1381 read_size = newEOF - upl_f_offset;
1382 read_size = (read_size + (devblocksize - 1)) & ~(devblocksize - 1);
1383 }
1384 retval = cluster_io(vp, upl, 0, upl_f_offset, read_size,
1385 CL_READ, (struct buf *)0);
1386 if (retval) {
1387 /*
1388 * we had an error during the read which causes us to abort
1389 * the current cluster_write request... before we do, we need
1390 * to release the rest of the pages in the upl without modifying
1391 * there state and mark the failed page in error
1392 */
1393 kernel_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
1394 kernel_upl_abort(upl, 0);
1395
1396 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1397 upl, 0, 0, retval, 0);
1398 break;
1399 }
1400 }
1401 if ((start_offset == 0 || upl_size > PAGE_SIZE) && ((start_offset + io_size) & PAGE_MASK)) {
1402 /*
1403 * the last offset we're writing to in this upl does not end on a page
1404 * boundary... if it's not beyond the old EOF, then we'll also need to
1405 * pre-read this page in if it isn't already valid
1406 */
1407 upl_offset = upl_size - PAGE_SIZE;
1408
1409 if ((upl_f_offset + start_offset + io_size) < oldEOF &&
1410 !upl_valid_page(pl, upl_offset / PAGE_SIZE)) {
1411 int read_size;
1412
1413 read_size = PAGE_SIZE;
1414
1415 if ((upl_f_offset + upl_offset + read_size) > newEOF) {
1416 read_size = newEOF - (upl_f_offset + upl_offset);
1417 read_size = (read_size + (devblocksize - 1)) & ~(devblocksize - 1);
1418 }
1419 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size,
1420 CL_READ, (struct buf *)0);
1421 if (retval) {
1422 /*
1423 * we had an error during the read which causes us to abort
1424 * the current cluster_write request... before we do, we need
1425 * to release the rest of the pages in the upl without modifying
1426 * there state and mark the failed page in error
1427 */
1428 kernel_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
1429 kernel_upl_abort(upl, 0);
1430
1431 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1432 upl, 0, 0, retval, 0);
1433 break;
1434 }
1435 }
1436 }
1437 if ((kret = kernel_upl_map(kernel_map, upl, &io_address)) != KERN_SUCCESS)
1438 panic("cluster_write: kernel_upl_map failed\n");
1439 xfer_resid = io_size;
1440 io_offset = start_offset;
1441
1442 while (zero_cnt && xfer_resid) {
1443
1444 if (zero_cnt < (long long)xfer_resid)
1445 bytes_to_zero = zero_cnt;
1446 else
1447 bytes_to_zero = xfer_resid;
1448
1449 if ( !(flags & IO_NOZEROVALID)) {
1450 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1451
1452 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1453 (int)upl_f_offset + io_offset, bytes_to_zero,
1454 (int)zero_cnt, xfer_resid, 0);
1455 } else {
1456 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off & PAGE_MASK_64));
1457
1458 if ( !upl_valid_page(pl, (int)(zero_off / PAGE_SIZE_64))) {
1459 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1460
1461 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1462 (int)upl_f_offset + io_offset, bytes_to_zero,
1463 (int)zero_cnt, xfer_resid, 0);
1464 }
1465 }
1466 xfer_resid -= bytes_to_zero;
1467 zero_cnt -= bytes_to_zero;
1468 zero_off += bytes_to_zero;
1469 io_offset += bytes_to_zero;
1470 }
1471 if (xfer_resid && uio_resid) {
1472 bytes_to_move = min(uio_resid, xfer_resid);
1473
1474 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 42)) | DBG_FUNC_NONE,
1475 (int)uio->uio_offset, bytes_to_move, uio_resid, xfer_resid, 0);
1476
1477 retval = uiomove((caddr_t)(io_address + io_offset), bytes_to_move, uio);
1478
1479 if (retval) {
1480 if ((kret = kernel_upl_unmap(kernel_map, upl)) != KERN_SUCCESS)
1481 panic("cluster_write: kernel_upl_unmap failed\n");
1482 kernel_upl_abort(upl, UPL_ABORT_DUMP_PAGES);
1483
1484 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1485 upl, 0, 0, retval, 0);
1486 } else {
1487 uio_resid -= bytes_to_move;
1488 xfer_resid -= bytes_to_move;
1489 io_offset += bytes_to_move;
1490 }
1491 }
1492 while (xfer_resid && zero_cnt1 && retval == 0) {
1493
1494 if (zero_cnt1 < (long long)xfer_resid)
1495 bytes_to_zero = zero_cnt1;
1496 else
1497 bytes_to_zero = xfer_resid;
1498
1499 if ( !(flags & IO_NOZEROVALID)) {
1500 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1501
1502 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1503 (int)upl_f_offset + io_offset,
1504 bytes_to_zero, (int)zero_cnt1, xfer_resid, 0);
1505 } else {
1506 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off1 & PAGE_MASK_64));
1507 if ( !upl_valid_page(pl, (int)(zero_off1 / PAGE_SIZE_64))) {
1508 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1509
1510 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1511 (int)upl_f_offset + io_offset,
1512 bytes_to_zero, (int)zero_cnt1, xfer_resid, 0);
1513 }
1514 }
1515 xfer_resid -= bytes_to_zero;
1516 zero_cnt1 -= bytes_to_zero;
1517 zero_off1 += bytes_to_zero;
1518 io_offset += bytes_to_zero;
1519 }
1520
1521 if (retval == 0) {
1522 int must_push;
1523 int can_delay;
1524
1525 io_size += start_offset;
1526
1527 if ((upl_f_offset + io_size) == newEOF && io_size < upl_size) {
1528 /*
1529 * if we're extending the file with this write
1530 * we'll zero fill the rest of the page so that
1531 * if the file gets extended again in such a way as to leave a
1532 * hole starting at this EOF, we'll have zero's in the correct spot
1533 */
1534 bzero((caddr_t)(io_address + io_size), upl_size - io_size);
1535
1536 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1537 (int)upl_f_offset + io_size,
1538 upl_size - io_size, 0, 0, 0);
1539 }
1540 if ((kret = kernel_upl_unmap(kernel_map, upl)) != KERN_SUCCESS)
1541 panic("cluster_write: kernel_upl_unmap failed\n");
1542
1543 io_size_before_rounding = io_size;
1544
1545 if (io_size & (devblocksize - 1))
1546 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
1547
1548 must_push = 0;
1549 can_delay = 0;
1550
1551 if (vp->v_clen) {
1552 int newsize;
1553
1554 /*
1555 * we have an existing cluster... see if this write will extend it nicely
1556 */
1557 if (start_blkno >= vp->v_cstart) {
1558 if (last_blkno <= (vp->v_cstart + vp->v_clen)) {
1559 /*
1560 * we have a write that fits entirely
1561 * within the existing cluster limits
1562 */
1563 if (last_blkno >= vp->v_lastw) {
1564 /*
1565 * if we're extending the dirty region within the cluster
1566 * we need to update the cluster info... we check for blkno
1567 * equality because we may be extending the file with a
1568 * partial write.... this in turn changes our idea of how
1569 * much data to write out (v_ciosiz) for the last page
1570 */
1571 vp->v_lastw = last_blkno;
1572 newsize = io_size + ((start_blkno - vp->v_cstart) * PAGE_SIZE);
1573
1574 if (newsize > vp->v_ciosiz)
1575 vp->v_ciosiz = newsize;
1576 }
1577 can_delay = 1;
1578 goto finish_io;
1579 }
1580 if (start_blkno < (vp->v_cstart + vp->v_clen)) {
1581 /*
1582 * we have a write that starts in the middle of the current cluster
1583 * but extends beyond the cluster's limit
1584 * we'll clip the current cluster if we actually
1585 * overlap with the new write and then push it out
1586 * and start a new cluster with the current write
1587 */
1588 if (vp->v_lastw > start_blkno) {
1589 vp->v_lastw = start_blkno;
1590 vp->v_ciosiz = (vp->v_lastw - vp->v_cstart) * PAGE_SIZE;
1591 }
1592 }
1593 /*
1594 * we also get here for the case where the current write starts
1595 * beyond the limit of the existing cluster
1596 */
1597 must_push = 1;
1598 goto check_delay;
1599 }
1600 /*
1601 * the current write starts in front of the current cluster
1602 */
1603 if (last_blkno > vp->v_cstart) {
1604 /*
1605 * the current write extends into the existing cluster
1606 */
1607 if ((vp->v_lastw - start_blkno) > vp->v_clen) {
1608 /*
1609 * if we were to combine this write with the current cluster
1610 * we would exceed the cluster size limit....
1611 * clip the current cluster by moving the start position
1612 * to where the current write ends, and then push it
1613 */
1614 vp->v_ciosiz -= (last_blkno - vp->v_cstart) * PAGE_SIZE;
1615 vp->v_cstart = last_blkno;
1616
1617 /*
1618 * round up the io_size to the nearest page size
1619 * since we've coalesced with at least 1 pre-existing
1620 * page in the current cluster... this write may have ended in the
1621 * middle of the page which would cause io_size to give us an
1622 * inaccurate view of how much I/O we actually need to do
1623 */
1624 io_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1625
1626 must_push = 1;
1627 goto check_delay;
1628 }
1629 /*
1630 * we can coalesce the current write with the existing cluster
1631 * adjust the cluster info to reflect this
1632 */
1633 if (last_blkno > vp->v_lastw) {
1634 /*
1635 * the current write completey overlaps
1636 * the existing cluster
1637 */
1638 vp->v_lastw = last_blkno;
1639 vp->v_ciosiz = io_size;
1640 } else {
1641 vp->v_ciosiz += (vp->v_cstart - start_blkno) * PAGE_SIZE;
1642
1643 if (io_size > vp->v_ciosiz)
1644 vp->v_ciosiz = io_size;
1645 }
1646 vp->v_cstart = start_blkno;
1647 can_delay = 1;
1648 goto finish_io;
1649 }
1650 /*
1651 * this I/O range is entirely in front of the current cluster
1652 * so we need to push the current cluster out before beginning
1653 * a new one
1654 */
1655 must_push = 1;
1656 }
1657check_delay:
1658 if (must_push)
1659 cluster_push(vp);
1660
1661 if (io_size_before_rounding < MAXPHYSIO && !(flags & IO_SYNC)) {
1662 vp->v_clen = MAXPHYSIO / PAGE_SIZE;
1663 vp->v_cstart = start_blkno;
1664 vp->v_lastw = last_blkno;
1665 vp->v_ciosiz = io_size;
1666
1667 can_delay = 1;
1668 }
1669finish_io:
1670 if (can_delay) {
1671 kernel_upl_commit_range(upl, 0, upl_size,
1672 UPL_COMMIT_SET_DIRTY
1673 | UPL_COMMIT_FREE_ON_EMPTY,
1674 pl, MAX_UPL_TRANSFER);
1675 continue;
1676 }
1677
1678 if ((flags & IO_SYNC) || (vp->v_numoutput > ASYNC_THROTTLE))
1679 io_flags = CL_COMMIT | CL_AGE;
1680 else
1681 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
1682
1683 if (vp->v_flag & VNOCACHE_DATA)
1684 io_flags |= CL_DUMP;
1685
1686 retval = cluster_io(vp, upl, 0, upl_f_offset, io_size,
1687 io_flags, (struct buf *)0);
1688 }
1689 }
1690 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1691 retval, 0, 0, 0, 0);
1692
1693 return (retval);
1694}
1695
1696cluster_read(vp, uio, filesize, devblocksize, flags)
1697 struct vnode *vp;
1698 struct uio *uio;
1699 off_t filesize;
1700 int devblocksize;
1701 int flags;
1702{
1703 void *object;
1704 int prev_resid;
1705 int clip_size;
1706 off_t max_io_size;
1707 struct iovec *iov;
1708 int retval = 0;
1709
1710 object = ubc_getobject(vp, UBC_NOREACTIVATE);
1711 if (object == (void *)NULL)
1712 panic("cluster_read: ubc_getobject failed");
1713
1714 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START,
1715 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
1716
1717 /*
1718 * We set a threshhold of 4 pages to decide if the nocopy
1719 * read loop is worth the trouble...
1720 */
1721
1722 if ((!((vp->v_flag & VNOCACHE_DATA) && (uio->uio_segflg == UIO_USERSPACE)))
1723 || (uio->uio_resid < 4 * PAGE_SIZE))
1724 {
1725 retval = cluster_read_x(object, vp, uio, filesize, devblocksize, flags);
1726 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
1727 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
1728 return(retval);
1729
1730 }
1731
1732 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0)
1733 {
1734 /* we know we have a resid, so this is safe */
1735 iov = uio->uio_iov;
1736 while (iov->iov_len == 0) {
1737 uio->uio_iov++;
1738 uio->uio_iovcnt--;
1739 iov = uio->uio_iov;
1740 }
1741
1742 if (uio->uio_offset & PAGE_MASK_64)
1743 {
1744 /* Bring the file offset read up to a pagesize boundary */
1745 clip_size = (PAGE_SIZE - (int)(uio->uio_offset & PAGE_MASK_64));
1746 if (uio->uio_resid < clip_size)
1747 clip_size = uio->uio_resid;
1748 /*
1749 * Fake the resid going into the cluster_read_x call
1750 * and restore it on the way out.
1751 */
1752 prev_resid = uio->uio_resid;
1753 uio->uio_resid = clip_size;
1754 retval = cluster_read_x(object, vp, uio, filesize, devblocksize, flags);
1755 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1756 }
1757 else if ((int)iov->iov_base & PAGE_MASK_64)
1758 {
1759 clip_size = iov->iov_len;
1760 prev_resid = uio->uio_resid;
1761 uio->uio_resid = clip_size;
1762 retval = cluster_read_x(object, vp, uio, filesize, devblocksize, flags);
1763 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1764 }
1765 else
1766 {
1767 /*
1768 * If we come in here, we know the offset into
1769 * the file is on a pagesize boundary
1770 */
1771
1772 max_io_size = filesize - uio->uio_offset;
1773 clip_size = uio->uio_resid;
1774 if (iov->iov_len < clip_size)
1775 clip_size = iov->iov_len;
1776 if (max_io_size < clip_size)
1777 clip_size = (int)max_io_size;
1778
1779 if (clip_size < PAGE_SIZE)
1780 {
1781 /*
1782 * Take care of the tail end of the read in this vector.
1783 */
1784 prev_resid = uio->uio_resid;
1785 uio->uio_resid = clip_size;
1786 retval = cluster_read_x(object,vp, uio, filesize, devblocksize, flags);
1787 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1788 }
1789 else
1790 {
1791 /* round clip_size down to a multiple of pagesize */
1792 clip_size = clip_size & ~(PAGE_MASK);
1793 prev_resid = uio->uio_resid;
1794 uio->uio_resid = clip_size;
1795 retval = cluster_nocopy_read(object, vp, uio, filesize, devblocksize, flags);
1796 if ((retval==0) && uio->uio_resid)
1797 retval = cluster_read_x(object,vp, uio, filesize, devblocksize, flags);
1798 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1799 }
1800 } /* end else */
1801 } /* end while */
1802
1803 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
1804 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
1805
1806 return(retval);
1807}
1808
1809static
1810cluster_read_x(object, vp, uio, filesize, devblocksize, flags)
1811 void *object;
1812 struct vnode *vp;
1813 struct uio *uio;
1814 off_t filesize;
1815 int devblocksize;
1816 int flags;
1817{
1818 upl_page_info_t *pl;
1819 upl_t upl;
1820 vm_offset_t upl_offset;
1821 int upl_size;
1822 off_t upl_f_offset;
1823 int start_offset;
1824 int start_pg;
1825 int last_pg;
1826 int uio_last;
1827 int pages_in_upl;
1828 off_t max_size;
1829 int io_size;
1830 vm_offset_t io_address;
1831 kern_return_t kret;
1832 int segflg;
1833 int error = 0;
1834 int retval = 0;
1835 int b_lblkno;
1836 int e_lblkno;
1837
1838 b_lblkno = (int)(uio->uio_offset / PAGE_SIZE_64);
1839
1840 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
1841 /*
1842 * compute the size of the upl needed to encompass
1843 * the requested read... limit each call to cluster_io
1844 * to at most MAXPHYSIO, make sure to account for
1845 * a starting offset that's not page aligned
1846 */
1847 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
1848 upl_f_offset = uio->uio_offset - (off_t)start_offset;
1849 max_size = filesize - uio->uio_offset;
1850
1851 if (uio->uio_resid < max_size)
1852 io_size = uio->uio_resid;
1853 else
1854 io_size = max_size;
1855#ifdef ppc
1856 if (uio->uio_segflg == UIO_USERSPACE && !(vp->v_flag & VNOCACHE_DATA)) {
1857 segflg = uio->uio_segflg;
1858
1859 uio->uio_segflg = UIO_PHYS_USERSPACE;
1860
1861 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
1862 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
1863
1864 while (io_size && retval == 0) {
1865 int xsize;
1866 vm_offset_t paddr;
1867
1868 if (memory_object_page_op(object, (vm_offset_t)upl_f_offset, UPL_POP_SET | UPL_POP_BUSY,
1869 &paddr, 0) != KERN_SUCCESS)
1870 break;
1871
1872 xsize = PAGE_SIZE - start_offset;
1873
1874 if (xsize > io_size)
1875 xsize = io_size;
1876
1877 retval = uiomove((caddr_t)(paddr + start_offset), xsize, uio);
1878
1879 memory_object_page_op(object, (vm_offset_t)upl_f_offset, UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
1880
1881 io_size -= xsize;
1882 start_offset = (int)
1883 (uio->uio_offset & PAGE_MASK_64);
1884 upl_f_offset = uio->uio_offset - start_offset;
1885 }
1886 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
1887 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
1888
1889 uio->uio_segflg = segflg;
1890
1891 if (retval)
1892 break;
1893
1894 if (io_size == 0) {
1895 /*
1896 * we're already finished with this read request
1897 * let's see if we should do a read-ahead
1898 */
1899 e_lblkno = (int)
1900 ((uio->uio_offset - 1) / PAGE_SIZE_64);
1901
1902 if (!(vp->v_flag & VRAOFF))
1903 /*
1904 * let's try to read ahead if we're in
1905 * a sequential access pattern
1906 */
1907 cluster_rd_ahead(vp, object, b_lblkno, e_lblkno, filesize, devblocksize);
1908 vp->v_lastr = e_lblkno;
1909
1910 break;
1911 }
1912 max_size = filesize - uio->uio_offset;
1913 }
1914#endif
1915 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1916 if (upl_size > MAXPHYSIO)
1917 upl_size = MAXPHYSIO;
1918 pages_in_upl = upl_size / PAGE_SIZE;
1919
1920 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_START,
1921 upl, (int)upl_f_offset, upl_size, start_offset, 0);
1922
1923 kret = vm_fault_list_request(object,
1924 (vm_object_offset_t)upl_f_offset, upl_size, &upl, NULL, 0,
1925 (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL));
1926
1927 if (kret != KERN_SUCCESS)
1928 panic("cluster_read: failed to get pagelist");
1929
1930 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1931
1932
1933 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_END,
1934 upl, (int)upl_f_offset, upl_size, start_offset, 0);
1935
1936 /*
1937 * scan from the beginning of the upl looking for the first
1938 * non-valid page.... this will become the first page in
1939 * the request we're going to make to 'cluster_io'... if all
1940 * of the pages are valid, we won't call through to 'cluster_io'
1941 */
1942 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
1943 if (!upl_valid_page(pl, start_pg))
1944 break;
1945 }
1946
1947 /*
1948 * scan from the starting invalid page looking for a valid
1949 * page before the end of the upl is reached, if we
1950 * find one, then it will be the last page of the request to
1951 * 'cluster_io'
1952 */
1953 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
1954 if (upl_valid_page(pl, last_pg))
1955 break;
1956 }
1957
1958 if (start_pg < last_pg) {
1959 /*
1960 * we found a range of 'invalid' pages that must be filled
1961 * if the last page in this range is the last page of the file
1962 * we may have to clip the size of it to keep from reading past
1963 * the end of the last physical block associated with the file
1964 */
1965 upl_offset = start_pg * PAGE_SIZE;
1966 io_size = (last_pg - start_pg) * PAGE_SIZE;
1967
1968 if ((upl_f_offset + upl_offset + io_size) > filesize) {
1969 io_size = filesize - (upl_f_offset + upl_offset);
1970 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
1971 }
1972 /*
1973 * issue a synchronous read to cluster_io
1974 */
1975
1976 error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset,
1977 io_size, CL_READ, (struct buf *)0);
1978 }
1979 if (error == 0) {
1980 /*
1981 * if the read completed successfully, or there was no I/O request
1982 * issued, than map the upl into kernel address space and
1983 * move the data into user land.... we'll first add on any 'valid'
1984 * pages that were present in the upl when we acquired it.
1985 */
1986 u_int val_size;
1987 u_int size_of_prefetch;
1988
1989 for (uio_last = last_pg; uio_last < pages_in_upl; uio_last++) {
1990 if (!upl_valid_page(pl, uio_last))
1991 break;
1992 }
1993 /*
1994 * compute size to transfer this round, if uio->uio_resid is
1995 * still non-zero after this uiomove, we'll loop around and
1996 * set up for another I/O.
1997 */
1998 val_size = (uio_last * PAGE_SIZE) - start_offset;
1999
2000 if (max_size < val_size)
2001 val_size = max_size;
2002
2003 if (uio->uio_resid < val_size)
2004 val_size = uio->uio_resid;
2005
2006 e_lblkno = (int)((uio->uio_offset + ((off_t)val_size - 1)) / PAGE_SIZE_64);
2007
2008 if (size_of_prefetch = (uio->uio_resid - val_size)) {
2009 /*
2010 * if there's still I/O left to do for this request, then issue a
2011 * pre-fetch I/O... the I/O wait time will overlap
2012 * with the copying of the data
2013 */
2014 cluster_rd_prefetch(vp, object, uio->uio_offset + val_size, size_of_prefetch, filesize, devblocksize);
2015 } else {
2016 if (!(vp->v_flag & VRAOFF) && !(vp->v_flag & VNOCACHE_DATA))
2017 /*
2018 * let's try to read ahead if we're in
2019 * a sequential access pattern
2020 */
2021 cluster_rd_ahead(vp, object, b_lblkno, e_lblkno, filesize, devblocksize);
2022 vp->v_lastr = e_lblkno;
2023 }
2024#ifdef ppc
2025 if (uio->uio_segflg == UIO_USERSPACE) {
2026 int offset;
2027
2028 segflg = uio->uio_segflg;
2029
2030 uio->uio_segflg = UIO_PHYS_USERSPACE;
2031
2032
2033 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2034 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2035
2036 offset = start_offset;
2037
2038 while (val_size && retval == 0) {
2039 int csize;
2040 int i;
2041 caddr_t paddr;
2042
2043 i = offset / PAGE_SIZE;
2044 csize = min(PAGE_SIZE - start_offset, val_size);
2045
2046 paddr = (caddr_t)upl_phys_page(pl, i) + start_offset;
2047
2048 retval = uiomove(paddr, csize, uio);
2049
2050 val_size -= csize;
2051 offset += csize;
2052 start_offset = offset & PAGE_MASK;
2053 }
2054 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2055 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2056
2057 uio->uio_segflg = segflg;
2058 } else
2059#endif
2060 {
2061 if ((kret = kernel_upl_map(kernel_map, upl, &io_address)) != KERN_SUCCESS)
2062 panic("cluster_read: kernel_upl_map failed\n");
2063
2064 retval = uiomove((caddr_t)(io_address + start_offset), val_size, uio);
2065
2066 if ((kret = kernel_upl_unmap(kernel_map, upl)) != KERN_SUCCESS)
2067 panic("cluster_read: kernel_upl_unmap failed\n");
2068 }
2069 }
2070 if (start_pg < last_pg) {
2071 /*
2072 * compute the range of pages that we actually issued an I/O for
2073 * and either commit them as valid if the I/O succeeded
2074 * or abort them if the I/O failed
2075 */
2076 io_size = (last_pg - start_pg) * PAGE_SIZE;
2077
2078 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2079 upl, start_pg * PAGE_SIZE, io_size, error, 0);
2080
2081 if (error || (vp->v_flag & VNOCACHE_DATA))
2082 kernel_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size,
2083 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2084 else
2085 kernel_upl_commit_range(upl,
2086 start_pg * PAGE_SIZE, io_size,
2087 UPL_COMMIT_CLEAR_DIRTY
2088 | UPL_COMMIT_FREE_ON_EMPTY
2089 | UPL_COMMIT_INACTIVATE,
2090 pl, MAX_UPL_TRANSFER);
2091
2092 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2093 upl, start_pg * PAGE_SIZE, io_size, error, 0);
2094 }
2095 if ((last_pg - start_pg) < pages_in_upl) {
2096 int cur_pg;
2097 int commit_flags;
2098
2099 /*
2100 * the set of pages that we issued an I/O for did not encompass
2101 * the entire upl... so just release these without modifying
2102 * there state
2103 */
2104 if (error)
2105 kernel_upl_abort(upl, 0);
2106 else {
2107 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2108 upl, -1, pages_in_upl - (last_pg - start_pg), 0, 0);
2109
2110 if (start_pg) {
2111 /*
2112 * we found some already valid pages at the beginning of the upl
2113 * commit these back to the inactive list with reference cleared
2114 */
2115 for (cur_pg = 0; cur_pg < start_pg; cur_pg++) {
2116 commit_flags = UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE;
2117
2118 if (upl_dirty_page(pl, cur_pg))
2119 commit_flags |= UPL_COMMIT_SET_DIRTY;
2120
2121 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2122 kernel_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2123 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2124 else
2125 kernel_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2126 PAGE_SIZE, commit_flags, pl, MAX_UPL_TRANSFER);
2127 }
2128 }
2129 if (last_pg < uio_last) {
2130 /*
2131 * we found some already valid pages immediately after the pages we issued
2132 * I/O for, commit these back to the inactive list with reference cleared
2133 */
2134 for (cur_pg = last_pg; cur_pg < uio_last; cur_pg++) {
2135 commit_flags = UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE;
2136
2137 if (upl_dirty_page(pl, cur_pg))
2138 commit_flags |= UPL_COMMIT_SET_DIRTY;
2139
2140 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2141 kernel_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2142 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2143 else
2144 kernel_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2145 PAGE_SIZE, commit_flags, pl, MAX_UPL_TRANSFER);
2146 }
2147 }
2148 if (uio_last < pages_in_upl) {
2149 /*
2150 * there were some invalid pages beyond the valid pages that we didn't
2151 * issue an I/O for, just release them unchanged
2152 */
2153 kernel_upl_abort(upl, 0);
2154 }
2155
2156 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2157 upl, -1, -1, 0, 0);
2158 }
2159 }
2160 if (retval == 0)
2161 retval = error;
2162 }
2163
2164 return (retval);
2165}
2166
2167static
2168cluster_nocopy_read(object, vp, uio, filesize, devblocksize, flags)
2169 void *object;
2170 struct vnode *vp;
2171 struct uio *uio;
2172 off_t filesize;
2173 int devblocksize;
2174 int flags;
2175{
2176 upl_t upl;
2177 upl_page_info_t *pl;
2178 off_t upl_f_offset;
2179 vm_offset_t upl_offset;
2180 off_t start_upl_f_offset;
2181 off_t max_io_size;
2182 int io_size;
2183 int upl_size;
2184 int upl_needed_size;
2185 int pages_in_pl;
2186 vm_offset_t paddr;
2187 int upl_flags;
2188 kern_return_t kret;
2189 int segflg;
2190 struct iovec *iov;
2191 int i;
2192 int force_data_sync;
2193 int error = 0;
2194 int retval = 0;
2195
2196 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START,
2197 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
2198
2199 /*
2200 * When we enter this routine, we know
2201 * -- the offset into the file is on a pagesize boundary
2202 * -- the resid is a page multiple
2203 * -- the resid will not exceed iov_len
2204 */
2205
2206 iov = uio->uio_iov;
2207 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2208
2209 io_size = uio->uio_resid;
2210
2211 /*
2212 * We don't come into this routine unless
2213 * UIO_USERSPACE is set.
2214 */
2215 segflg = uio->uio_segflg;
2216
2217 uio->uio_segflg = UIO_PHYS_USERSPACE;
2218
2219 /*
2220 * First look for pages already in the cache
2221 * and move them to user space.
2222 */
2223 while (io_size && retval == 0) {
2224
2225 upl_f_offset = uio->uio_offset;
2226
2227 /*
2228 * If this call fails, it means the page is not
2229 * in the page cache.
2230 */
2231 if (memory_object_page_op(object, (vm_offset_t)upl_f_offset,
2232 UPL_POP_SET | UPL_POP_BUSY,
2233 &paddr, 0) != KERN_SUCCESS)
2234 break;
2235
2236 retval = uiomove((caddr_t)(paddr), PAGE_SIZE, uio);
2237
2238 memory_object_page_op(object, (vm_offset_t)upl_f_offset,
2239 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2240
2241 io_size -= PAGE_SIZE;
2242 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 71)) | DBG_FUNC_NONE,
2243 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2244 }
2245
2246 uio->uio_segflg = segflg;
2247
2248 if (retval)
2249 {
2250 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2251 (int)uio->uio_offset, uio->uio_resid, 2, retval, 0);
2252 return(retval);
2253 }
2254
2255 /* If we are already finished with this read, then return */
2256 if (io_size == 0)
2257 {
2258
2259 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2260 (int)uio->uio_offset, uio->uio_resid, 3, io_size, 0);
2261 return(0);
2262 }
2263
2264 max_io_size = io_size;
2265 if (max_io_size > MAXPHYSIO)
2266 max_io_size = MAXPHYSIO;
2267
2268 start_upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
2269 upl_f_offset = start_upl_f_offset;
2270 io_size = 0;
2271
2272 while(io_size < max_io_size)
2273 {
2274
2275 if(memory_object_page_op(object, (vm_offset_t)upl_f_offset,
2276 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) == KERN_SUCCESS)
2277 {
2278 memory_object_page_op(object, (vm_offset_t)upl_f_offset,
2279 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2280 break;
2281 }
2282
2283 /*
2284 * Build up the io request parameters.
2285 */
2286
2287 io_size += PAGE_SIZE;
2288 upl_f_offset += PAGE_SIZE;
2289 }
2290
2291 if (io_size == 0)
2292 return(retval);
2293
2294 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2295 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
2296
2297 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START,
2298 (int)upl_offset, upl_needed_size, iov->iov_base, io_size, 0);
2299
2300 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
2301 {
2302 pages_in_pl = 0;
2303 upl_size = upl_needed_size;
2304 upl_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2305
2306 kret = vm_map_get_upl(current_map(),
2307 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2308 &upl_size, &upl, &pl, &pages_in_pl, &upl_flags, force_data_sync);
2309
2310 pages_in_pl = upl_size / PAGE_SIZE;
2311
2312 if (kret != KERN_SUCCESS)
2313 {
2314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2315 (int)upl_offset, upl_size, io_size, kret, 0);
2316
2317 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2318 (int)uio->uio_offset, uio->uio_resid, 4, retval, 0);
2319
2320 /* cluster_nocopy_read: failed to get pagelist */
2321 /* do not return kret here */
2322 return(retval);
2323 }
2324
2325 for(i=0; i < pages_in_pl; i++)
2326 {
2327 if (!upl_valid_page(pl, i))
2328 break;
2329 }
2330 if (i == pages_in_pl)
2331 break;
2332
2333 kernel_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2334 UPL_ABORT_FREE_ON_EMPTY);
2335 }
2336
2337 if (force_data_sync >= 3)
2338 {
2339 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2340 (int)upl_offset, upl_size, io_size, kret, 0);
2341
2342 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2343 (int)uio->uio_offset, uio->uio_resid, 5, retval, 0);
2344 return(retval);
2345 }
2346 /*
2347 * Consider the possibility that upl_size wasn't satisfied.
2348 */
2349 if (upl_size != upl_needed_size)
2350 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
2351
2352 if (io_size == 0)
2353 {
2354 kernel_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2355 UPL_ABORT_FREE_ON_EMPTY);
2356 return(retval);
2357 }
2358
2359 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2360 (int)upl_offset, upl_size, io_size, kret, 0);
2361
2362 /*
2363 * issue a synchronous read to cluster_io
2364 */
2365
2366 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START,
2367 upl, (int)upl_offset, (int)start_upl_f_offset, io_size, 0);
2368
2369 error = cluster_io(vp, upl, upl_offset, start_upl_f_offset,
2370 io_size, CL_READ| CL_NOZERO, (struct buf *)0);
2371
2372 if (error == 0) {
2373 /*
2374 * The cluster_io read completed successfully,
2375 * update the uio structure and commit.
2376 */
2377
2378 kernel_upl_commit_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2379 UPL_COMMIT_SET_DIRTY
2380 | UPL_COMMIT_FREE_ON_EMPTY,
2381 pl, MAX_UPL_TRANSFER);
2382
2383 iov->iov_base += io_size;
2384 iov->iov_len -= io_size;
2385 uio->uio_resid -= io_size;
2386 uio->uio_offset += io_size;
2387 }
2388 else {
2389 kernel_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2390 UPL_ABORT_FREE_ON_EMPTY);
2391 }
2392
2393 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END,
2394 upl, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
2395
2396 if (retval == 0)
2397 retval = error;
2398
2399 } /* end while */
2400
2401
2402 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2403 (int)uio->uio_offset, (int)uio->uio_resid, 6, retval, 0);
2404
2405 return (retval);
2406}
2407
2408
2409
2410/*
2411 * generate advisory I/O's in the largest chunks possible
2412 * the completed pages will be released into the VM cache
2413 */
2414advisory_read(vp, filesize, f_offset, resid, devblocksize)
2415 struct vnode *vp;
2416 off_t filesize;
2417 off_t f_offset;
2418 int resid;
2419 int devblocksize;
2420{
2421 void *object;
2422 upl_page_info_t *pl;
2423 upl_t upl;
2424 vm_offset_t upl_offset;
2425 int upl_size;
2426 off_t upl_f_offset;
2427 int start_offset;
2428 int start_pg;
2429 int last_pg;
2430 int pages_in_upl;
2431 off_t max_size;
2432 int io_size;
2433 kern_return_t kret;
2434 int retval = 0;
2435
2436
2437 if (!UBCINFOEXISTS(vp))
2438 return(EINVAL);
2439
2440 object = ubc_getobject(vp, UBC_NOREACTIVATE);
2441 if (object == (void *)NULL)
2442 panic("advisory_read: ubc_getobject failed");
2443
2444 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START,
2445 (int)f_offset, resid, (int)filesize, devblocksize, 0);
2446
2447 while (resid && f_offset < filesize && retval == 0) {
2448 /*
2449 * compute the size of the upl needed to encompass
2450 * the requested read... limit each call to cluster_io
2451 * to at most MAXPHYSIO, make sure to account for
2452 * a starting offset that's not page aligned
2453 */
2454 start_offset = (int)(f_offset & PAGE_MASK_64);
2455 upl_f_offset = f_offset - (off_t)start_offset;
2456 max_size = filesize - f_offset;
2457
2458 if (resid < max_size)
2459 io_size = resid;
2460 else
2461 io_size = max_size;
2462
2463 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2464 if (upl_size > MAXPHYSIO)
2465 upl_size = MAXPHYSIO;
2466 pages_in_upl = upl_size / PAGE_SIZE;
2467
2468 kret = vm_fault_list_request(object,
2469 (vm_object_offset_t)upl_f_offset, upl_size, &upl, NULL, 0,
2470 (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL));
2471
2472 if (kret != KERN_SUCCESS)
2473 panic("advisory_read: failed to get pagelist");
2474
2475 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2476
2477
2478 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 61)) | DBG_FUNC_NONE,
2479 upl, (int)upl_f_offset, upl_size, start_offset, 0);
2480
2481 /*
2482 * scan from the beginning of the upl looking for the first
2483 * non-valid page.... this will become the first page in
2484 * the request we're going to make to 'cluster_io'... if all
2485 * of the pages are valid, we won't call through to 'cluster_io'
2486 */
2487 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
2488 if (!upl_valid_page(pl, start_pg))
2489 break;
2490 }
2491
2492 /*
2493 * scan from the starting invalid page looking for a valid
2494 * page before the end of the upl is reached, if we
2495 * find one, then it will be the last page of the request to
2496 * 'cluster_io'
2497 */
2498 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2499 if (upl_valid_page(pl, last_pg))
2500 break;
2501 }
2502
2503 if (start_pg < last_pg) {
2504 /*
2505 * we found a range of 'invalid' pages that must be filled
2506 * if the last page in this range is the last page of the file
2507 * we may have to clip the size of it to keep from reading past
2508 * the end of the last physical block associated with the file
2509 */
2510 upl_offset = start_pg * PAGE_SIZE;
2511 io_size = (last_pg - start_pg) * PAGE_SIZE;
2512
2513 if ((upl_f_offset + upl_offset + io_size) > filesize) {
2514 io_size = filesize - (upl_f_offset + upl_offset);
2515 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
2516 }
2517 /*
2518 * issue an asynchronous read to cluster_io
2519 */
2520 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size,
2521 CL_ASYNC | CL_READ | CL_COMMIT | CL_AGE, (struct buf *)0);
2522 }
2523 if (start_pg) {
2524 /*
2525 * start_pg of non-zero indicates we found some already valid pages
2526 * at the beginning of the upl.... we need to release these without
2527 * modifying there state
2528 */
2529 kernel_upl_abort_range(upl, 0, start_pg * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
2530
2531 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 62)) | DBG_FUNC_NONE,
2532 upl, 0, start_pg * PAGE_SIZE, 0, 0);
2533 }
2534 if (last_pg < pages_in_upl) {
2535 /*
2536 * the set of pages that we issued an I/O for did not extend all the
2537 * way to the end of the upl... so just release them without modifying
2538 * there state
2539 */
2540 kernel_upl_abort_range(upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE,
2541 UPL_ABORT_FREE_ON_EMPTY);
2542
2543 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 63)) | DBG_FUNC_NONE,
2544 upl, last_pg * PAGE_SIZE,
2545 (pages_in_upl - last_pg) * PAGE_SIZE, 0, 0);
2546 }
2547 io_size = (last_pg * PAGE_SIZE) - start_offset;
2548
2549 if (io_size > resid)
2550 io_size = resid;
2551 f_offset += io_size;
2552 resid -= io_size;
2553 }
2554 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_END,
2555 (int)f_offset, resid, retval, 0, 0);
2556
2557 return(retval);
2558}
2559
2560
2561cluster_push(vp)
2562 struct vnode *vp;
2563{
2564 void *object;
2565 upl_page_info_t *pl;
2566 upl_t upl;
2567 vm_offset_t upl_offset;
2568 int upl_size;
2569 off_t upl_f_offset;
2570 int pages_in_upl;
2571 int start_pg;
2572 int last_pg;
2573 int io_size;
2574 int io_flags;
2575 int size;
2576 kern_return_t kret;
2577
2578
2579 if (!UBCINFOEXISTS(vp))
2580 return(0);
2581
2582 if (vp->v_clen == 0 || (pages_in_upl = vp->v_lastw - vp->v_cstart) == 0)
2583 return (0);
2584 upl_size = pages_in_upl * PAGE_SIZE;
2585 upl_f_offset = ((off_t)vp->v_cstart) * PAGE_SIZE_64;
2586 size = vp->v_ciosiz;
2587 vp->v_clen = 0;
2588
2589 if (size > upl_size || (upl_size - size) > PAGE_SIZE)
2590 panic("cluster_push: v_ciosiz doesn't match size of cluster\n");
2591
2592 object = ubc_getobject(vp, UBC_NOREACTIVATE);
2593 if (object == (void *)NULL)
2594 panic("cluster_push: ubc_getobject failed");
2595
2596 kret = vm_fault_list_request(object,
2597 (vm_object_offset_t)upl_f_offset, upl_size, &upl, NULL, 0,
2598 (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL));
2599 if (kret != KERN_SUCCESS)
2600 panic("cluster_push: failed to get pagelist");
2601
2602 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2603
2604 last_pg = 0;
2605
2606 while (size) {
2607
2608 for (start_pg = last_pg; start_pg < pages_in_upl; start_pg++) {
2609 if (upl_valid_page(pl, start_pg) && upl_dirty_page(pl, start_pg))
2610 break;
2611 }
2612 if (start_pg > last_pg) {
2613 io_size = (start_pg - last_pg) * PAGE_SIZE;
2614
2615 kernel_upl_abort_range(upl, last_pg * PAGE_SIZE, io_size, UPL_ABORT_FREE_ON_EMPTY);
2616
2617 if (io_size < size)
2618 size -= io_size;
2619 else
2620 break;
2621 }
2622 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2623 if (!upl_valid_page(pl, last_pg) || !upl_dirty_page(pl, last_pg))
2624 break;
2625 }
2626 upl_offset = start_pg * PAGE_SIZE;
2627
2628 io_size = min(size, (last_pg - start_pg) * PAGE_SIZE);
2629
2630 if (vp->v_numoutput > ASYNC_THROTTLE)
2631 io_flags = CL_COMMIT | CL_AGE;
2632 else
2633 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
2634
2635 if (vp->v_flag & VNOCACHE_DATA)
2636 io_flags |= CL_DUMP;
2637
2638 cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, io_flags, (struct buf *)0);
2639
2640 size -= io_size;
2641 }
2642 return(1);
2643}