]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
763ecc5330cd79ad08f0ea67ece86c646c41c67a
[apple/xnu.git] / bsd / vfs / vfs_cluster.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
56 */
57
58 #include <sys/param.h>
59 #include <sys/proc.h>
60 #include <sys/buf.h>
61 #include <sys/vnode.h>
62 #include <sys/mount.h>
63 #include <sys/trace.h>
64 #include <sys/malloc.h>
65 #include <sys/resourcevar.h>
66 #include <libkern/libkern.h>
67
68 #include <sys/ubc.h>
69 #include <vm/vm_pageout.h>
70
71 #include <sys/kdebug.h>
72
73 #define CL_READ 0x01
74 #define CL_ASYNC 0x02
75 #define CL_COMMIT 0x04
76 #define CL_NOMAP 0x08
77 #define CL_PAGEOUT 0x10
78 #define CL_AGE 0x20
79 #define CL_DUMP 0x40
80 #define CL_NOZERO 0x80
81 #define CL_PAGEIN 0x100
82 #define CL_DEV_MEMORY 0x200
83
84 /*
85 * throttle the number of async writes that
86 * can be outstanding on a single vnode
87 * before we issue a synchronous write
88 */
89 #define ASYNC_THROTTLE 6
90
91 static int
92 cluster_iodone(bp)
93 struct buf *bp;
94 {
95 int b_flags;
96 int error;
97 int total_size;
98 int total_resid;
99 int upl_offset;
100 upl_t upl;
101 struct buf *cbp;
102 struct buf *cbp_head;
103 struct buf *cbp_next;
104 struct buf *real_bp;
105 struct vnode *vp;
106 int commit_size;
107 int pg_offset;
108
109
110 cbp_head = (struct buf *)(bp->b_trans_head);
111
112 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START,
113 cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
114
115 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
116 /*
117 * all I/O requests that are part of this transaction
118 * have to complete before we can process it
119 */
120 if ( !(cbp->b_flags & B_DONE)) {
121
122 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
123 cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0);
124
125 return 0;
126 }
127 }
128 error = 0;
129 total_size = 0;
130 total_resid = 0;
131
132 cbp = cbp_head;
133 upl_offset = cbp->b_uploffset;
134 upl = cbp->b_pagelist;
135 b_flags = cbp->b_flags;
136 real_bp = cbp->b_real_bp;
137 vp = cbp->b_vp;
138
139 while (cbp) {
140 if (cbp->b_vectorcount > 1)
141 _FREE(cbp->b_vectorlist, M_SEGMENT);
142
143 if ((cbp->b_flags & B_ERROR) && error == 0)
144 error = cbp->b_error;
145
146 total_resid += cbp->b_resid;
147 total_size += cbp->b_bcount;
148
149 cbp_next = cbp->b_trans_next;
150
151 free_io_buf(cbp);
152
153 cbp = cbp_next;
154 }
155 if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput <= (ASYNC_THROTTLE / 3))) {
156 vp->v_flag &= ~VTHROTTLED;
157 wakeup((caddr_t)&vp->v_numoutput);
158 }
159 if ((b_flags & B_NEED_IODONE) && real_bp) {
160 if (error) {
161 real_bp->b_flags |= B_ERROR;
162 real_bp->b_error = error;
163 }
164 real_bp->b_resid = total_resid;
165
166 biodone(real_bp);
167 }
168 if (error == 0 && total_resid)
169 error = EIO;
170
171 if (b_flags & B_COMMIT_UPL) {
172 pg_offset = upl_offset & PAGE_MASK;
173 commit_size = (((pg_offset + total_size) + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
174
175 if (error || (b_flags & B_NOCACHE)) {
176 int upl_abort_code;
177
178 if (b_flags & B_PAGEOUT)
179 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
180 else if (b_flags & B_PGIN)
181 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
182 else
183 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
184
185 ubc_upl_abort_range(upl, upl_offset - pg_offset, commit_size,
186 upl_abort_code);
187
188 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
189 upl, upl_offset - pg_offset, commit_size,
190 0x80000000|upl_abort_code, 0);
191
192 } else {
193 int upl_commit_flags = UPL_COMMIT_FREE_ON_EMPTY;
194
195 if ( !(b_flags & B_PAGEOUT))
196 upl_commit_flags |= UPL_COMMIT_CLEAR_DIRTY;
197 if (b_flags & B_AGE)
198 upl_commit_flags |= UPL_COMMIT_INACTIVATE;
199
200 ubc_upl_commit_range(upl, upl_offset - pg_offset, commit_size,
201 upl_commit_flags);
202
203 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
204 upl, upl_offset - pg_offset, commit_size,
205 upl_commit_flags, 0);
206 }
207 } else
208 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
209 upl, upl_offset, 0, error, 0);
210
211 return (error);
212 }
213
214
215 static void
216 cluster_zero(upl, upl_offset, size, flags, bp)
217 upl_t upl;
218 vm_offset_t upl_offset;
219 int size;
220 int flags;
221 struct buf *bp;
222 {
223 vm_offset_t io_addr = 0;
224 kern_return_t kret;
225
226 if ( !(flags & CL_NOMAP)) {
227 kret = ubc_upl_map(upl, &io_addr);
228
229 if (kret != KERN_SUCCESS)
230 panic("cluster_zero: ubc_upl_map() failed with (%d)", kret);
231 if (io_addr == 0)
232 panic("cluster_zero: ubc_upl_map() mapped 0");
233 } else
234 io_addr = (vm_offset_t)bp->b_data;
235 bzero((caddr_t)(io_addr + upl_offset), size);
236
237 if ( !(flags & CL_NOMAP)) {
238 kret = ubc_upl_unmap(upl);
239
240 if (kret != KERN_SUCCESS)
241 panic("cluster_zero: kernel_upl_unmap failed");
242 }
243 }
244
245 static int
246 cluster_io(vp, upl, upl_offset, f_offset, size, flags, real_bp)
247 struct vnode *vp;
248 upl_t upl;
249 vm_offset_t upl_offset;
250 off_t f_offset;
251 int size;
252 int flags;
253 struct buf *real_bp;
254 {
255 struct buf *cbp;
256 struct iovec *iovp;
257 int io_flags;
258 int error = 0;
259 int retval = 0;
260 struct buf *cbp_head = 0;
261 struct buf *cbp_tail = 0;
262 upl_page_info_t *pl;
263 int pg_count;
264 int pg_offset;
265 int max_iosize;
266 int max_vectors;
267 int priv;
268
269 if (flags & CL_READ) {
270 io_flags = (B_VECTORLIST | B_READ);
271
272 vfs_io_attributes(vp, B_READ, &max_iosize, &max_vectors);
273 } else {
274 io_flags = (B_VECTORLIST | B_WRITEINPROG);
275
276 vfs_io_attributes(vp, B_WRITE, &max_iosize, &max_vectors);
277 }
278 pl = ubc_upl_pageinfo(upl);
279
280 if (flags & CL_ASYNC)
281 io_flags |= (B_CALL | B_ASYNC);
282 if (flags & CL_AGE)
283 io_flags |= B_AGE;
284 if (flags & CL_DUMP)
285 io_flags |= B_NOCACHE;
286 if (flags & CL_PAGEIN)
287 io_flags |= B_PGIN;
288
289
290 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_START,
291 (int)f_offset, size, upl_offset, flags, 0);
292
293 if ((flags & CL_READ) && ((upl_offset + size) & PAGE_MASK) && (!(flags & CL_NOZERO))) {
294 /*
295 * then we are going to end up
296 * with a page that we can't complete (the file size wasn't a multiple
297 * of PAGE_SIZE and we're trying to read to the end of the file
298 * so we'll go ahead and zero out the portion of the page we can't
299 * read in from the file
300 */
301 cluster_zero(upl, upl_offset + size, PAGE_SIZE - ((upl_offset + size) & PAGE_MASK), flags, real_bp);
302
303 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_NONE,
304 upl_offset + size, PAGE_SIZE - ((upl_offset + size) & PAGE_MASK),
305 flags, real_bp, 0);
306 }
307 while (size) {
308 size_t io_size;
309 int vsize;
310 int i;
311 int pl_index;
312 int pg_resid;
313 int num_contig;
314 daddr_t lblkno;
315 daddr_t blkno;
316
317 if (size > max_iosize)
318 io_size = max_iosize;
319 else
320 io_size = size;
321
322 if (error = VOP_CMAP(vp, f_offset, io_size, &blkno, &io_size, NULL)) {
323 if (error == EOPNOTSUPP)
324 panic("VOP_CMAP Unimplemented");
325 break;
326 }
327
328 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 24)) | DBG_FUNC_NONE,
329 (int)f_offset, (int)blkno, io_size, 0, 0);
330
331 if ( (!(flags & CL_READ) && (long)blkno == -1) || io_size == 0) {
332 if (flags & CL_PAGEOUT) {
333 error = EINVAL;
334 break;
335 };
336
337 /* Try paging out the page individually before
338 giving up entirely and dumping it (it could
339 be mapped in a "hole" and require allocation
340 before the I/O:
341 */
342 ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE_64, UPL_ABORT_FREE_ON_EMPTY);
343 if (ubc_pushdirty_range(vp, f_offset, PAGE_SIZE_64) == 0) {
344 error = EINVAL;
345 break;
346 };
347
348 upl_offset += PAGE_SIZE_64;
349 f_offset += PAGE_SIZE_64;
350 size -= PAGE_SIZE_64;
351 continue;
352 }
353 lblkno = (daddr_t)(f_offset / PAGE_SIZE_64);
354 /*
355 * we have now figured out how much I/O we can do - this is in 'io_size'
356 * pl_index represents the first page in the 'upl' that the I/O will occur for
357 * pg_offset is the starting point in the first page for the I/O
358 * pg_count is the number of full and partial pages that 'io_size' encompasses
359 */
360 pl_index = upl_offset / PAGE_SIZE;
361 pg_offset = upl_offset & PAGE_MASK;
362 pg_count = (io_size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE;
363
364 if (flags & CL_DEV_MEMORY) {
365 /*
366 * currently, can't deal with reading 'holes' in file
367 */
368 if ((long)blkno == -1) {
369 error = EINVAL;
370 break;
371 }
372 /*
373 * treat physical requests as one 'giant' page
374 */
375 pg_count = 1;
376 }
377 if ((flags & CL_READ) && (long)blkno == -1) {
378 /*
379 * if we're reading and blkno == -1, then we've got a
380 * 'hole' in the file that we need to deal with by zeroing
381 * out the affected area in the upl
382 */
383 cluster_zero(upl, upl_offset, io_size, flags, real_bp);
384
385 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_NONE,
386 upl_offset, io_size, flags, real_bp, 0);
387
388 pg_count = (io_size - pg_offset) / PAGE_SIZE;
389
390 if (io_size == size && ((upl_offset + io_size) & PAGE_MASK))
391 pg_count++;
392
393 if (pg_count) {
394 if (pg_offset)
395 pg_resid = PAGE_SIZE - pg_offset;
396 else
397 pg_resid = 0;
398 if (flags & CL_COMMIT)
399 ubc_upl_commit_range(upl,
400 upl_offset + pg_resid,
401 pg_count * PAGE_SIZE,
402 UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
403 }
404 upl_offset += io_size;
405 f_offset += io_size;
406 size -= io_size;
407
408 if (cbp_head && pg_count)
409 goto start_io;
410 continue;
411 } else if (real_bp && (real_bp->b_blkno == real_bp->b_lblkno)) {
412 real_bp->b_blkno = blkno;
413 }
414
415 if (pg_count > 1) {
416 if (pg_count > max_vectors) {
417 io_size -= (pg_count - max_vectors) * PAGE_SIZE;
418
419 if (io_size < 0) {
420 io_size = PAGE_SIZE - pg_offset;
421 pg_count = 1;
422 } else
423 pg_count = max_vectors;
424 }
425 /*
426 * we need to allocate space for the vector list
427 */
428 if (pg_count > 1) {
429 iovp = (struct iovec *)_MALLOC(sizeof(struct iovec) * pg_count,
430 M_SEGMENT, M_NOWAIT);
431
432 if (iovp == (struct iovec *) 0) {
433 /*
434 * if the allocation fails, then throttle down to a single page
435 */
436 io_size = PAGE_SIZE - pg_offset;
437 pg_count = 1;
438 }
439 }
440 }
441
442 /* Throttle the speculative IO */
443 if ((flags & CL_ASYNC) && !(flags & CL_PAGEOUT))
444 priv = 0;
445 else
446 priv = 1;
447
448 cbp = alloc_io_buf(vp, priv);
449
450 if (pg_count == 1)
451 /*
452 * we use the io vector that's reserved in the buffer header
453 * this insures we can always issue an I/O even in a low memory
454 * condition that prevents the _MALLOC from succeeding... this
455 * is necessary to prevent deadlocks with the pager
456 */
457 iovp = (struct iovec *)(&cbp->b_vects[0]);
458
459 cbp->b_vectorlist = (void *)iovp;
460 cbp->b_vectorcount = pg_count;
461
462 if (flags & CL_DEV_MEMORY) {
463
464 iovp->iov_len = io_size;
465 iovp->iov_base = (caddr_t)upl_phys_page(pl, 0);
466
467 if (iovp->iov_base == (caddr_t) 0) {
468 free_io_buf(cbp);
469 error = EINVAL;
470 } else
471 iovp->iov_base += upl_offset;
472 } else {
473
474 for (i = 0, vsize = io_size; i < pg_count; i++, iovp++) {
475 int psize;
476
477 psize = PAGE_SIZE - pg_offset;
478
479 if (psize > vsize)
480 psize = vsize;
481
482 iovp->iov_len = psize;
483 iovp->iov_base = (caddr_t)upl_phys_page(pl, pl_index + i);
484
485 if (iovp->iov_base == (caddr_t) 0) {
486 if (pg_count > 1)
487 _FREE(cbp->b_vectorlist, M_SEGMENT);
488 free_io_buf(cbp);
489
490 error = EINVAL;
491 break;
492 }
493 iovp->iov_base += pg_offset;
494 pg_offset = 0;
495
496 if (flags & CL_PAGEOUT) {
497 int s;
498 struct buf *bp;
499
500 s = splbio();
501 if (bp = incore(vp, lblkno + i)) {
502 if (!ISSET(bp->b_flags, B_BUSY)) {
503 bremfree(bp);
504 SET(bp->b_flags, (B_BUSY | B_INVAL));
505 splx(s);
506 brelse(bp);
507 } else
508 panic("BUSY bp found in cluster_io");
509 }
510 splx(s);
511 }
512 vsize -= psize;
513 }
514 }
515 if (error)
516 break;
517
518 if (flags & CL_ASYNC)
519 cbp->b_iodone = (void *)cluster_iodone;
520 cbp->b_flags |= io_flags;
521
522 cbp->b_lblkno = lblkno;
523 cbp->b_blkno = blkno;
524 cbp->b_bcount = io_size;
525 cbp->b_pagelist = upl;
526 cbp->b_uploffset = upl_offset;
527 cbp->b_trans_next = (struct buf *)0;
528
529 if (flags & CL_READ)
530 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 26)) | DBG_FUNC_NONE,
531 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
532 else
533 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 27)) | DBG_FUNC_NONE,
534 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
535
536 if (cbp_head) {
537 cbp_tail->b_trans_next = cbp;
538 cbp_tail = cbp;
539 } else {
540 cbp_head = cbp;
541 cbp_tail = cbp;
542 }
543 (struct buf *)(cbp->b_trans_head) = cbp_head;
544
545 upl_offset += io_size;
546 f_offset += io_size;
547 size -= io_size;
548
549 if ( (!(upl_offset & PAGE_MASK) && !(flags & CL_DEV_MEMORY)) || size == 0) {
550 /*
551 * if we have no more I/O to issue or
552 * the current I/O we've prepared fully
553 * completes the last page in this request
554 * or it's been completed via a zero-fill
555 * due to a 'hole' in the file
556 * then go ahead and issue the I/O
557 */
558 start_io:
559 if (flags & CL_COMMIT)
560 cbp_head->b_flags |= B_COMMIT_UPL;
561 if (flags & CL_PAGEOUT)
562 cbp_head->b_flags |= B_PAGEOUT;
563 if (flags & CL_PAGEIN)
564 cbp_head->b_flags |= B_PGIN;
565
566 if (real_bp) {
567 cbp_head->b_flags |= B_NEED_IODONE;
568 cbp_head->b_real_bp = real_bp;
569 }
570
571 for (cbp = cbp_head; cbp;) {
572 struct buf * cbp_next;
573
574 if (io_flags & B_WRITEINPROG)
575 cbp->b_vp->v_numoutput++;
576
577 cbp_next = cbp->b_trans_next;
578
579 (void) VOP_STRATEGY(cbp);
580 cbp = cbp_next;
581 }
582 if ( !(flags & CL_ASYNC)) {
583 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next)
584 biowait(cbp);
585
586 if (error = cluster_iodone(cbp_head)) {
587 retval = error;
588 error = 0;
589 }
590 }
591 cbp_head = (struct buf *)0;
592 cbp_tail = (struct buf *)0;
593 }
594 }
595 if (error) {
596 int abort_size;
597
598 for (cbp = cbp_head; cbp;) {
599 struct buf * cbp_next;
600
601 if (cbp->b_vectorcount > 1)
602 _FREE(cbp->b_vectorlist, M_SEGMENT);
603 upl_offset -= cbp->b_bcount;
604 size += cbp->b_bcount;
605
606 cbp_next = cbp->b_trans_next;
607 free_io_buf(cbp);
608 cbp = cbp_next;
609 }
610 pg_offset = upl_offset & PAGE_MASK;
611 abort_size = ((size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
612
613 if (flags & CL_COMMIT) {
614 int upl_abort_code;
615
616 if (flags & CL_PAGEOUT)
617 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
618 else if (flags & CL_PAGEIN)
619 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
620 else
621 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
622
623 ubc_upl_abort_range(upl, upl_offset - pg_offset, abort_size,
624 upl_abort_code);
625
626 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE,
627 upl, upl_offset - pg_offset, abort_size, error, 0);
628 }
629 if (real_bp) {
630 real_bp->b_flags |= B_ERROR;
631 real_bp->b_error = error;
632
633 biodone(real_bp);
634 }
635 if (retval == 0)
636 retval = error;
637 }
638 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_END,
639 (int)f_offset, size, upl_offset, retval, 0);
640
641 return (retval);
642 }
643
644
645 static int
646 cluster_rd_prefetch(vp, f_offset, size, filesize, devblocksize)
647 struct vnode *vp;
648 off_t f_offset;
649 u_int size;
650 off_t filesize;
651 int devblocksize;
652 {
653 upl_t upl;
654 upl_page_info_t *pl;
655 int pages_in_upl;
656 int start_pg;
657 int last_pg;
658 int last_valid;
659 int io_size;
660
661
662 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_START,
663 (int)f_offset, size, (int)filesize, 0, 0);
664
665 if (f_offset >= filesize) {
666 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
667 (int)f_offset, 0, 0, 0, 0);
668 return(0);
669 }
670 if (ubc_page_op(vp, f_offset, 0, 0, 0) == KERN_SUCCESS) {
671 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
672 (int)f_offset, 0, 0, 0, 0);
673 return(0);
674 }
675 if (size > (MAX_UPL_TRANSFER * PAGE_SIZE))
676 size = MAX_UPL_TRANSFER * PAGE_SIZE;
677 else
678 size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
679
680 if ((off_t)size > (filesize - f_offset))
681 size = ((filesize - f_offset) + (devblocksize - 1)) & ~(devblocksize - 1);
682
683 pages_in_upl = (size + (PAGE_SIZE - 1)) / PAGE_SIZE;
684
685 ubc_create_upl(vp,
686 f_offset,
687 pages_in_upl * PAGE_SIZE,
688 &upl,
689 &pl,
690 UPL_FLAGS_NONE);
691
692 if (upl == (upl_t) 0)
693 return(0);
694
695 /*
696 * scan from the beginning of the upl looking for the first
697 * non-valid page.... this will become the first page in
698 * the request we're going to make to 'cluster_io'... if all
699 * of the pages are valid, we won't call through to 'cluster_io'
700 */
701 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
702 if (!upl_valid_page(pl, start_pg))
703 break;
704 }
705
706 /*
707 * scan from the starting invalid page looking for a valid
708 * page before the end of the upl is reached, if we
709 * find one, then it will be the last page of the request to
710 * 'cluster_io'
711 */
712 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
713 if (upl_valid_page(pl, last_pg))
714 break;
715 }
716
717 /*
718 * if we find any more free valid pages at the tail of the upl
719 * than update maxra accordingly....
720 */
721 for (last_valid = last_pg; last_valid < pages_in_upl; last_valid++) {
722 if (!upl_valid_page(pl, last_valid))
723 break;
724 }
725 if (start_pg < last_pg) {
726 vm_offset_t upl_offset;
727
728 /*
729 * we found a range of 'invalid' pages that must be filled
730 * 'size' has already been clipped to the LEOF
731 * make sure it's at least a multiple of the device block size
732 */
733 upl_offset = start_pg * PAGE_SIZE;
734 io_size = (last_pg - start_pg) * PAGE_SIZE;
735
736 if ((upl_offset + io_size) > size) {
737 io_size = size - upl_offset;
738
739 KERNEL_DEBUG(0xd001000, upl_offset, size, io_size, 0, 0);
740 }
741 cluster_io(vp, upl, upl_offset, f_offset + upl_offset, io_size,
742 CL_READ | CL_COMMIT | CL_ASYNC | CL_AGE, (struct buf *)0);
743 }
744 if (start_pg) {
745 /*
746 * start_pg of non-zero indicates we found some already valid pages
747 * at the beginning of the upl.... we need to release these without
748 * modifying there state
749 */
750 ubc_upl_abort_range(upl, 0, start_pg * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
751
752 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 50)) | DBG_FUNC_NONE,
753 upl, 0, start_pg * PAGE_SIZE, 0, 0);
754 }
755 if (last_pg < pages_in_upl) {
756 /*
757 * the set of pages that we issued an I/O for did not extend all the
758 * way to the end of the upl... so just release them without modifying
759 * there state
760 */
761 ubc_upl_abort_range(upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE,
762 UPL_ABORT_FREE_ON_EMPTY);
763
764 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 50)) | DBG_FUNC_NONE,
765 upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE, 0, 0);
766 }
767
768 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
769 (int)f_offset + (last_valid * PAGE_SIZE), 0, 0, 0, 0);
770
771 return(last_valid);
772 }
773
774
775
776 static void
777 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize)
778 struct vnode *vp;
779 daddr_t b_lblkno;
780 daddr_t e_lblkno;
781 off_t filesize;
782 int devblocksize;
783 {
784 daddr_t r_lblkno;
785 off_t f_offset;
786 int size_of_prefetch;
787 int max_iosize;
788 int max_pages;
789
790 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_START,
791 b_lblkno, e_lblkno, vp->v_lastr, 0, 0);
792
793 if (b_lblkno == vp->v_lastr && b_lblkno == e_lblkno) {
794 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
795 vp->v_ralen, vp->v_maxra, vp->v_lastr, 0, 0);
796 return;
797 }
798
799 if (vp->v_lastr == -1 || (b_lblkno != vp->v_lastr && b_lblkno != (vp->v_lastr + 1) && b_lblkno != (vp->v_maxra + 1))) {
800 vp->v_ralen = 0;
801 vp->v_maxra = 0;
802
803 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
804 vp->v_ralen, vp->v_maxra, vp->v_lastr, 1, 0);
805
806 return;
807 }
808 vfs_io_attributes(vp, B_READ, &max_iosize, &max_pages);
809
810 if ((max_iosize / PAGE_SIZE) < max_pages)
811 max_pages = max_iosize / PAGE_SIZE;
812 if (max_pages > MAX_UPL_TRANSFER)
813 max_pages = MAX_UPL_TRANSFER;
814
815 vp->v_ralen = vp->v_ralen ? min(max_pages, vp->v_ralen << 1) : 1;
816
817 if (((e_lblkno + 1) - b_lblkno) > vp->v_ralen)
818 vp->v_ralen = min(max_pages, (e_lblkno + 1) - b_lblkno);
819
820 if (e_lblkno < vp->v_maxra) {
821 if ((vp->v_maxra - e_lblkno) > (max_pages / 4)) {
822
823 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
824 vp->v_ralen, vp->v_maxra, vp->v_lastr, 2, 0);
825 return;
826 }
827 }
828 r_lblkno = max(e_lblkno, vp->v_maxra) + 1;
829 f_offset = (off_t)r_lblkno * PAGE_SIZE_64;
830
831 size_of_prefetch = cluster_rd_prefetch(vp, f_offset, vp->v_ralen * PAGE_SIZE, filesize, devblocksize);
832
833 if (size_of_prefetch)
834 vp->v_maxra = r_lblkno + (size_of_prefetch - 1);
835
836 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
837 vp->v_ralen, vp->v_maxra, vp->v_lastr, 3, 0);
838 }
839
840
841 cluster_pageout(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
842 struct vnode *vp;
843 upl_t upl;
844 vm_offset_t upl_offset;
845 off_t f_offset;
846 int size;
847 off_t filesize;
848 int devblocksize;
849 int flags;
850 {
851 int io_size;
852 int pg_size;
853 off_t max_size;
854 int local_flags = CL_PAGEOUT;
855
856 if ((flags & UPL_IOSYNC) == 0)
857 local_flags |= CL_ASYNC;
858 if ((flags & UPL_NOCOMMIT) == 0)
859 local_flags |= CL_COMMIT;
860
861 if (upl == (upl_t) 0)
862 panic("cluster_pageout: can't handle NULL upl yet\n");
863
864
865 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 52)) | DBG_FUNC_NONE,
866 (int)f_offset, size, (int)filesize, local_flags, 0);
867
868 /*
869 * If they didn't specify any I/O, then we are done...
870 * we can't issue an abort because we don't know how
871 * big the upl really is
872 */
873 if (size <= 0)
874 return (EINVAL);
875
876 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
877 if (local_flags & CL_COMMIT)
878 ubc_upl_abort_range(upl, upl_offset, size,
879 UPL_ABORT_FREE_ON_EMPTY);
880 return (EROFS);
881 }
882 /*
883 * can't page-in from a negative offset
884 * or if we're starting beyond the EOF
885 * or if the file offset isn't page aligned
886 * or the size requested isn't a multiple of PAGE_SIZE
887 */
888 if (f_offset < 0 || f_offset >= filesize ||
889 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
890 if (local_flags & CL_COMMIT)
891 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
892 return (EINVAL);
893 }
894 max_size = filesize - f_offset;
895
896 if (size < max_size)
897 io_size = size;
898 else
899 io_size = (max_size + (devblocksize - 1)) & ~(devblocksize - 1);
900
901 pg_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
902
903 if (size > pg_size) {
904 if (local_flags & CL_COMMIT)
905 ubc_upl_abort_range(upl, upl_offset + pg_size, size - pg_size,
906 UPL_ABORT_FREE_ON_EMPTY);
907 }
908 while (vp->v_numoutput >= ASYNC_THROTTLE) {
909 vp->v_flag |= VTHROTTLED;
910 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_pageout", 0);
911 }
912
913 return (cluster_io(vp, upl, upl_offset, f_offset, io_size,
914 local_flags, (struct buf *)0));
915 }
916
917
918 cluster_pagein(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
919 struct vnode *vp;
920 upl_t upl;
921 vm_offset_t upl_offset;
922 off_t f_offset;
923 int size;
924 off_t filesize;
925 int devblocksize;
926 int flags;
927 {
928 u_int io_size;
929 int pg_size;
930 off_t max_size;
931 int retval;
932 int local_flags = 0;
933
934
935 /*
936 * If they didn't ask for any data, then we are done...
937 * we can't issue an abort because we don't know how
938 * big the upl really is
939 */
940 if (size <= 0)
941 return (EINVAL);
942
943 if ((flags & UPL_NOCOMMIT) == 0)
944 local_flags = CL_COMMIT;
945
946 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 56)) | DBG_FUNC_NONE,
947 (int)f_offset, size, (int)filesize, local_flags, 0);
948
949 /*
950 * can't page-in from a negative offset
951 * or if we're starting beyond the EOF
952 * or if the file offset isn't page aligned
953 * or the size requested isn't a multiple of PAGE_SIZE
954 */
955 if (f_offset < 0 || f_offset >= filesize ||
956 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
957 if (local_flags & CL_COMMIT)
958 ubc_upl_abort_range(upl, upl_offset, size,
959 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
960 return (EINVAL);
961 }
962 max_size = filesize - f_offset;
963
964 if (size < max_size)
965 io_size = size;
966 else
967 io_size = (max_size + (devblocksize - 1)) & ~(devblocksize - 1);
968
969 pg_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
970
971 if (upl == (upl_t) 0) {
972 ubc_create_upl( vp,
973 f_offset,
974 pg_size,
975 &upl,
976 NULL,
977 UPL_FLAGS_NONE);
978
979 if (upl == (upl_t) 0)
980 return (EINVAL);
981
982 upl_offset = (vm_offset_t)0;
983 size = pg_size;
984 }
985 if (size > pg_size) {
986 if (local_flags & CL_COMMIT)
987 ubc_upl_abort_range(upl, upl_offset + pg_size, size - pg_size,
988 UPL_ABORT_FREE_ON_EMPTY);
989 }
990
991 retval = cluster_io(vp, upl, upl_offset, f_offset, io_size,
992 local_flags | CL_READ | CL_PAGEIN, (struct buf *)0);
993
994 if (retval == 0) {
995 int b_lblkno;
996 int e_lblkno;
997
998 b_lblkno = (int)(f_offset / PAGE_SIZE_64);
999 e_lblkno = (int)
1000 ((f_offset + ((off_t)io_size - 1)) / PAGE_SIZE_64);
1001
1002 if (!(flags & UPL_NORDAHEAD) && !(vp->v_flag & VRAOFF)) {
1003 /*
1004 * we haven't read the last page in of the file yet
1005 * so let's try to read ahead if we're in
1006 * a sequential access pattern
1007 */
1008 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
1009 }
1010 vp->v_lastr = e_lblkno;
1011 }
1012 return (retval);
1013 }
1014
1015
1016 cluster_bp(bp)
1017 struct buf *bp;
1018 {
1019 off_t f_offset;
1020 int flags;
1021
1022 if (bp->b_pagelist == (upl_t) 0)
1023 panic("cluster_bp: can't handle NULL upl yet\n");
1024 if (bp->b_flags & B_READ)
1025 flags = CL_ASYNC | CL_NOMAP | CL_READ;
1026 else
1027 flags = CL_ASYNC | CL_NOMAP;
1028
1029 f_offset = ubc_blktooff(bp->b_vp, bp->b_lblkno);
1030
1031 return (cluster_io(bp->b_vp, bp->b_pagelist, 0, f_offset, bp->b_bcount, flags, bp));
1032 }
1033
1034
1035 cluster_write(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1036 struct vnode *vp;
1037 struct uio *uio;
1038 off_t oldEOF;
1039 off_t newEOF;
1040 off_t headOff;
1041 off_t tailOff;
1042 int devblocksize;
1043 int flags;
1044 {
1045 int prev_resid;
1046 int clip_size;
1047 off_t max_io_size;
1048 struct iovec *iov;
1049 vm_offset_t upl_offset;
1050 int upl_size;
1051 int pages_in_pl;
1052 upl_page_info_t *pl;
1053 int upl_flags;
1054 upl_t upl;
1055 int retval = 0;
1056
1057
1058 if ((!uio) || (uio->uio_segflg != UIO_USERSPACE) || (!(vp->v_flag & VNOCACHE_DATA)))
1059 {
1060 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1061 return(retval);
1062 }
1063
1064 while (uio->uio_resid && uio->uio_offset < newEOF && retval == 0)
1065 {
1066 /* we know we have a resid, so this is safe */
1067 iov = uio->uio_iov;
1068 while (iov->iov_len == 0) {
1069 uio->uio_iov++;
1070 uio->uio_iovcnt--;
1071 iov = uio->uio_iov;
1072 }
1073
1074 /*
1075 * We check every vector target and if it is physically
1076 * contiguous space, we skip the sanity checks.
1077 */
1078
1079 upl_offset = (vm_offset_t)iov->iov_base & ~PAGE_MASK;
1080 upl_size = (upl_offset + PAGE_SIZE +(PAGE_SIZE -1)) & ~PAGE_MASK;
1081 pages_in_pl = 0;
1082 upl_flags = UPL_QUERY_OBJECT_TYPE;
1083 if ((vm_map_get_upl(current_map(),
1084 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1085 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0)) != KERN_SUCCESS)
1086 {
1087 /*
1088 * the user app must have passed in an invalid address
1089 */
1090 return (EFAULT);
1091 }
1092
1093 if (upl_flags & UPL_PHYS_CONTIG)
1094 {
1095 /*
1096 * since the interface to the IOKit below us uses physical block #'s and
1097 * block counts to specify the I/O, we can't handle anything that isn't
1098 * devblocksize aligned
1099 */
1100 if ((uio->uio_offset & (devblocksize - 1)) || (uio->uio_resid & (devblocksize - 1)))
1101 return(EINVAL);
1102
1103 if (flags & IO_HEADZEROFILL)
1104 {
1105 flags &= ~IO_HEADZEROFILL;
1106
1107 if (retval = cluster_write_x(vp, (struct uio *)0, 0, uio->uio_offset, headOff, 0, devblocksize, IO_HEADZEROFILL))
1108 return(retval);
1109 }
1110
1111 retval = cluster_phys_write(vp, uio);
1112
1113 if (uio->uio_resid == 0 && (flags & IO_TAILZEROFILL))
1114 {
1115 retval = cluster_write_x(vp, (struct uio *)0, 0, tailOff, uio->uio_offset, 0, devblocksize, IO_HEADZEROFILL);
1116 return(retval);
1117 }
1118 }
1119 else if ((uio->uio_resid < 4 * PAGE_SIZE) || (flags & (IO_TAILZEROFILL | IO_HEADZEROFILL)))
1120 {
1121 /*
1122 * We set a threshhold of 4 pages to decide if the nocopy
1123 * write loop is worth the trouble...
1124 * we also come here if we're trying to zero the head and/or tail
1125 * of a partially written page, and the user source is not a physically contiguous region
1126 */
1127 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1128 return(retval);
1129 }
1130 else if (uio->uio_offset & PAGE_MASK_64)
1131 {
1132 /* Bring the file offset write up to a pagesize boundary */
1133 clip_size = (PAGE_SIZE - (uio->uio_offset & PAGE_MASK_64));
1134 if (uio->uio_resid < clip_size)
1135 clip_size = uio->uio_resid;
1136 /*
1137 * Fake the resid going into the cluster_write_x call
1138 * and restore it on the way out.
1139 */
1140 prev_resid = uio->uio_resid;
1141 uio->uio_resid = clip_size;
1142 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1143 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1144 }
1145 else if ((int)iov->iov_base & PAGE_MASK_64)
1146 {
1147 clip_size = iov->iov_len;
1148 prev_resid = uio->uio_resid;
1149 uio->uio_resid = clip_size;
1150 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1151 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1152 }
1153 else
1154 {
1155 /*
1156 * If we come in here, we know the offset into
1157 * the file is on a pagesize boundary
1158 */
1159
1160 max_io_size = newEOF - uio->uio_offset;
1161 clip_size = uio->uio_resid;
1162 if (iov->iov_len < clip_size)
1163 clip_size = iov->iov_len;
1164 if (max_io_size < clip_size)
1165 clip_size = max_io_size;
1166
1167 if (clip_size < PAGE_SIZE)
1168 {
1169 /*
1170 * Take care of tail end of write in this vector
1171 */
1172 prev_resid = uio->uio_resid;
1173 uio->uio_resid = clip_size;
1174 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1175 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1176 }
1177 else
1178 {
1179 /* round clip_size down to a multiple of pagesize */
1180 clip_size = clip_size & ~(PAGE_MASK);
1181 prev_resid = uio->uio_resid;
1182 uio->uio_resid = clip_size;
1183 retval = cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags);
1184 if ((retval == 0) && uio->uio_resid)
1185 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1186 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1187 }
1188 } /* end else */
1189 } /* end while */
1190 return(retval);
1191 }
1192
1193 static
1194 cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags)
1195 struct vnode *vp;
1196 struct uio *uio;
1197 off_t newEOF;
1198 int devblocksize;
1199 int flags;
1200 {
1201 upl_t upl;
1202 upl_page_info_t *pl;
1203 off_t upl_f_offset;
1204 vm_offset_t upl_offset;
1205 off_t max_io_size;
1206 int io_size;
1207 int upl_size;
1208 int upl_needed_size;
1209 int pages_in_pl;
1210 int upl_flags;
1211 kern_return_t kret;
1212 struct iovec *iov;
1213 int i;
1214 int force_data_sync;
1215 int error = 0;
1216
1217 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START,
1218 (int)uio->uio_offset, (int)uio->uio_resid,
1219 (int)newEOF, devblocksize, 0);
1220
1221 /*
1222 * When we enter this routine, we know
1223 * -- the offset into the file is on a pagesize boundary
1224 * -- the resid is a page multiple
1225 * -- the resid will not exceed iov_len
1226 */
1227
1228 iov = uio->uio_iov;
1229
1230 while (uio->uio_resid && uio->uio_offset < newEOF && error == 0) {
1231 io_size = uio->uio_resid;
1232
1233 if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1234 io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1235
1236 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1237 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
1238
1239 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START,
1240 (int)upl_offset, upl_needed_size, iov->iov_base, io_size, 0);
1241
1242 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
1243 {
1244 pages_in_pl = 0;
1245 upl_size = upl_needed_size;
1246 upl_flags = UPL_COPYOUT_FROM | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1247
1248 kret = vm_map_get_upl(current_map(),
1249 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1250 &upl_size,
1251 &upl,
1252 NULL,
1253 &pages_in_pl,
1254 &upl_flags,
1255 force_data_sync);
1256
1257 if (kret != KERN_SUCCESS)
1258 {
1259 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1260 0, 0, 0, kret, 0);
1261
1262 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1263 (int)uio->uio_offset, (int)uio->uio_resid, kret, 1, 0);
1264
1265 /* cluster_nocopy_write: failed to get pagelist */
1266 /* do not return kret here */
1267 return(0);
1268 }
1269
1270 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1271 pages_in_pl = upl_size / PAGE_SIZE;
1272
1273 for(i=0; i < pages_in_pl; i++)
1274 {
1275 if (!upl_valid_page(pl, i))
1276 break;
1277 }
1278
1279 if (i == pages_in_pl)
1280 break;
1281
1282 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1283 UPL_ABORT_FREE_ON_EMPTY);
1284 }
1285
1286 if (force_data_sync >= 3)
1287 {
1288 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1289 i, pages_in_pl, upl_size, kret, 0);
1290
1291 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1292 (int)uio->uio_offset, (int)uio->uio_resid, kret, 2, 0);
1293 return(0);
1294 }
1295
1296 /*
1297 * Consider the possibility that upl_size wasn't satisfied.
1298 */
1299 if (upl_size != upl_needed_size)
1300 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
1301
1302 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1303 (int)upl_offset, upl_size, iov->iov_base, io_size, 0);
1304
1305 if (io_size == 0)
1306 {
1307 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1308 UPL_ABORT_FREE_ON_EMPTY);
1309 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1310 (int)uio->uio_offset, uio->uio_resid, 0, 3, 0);
1311
1312 return(0);
1313 }
1314
1315 /*
1316 * Now look for pages already in the cache
1317 * and throw them away.
1318 */
1319
1320 upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
1321 max_io_size = io_size;
1322
1323 while (max_io_size) {
1324
1325 /*
1326 * Flag UPL_POP_DUMP says if the page is found
1327 * in the page cache it must be thrown away.
1328 */
1329 ubc_page_op(vp,
1330 upl_f_offset,
1331 UPL_POP_SET | UPL_POP_BUSY | UPL_POP_DUMP,
1332 0, 0);
1333 max_io_size -= PAGE_SIZE;
1334 upl_f_offset += PAGE_SIZE;
1335 }
1336
1337 /*
1338 * issue a synchronous write to cluster_io
1339 */
1340
1341 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_START,
1342 (int)upl_offset, (int)uio->uio_offset, io_size, 0, 0);
1343
1344 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1345 io_size, 0, (struct buf *)0);
1346
1347 if (error == 0) {
1348 /*
1349 * The cluster_io write completed successfully,
1350 * update the uio structure and commit.
1351 */
1352
1353 ubc_upl_commit_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1354 UPL_COMMIT_FREE_ON_EMPTY);
1355
1356 iov->iov_base += io_size;
1357 iov->iov_len -= io_size;
1358 uio->uio_resid -= io_size;
1359 uio->uio_offset += io_size;
1360 }
1361 else {
1362 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1363 UPL_ABORT_FREE_ON_EMPTY);
1364 }
1365
1366 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END,
1367 (int)upl_offset, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
1368
1369 } /* end while */
1370
1371
1372 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1373 (int)uio->uio_offset, (int)uio->uio_resid, error, 4, 0);
1374
1375 return (error);
1376 }
1377
1378 static
1379 cluster_phys_write(vp, uio)
1380 struct vnode *vp;
1381 struct uio *uio;
1382 {
1383 upl_t upl;
1384 vm_offset_t upl_offset;
1385 int io_size;
1386 int upl_size;
1387 int upl_needed_size;
1388 int pages_in_pl;
1389 int upl_flags;
1390 kern_return_t kret;
1391 struct iovec *iov;
1392 int error = 0;
1393
1394 /*
1395 * When we enter this routine, we know
1396 * -- the resid will not exceed iov_len
1397 * -- the vector target address is physcially contiguous
1398 */
1399
1400 iov = uio->uio_iov;
1401 io_size = iov->iov_len;
1402 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1403 upl_needed_size = upl_offset + io_size;
1404
1405 pages_in_pl = 0;
1406 upl_size = upl_needed_size;
1407 upl_flags = UPL_COPYOUT_FROM | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1408
1409 kret = vm_map_get_upl(current_map(),
1410 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1411 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0);
1412
1413 if (kret != KERN_SUCCESS)
1414 {
1415 /* cluster_phys_write: failed to get pagelist */
1416 /* note: return kret here */
1417 return(EINVAL);
1418 }
1419
1420 /*
1421 * Consider the possibility that upl_size wasn't satisfied.
1422 * This is a failure in the physical memory case.
1423 */
1424 if (upl_size < upl_needed_size)
1425 {
1426 kernel_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1427 return(EINVAL);
1428 }
1429
1430 /*
1431 * issue a synchronous write to cluster_io
1432 */
1433
1434 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1435 io_size, CL_DEV_MEMORY, (struct buf *)0);
1436
1437 if (error == 0) {
1438 /*
1439 * The cluster_io write completed successfully,
1440 * update the uio structure and commit.
1441 */
1442
1443 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_FREE_ON_EMPTY);
1444
1445 iov->iov_base += io_size;
1446 iov->iov_len -= io_size;
1447 uio->uio_resid -= io_size;
1448 uio->uio_offset += io_size;
1449 }
1450 else
1451 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1452
1453 return (error);
1454 }
1455
1456 static
1457 cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1458 struct vnode *vp;
1459 struct uio *uio;
1460 off_t oldEOF;
1461 off_t newEOF;
1462 off_t headOff;
1463 off_t tailOff;
1464 int devblocksize;
1465 int flags;
1466 {
1467 upl_page_info_t *pl;
1468 upl_t upl;
1469 vm_offset_t upl_offset;
1470 int upl_size;
1471 off_t upl_f_offset;
1472 int pages_in_upl;
1473 int start_offset;
1474 int xfer_resid;
1475 int io_size;
1476 int io_size_before_rounding;
1477 int io_flags;
1478 vm_offset_t io_address;
1479 int io_offset;
1480 int bytes_to_zero;
1481 int bytes_to_move;
1482 kern_return_t kret;
1483 int retval = 0;
1484 int uio_resid;
1485 long long total_size;
1486 long long zero_cnt;
1487 off_t zero_off;
1488 long long zero_cnt1;
1489 off_t zero_off1;
1490 daddr_t start_blkno;
1491 daddr_t last_blkno;
1492
1493 if (uio) {
1494 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1495 (int)uio->uio_offset, uio->uio_resid, (int)oldEOF, (int)newEOF, 0);
1496
1497 uio_resid = uio->uio_resid;
1498 } else {
1499 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1500 0, 0, (int)oldEOF, (int)newEOF, 0);
1501
1502 uio_resid = 0;
1503 }
1504 zero_cnt = 0;
1505 zero_cnt1 = 0;
1506
1507 if (flags & IO_HEADZEROFILL) {
1508 /*
1509 * some filesystems (HFS is one) don't support unallocated holes within a file...
1510 * so we zero fill the intervening space between the old EOF and the offset
1511 * where the next chunk of real data begins.... ftruncate will also use this
1512 * routine to zero fill to the new EOF when growing a file... in this case, the
1513 * uio structure will not be provided
1514 */
1515 if (uio) {
1516 if (headOff < uio->uio_offset) {
1517 zero_cnt = uio->uio_offset - headOff;
1518 zero_off = headOff;
1519 }
1520 } else if (headOff < newEOF) {
1521 zero_cnt = newEOF - headOff;
1522 zero_off = headOff;
1523 }
1524 }
1525 if (flags & IO_TAILZEROFILL) {
1526 if (uio) {
1527 zero_off1 = uio->uio_offset + uio->uio_resid;
1528
1529 if (zero_off1 < tailOff)
1530 zero_cnt1 = tailOff - zero_off1;
1531 }
1532 }
1533 if (zero_cnt == 0 && uio == (struct uio *) 0)
1534 {
1535 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1536 retval, 0, 0, 0, 0);
1537 return (0);
1538 }
1539
1540 while ((total_size = (uio_resid + zero_cnt + zero_cnt1)) && retval == 0) {
1541 /*
1542 * for this iteration of the loop, figure out where our starting point is
1543 */
1544 if (zero_cnt) {
1545 start_offset = (int)(zero_off & PAGE_MASK_64);
1546 upl_f_offset = zero_off - start_offset;
1547 } else if (uio_resid) {
1548 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
1549 upl_f_offset = uio->uio_offset - start_offset;
1550 } else {
1551 start_offset = (int)(zero_off1 & PAGE_MASK_64);
1552 upl_f_offset = zero_off1 - start_offset;
1553 }
1554 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 46)) | DBG_FUNC_NONE,
1555 (int)zero_off, (int)zero_cnt, (int)zero_off1, (int)zero_cnt1, 0);
1556
1557 if (total_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1558 total_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1559
1560 /*
1561 * compute the size of the upl needed to encompass
1562 * the requested write... limit each call to cluster_io
1563 * to the maximum UPL size... cluster_io will clip if
1564 * this exceeds the maximum io_size for the device,
1565 * make sure to account for
1566 * a starting offset that's not page aligned
1567 */
1568 upl_size = (start_offset + total_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1569
1570 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1571 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1572
1573 pages_in_upl = upl_size / PAGE_SIZE;
1574 io_size = upl_size - start_offset;
1575
1576 if ((long long)io_size > total_size)
1577 io_size = total_size;
1578
1579 start_blkno = (daddr_t)(upl_f_offset / PAGE_SIZE_64);
1580 last_blkno = start_blkno + pages_in_upl;
1581
1582 kret = ubc_create_upl(vp,
1583 upl_f_offset,
1584 upl_size,
1585 &upl,
1586 &pl,
1587 UPL_FLAGS_NONE);
1588 if (kret != KERN_SUCCESS)
1589 panic("cluster_write: failed to get pagelist");
1590
1591 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_NONE,
1592 upl, (int)upl_f_offset, upl_size, start_offset, 0);
1593
1594 if (start_offset && !upl_valid_page(pl, 0)) {
1595 int read_size;
1596
1597 /*
1598 * we're starting in the middle of the first page of the upl
1599 * and the page isn't currently valid, so we're going to have
1600 * to read it in first... this is a synchronous operation
1601 */
1602 read_size = PAGE_SIZE;
1603
1604 if ((upl_f_offset + read_size) > newEOF) {
1605 read_size = newEOF - upl_f_offset;
1606 read_size = (read_size + (devblocksize - 1)) & ~(devblocksize - 1);
1607 }
1608 retval = cluster_io(vp, upl, 0, upl_f_offset, read_size,
1609 CL_READ, (struct buf *)0);
1610 if (retval) {
1611 /*
1612 * we had an error during the read which causes us to abort
1613 * the current cluster_write request... before we do, we need
1614 * to release the rest of the pages in the upl without modifying
1615 * there state and mark the failed page in error
1616 */
1617 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
1618 ubc_upl_abort(upl, 0);
1619
1620 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1621 upl, 0, 0, retval, 0);
1622 break;
1623 }
1624 }
1625 if ((start_offset == 0 || upl_size > PAGE_SIZE) && ((start_offset + io_size) & PAGE_MASK)) {
1626 /*
1627 * the last offset we're writing to in this upl does not end on a page
1628 * boundary... if it's not beyond the old EOF, then we'll also need to
1629 * pre-read this page in if it isn't already valid
1630 */
1631 upl_offset = upl_size - PAGE_SIZE;
1632
1633 if ((upl_f_offset + start_offset + io_size) < oldEOF &&
1634 !upl_valid_page(pl, upl_offset / PAGE_SIZE)) {
1635 int read_size;
1636
1637 read_size = PAGE_SIZE;
1638
1639 if ((upl_f_offset + upl_offset + read_size) > newEOF) {
1640 read_size = newEOF - (upl_f_offset + upl_offset);
1641 read_size = (read_size + (devblocksize - 1)) & ~(devblocksize - 1);
1642 }
1643 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size,
1644 CL_READ, (struct buf *)0);
1645 if (retval) {
1646 /*
1647 * we had an error during the read which causes us to abort
1648 * the current cluster_write request... before we do, we
1649 * need to release the rest of the pages in the upl without
1650 * modifying there state and mark the failed page in error
1651 */
1652 ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE,
1653 UPL_ABORT_DUMP_PAGES);
1654 ubc_upl_abort(upl, 0);
1655
1656 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1657 upl, 0, 0, retval, 0);
1658 break;
1659 }
1660 }
1661 }
1662 if ((kret = ubc_upl_map(upl, &io_address)) != KERN_SUCCESS)
1663 panic("cluster_write: ubc_upl_map failed\n");
1664 xfer_resid = io_size;
1665 io_offset = start_offset;
1666
1667 while (zero_cnt && xfer_resid) {
1668
1669 if (zero_cnt < (long long)xfer_resid)
1670 bytes_to_zero = zero_cnt;
1671 else
1672 bytes_to_zero = xfer_resid;
1673
1674 if ( !(flags & IO_NOZEROVALID)) {
1675 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1676
1677 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1678 (int)upl_f_offset + io_offset, bytes_to_zero,
1679 (int)zero_cnt, xfer_resid, 0);
1680 } else {
1681 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off & PAGE_MASK_64));
1682
1683 if ( !upl_valid_page(pl, (int)(zero_off / PAGE_SIZE_64))) {
1684 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1685
1686 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1687 (int)upl_f_offset + io_offset, bytes_to_zero,
1688 (int)zero_cnt, xfer_resid, 0);
1689 }
1690 }
1691 xfer_resid -= bytes_to_zero;
1692 zero_cnt -= bytes_to_zero;
1693 zero_off += bytes_to_zero;
1694 io_offset += bytes_to_zero;
1695 }
1696 if (xfer_resid && uio_resid) {
1697 bytes_to_move = min(uio_resid, xfer_resid);
1698
1699 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 42)) | DBG_FUNC_NONE,
1700 (int)uio->uio_offset, bytes_to_move, uio_resid, xfer_resid, 0);
1701
1702 retval = uiomove((caddr_t)(io_address + io_offset), bytes_to_move, uio);
1703
1704 if (retval) {
1705 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
1706 panic("cluster_write: kernel_upl_unmap failed\n");
1707 ubc_upl_abort(upl, UPL_ABORT_DUMP_PAGES);
1708
1709 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1710 upl, 0, 0, retval, 0);
1711 } else {
1712 uio_resid -= bytes_to_move;
1713 xfer_resid -= bytes_to_move;
1714 io_offset += bytes_to_move;
1715 }
1716 }
1717 while (xfer_resid && zero_cnt1 && retval == 0) {
1718
1719 if (zero_cnt1 < (long long)xfer_resid)
1720 bytes_to_zero = zero_cnt1;
1721 else
1722 bytes_to_zero = xfer_resid;
1723
1724 if ( !(flags & IO_NOZEROVALID)) {
1725 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1726
1727 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1728 (int)upl_f_offset + io_offset,
1729 bytes_to_zero, (int)zero_cnt1, xfer_resid, 0);
1730 } else {
1731 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off1 & PAGE_MASK_64));
1732 if ( !upl_valid_page(pl, (int)(zero_off1 / PAGE_SIZE_64))) {
1733 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1734
1735 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1736 (int)upl_f_offset + io_offset,
1737 bytes_to_zero, (int)zero_cnt1, xfer_resid, 0);
1738 }
1739 }
1740 xfer_resid -= bytes_to_zero;
1741 zero_cnt1 -= bytes_to_zero;
1742 zero_off1 += bytes_to_zero;
1743 io_offset += bytes_to_zero;
1744 }
1745
1746 if (retval == 0) {
1747 int must_push;
1748 int can_delay;
1749
1750 io_size += start_offset;
1751
1752 if ((upl_f_offset + io_size) == newEOF && io_size < upl_size) {
1753 /*
1754 * if we're extending the file with this write
1755 * we'll zero fill the rest of the page so that
1756 * if the file gets extended again in such a way as to leave a
1757 * hole starting at this EOF, we'll have zero's in the correct spot
1758 */
1759 bzero((caddr_t)(io_address + io_size), upl_size - io_size);
1760
1761 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1762 (int)upl_f_offset + io_size,
1763 upl_size - io_size, 0, 0, 0);
1764 }
1765 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
1766 panic("cluster_write: kernel_upl_unmap failed\n");
1767
1768 io_size_before_rounding = io_size;
1769
1770 if (io_size & (devblocksize - 1))
1771 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
1772
1773 must_push = 0;
1774 can_delay = 0;
1775
1776 if (vp->v_clen) {
1777 int newsize;
1778
1779 /*
1780 * we have an existing cluster... see if this write will extend it nicely
1781 */
1782 if (start_blkno >= vp->v_cstart) {
1783 if (last_blkno <= (vp->v_cstart + vp->v_clen)) {
1784 /*
1785 * we have a write that fits entirely
1786 * within the existing cluster limits
1787 */
1788 if (last_blkno >= vp->v_lastw) {
1789 /*
1790 * if we're extending the dirty region within the cluster
1791 * we need to update the cluster info... we check for blkno
1792 * equality because we may be extending the file with a
1793 * partial write.... this in turn changes our idea of how
1794 * much data to write out (v_ciosiz) for the last page
1795 */
1796 vp->v_lastw = last_blkno;
1797 newsize = io_size + ((start_blkno - vp->v_cstart) * PAGE_SIZE);
1798
1799 if (newsize > vp->v_ciosiz)
1800 vp->v_ciosiz = newsize;
1801 }
1802 can_delay = 1;
1803 goto finish_io;
1804 }
1805 if (start_blkno < (vp->v_cstart + vp->v_clen)) {
1806 /*
1807 * we have a write that starts in the middle of the current cluster
1808 * but extends beyond the cluster's limit
1809 * we'll clip the current cluster if we actually
1810 * overlap with the new write and then push it out
1811 * and start a new cluster with the current write
1812 */
1813 if (vp->v_lastw > start_blkno) {
1814 vp->v_lastw = start_blkno;
1815 vp->v_ciosiz = (vp->v_lastw - vp->v_cstart) * PAGE_SIZE;
1816 }
1817 }
1818 /*
1819 * we also get here for the case where the current write starts
1820 * beyond the limit of the existing cluster
1821 */
1822 must_push = 1;
1823 goto check_delay;
1824 }
1825 /*
1826 * the current write starts in front of the current cluster
1827 */
1828 if (last_blkno > vp->v_cstart) {
1829 /*
1830 * the current write extends into the existing cluster
1831 */
1832 if ((vp->v_lastw - start_blkno) > vp->v_clen) {
1833 /*
1834 * if we were to combine this write with the current cluster
1835 * we would exceed the cluster size limit....
1836 * clip the current cluster by moving the start position
1837 * to where the current write ends, and then push it
1838 */
1839 vp->v_ciosiz -= (last_blkno - vp->v_cstart) * PAGE_SIZE;
1840 vp->v_cstart = last_blkno;
1841
1842 /*
1843 * round up the io_size to the nearest page size
1844 * since we've coalesced with at least 1 pre-existing
1845 * page in the current cluster... this write may have ended in the
1846 * middle of the page which would cause io_size to give us an
1847 * inaccurate view of how much I/O we actually need to do
1848 */
1849 io_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1850
1851 must_push = 1;
1852 goto check_delay;
1853 }
1854 /*
1855 * we can coalesce the current write with the existing cluster
1856 * adjust the cluster info to reflect this
1857 */
1858 if (last_blkno > vp->v_lastw) {
1859 /*
1860 * the current write completey overlaps
1861 * the existing cluster
1862 */
1863 vp->v_lastw = last_blkno;
1864 vp->v_ciosiz = io_size;
1865 } else {
1866 vp->v_ciosiz += (vp->v_cstart - start_blkno) * PAGE_SIZE;
1867
1868 if (io_size > vp->v_ciosiz)
1869 vp->v_ciosiz = io_size;
1870 }
1871 vp->v_cstart = start_blkno;
1872 can_delay = 1;
1873 goto finish_io;
1874 }
1875 /*
1876 * this I/O range is entirely in front of the current cluster
1877 * so we need to push the current cluster out before beginning
1878 * a new one
1879 */
1880 must_push = 1;
1881 }
1882 check_delay:
1883 if (must_push)
1884 cluster_push(vp);
1885
1886 if (io_size_before_rounding < (MAX_UPL_TRANSFER * PAGE_SIZE) && !(flags & IO_SYNC)) {
1887 vp->v_clen = MAX_UPL_TRANSFER;
1888 vp->v_cstart = start_blkno;
1889 vp->v_lastw = last_blkno;
1890 vp->v_ciosiz = io_size;
1891
1892 can_delay = 1;
1893 }
1894 finish_io:
1895 if (can_delay) {
1896 ubc_upl_commit_range(upl, 0, upl_size,
1897 UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
1898 continue;
1899 }
1900 if (flags & IO_SYNC)
1901 io_flags = CL_COMMIT | CL_AGE;
1902 else
1903 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
1904
1905 if (vp->v_flag & VNOCACHE_DATA)
1906 io_flags |= CL_DUMP;
1907
1908 while (vp->v_numoutput >= ASYNC_THROTTLE) {
1909 vp->v_flag |= VTHROTTLED;
1910 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_write", 0);
1911 }
1912 retval = cluster_io(vp, upl, 0, upl_f_offset, io_size,
1913 io_flags, (struct buf *)0);
1914 }
1915 }
1916 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1917 retval, 0, 0, 0, 0);
1918
1919 return (retval);
1920 }
1921
1922 cluster_read(vp, uio, filesize, devblocksize, flags)
1923 struct vnode *vp;
1924 struct uio *uio;
1925 off_t filesize;
1926 int devblocksize;
1927 int flags;
1928 {
1929 int prev_resid;
1930 int clip_size;
1931 off_t max_io_size;
1932 struct iovec *iov;
1933 vm_offset_t upl_offset;
1934 int upl_size;
1935 int pages_in_pl;
1936 upl_page_info_t *pl;
1937 int upl_flags;
1938 upl_t upl;
1939 int retval = 0;
1940
1941 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START,
1942 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
1943
1944 /*
1945 * We set a threshhold of 4 pages to decide if the nocopy
1946 * read loop is worth the trouble...
1947 */
1948
1949 if (!((vp->v_flag & VNOCACHE_DATA) && (uio->uio_segflg == UIO_USERSPACE)))
1950 {
1951 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
1952 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
1953 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
1954 return(retval);
1955 }
1956
1957 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0)
1958 {
1959 /* we know we have a resid, so this is safe */
1960 iov = uio->uio_iov;
1961 while (iov->iov_len == 0) {
1962 uio->uio_iov++;
1963 uio->uio_iovcnt--;
1964 iov = uio->uio_iov;
1965 }
1966
1967 /*
1968 * We check every vector target and if it is physically
1969 * contiguous space, we skip the sanity checks.
1970 */
1971
1972 upl_offset = (vm_offset_t)iov->iov_base & ~PAGE_MASK;
1973 upl_size = (upl_offset + PAGE_SIZE +(PAGE_SIZE -1)) & ~PAGE_MASK;
1974 pages_in_pl = 0;
1975 upl_flags = UPL_QUERY_OBJECT_TYPE;
1976 if((vm_map_get_upl(current_map(),
1977 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1978 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0)) != KERN_SUCCESS)
1979 {
1980 /*
1981 * the user app must have passed in an invalid address
1982 */
1983 return (EFAULT);
1984 }
1985
1986 if (upl_flags & UPL_PHYS_CONTIG)
1987 {
1988 retval = cluster_phys_read(vp, uio, filesize);
1989 }
1990 else if (uio->uio_resid < 4 * PAGE_SIZE)
1991 {
1992 /*
1993 * We set a threshhold of 4 pages to decide if the nocopy
1994 * read loop is worth the trouble...
1995 */
1996 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
1997 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
1998 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
1999 return(retval);
2000 }
2001 else if (uio->uio_offset & PAGE_MASK_64)
2002 {
2003 /* Bring the file offset read up to a pagesize boundary */
2004 clip_size = (PAGE_SIZE - (int)(uio->uio_offset & PAGE_MASK_64));
2005 if (uio->uio_resid < clip_size)
2006 clip_size = uio->uio_resid;
2007 /*
2008 * Fake the resid going into the cluster_read_x call
2009 * and restore it on the way out.
2010 */
2011 prev_resid = uio->uio_resid;
2012 uio->uio_resid = clip_size;
2013 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2014 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2015 }
2016 else if ((int)iov->iov_base & PAGE_MASK_64)
2017 {
2018 clip_size = iov->iov_len;
2019 prev_resid = uio->uio_resid;
2020 uio->uio_resid = clip_size;
2021 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2022 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2023 }
2024 else
2025 {
2026 /*
2027 * If we come in here, we know the offset into
2028 * the file is on a pagesize boundary
2029 */
2030
2031 max_io_size = filesize - uio->uio_offset;
2032 clip_size = uio->uio_resid;
2033 if (iov->iov_len < clip_size)
2034 clip_size = iov->iov_len;
2035 if (max_io_size < clip_size)
2036 clip_size = (int)max_io_size;
2037
2038 if (clip_size < PAGE_SIZE)
2039 {
2040 /*
2041 * Take care of the tail end of the read in this vector.
2042 */
2043 prev_resid = uio->uio_resid;
2044 uio->uio_resid = clip_size;
2045 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2046 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2047 }
2048 else
2049 {
2050 /* round clip_size down to a multiple of pagesize */
2051 clip_size = clip_size & ~(PAGE_MASK);
2052 prev_resid = uio->uio_resid;
2053 uio->uio_resid = clip_size;
2054 retval = cluster_nocopy_read(vp, uio, filesize, devblocksize, flags);
2055 if ((retval==0) && uio->uio_resid)
2056 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2057 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2058 }
2059 } /* end else */
2060 } /* end while */
2061
2062 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
2063 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
2064
2065 return(retval);
2066 }
2067
2068 static
2069 cluster_read_x(vp, uio, filesize, devblocksize, flags)
2070 struct vnode *vp;
2071 struct uio *uio;
2072 off_t filesize;
2073 int devblocksize;
2074 int flags;
2075 {
2076 upl_page_info_t *pl;
2077 upl_t upl;
2078 vm_offset_t upl_offset;
2079 int upl_size;
2080 off_t upl_f_offset;
2081 int start_offset;
2082 int start_pg;
2083 int last_pg;
2084 int uio_last;
2085 int pages_in_upl;
2086 off_t max_size;
2087 int io_size;
2088 vm_offset_t io_address;
2089 kern_return_t kret;
2090 int segflg;
2091 int error = 0;
2092 int retval = 0;
2093 int b_lblkno;
2094 int e_lblkno;
2095
2096 b_lblkno = (int)(uio->uio_offset / PAGE_SIZE_64);
2097
2098 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2099 /*
2100 * compute the size of the upl needed to encompass
2101 * the requested read... limit each call to cluster_io
2102 * to the maximum UPL size... cluster_io will clip if
2103 * this exceeds the maximum io_size for the device,
2104 * make sure to account for
2105 * a starting offset that's not page aligned
2106 */
2107 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
2108 upl_f_offset = uio->uio_offset - (off_t)start_offset;
2109 max_size = filesize - uio->uio_offset;
2110
2111 if ((off_t)((unsigned int)uio->uio_resid) < max_size)
2112 io_size = uio->uio_resid;
2113 else
2114 io_size = max_size;
2115 #ifdef ppc
2116 if (uio->uio_segflg == UIO_USERSPACE && !(vp->v_flag & VNOCACHE_DATA)) {
2117 segflg = uio->uio_segflg;
2118
2119 uio->uio_segflg = UIO_PHYS_USERSPACE;
2120
2121 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2122 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2123
2124 while (io_size && retval == 0) {
2125 int xsize;
2126 vm_offset_t paddr;
2127
2128 if (ubc_page_op(vp,
2129 upl_f_offset,
2130 UPL_POP_SET | UPL_POP_BUSY,
2131 &paddr, 0) != KERN_SUCCESS)
2132 break;
2133
2134 xsize = PAGE_SIZE - start_offset;
2135
2136 if (xsize > io_size)
2137 xsize = io_size;
2138
2139 retval = uiomove((caddr_t)(paddr + start_offset), xsize, uio);
2140
2141 ubc_page_op(vp, upl_f_offset,
2142 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2143
2144 io_size -= xsize;
2145 start_offset = (int)
2146 (uio->uio_offset & PAGE_MASK_64);
2147 upl_f_offset = uio->uio_offset - start_offset;
2148 }
2149 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2150 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2151
2152 uio->uio_segflg = segflg;
2153
2154 if (retval)
2155 break;
2156
2157 if (io_size == 0) {
2158 /*
2159 * we're already finished with this read request
2160 * let's see if we should do a read-ahead
2161 */
2162 e_lblkno = (int)
2163 ((uio->uio_offset - 1) / PAGE_SIZE_64);
2164
2165 if (!(vp->v_flag & VRAOFF))
2166 /*
2167 * let's try to read ahead if we're in
2168 * a sequential access pattern
2169 */
2170 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
2171 vp->v_lastr = e_lblkno;
2172
2173 break;
2174 }
2175 max_size = filesize - uio->uio_offset;
2176 }
2177 #endif
2178 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2179 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2180 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2181 pages_in_upl = upl_size / PAGE_SIZE;
2182
2183 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_START,
2184 upl, (int)upl_f_offset, upl_size, start_offset, 0);
2185
2186 kret = ubc_create_upl(vp,
2187 upl_f_offset,
2188 upl_size,
2189 &upl,
2190 &pl,
2191 UPL_FLAGS_NONE);
2192 if (kret != KERN_SUCCESS)
2193 panic("cluster_read: failed to get pagelist");
2194
2195 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_END,
2196 upl, (int)upl_f_offset, upl_size, start_offset, 0);
2197
2198 /*
2199 * scan from the beginning of the upl looking for the first
2200 * non-valid page.... this will become the first page in
2201 * the request we're going to make to 'cluster_io'... if all
2202 * of the pages are valid, we won't call through to 'cluster_io'
2203 */
2204 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
2205 if (!upl_valid_page(pl, start_pg))
2206 break;
2207 }
2208
2209 /*
2210 * scan from the starting invalid page looking for a valid
2211 * page before the end of the upl is reached, if we
2212 * find one, then it will be the last page of the request to
2213 * 'cluster_io'
2214 */
2215 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2216 if (upl_valid_page(pl, last_pg))
2217 break;
2218 }
2219
2220 if (start_pg < last_pg) {
2221 /*
2222 * we found a range of 'invalid' pages that must be filled
2223 * if the last page in this range is the last page of the file
2224 * we may have to clip the size of it to keep from reading past
2225 * the end of the last physical block associated with the file
2226 */
2227 upl_offset = start_pg * PAGE_SIZE;
2228 io_size = (last_pg - start_pg) * PAGE_SIZE;
2229
2230 if ((upl_f_offset + upl_offset + io_size) > filesize) {
2231 io_size = filesize - (upl_f_offset + upl_offset);
2232 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
2233 }
2234 /*
2235 * issue a synchronous read to cluster_io
2236 */
2237
2238 error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset,
2239 io_size, CL_READ, (struct buf *)0);
2240 }
2241 if (error == 0) {
2242 /*
2243 * if the read completed successfully, or there was no I/O request
2244 * issued, than map the upl into kernel address space and
2245 * move the data into user land.... we'll first add on any 'valid'
2246 * pages that were present in the upl when we acquired it.
2247 */
2248 u_int val_size;
2249 u_int size_of_prefetch;
2250
2251 for (uio_last = last_pg; uio_last < pages_in_upl; uio_last++) {
2252 if (!upl_valid_page(pl, uio_last))
2253 break;
2254 }
2255 /*
2256 * compute size to transfer this round, if uio->uio_resid is
2257 * still non-zero after this uiomove, we'll loop around and
2258 * set up for another I/O.
2259 */
2260 val_size = (uio_last * PAGE_SIZE) - start_offset;
2261
2262 if (max_size < val_size)
2263 val_size = max_size;
2264
2265 if (uio->uio_resid < val_size)
2266 val_size = uio->uio_resid;
2267
2268 e_lblkno = (int)((uio->uio_offset + ((off_t)val_size - 1)) / PAGE_SIZE_64);
2269
2270 if (size_of_prefetch = (uio->uio_resid - val_size)) {
2271 /*
2272 * if there's still I/O left to do for this request, then issue a
2273 * pre-fetch I/O... the I/O wait time will overlap
2274 * with the copying of the data
2275 */
2276 cluster_rd_prefetch(vp, uio->uio_offset + val_size, size_of_prefetch, filesize, devblocksize);
2277 } else {
2278 if (!(vp->v_flag & VRAOFF) && !(vp->v_flag & VNOCACHE_DATA))
2279 /*
2280 * let's try to read ahead if we're in
2281 * a sequential access pattern
2282 */
2283 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
2284 vp->v_lastr = e_lblkno;
2285 }
2286 #ifdef ppc
2287 if (uio->uio_segflg == UIO_USERSPACE) {
2288 int offset;
2289
2290 segflg = uio->uio_segflg;
2291
2292 uio->uio_segflg = UIO_PHYS_USERSPACE;
2293
2294
2295 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2296 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2297
2298 offset = start_offset;
2299
2300 while (val_size && retval == 0) {
2301 int csize;
2302 int i;
2303 caddr_t paddr;
2304
2305 i = offset / PAGE_SIZE;
2306 csize = min(PAGE_SIZE - start_offset, val_size);
2307
2308 paddr = (caddr_t)upl_phys_page(pl, i) + start_offset;
2309
2310 retval = uiomove(paddr, csize, uio);
2311
2312 val_size -= csize;
2313 offset += csize;
2314 start_offset = offset & PAGE_MASK;
2315 }
2316 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2317 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2318
2319 uio->uio_segflg = segflg;
2320 } else
2321 #endif
2322 {
2323 if ((kret = ubc_upl_map(upl, &io_address)) != KERN_SUCCESS)
2324 panic("cluster_read: ubc_upl_map() failed\n");
2325
2326 retval = uiomove((caddr_t)(io_address + start_offset), val_size, uio);
2327
2328 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
2329 panic("cluster_read: ubc_upl_unmap() failed\n");
2330 }
2331 }
2332 if (start_pg < last_pg) {
2333 /*
2334 * compute the range of pages that we actually issued an I/O for
2335 * and either commit them as valid if the I/O succeeded
2336 * or abort them if the I/O failed
2337 */
2338 io_size = (last_pg - start_pg) * PAGE_SIZE;
2339
2340 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2341 upl, start_pg * PAGE_SIZE, io_size, error, 0);
2342
2343 if (error || (vp->v_flag & VNOCACHE_DATA))
2344 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size,
2345 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2346 else
2347 ubc_upl_commit_range(upl, start_pg * PAGE_SIZE, io_size,
2348 UPL_COMMIT_CLEAR_DIRTY
2349 | UPL_COMMIT_FREE_ON_EMPTY
2350 | UPL_COMMIT_INACTIVATE);
2351
2352 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2353 upl, start_pg * PAGE_SIZE, io_size, error, 0);
2354 }
2355 if ((last_pg - start_pg) < pages_in_upl) {
2356 int cur_pg;
2357 int commit_flags;
2358
2359 /*
2360 * the set of pages that we issued an I/O for did not encompass
2361 * the entire upl... so just release these without modifying
2362 * there state
2363 */
2364 if (error)
2365 ubc_upl_abort(upl, 0);
2366 else {
2367 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2368 upl, -1, pages_in_upl - (last_pg - start_pg), 0, 0);
2369
2370 if (start_pg) {
2371 /*
2372 * we found some already valid pages at the beginning of
2373 * the upl commit these back to the inactive list with
2374 * reference cleared
2375 */
2376 for (cur_pg = 0; cur_pg < start_pg; cur_pg++) {
2377 commit_flags = UPL_COMMIT_FREE_ON_EMPTY
2378 | UPL_COMMIT_INACTIVATE;
2379
2380 if (upl_dirty_page(pl, cur_pg))
2381 commit_flags |= UPL_COMMIT_SET_DIRTY;
2382
2383 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2384 ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2385 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2386 else
2387 ubc_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2388 PAGE_SIZE, commit_flags);
2389 }
2390 }
2391 if (last_pg < uio_last) {
2392 /*
2393 * we found some already valid pages immediately after the
2394 * pages we issued I/O for, commit these back to the
2395 * inactive list with reference cleared
2396 */
2397 for (cur_pg = last_pg; cur_pg < uio_last; cur_pg++) {
2398 commit_flags = UPL_COMMIT_FREE_ON_EMPTY
2399 | UPL_COMMIT_INACTIVATE;
2400
2401 if (upl_dirty_page(pl, cur_pg))
2402 commit_flags |= UPL_COMMIT_SET_DIRTY;
2403
2404 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2405 ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2406 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2407 else
2408 ubc_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2409 PAGE_SIZE, commit_flags);
2410 }
2411 }
2412 if (uio_last < pages_in_upl) {
2413 /*
2414 * there were some invalid pages beyond the valid pages
2415 * that we didn't issue an I/O for, just release them
2416 * unchanged
2417 */
2418 ubc_upl_abort(upl, 0);
2419 }
2420
2421 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2422 upl, -1, -1, 0, 0);
2423 }
2424 }
2425 if (retval == 0)
2426 retval = error;
2427 }
2428
2429 return (retval);
2430 }
2431
2432 static
2433 cluster_nocopy_read(vp, uio, filesize, devblocksize, flags)
2434 struct vnode *vp;
2435 struct uio *uio;
2436 off_t filesize;
2437 int devblocksize;
2438 int flags;
2439 {
2440 upl_t upl;
2441 upl_page_info_t *pl;
2442 off_t upl_f_offset;
2443 vm_offset_t upl_offset;
2444 off_t start_upl_f_offset;
2445 off_t max_io_size;
2446 int io_size;
2447 int upl_size;
2448 int upl_needed_size;
2449 int pages_in_pl;
2450 vm_offset_t paddr;
2451 int upl_flags;
2452 kern_return_t kret;
2453 int segflg;
2454 struct iovec *iov;
2455 int i;
2456 int force_data_sync;
2457 int error = 0;
2458 int retval = 0;
2459
2460 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START,
2461 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
2462
2463 /*
2464 * When we enter this routine, we know
2465 * -- the offset into the file is on a pagesize boundary
2466 * -- the resid is a page multiple
2467 * -- the resid will not exceed iov_len
2468 */
2469
2470 iov = uio->uio_iov;
2471 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2472
2473 max_io_size = filesize - uio->uio_offset;
2474
2475 if (max_io_size < (off_t)((unsigned int)uio->uio_resid))
2476 io_size = max_io_size;
2477 else
2478 io_size = uio->uio_resid;
2479
2480 /*
2481 * We don't come into this routine unless
2482 * UIO_USERSPACE is set.
2483 */
2484 segflg = uio->uio_segflg;
2485
2486 uio->uio_segflg = UIO_PHYS_USERSPACE;
2487
2488 /*
2489 * First look for pages already in the cache
2490 * and move them to user space.
2491 */
2492 while (io_size && (retval == 0)) {
2493 upl_f_offset = uio->uio_offset;
2494
2495 /*
2496 * If this call fails, it means the page is not
2497 * in the page cache.
2498 */
2499 if (ubc_page_op(vp, upl_f_offset,
2500 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) != KERN_SUCCESS)
2501 break;
2502
2503 retval = uiomove((caddr_t)(paddr), PAGE_SIZE, uio);
2504
2505 ubc_page_op(vp, upl_f_offset,
2506 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2507
2508 io_size -= PAGE_SIZE;
2509 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 71)) | DBG_FUNC_NONE,
2510 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2511 }
2512
2513 uio->uio_segflg = segflg;
2514
2515 if (retval)
2516 {
2517 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2518 (int)uio->uio_offset, uio->uio_resid, 2, retval, 0);
2519 return(retval);
2520 }
2521
2522 /* If we are already finished with this read, then return */
2523 if (io_size == 0)
2524 {
2525
2526 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2527 (int)uio->uio_offset, uio->uio_resid, 3, io_size, 0);
2528 return(0);
2529 }
2530
2531 max_io_size = io_size;
2532 if (max_io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2533 max_io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2534
2535 start_upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
2536 upl_f_offset = start_upl_f_offset;
2537 io_size = 0;
2538
2539 while(io_size < max_io_size)
2540 {
2541
2542 if(ubc_page_op(vp, upl_f_offset,
2543 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) == KERN_SUCCESS)
2544 {
2545 ubc_page_op(vp, upl_f_offset,
2546 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2547 break;
2548 }
2549
2550 /*
2551 * Build up the io request parameters.
2552 */
2553
2554 io_size += PAGE_SIZE;
2555 upl_f_offset += PAGE_SIZE;
2556 }
2557
2558 if (io_size == 0)
2559 return(retval);
2560
2561 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2562 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
2563
2564 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START,
2565 (int)upl_offset, upl_needed_size, iov->iov_base, io_size, 0);
2566
2567 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
2568 {
2569 pages_in_pl = 0;
2570 upl_size = upl_needed_size;
2571 upl_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2572
2573 kret = vm_map_get_upl(current_map(),
2574 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2575 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, force_data_sync);
2576
2577 if (kret != KERN_SUCCESS)
2578 {
2579 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2580 (int)upl_offset, upl_size, io_size, kret, 0);
2581
2582 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2583 (int)uio->uio_offset, uio->uio_resid, 4, retval, 0);
2584
2585 /* cluster_nocopy_read: failed to get pagelist */
2586 /* do not return kret here */
2587 return(retval);
2588 }
2589
2590 pages_in_pl = upl_size / PAGE_SIZE;
2591 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2592
2593 for(i=0; i < pages_in_pl; i++)
2594 {
2595 if (!upl_valid_page(pl, i))
2596 break;
2597 }
2598 if (i == pages_in_pl)
2599 break;
2600
2601 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2602 UPL_ABORT_FREE_ON_EMPTY);
2603 }
2604
2605 if (force_data_sync >= 3)
2606 {
2607 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2608 (int)upl_offset, upl_size, io_size, kret, 0);
2609
2610 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2611 (int)uio->uio_offset, uio->uio_resid, 5, retval, 0);
2612 return(retval);
2613 }
2614 /*
2615 * Consider the possibility that upl_size wasn't satisfied.
2616 */
2617 if (upl_size != upl_needed_size)
2618 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
2619
2620 if (io_size == 0)
2621 {
2622 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2623 UPL_ABORT_FREE_ON_EMPTY);
2624 return(retval);
2625 }
2626
2627 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2628 (int)upl_offset, upl_size, io_size, kret, 0);
2629
2630 /*
2631 * issue a synchronous read to cluster_io
2632 */
2633
2634 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START,
2635 upl, (int)upl_offset, (int)start_upl_f_offset, io_size, 0);
2636
2637 error = cluster_io(vp, upl, upl_offset, start_upl_f_offset,
2638 io_size, CL_READ| CL_NOZERO, (struct buf *)0);
2639
2640 if (error == 0) {
2641 /*
2642 * The cluster_io read completed successfully,
2643 * update the uio structure and commit.
2644 */
2645
2646 ubc_upl_commit_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2647 UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
2648
2649 iov->iov_base += io_size;
2650 iov->iov_len -= io_size;
2651 uio->uio_resid -= io_size;
2652 uio->uio_offset += io_size;
2653 }
2654 else {
2655 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2656 UPL_ABORT_FREE_ON_EMPTY);
2657 }
2658
2659 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END,
2660 upl, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
2661
2662 if (retval == 0)
2663 retval = error;
2664
2665 } /* end while */
2666
2667
2668 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2669 (int)uio->uio_offset, (int)uio->uio_resid, 6, retval, 0);
2670
2671 return (retval);
2672 }
2673
2674
2675 static
2676 cluster_phys_read(vp, uio, filesize)
2677 struct vnode *vp;
2678 struct uio *uio;
2679 off_t filesize;
2680 {
2681 upl_t upl;
2682 vm_offset_t upl_offset;
2683 off_t max_size;
2684 int io_size;
2685 int upl_size;
2686 int upl_needed_size;
2687 int pages_in_pl;
2688 int upl_flags;
2689 kern_return_t kret;
2690 struct iovec *iov;
2691 int error;
2692
2693 /*
2694 * When we enter this routine, we know
2695 * -- the resid will not exceed iov_len
2696 * -- the target address is physically contiguous
2697 */
2698
2699 iov = uio->uio_iov;
2700
2701 max_size = filesize - uio->uio_offset;
2702
2703 if (max_size < (off_t)((unsigned int)iov->iov_len))
2704 io_size = max_size;
2705 else
2706 io_size = iov->iov_len;
2707
2708 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2709 upl_needed_size = upl_offset + io_size;
2710
2711 pages_in_pl = 0;
2712 upl_size = upl_needed_size;
2713 upl_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2714
2715 kret = vm_map_get_upl(current_map(),
2716 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2717 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0);
2718
2719 if (kret != KERN_SUCCESS)
2720 {
2721 /* cluster_phys_read: failed to get pagelist */
2722 return(EINVAL);
2723 }
2724
2725 /*
2726 * Consider the possibility that upl_size wasn't satisfied.
2727 */
2728 if (upl_size < upl_needed_size)
2729 {
2730 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2731 return(EINVAL);
2732 }
2733
2734 /*
2735 * issue a synchronous read to cluster_io
2736 */
2737
2738 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
2739 io_size, CL_READ| CL_NOZERO | CL_DEV_MEMORY, (struct buf *)0);
2740
2741 if (error == 0)
2742 {
2743 /*
2744 * The cluster_io read completed successfully,
2745 * update the uio structure and commit.
2746 */
2747
2748 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_FREE_ON_EMPTY);
2749
2750 iov->iov_base += io_size;
2751 iov->iov_len -= io_size;
2752 uio->uio_resid -= io_size;
2753 uio->uio_offset += io_size;
2754 }
2755 else
2756 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2757
2758 return (error);
2759 }
2760
2761 /*
2762 * generate advisory I/O's in the largest chunks possible
2763 * the completed pages will be released into the VM cache
2764 */
2765 advisory_read(vp, filesize, f_offset, resid, devblocksize)
2766 struct vnode *vp;
2767 off_t filesize;
2768 off_t f_offset;
2769 int resid;
2770 int devblocksize;
2771 {
2772 upl_page_info_t *pl;
2773 upl_t upl;
2774 vm_offset_t upl_offset;
2775 int upl_size;
2776 off_t upl_f_offset;
2777 int start_offset;
2778 int start_pg;
2779 int last_pg;
2780 int pages_in_upl;
2781 off_t max_size;
2782 int io_size;
2783 kern_return_t kret;
2784 int retval = 0;
2785
2786
2787 if (!UBCINFOEXISTS(vp))
2788 return(EINVAL);
2789
2790 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START,
2791 (int)f_offset, resid, (int)filesize, devblocksize, 0);
2792
2793 while (resid && f_offset < filesize && retval == 0) {
2794 /*
2795 * compute the size of the upl needed to encompass
2796 * the requested read... limit each call to cluster_io
2797 * to the maximum UPL size... cluster_io will clip if
2798 * this exceeds the maximum io_size for the device,
2799 * make sure to account for
2800 * a starting offset that's not page aligned
2801 */
2802 start_offset = (int)(f_offset & PAGE_MASK_64);
2803 upl_f_offset = f_offset - (off_t)start_offset;
2804 max_size = filesize - f_offset;
2805
2806 if (resid < max_size)
2807 io_size = resid;
2808 else
2809 io_size = max_size;
2810
2811 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2812 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2813 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2814 pages_in_upl = upl_size / PAGE_SIZE;
2815
2816 kret = ubc_create_upl(vp,
2817 upl_f_offset,
2818 upl_size,
2819 &upl,
2820 &pl,
2821 UPL_FLAGS_NONE);
2822 if (kret != KERN_SUCCESS)
2823 panic("advisory_read: failed to get pagelist");
2824
2825
2826 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 61)) | DBG_FUNC_NONE,
2827 upl, (int)upl_f_offset, upl_size, start_offset, 0);
2828
2829 /*
2830 * scan from the beginning of the upl looking for the first
2831 * non-valid page.... this will become the first page in
2832 * the request we're going to make to 'cluster_io'... if all
2833 * of the pages are valid, we won't call through to 'cluster_io'
2834 */
2835 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
2836 if (!upl_valid_page(pl, start_pg))
2837 break;
2838 }
2839
2840 /*
2841 * scan from the starting invalid page looking for a valid
2842 * page before the end of the upl is reached, if we
2843 * find one, then it will be the last page of the request to
2844 * 'cluster_io'
2845 */
2846 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2847 if (upl_valid_page(pl, last_pg))
2848 break;
2849 }
2850
2851 if (start_pg < last_pg) {
2852 /*
2853 * we found a range of 'invalid' pages that must be filled
2854 * if the last page in this range is the last page of the file
2855 * we may have to clip the size of it to keep from reading past
2856 * the end of the last physical block associated with the file
2857 */
2858 upl_offset = start_pg * PAGE_SIZE;
2859 io_size = (last_pg - start_pg) * PAGE_SIZE;
2860
2861 if ((upl_f_offset + upl_offset + io_size) > filesize) {
2862 io_size = filesize - (upl_f_offset + upl_offset);
2863 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
2864 }
2865 /*
2866 * issue an asynchronous read to cluster_io
2867 */
2868 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size,
2869 CL_ASYNC | CL_READ | CL_COMMIT | CL_AGE, (struct buf *)0);
2870 }
2871 if (start_pg) {
2872 /*
2873 * start_pg of non-zero indicates we found some already valid pages
2874 * at the beginning of the upl.... we need to release these without
2875 * modifying there state
2876 */
2877 ubc_upl_abort_range(upl, 0, start_pg * PAGE_SIZE,
2878 UPL_ABORT_FREE_ON_EMPTY);
2879
2880 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 62)) | DBG_FUNC_NONE,
2881 upl, 0, start_pg * PAGE_SIZE, 0, 0);
2882 }
2883 if (last_pg < pages_in_upl) {
2884 /*
2885 * the set of pages that we issued an I/O for did not extend all the
2886 * way to the end of the upl..so just release them without modifying
2887 * there state
2888 */
2889 ubc_upl_abort_range(upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE,
2890 UPL_ABORT_FREE_ON_EMPTY);
2891
2892 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 63)) | DBG_FUNC_NONE,
2893 upl, last_pg * PAGE_SIZE,
2894 (pages_in_upl - last_pg) * PAGE_SIZE, 0, 0);
2895 }
2896 io_size = (last_pg * PAGE_SIZE) - start_offset;
2897
2898 if (io_size > resid)
2899 io_size = resid;
2900 f_offset += io_size;
2901 resid -= io_size;
2902 }
2903 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_END,
2904 (int)f_offset, resid, retval, 0, 0);
2905
2906 return(retval);
2907 }
2908
2909
2910 cluster_push(vp)
2911 struct vnode *vp;
2912 {
2913 upl_page_info_t *pl;
2914 upl_t upl;
2915 vm_offset_t upl_offset;
2916 int upl_size;
2917 off_t upl_f_offset;
2918 int pages_in_upl;
2919 int start_pg;
2920 int last_pg;
2921 int io_size;
2922 int io_flags;
2923 int size;
2924 kern_return_t kret;
2925
2926
2927 if (!UBCINFOEXISTS(vp))
2928 return(0);
2929
2930 if (vp->v_clen == 0 || (pages_in_upl = vp->v_lastw - vp->v_cstart) == 0)
2931 return (0);
2932 upl_size = pages_in_upl * PAGE_SIZE;
2933 upl_f_offset = ((off_t)vp->v_cstart) * PAGE_SIZE_64;
2934 size = vp->v_ciosiz;
2935 vp->v_clen = 0;
2936
2937 if (size > upl_size || (upl_size - size) > PAGE_SIZE)
2938 panic("cluster_push: v_ciosiz doesn't match size of cluster\n");
2939
2940 kret = ubc_create_upl(vp,
2941 upl_f_offset,
2942 upl_size,
2943 &upl,
2944 &pl,
2945 UPL_FLAGS_NONE);
2946 if (kret != KERN_SUCCESS)
2947 panic("cluster_push: failed to get pagelist");
2948
2949 last_pg = 0;
2950
2951 while (size) {
2952
2953 for (start_pg = last_pg; start_pg < pages_in_upl; start_pg++) {
2954 if (upl_valid_page(pl, start_pg) && upl_dirty_page(pl, start_pg))
2955 break;
2956 }
2957 if (start_pg > last_pg) {
2958 io_size = (start_pg - last_pg) * PAGE_SIZE;
2959
2960 ubc_upl_abort_range(upl, last_pg * PAGE_SIZE, io_size,
2961 UPL_ABORT_FREE_ON_EMPTY);
2962
2963 if (io_size < size)
2964 size -= io_size;
2965 else
2966 break;
2967 }
2968 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2969 if (!upl_valid_page(pl, last_pg) || !upl_dirty_page(pl, last_pg))
2970 break;
2971 }
2972 upl_offset = start_pg * PAGE_SIZE;
2973
2974 io_size = min(size, (last_pg - start_pg) * PAGE_SIZE);
2975
2976 if (vp->v_flag & VNOCACHE_DATA)
2977 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC | CL_DUMP;
2978 else
2979 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
2980
2981 while (vp->v_numoutput >= ASYNC_THROTTLE) {
2982 vp->v_flag |= VTHROTTLED;
2983 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_push", 0);
2984 }
2985 cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, io_flags, (struct buf *)0);
2986
2987 size -= io_size;
2988 }
2989 return(1);
2990 }