]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
07f0dbbd57c4bd708e8c25f7af49055d0f7ab200
[apple/xnu.git] / bsd / vfs / vfs_cluster.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
56 */
57
58 #include <sys/param.h>
59 #include <sys/proc.h>
60 #include <sys/buf.h>
61 #include <sys/vnode.h>
62 #include <sys/mount.h>
63 #include <sys/trace.h>
64 #include <sys/malloc.h>
65 #include <sys/resourcevar.h>
66 #include <libkern/libkern.h>
67
68 #include <sys/ubc.h>
69 #include <vm/vm_pageout.h>
70
71 #include <sys/kdebug.h>
72
73 #define CL_READ 0x01
74 #define CL_ASYNC 0x02
75 #define CL_COMMIT 0x04
76 #define CL_NOMAP 0x08
77 #define CL_PAGEOUT 0x10
78 #define CL_AGE 0x20
79 #define CL_DUMP 0x40
80 #define CL_NOZERO 0x80
81 #define CL_PAGEIN 0x100
82 #define CL_DEV_MEMORY 0x200
83
84 /*
85 * throttle the number of async writes that
86 * can be outstanding on a single vnode
87 * before we issue a synchronous write
88 */
89 #define ASYNC_THROTTLE 9
90
91 static int
92 cluster_iodone(bp)
93 struct buf *bp;
94 {
95 int b_flags;
96 int error;
97 int total_size;
98 int total_resid;
99 int upl_offset;
100 upl_t upl;
101 struct buf *cbp;
102 struct buf *cbp_head;
103 struct buf *cbp_next;
104 struct buf *real_bp;
105 struct vnode *vp;
106 int commit_size;
107 int pg_offset;
108
109
110 cbp_head = (struct buf *)(bp->b_trans_head);
111
112 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START,
113 cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
114
115 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
116 /*
117 * all I/O requests that are part of this transaction
118 * have to complete before we can process it
119 */
120 if ( !(cbp->b_flags & B_DONE)) {
121
122 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
123 cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0);
124
125 return 0;
126 }
127 }
128 error = 0;
129 total_size = 0;
130 total_resid = 0;
131
132 cbp = cbp_head;
133 upl_offset = cbp->b_uploffset;
134 upl = cbp->b_pagelist;
135 b_flags = cbp->b_flags;
136 real_bp = cbp->b_real_bp;
137 vp = cbp->b_vp;
138
139 while (cbp) {
140 if (cbp->b_vectorcount > 1)
141 _FREE(cbp->b_vectorlist, M_SEGMENT);
142
143 if ((cbp->b_flags & B_ERROR) && error == 0)
144 error = cbp->b_error;
145
146 total_resid += cbp->b_resid;
147 total_size += cbp->b_bcount;
148
149 cbp_next = cbp->b_trans_next;
150
151 free_io_buf(cbp);
152
153 cbp = cbp_next;
154 }
155 if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput <= (ASYNC_THROTTLE / 3))) {
156 vp->v_flag &= ~VTHROTTLED;
157 wakeup((caddr_t)&vp->v_numoutput);
158 }
159 if ((b_flags & B_NEED_IODONE) && real_bp) {
160 if (error) {
161 real_bp->b_flags |= B_ERROR;
162 real_bp->b_error = error;
163 }
164 real_bp->b_resid = total_resid;
165
166 biodone(real_bp);
167 }
168 if (error == 0 && total_resid)
169 error = EIO;
170
171 if (b_flags & B_COMMIT_UPL) {
172 pg_offset = upl_offset & PAGE_MASK;
173 commit_size = (((pg_offset + total_size) + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
174
175 if (error || (b_flags & B_NOCACHE)) {
176 int upl_abort_code;
177
178 if (b_flags & B_PAGEOUT)
179 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
180 else if (b_flags & B_PGIN)
181 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
182 else
183 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
184
185 ubc_upl_abort_range(upl, upl_offset - pg_offset, commit_size,
186 upl_abort_code);
187
188 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
189 upl, upl_offset - pg_offset, commit_size,
190 0x80000000|upl_abort_code, 0);
191
192 } else {
193 int upl_commit_flags = UPL_COMMIT_FREE_ON_EMPTY;
194
195 if ( !(b_flags & B_PAGEOUT))
196 upl_commit_flags |= UPL_COMMIT_CLEAR_DIRTY;
197 if (b_flags & B_AGE)
198 upl_commit_flags |= UPL_COMMIT_INACTIVATE;
199
200 ubc_upl_commit_range(upl, upl_offset - pg_offset, commit_size,
201 upl_commit_flags);
202
203 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
204 upl, upl_offset - pg_offset, commit_size,
205 upl_commit_flags, 0);
206 }
207 } else
208 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
209 upl, upl_offset, 0, error, 0);
210
211 return (error);
212 }
213
214
215 static void
216 cluster_zero(upl, upl_offset, size, flags, bp)
217 upl_t upl;
218 vm_offset_t upl_offset;
219 int size;
220 int flags;
221 struct buf *bp;
222 {
223 vm_offset_t io_addr = 0;
224 kern_return_t kret;
225
226 if ( !(flags & CL_NOMAP)) {
227 kret = ubc_upl_map(upl, &io_addr);
228
229 if (kret != KERN_SUCCESS)
230 panic("cluster_zero: ubc_upl_map() failed with (%d)", kret);
231 if (io_addr == 0)
232 panic("cluster_zero: ubc_upl_map() mapped 0");
233 } else
234 io_addr = (vm_offset_t)bp->b_data;
235 bzero((caddr_t)(io_addr + upl_offset), size);
236
237 if ( !(flags & CL_NOMAP)) {
238 kret = ubc_upl_unmap(upl);
239
240 if (kret != KERN_SUCCESS)
241 panic("cluster_zero: kernel_upl_unmap failed");
242 }
243 }
244
245 static int
246 cluster_io(vp, upl, upl_offset, f_offset, size, flags, real_bp)
247 struct vnode *vp;
248 upl_t upl;
249 vm_offset_t upl_offset;
250 off_t f_offset;
251 int size;
252 int flags;
253 struct buf *real_bp;
254 {
255 struct buf *cbp;
256 struct iovec *iovp;
257 int io_flags;
258 int error = 0;
259 int retval = 0;
260 struct buf *cbp_head = 0;
261 struct buf *cbp_tail = 0;
262 upl_page_info_t *pl;
263 int pg_count;
264 int pg_offset;
265 int max_iosize;
266 int max_vectors;
267 int priv;
268
269 if (flags & CL_READ) {
270 io_flags = (B_VECTORLIST | B_READ);
271
272 vfs_io_attributes(vp, B_READ, &max_iosize, &max_vectors);
273 } else {
274 io_flags = (B_VECTORLIST | B_WRITEINPROG);
275
276 vfs_io_attributes(vp, B_WRITE, &max_iosize, &max_vectors);
277 }
278 pl = ubc_upl_pageinfo(upl);
279
280 if (flags & CL_ASYNC)
281 io_flags |= (B_CALL | B_ASYNC);
282 if (flags & CL_AGE)
283 io_flags |= B_AGE;
284 if (flags & CL_DUMP)
285 io_flags |= B_NOCACHE;
286 if (flags & CL_PAGEIN)
287 io_flags |= B_PGIN;
288
289
290 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_START,
291 (int)f_offset, size, upl_offset, flags, 0);
292
293 if ((flags & CL_READ) && ((upl_offset + size) & PAGE_MASK) && (!(flags & CL_NOZERO))) {
294 /*
295 * then we are going to end up
296 * with a page that we can't complete (the file size wasn't a multiple
297 * of PAGE_SIZE and we're trying to read to the end of the file
298 * so we'll go ahead and zero out the portion of the page we can't
299 * read in from the file
300 */
301 cluster_zero(upl, upl_offset + size, PAGE_SIZE - ((upl_offset + size) & PAGE_MASK), flags, real_bp);
302
303 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_NONE,
304 upl_offset + size, PAGE_SIZE - ((upl_offset + size) & PAGE_MASK),
305 flags, real_bp, 0);
306 }
307 while (size) {
308 size_t io_size;
309 int vsize;
310 int i;
311 int pl_index;
312 int pg_resid;
313 int num_contig;
314 daddr_t lblkno;
315 daddr_t blkno;
316
317 if (size > max_iosize)
318 io_size = max_iosize;
319 else
320 io_size = size;
321
322 if (error = VOP_CMAP(vp, f_offset, io_size, &blkno, &io_size, NULL)) {
323 if (error == EOPNOTSUPP)
324 panic("VOP_CMAP Unimplemented");
325 break;
326 }
327
328 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 24)) | DBG_FUNC_NONE,
329 (int)f_offset, (int)blkno, io_size, 0, 0);
330
331 if ( (!(flags & CL_READ) && (long)blkno == -1) || io_size == 0) {
332 if (flags & CL_PAGEOUT) {
333 error = EINVAL;
334 break;
335 };
336
337 /* Try paging out the page individually before
338 giving up entirely and dumping it (it could
339 be mapped in a "hole" and require allocation
340 before the I/O:
341 */
342 ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE_64, UPL_ABORT_FREE_ON_EMPTY);
343 if (ubc_pushdirty_range(vp, f_offset, PAGE_SIZE_64) == 0) {
344 error = EINVAL;
345 break;
346 };
347
348 upl_offset += PAGE_SIZE_64;
349 f_offset += PAGE_SIZE_64;
350 size -= PAGE_SIZE_64;
351 continue;
352 }
353 lblkno = (daddr_t)(f_offset / PAGE_SIZE_64);
354 /*
355 * we have now figured out how much I/O we can do - this is in 'io_size'
356 * pl_index represents the first page in the 'upl' that the I/O will occur for
357 * pg_offset is the starting point in the first page for the I/O
358 * pg_count is the number of full and partial pages that 'io_size' encompasses
359 */
360 pl_index = upl_offset / PAGE_SIZE;
361 pg_offset = upl_offset & PAGE_MASK;
362 pg_count = (io_size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE;
363
364 if (flags & CL_DEV_MEMORY) {
365 /*
366 * currently, can't deal with reading 'holes' in file
367 */
368 if ((long)blkno == -1) {
369 error = EINVAL;
370 break;
371 }
372 /*
373 * treat physical requests as one 'giant' page
374 */
375 pg_count = 1;
376 }
377 if ((flags & CL_READ) && (long)blkno == -1) {
378 /*
379 * if we're reading and blkno == -1, then we've got a
380 * 'hole' in the file that we need to deal with by zeroing
381 * out the affected area in the upl
382 */
383 cluster_zero(upl, upl_offset, io_size, flags, real_bp);
384
385 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_NONE,
386 upl_offset, io_size, flags, real_bp, 0);
387
388 pg_count = (io_size - pg_offset) / PAGE_SIZE;
389
390 if (io_size == size && ((upl_offset + io_size) & PAGE_MASK))
391 pg_count++;
392
393 if (pg_count) {
394 if (pg_offset)
395 pg_resid = PAGE_SIZE - pg_offset;
396 else
397 pg_resid = 0;
398 if (flags & CL_COMMIT)
399 ubc_upl_commit_range(upl,
400 upl_offset + pg_resid,
401 pg_count * PAGE_SIZE,
402 UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
403 }
404 upl_offset += io_size;
405 f_offset += io_size;
406 size -= io_size;
407
408 if (cbp_head && pg_count)
409 goto start_io;
410 continue;
411 } else if (real_bp && (real_bp->b_blkno == real_bp->b_lblkno)) {
412 real_bp->b_blkno = blkno;
413 }
414
415 if (pg_count > 1) {
416 if (pg_count > max_vectors) {
417 io_size -= (pg_count - max_vectors) * PAGE_SIZE;
418
419 if (io_size < 0) {
420 io_size = PAGE_SIZE - pg_offset;
421 pg_count = 1;
422 } else
423 pg_count = max_vectors;
424 }
425 /*
426 * we need to allocate space for the vector list
427 */
428 if (pg_count > 1) {
429 iovp = (struct iovec *)_MALLOC(sizeof(struct iovec) * pg_count,
430 M_SEGMENT, M_NOWAIT);
431
432 if (iovp == (struct iovec *) 0) {
433 /*
434 * if the allocation fails, then throttle down to a single page
435 */
436 io_size = PAGE_SIZE - pg_offset;
437 pg_count = 1;
438 }
439 }
440 }
441
442 /* Throttle the speculative IO */
443 if ((flags & CL_ASYNC) && !(flags & CL_PAGEOUT))
444 priv = 0;
445 else
446 priv = 1;
447
448 cbp = alloc_io_buf(vp, priv);
449
450 if (pg_count == 1)
451 /*
452 * we use the io vector that's reserved in the buffer header
453 * this insures we can always issue an I/O even in a low memory
454 * condition that prevents the _MALLOC from succeeding... this
455 * is necessary to prevent deadlocks with the pager
456 */
457 iovp = (struct iovec *)(&cbp->b_vects[0]);
458
459 cbp->b_vectorlist = (void *)iovp;
460 cbp->b_vectorcount = pg_count;
461
462 if (flags & CL_DEV_MEMORY) {
463
464 iovp->iov_len = io_size;
465 iovp->iov_base = (caddr_t)upl_phys_page(pl, 0);
466
467 if (iovp->iov_base == (caddr_t) 0) {
468 free_io_buf(cbp);
469 error = EINVAL;
470 } else
471 iovp->iov_base += upl_offset;
472 } else {
473
474 for (i = 0, vsize = io_size; i < pg_count; i++, iovp++) {
475 int psize;
476
477 psize = PAGE_SIZE - pg_offset;
478
479 if (psize > vsize)
480 psize = vsize;
481
482 iovp->iov_len = psize;
483 iovp->iov_base = (caddr_t)upl_phys_page(pl, pl_index + i);
484
485 if (iovp->iov_base == (caddr_t) 0) {
486 if (pg_count > 1)
487 _FREE(cbp->b_vectorlist, M_SEGMENT);
488 free_io_buf(cbp);
489
490 error = EINVAL;
491 break;
492 }
493 iovp->iov_base += pg_offset;
494 pg_offset = 0;
495
496 if (flags & CL_PAGEOUT) {
497 int s;
498 struct buf *bp;
499
500 s = splbio();
501 if (bp = incore(vp, lblkno + i)) {
502 if (!ISSET(bp->b_flags, B_BUSY)) {
503 bremfree(bp);
504 SET(bp->b_flags, (B_BUSY | B_INVAL));
505 splx(s);
506 brelse(bp);
507 } else
508 panic("BUSY bp found in cluster_io");
509 }
510 splx(s);
511 }
512 vsize -= psize;
513 }
514 }
515 if (error)
516 break;
517
518 if (flags & CL_ASYNC)
519 cbp->b_iodone = (void *)cluster_iodone;
520 cbp->b_flags |= io_flags;
521
522 cbp->b_lblkno = lblkno;
523 cbp->b_blkno = blkno;
524 cbp->b_bcount = io_size;
525 cbp->b_pagelist = upl;
526 cbp->b_uploffset = upl_offset;
527 cbp->b_trans_next = (struct buf *)0;
528
529 if (flags & CL_READ)
530 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 26)) | DBG_FUNC_NONE,
531 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
532 else
533 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 27)) | DBG_FUNC_NONE,
534 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
535
536 if (cbp_head) {
537 cbp_tail->b_trans_next = cbp;
538 cbp_tail = cbp;
539 } else {
540 cbp_head = cbp;
541 cbp_tail = cbp;
542 }
543 (struct buf *)(cbp->b_trans_head) = cbp_head;
544
545 upl_offset += io_size;
546 f_offset += io_size;
547 size -= io_size;
548
549 if ( (!(upl_offset & PAGE_MASK) && !(flags & CL_DEV_MEMORY)) || size == 0) {
550 /*
551 * if we have no more I/O to issue or
552 * the current I/O we've prepared fully
553 * completes the last page in this request
554 * or it's been completed via a zero-fill
555 * due to a 'hole' in the file
556 * then go ahead and issue the I/O
557 */
558 start_io:
559 if (flags & CL_COMMIT)
560 cbp_head->b_flags |= B_COMMIT_UPL;
561 if (flags & CL_PAGEOUT)
562 cbp_head->b_flags |= B_PAGEOUT;
563 if (flags & CL_PAGEIN)
564 cbp_head->b_flags |= B_PGIN;
565
566 if (real_bp) {
567 cbp_head->b_flags |= B_NEED_IODONE;
568 cbp_head->b_real_bp = real_bp;
569 }
570
571 for (cbp = cbp_head; cbp;) {
572 struct buf * cbp_next;
573
574 if (io_flags & B_WRITEINPROG)
575 cbp->b_vp->v_numoutput++;
576
577 cbp_next = cbp->b_trans_next;
578
579 (void) VOP_STRATEGY(cbp);
580 cbp = cbp_next;
581 }
582 if ( !(flags & CL_ASYNC)) {
583 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next)
584 biowait(cbp);
585
586 if (error = cluster_iodone(cbp_head)) {
587 retval = error;
588 error = 0;
589 }
590 }
591 cbp_head = (struct buf *)0;
592 cbp_tail = (struct buf *)0;
593 }
594 }
595 if (error) {
596 int abort_size;
597
598 for (cbp = cbp_head; cbp;) {
599 struct buf * cbp_next;
600
601 if (cbp->b_vectorcount > 1)
602 _FREE(cbp->b_vectorlist, M_SEGMENT);
603 upl_offset -= cbp->b_bcount;
604 size += cbp->b_bcount;
605
606 cbp_next = cbp->b_trans_next;
607 free_io_buf(cbp);
608 cbp = cbp_next;
609 }
610 pg_offset = upl_offset & PAGE_MASK;
611 abort_size = ((size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
612
613 if (flags & CL_COMMIT) {
614 int upl_abort_code;
615
616 if (flags & CL_PAGEOUT)
617 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
618 else if (flags & CL_PAGEIN)
619 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
620 else
621 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
622
623 ubc_upl_abort_range(upl, upl_offset - pg_offset, abort_size,
624 upl_abort_code);
625
626 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE,
627 upl, upl_offset - pg_offset, abort_size, error, 0);
628 }
629 if (real_bp) {
630 real_bp->b_flags |= B_ERROR;
631 real_bp->b_error = error;
632
633 biodone(real_bp);
634 }
635 if (retval == 0)
636 retval = error;
637 }
638 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_END,
639 (int)f_offset, size, upl_offset, retval, 0);
640
641 return (retval);
642 }
643
644
645 static int
646 cluster_rd_prefetch(vp, f_offset, size, filesize, devblocksize)
647 struct vnode *vp;
648 off_t f_offset;
649 u_int size;
650 off_t filesize;
651 int devblocksize;
652 {
653 upl_t upl;
654 upl_page_info_t *pl;
655 int pages_in_upl;
656 int start_pg;
657 int last_pg;
658 int last_valid;
659 int io_size;
660
661
662 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_START,
663 (int)f_offset, size, (int)filesize, 0, 0);
664
665 if (f_offset >= filesize) {
666 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
667 (int)f_offset, 0, 0, 0, 0);
668 return(0);
669 }
670 if (ubc_page_op(vp, f_offset, 0, 0, 0) == KERN_SUCCESS) {
671 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
672 (int)f_offset, 0, 0, 0, 0);
673 return(1);
674 }
675 if (size > (MAX_UPL_TRANSFER * PAGE_SIZE))
676 size = MAX_UPL_TRANSFER * PAGE_SIZE;
677 else
678 size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
679
680 if ((off_t)size > (filesize - f_offset))
681 size = ((filesize - f_offset) + (devblocksize - 1)) & ~(devblocksize - 1);
682
683 pages_in_upl = (size + (PAGE_SIZE - 1)) / PAGE_SIZE;
684
685 ubc_create_upl(vp,
686 f_offset,
687 pages_in_upl * PAGE_SIZE,
688 &upl,
689 &pl,
690 UPL_FLAGS_NONE);
691
692 if (upl == (upl_t) 0)
693 return(0);
694
695 /*
696 * scan from the beginning of the upl looking for the first
697 * non-valid page.... this will become the first page in
698 * the request we're going to make to 'cluster_io'... if all
699 * of the pages are valid, we won't call through to 'cluster_io'
700 */
701 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
702 if (!upl_valid_page(pl, start_pg))
703 break;
704 }
705
706 /*
707 * scan from the starting invalid page looking for a valid
708 * page before the end of the upl is reached, if we
709 * find one, then it will be the last page of the request to
710 * 'cluster_io'
711 */
712 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
713 if (upl_valid_page(pl, last_pg))
714 break;
715 }
716
717 /*
718 * if we find any more free valid pages at the tail of the upl
719 * than update maxra accordingly....
720 */
721 for (last_valid = last_pg; last_valid < pages_in_upl; last_valid++) {
722 if (!upl_valid_page(pl, last_valid))
723 break;
724 }
725 if (start_pg < last_pg) {
726 vm_offset_t upl_offset;
727
728 /*
729 * we found a range of 'invalid' pages that must be filled
730 * 'size' has already been clipped to the LEOF
731 * make sure it's at least a multiple of the device block size
732 */
733 upl_offset = start_pg * PAGE_SIZE;
734 io_size = (last_pg - start_pg) * PAGE_SIZE;
735
736 if ((upl_offset + io_size) > size) {
737 io_size = size - upl_offset;
738
739 KERNEL_DEBUG(0xd001000, upl_offset, size, io_size, 0, 0);
740 }
741 cluster_io(vp, upl, upl_offset, f_offset + upl_offset, io_size,
742 CL_READ | CL_COMMIT | CL_ASYNC | CL_AGE, (struct buf *)0);
743 }
744 if (start_pg) {
745 /*
746 * start_pg of non-zero indicates we found some already valid pages
747 * at the beginning of the upl.... we need to release these without
748 * modifying there state
749 */
750 ubc_upl_abort_range(upl, 0, start_pg * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
751
752 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 50)) | DBG_FUNC_NONE,
753 upl, 0, start_pg * PAGE_SIZE, 0, 0);
754 }
755 if (last_pg < pages_in_upl) {
756 /*
757 * the set of pages that we issued an I/O for did not extend all the
758 * way to the end of the upl... so just release them without modifying
759 * there state
760 */
761 ubc_upl_abort_range(upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE,
762 UPL_ABORT_FREE_ON_EMPTY);
763
764 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 50)) | DBG_FUNC_NONE,
765 upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE, 0, 0);
766 }
767
768 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
769 (int)f_offset + (last_valid * PAGE_SIZE), 0, 0, 0, 0);
770
771 return(last_valid);
772 }
773
774
775
776 static void
777 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize)
778 struct vnode *vp;
779 daddr_t b_lblkno;
780 daddr_t e_lblkno;
781 off_t filesize;
782 int devblocksize;
783 {
784 daddr_t r_lblkno;
785 off_t f_offset;
786 int size_of_prefetch;
787 int max_pages;
788
789 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_START,
790 b_lblkno, e_lblkno, vp->v_lastr, 0, 0);
791
792 if (b_lblkno == vp->v_lastr && b_lblkno == e_lblkno) {
793 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
794 vp->v_ralen, vp->v_maxra, vp->v_lastr, 0, 0);
795 return;
796 }
797
798 if (vp->v_lastr == -1 || (b_lblkno != vp->v_lastr && b_lblkno != (vp->v_lastr + 1) && b_lblkno != (vp->v_maxra + 1))) {
799 vp->v_ralen = 0;
800 vp->v_maxra = 0;
801
802 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
803 vp->v_ralen, vp->v_maxra, vp->v_lastr, 1, 0);
804
805 return;
806 }
807 max_pages = MAX_UPL_TRANSFER;
808
809 vp->v_ralen = vp->v_ralen ? min(max_pages, vp->v_ralen << 1) : 1;
810
811 if (((e_lblkno + 1) - b_lblkno) > vp->v_ralen)
812 vp->v_ralen = min(max_pages, (e_lblkno + 1) - b_lblkno);
813
814 if (e_lblkno < vp->v_maxra) {
815 if ((vp->v_maxra - e_lblkno) > (max_pages / 4)) {
816
817 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
818 vp->v_ralen, vp->v_maxra, vp->v_lastr, 2, 0);
819 return;
820 }
821 }
822 r_lblkno = max(e_lblkno, vp->v_maxra) + 1;
823 f_offset = (off_t)r_lblkno * PAGE_SIZE_64;
824
825 size_of_prefetch = cluster_rd_prefetch(vp, f_offset, vp->v_ralen * PAGE_SIZE, filesize, devblocksize);
826
827 if (size_of_prefetch)
828 vp->v_maxra = r_lblkno + (size_of_prefetch - 1);
829
830 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
831 vp->v_ralen, vp->v_maxra, vp->v_lastr, 3, 0);
832 }
833
834
835 cluster_pageout(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
836 struct vnode *vp;
837 upl_t upl;
838 vm_offset_t upl_offset;
839 off_t f_offset;
840 int size;
841 off_t filesize;
842 int devblocksize;
843 int flags;
844 {
845 int io_size;
846 int pg_size;
847 off_t max_size;
848 int local_flags = CL_PAGEOUT;
849
850 if ((flags & UPL_IOSYNC) == 0)
851 local_flags |= CL_ASYNC;
852 if ((flags & UPL_NOCOMMIT) == 0)
853 local_flags |= CL_COMMIT;
854
855 if (upl == (upl_t) 0)
856 panic("cluster_pageout: can't handle NULL upl yet\n");
857
858
859 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 52)) | DBG_FUNC_NONE,
860 (int)f_offset, size, (int)filesize, local_flags, 0);
861
862 /*
863 * If they didn't specify any I/O, then we are done...
864 * we can't issue an abort because we don't know how
865 * big the upl really is
866 */
867 if (size <= 0)
868 return (EINVAL);
869
870 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
871 if (local_flags & CL_COMMIT)
872 ubc_upl_abort_range(upl, upl_offset, size,
873 UPL_ABORT_FREE_ON_EMPTY);
874 return (EROFS);
875 }
876 /*
877 * can't page-in from a negative offset
878 * or if we're starting beyond the EOF
879 * or if the file offset isn't page aligned
880 * or the size requested isn't a multiple of PAGE_SIZE
881 */
882 if (f_offset < 0 || f_offset >= filesize ||
883 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
884 if (local_flags & CL_COMMIT)
885 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
886 return (EINVAL);
887 }
888 max_size = filesize - f_offset;
889
890 if (size < max_size)
891 io_size = size;
892 else
893 io_size = (max_size + (devblocksize - 1)) & ~(devblocksize - 1);
894
895 pg_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
896
897 if (size > pg_size) {
898 if (local_flags & CL_COMMIT)
899 ubc_upl_abort_range(upl, upl_offset + pg_size, size - pg_size,
900 UPL_ABORT_FREE_ON_EMPTY);
901 }
902 while (vp->v_numoutput >= ASYNC_THROTTLE) {
903 vp->v_flag |= VTHROTTLED;
904 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_pageout", 0);
905 }
906
907 return (cluster_io(vp, upl, upl_offset, f_offset, io_size,
908 local_flags, (struct buf *)0));
909 }
910
911
912 cluster_pagein(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
913 struct vnode *vp;
914 upl_t upl;
915 vm_offset_t upl_offset;
916 off_t f_offset;
917 int size;
918 off_t filesize;
919 int devblocksize;
920 int flags;
921 {
922 u_int io_size;
923 int pg_size;
924 off_t max_size;
925 int retval;
926 int local_flags = 0;
927
928
929 /*
930 * If they didn't ask for any data, then we are done...
931 * we can't issue an abort because we don't know how
932 * big the upl really is
933 */
934 if (size <= 0)
935 return (EINVAL);
936
937 if ((flags & UPL_NOCOMMIT) == 0)
938 local_flags = CL_COMMIT;
939
940 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 56)) | DBG_FUNC_NONE,
941 (int)f_offset, size, (int)filesize, local_flags, 0);
942
943 /*
944 * can't page-in from a negative offset
945 * or if we're starting beyond the EOF
946 * or if the file offset isn't page aligned
947 * or the size requested isn't a multiple of PAGE_SIZE
948 */
949 if (f_offset < 0 || f_offset >= filesize ||
950 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
951 if (local_flags & CL_COMMIT)
952 ubc_upl_abort_range(upl, upl_offset, size,
953 UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
954 return (EINVAL);
955 }
956 max_size = filesize - f_offset;
957
958 if (size < max_size)
959 io_size = size;
960 else
961 io_size = (max_size + (devblocksize - 1)) & ~(devblocksize - 1);
962
963 pg_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
964
965 if (upl == (upl_t) 0) {
966 ubc_create_upl( vp,
967 f_offset,
968 pg_size,
969 &upl,
970 NULL,
971 UPL_FLAGS_NONE);
972
973 if (upl == (upl_t) 0)
974 return (EINVAL);
975
976 upl_offset = (vm_offset_t)0;
977 size = pg_size;
978 }
979 if (size > pg_size) {
980 if (local_flags & CL_COMMIT)
981 ubc_upl_abort_range(upl, upl_offset + pg_size, size - pg_size,
982 UPL_ABORT_FREE_ON_EMPTY);
983 }
984
985 retval = cluster_io(vp, upl, upl_offset, f_offset, io_size,
986 local_flags | CL_READ | CL_PAGEIN, (struct buf *)0);
987
988 if (retval == 0) {
989 int b_lblkno;
990 int e_lblkno;
991
992 b_lblkno = (int)(f_offset / PAGE_SIZE_64);
993 e_lblkno = (int)
994 ((f_offset + ((off_t)io_size - 1)) / PAGE_SIZE_64);
995
996 if (!(flags & UPL_NORDAHEAD) && !(vp->v_flag & VRAOFF)) {
997 /*
998 * we haven't read the last page in of the file yet
999 * so let's try to read ahead if we're in
1000 * a sequential access pattern
1001 */
1002 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
1003 }
1004 vp->v_lastr = e_lblkno;
1005 }
1006 return (retval);
1007 }
1008
1009
1010 cluster_bp(bp)
1011 struct buf *bp;
1012 {
1013 off_t f_offset;
1014 int flags;
1015
1016 if (bp->b_pagelist == (upl_t) 0)
1017 panic("cluster_bp: can't handle NULL upl yet\n");
1018 if (bp->b_flags & B_READ)
1019 flags = CL_ASYNC | CL_NOMAP | CL_READ;
1020 else
1021 flags = CL_ASYNC | CL_NOMAP;
1022
1023 f_offset = ubc_blktooff(bp->b_vp, bp->b_lblkno);
1024
1025 return (cluster_io(bp->b_vp, bp->b_pagelist, 0, f_offset, bp->b_bcount, flags, bp));
1026 }
1027
1028
1029 cluster_write(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1030 struct vnode *vp;
1031 struct uio *uio;
1032 off_t oldEOF;
1033 off_t newEOF;
1034 off_t headOff;
1035 off_t tailOff;
1036 int devblocksize;
1037 int flags;
1038 {
1039 int prev_resid;
1040 int clip_size;
1041 off_t max_io_size;
1042 struct iovec *iov;
1043 vm_offset_t upl_offset;
1044 int upl_size;
1045 int pages_in_pl;
1046 upl_page_info_t *pl;
1047 int upl_flags;
1048 upl_t upl;
1049 int retval = 0;
1050
1051
1052 if ((!uio) || (uio->uio_segflg != UIO_USERSPACE) || (!(vp->v_flag & VNOCACHE_DATA)))
1053 {
1054 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1055 return(retval);
1056 }
1057
1058 while (uio->uio_resid && uio->uio_offset < newEOF && retval == 0)
1059 {
1060 /* we know we have a resid, so this is safe */
1061 iov = uio->uio_iov;
1062 while (iov->iov_len == 0) {
1063 uio->uio_iov++;
1064 uio->uio_iovcnt--;
1065 iov = uio->uio_iov;
1066 }
1067
1068 /*
1069 * We check every vector target and if it is physically
1070 * contiguous space, we skip the sanity checks.
1071 */
1072
1073 upl_offset = (vm_offset_t)iov->iov_base & ~PAGE_MASK;
1074 upl_size = (upl_offset + PAGE_SIZE +(PAGE_SIZE -1)) & ~PAGE_MASK;
1075 pages_in_pl = 0;
1076 upl_flags = UPL_QUERY_OBJECT_TYPE;
1077 if ((vm_map_get_upl(current_map(),
1078 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1079 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0)) != KERN_SUCCESS)
1080 {
1081 /*
1082 * the user app must have passed in an invalid address
1083 */
1084 return (EFAULT);
1085 }
1086
1087 if (upl_flags & UPL_PHYS_CONTIG)
1088 {
1089 /*
1090 * since the interface to the IOKit below us uses physical block #'s and
1091 * block counts to specify the I/O, we can't handle anything that isn't
1092 * devblocksize aligned
1093 */
1094 if ((uio->uio_offset & (devblocksize - 1)) || (uio->uio_resid & (devblocksize - 1)))
1095 return(EINVAL);
1096
1097 if (flags & IO_HEADZEROFILL)
1098 {
1099 flags &= ~IO_HEADZEROFILL;
1100
1101 if (retval = cluster_write_x(vp, (struct uio *)0, 0, uio->uio_offset, headOff, 0, devblocksize, IO_HEADZEROFILL))
1102 return(retval);
1103 }
1104
1105 retval = cluster_phys_write(vp, uio);
1106
1107 if (uio->uio_resid == 0 && (flags & IO_TAILZEROFILL))
1108 {
1109 retval = cluster_write_x(vp, (struct uio *)0, 0, tailOff, uio->uio_offset, 0, devblocksize, IO_HEADZEROFILL);
1110 return(retval);
1111 }
1112 }
1113 else if ((uio->uio_resid < 4 * PAGE_SIZE) || (flags & (IO_TAILZEROFILL | IO_HEADZEROFILL)))
1114 {
1115 /*
1116 * We set a threshhold of 4 pages to decide if the nocopy
1117 * write loop is worth the trouble...
1118 * we also come here if we're trying to zero the head and/or tail
1119 * of a partially written page, and the user source is not a physically contiguous region
1120 */
1121 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1122 return(retval);
1123 }
1124 else if (uio->uio_offset & PAGE_MASK_64)
1125 {
1126 /* Bring the file offset write up to a pagesize boundary */
1127 clip_size = (PAGE_SIZE - (uio->uio_offset & PAGE_MASK_64));
1128 if (uio->uio_resid < clip_size)
1129 clip_size = uio->uio_resid;
1130 /*
1131 * Fake the resid going into the cluster_write_x call
1132 * and restore it on the way out.
1133 */
1134 prev_resid = uio->uio_resid;
1135 uio->uio_resid = clip_size;
1136 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1137 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1138 }
1139 else if ((int)iov->iov_base & PAGE_MASK_64)
1140 {
1141 clip_size = iov->iov_len;
1142 prev_resid = uio->uio_resid;
1143 uio->uio_resid = clip_size;
1144 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1145 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1146 }
1147 else
1148 {
1149 /*
1150 * If we come in here, we know the offset into
1151 * the file is on a pagesize boundary
1152 */
1153
1154 max_io_size = newEOF - uio->uio_offset;
1155 clip_size = uio->uio_resid;
1156 if (iov->iov_len < clip_size)
1157 clip_size = iov->iov_len;
1158 if (max_io_size < clip_size)
1159 clip_size = max_io_size;
1160
1161 if (clip_size < PAGE_SIZE)
1162 {
1163 /*
1164 * Take care of tail end of write in this vector
1165 */
1166 prev_resid = uio->uio_resid;
1167 uio->uio_resid = clip_size;
1168 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1169 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1170 }
1171 else
1172 {
1173 /* round clip_size down to a multiple of pagesize */
1174 clip_size = clip_size & ~(PAGE_MASK);
1175 prev_resid = uio->uio_resid;
1176 uio->uio_resid = clip_size;
1177 retval = cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags);
1178 if ((retval == 0) && uio->uio_resid)
1179 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1180 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1181 }
1182 } /* end else */
1183 } /* end while */
1184 return(retval);
1185 }
1186
1187 static
1188 cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags)
1189 struct vnode *vp;
1190 struct uio *uio;
1191 off_t newEOF;
1192 int devblocksize;
1193 int flags;
1194 {
1195 upl_t upl;
1196 upl_page_info_t *pl;
1197 off_t upl_f_offset;
1198 vm_offset_t upl_offset;
1199 off_t max_io_size;
1200 int io_size;
1201 int upl_size;
1202 int upl_needed_size;
1203 int pages_in_pl;
1204 int upl_flags;
1205 kern_return_t kret;
1206 struct iovec *iov;
1207 int i;
1208 int force_data_sync;
1209 int error = 0;
1210
1211 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START,
1212 (int)uio->uio_offset, (int)uio->uio_resid,
1213 (int)newEOF, devblocksize, 0);
1214
1215 /*
1216 * When we enter this routine, we know
1217 * -- the offset into the file is on a pagesize boundary
1218 * -- the resid is a page multiple
1219 * -- the resid will not exceed iov_len
1220 */
1221
1222 iov = uio->uio_iov;
1223
1224 while (uio->uio_resid && uio->uio_offset < newEOF && error == 0) {
1225 io_size = uio->uio_resid;
1226
1227 if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1228 io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1229
1230 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1231 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
1232
1233 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START,
1234 (int)upl_offset, upl_needed_size, iov->iov_base, io_size, 0);
1235
1236 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
1237 {
1238 pages_in_pl = 0;
1239 upl_size = upl_needed_size;
1240 upl_flags = UPL_COPYOUT_FROM | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1241
1242 kret = vm_map_get_upl(current_map(),
1243 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1244 &upl_size,
1245 &upl,
1246 NULL,
1247 &pages_in_pl,
1248 &upl_flags,
1249 force_data_sync);
1250
1251 if (kret != KERN_SUCCESS)
1252 {
1253 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1254 0, 0, 0, kret, 0);
1255
1256 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1257 (int)uio->uio_offset, (int)uio->uio_resid, kret, 1, 0);
1258
1259 /* cluster_nocopy_write: failed to get pagelist */
1260 /* do not return kret here */
1261 return(0);
1262 }
1263
1264 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1265 pages_in_pl = upl_size / PAGE_SIZE;
1266
1267 for(i=0; i < pages_in_pl; i++)
1268 {
1269 if (!upl_valid_page(pl, i))
1270 break;
1271 }
1272
1273 if (i == pages_in_pl)
1274 break;
1275
1276 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1277 UPL_ABORT_FREE_ON_EMPTY);
1278 }
1279
1280 if (force_data_sync >= 3)
1281 {
1282 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1283 i, pages_in_pl, upl_size, kret, 0);
1284
1285 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1286 (int)uio->uio_offset, (int)uio->uio_resid, kret, 2, 0);
1287 return(0);
1288 }
1289
1290 /*
1291 * Consider the possibility that upl_size wasn't satisfied.
1292 */
1293 if (upl_size != upl_needed_size)
1294 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
1295
1296 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1297 (int)upl_offset, upl_size, iov->iov_base, io_size, 0);
1298
1299 if (io_size == 0)
1300 {
1301 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1302 UPL_ABORT_FREE_ON_EMPTY);
1303 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1304 (int)uio->uio_offset, uio->uio_resid, 0, 3, 0);
1305
1306 return(0);
1307 }
1308
1309 /*
1310 * Now look for pages already in the cache
1311 * and throw them away.
1312 */
1313
1314 upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
1315 max_io_size = io_size;
1316
1317 while (max_io_size) {
1318
1319 /*
1320 * Flag UPL_POP_DUMP says if the page is found
1321 * in the page cache it must be thrown away.
1322 */
1323 ubc_page_op(vp,
1324 upl_f_offset,
1325 UPL_POP_SET | UPL_POP_BUSY | UPL_POP_DUMP,
1326 0, 0);
1327 max_io_size -= PAGE_SIZE;
1328 upl_f_offset += PAGE_SIZE;
1329 }
1330
1331 /*
1332 * issue a synchronous write to cluster_io
1333 */
1334
1335 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_START,
1336 (int)upl_offset, (int)uio->uio_offset, io_size, 0, 0);
1337
1338 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1339 io_size, 0, (struct buf *)0);
1340
1341 if (error == 0) {
1342 /*
1343 * The cluster_io write completed successfully,
1344 * update the uio structure.
1345 */
1346 iov->iov_base += io_size;
1347 iov->iov_len -= io_size;
1348 uio->uio_resid -= io_size;
1349 uio->uio_offset += io_size;
1350 }
1351 /*
1352 * always 'commit' the I/O via the abort primitive whether the I/O
1353 * succeeded cleanly or not... this is necessary to insure that
1354 * we preserve the state of the DIRTY flag on the pages used to
1355 * provide the data for the I/O... the state of this flag SHOULD
1356 * NOT be changed by a write
1357 */
1358 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1359 UPL_ABORT_FREE_ON_EMPTY);
1360
1361
1362 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END,
1363 (int)upl_offset, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
1364
1365 } /* end while */
1366
1367
1368 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1369 (int)uio->uio_offset, (int)uio->uio_resid, error, 4, 0);
1370
1371 return (error);
1372 }
1373
1374 static
1375 cluster_phys_write(vp, uio)
1376 struct vnode *vp;
1377 struct uio *uio;
1378 {
1379 upl_t upl;
1380 vm_offset_t upl_offset;
1381 int io_size;
1382 int upl_size;
1383 int upl_needed_size;
1384 int pages_in_pl;
1385 int upl_flags;
1386 kern_return_t kret;
1387 struct iovec *iov;
1388 int error = 0;
1389
1390 /*
1391 * When we enter this routine, we know
1392 * -- the resid will not exceed iov_len
1393 * -- the vector target address is physcially contiguous
1394 */
1395
1396 iov = uio->uio_iov;
1397 io_size = iov->iov_len;
1398 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1399 upl_needed_size = upl_offset + io_size;
1400
1401 pages_in_pl = 0;
1402 upl_size = upl_needed_size;
1403 upl_flags = UPL_COPYOUT_FROM | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1404
1405 kret = vm_map_get_upl(current_map(),
1406 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1407 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0);
1408
1409 if (kret != KERN_SUCCESS)
1410 {
1411 /* cluster_phys_write: failed to get pagelist */
1412 /* note: return kret here */
1413 return(EINVAL);
1414 }
1415
1416 /*
1417 * Consider the possibility that upl_size wasn't satisfied.
1418 * This is a failure in the physical memory case.
1419 */
1420 if (upl_size < upl_needed_size)
1421 {
1422 kernel_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1423 return(EINVAL);
1424 }
1425
1426 /*
1427 * issue a synchronous write to cluster_io
1428 */
1429
1430 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1431 io_size, CL_DEV_MEMORY, (struct buf *)0);
1432
1433 if (error == 0) {
1434 /*
1435 * The cluster_io write completed successfully,
1436 * update the uio structure and commit.
1437 */
1438
1439 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_FREE_ON_EMPTY);
1440
1441 iov->iov_base += io_size;
1442 iov->iov_len -= io_size;
1443 uio->uio_resid -= io_size;
1444 uio->uio_offset += io_size;
1445 }
1446 else
1447 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1448
1449 return (error);
1450 }
1451
1452 static
1453 cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1454 struct vnode *vp;
1455 struct uio *uio;
1456 off_t oldEOF;
1457 off_t newEOF;
1458 off_t headOff;
1459 off_t tailOff;
1460 int devblocksize;
1461 int flags;
1462 {
1463 upl_page_info_t *pl;
1464 upl_t upl;
1465 vm_offset_t upl_offset;
1466 int upl_size;
1467 off_t upl_f_offset;
1468 int pages_in_upl;
1469 int start_offset;
1470 int xfer_resid;
1471 int io_size;
1472 int io_size_before_rounding;
1473 int io_flags;
1474 vm_offset_t io_address;
1475 int io_offset;
1476 int bytes_to_zero;
1477 int bytes_to_move;
1478 kern_return_t kret;
1479 int retval = 0;
1480 int uio_resid;
1481 long long total_size;
1482 long long zero_cnt;
1483 off_t zero_off;
1484 long long zero_cnt1;
1485 off_t zero_off1;
1486 daddr_t start_blkno;
1487 daddr_t last_blkno;
1488
1489 if (uio) {
1490 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1491 (int)uio->uio_offset, uio->uio_resid, (int)oldEOF, (int)newEOF, 0);
1492
1493 uio_resid = uio->uio_resid;
1494 } else {
1495 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1496 0, 0, (int)oldEOF, (int)newEOF, 0);
1497
1498 uio_resid = 0;
1499 }
1500 zero_cnt = 0;
1501 zero_cnt1 = 0;
1502
1503 if (flags & IO_HEADZEROFILL) {
1504 /*
1505 * some filesystems (HFS is one) don't support unallocated holes within a file...
1506 * so we zero fill the intervening space between the old EOF and the offset
1507 * where the next chunk of real data begins.... ftruncate will also use this
1508 * routine to zero fill to the new EOF when growing a file... in this case, the
1509 * uio structure will not be provided
1510 */
1511 if (uio) {
1512 if (headOff < uio->uio_offset) {
1513 zero_cnt = uio->uio_offset - headOff;
1514 zero_off = headOff;
1515 }
1516 } else if (headOff < newEOF) {
1517 zero_cnt = newEOF - headOff;
1518 zero_off = headOff;
1519 }
1520 }
1521 if (flags & IO_TAILZEROFILL) {
1522 if (uio) {
1523 zero_off1 = uio->uio_offset + uio->uio_resid;
1524
1525 if (zero_off1 < tailOff)
1526 zero_cnt1 = tailOff - zero_off1;
1527 }
1528 }
1529 if (zero_cnt == 0 && uio == (struct uio *) 0)
1530 {
1531 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1532 retval, 0, 0, 0, 0);
1533 return (0);
1534 }
1535
1536 while ((total_size = (uio_resid + zero_cnt + zero_cnt1)) && retval == 0) {
1537 /*
1538 * for this iteration of the loop, figure out where our starting point is
1539 */
1540 if (zero_cnt) {
1541 start_offset = (int)(zero_off & PAGE_MASK_64);
1542 upl_f_offset = zero_off - start_offset;
1543 } else if (uio_resid) {
1544 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
1545 upl_f_offset = uio->uio_offset - start_offset;
1546 } else {
1547 start_offset = (int)(zero_off1 & PAGE_MASK_64);
1548 upl_f_offset = zero_off1 - start_offset;
1549 }
1550 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 46)) | DBG_FUNC_NONE,
1551 (int)zero_off, (int)zero_cnt, (int)zero_off1, (int)zero_cnt1, 0);
1552
1553 if (total_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1554 total_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1555
1556 /*
1557 * compute the size of the upl needed to encompass
1558 * the requested write... limit each call to cluster_io
1559 * to the maximum UPL size... cluster_io will clip if
1560 * this exceeds the maximum io_size for the device,
1561 * make sure to account for
1562 * a starting offset that's not page aligned
1563 */
1564 upl_size = (start_offset + total_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1565
1566 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1567 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1568
1569 pages_in_upl = upl_size / PAGE_SIZE;
1570 io_size = upl_size - start_offset;
1571
1572 if ((long long)io_size > total_size)
1573 io_size = total_size;
1574
1575 start_blkno = (daddr_t)(upl_f_offset / PAGE_SIZE_64);
1576 last_blkno = start_blkno + pages_in_upl;
1577
1578 kret = ubc_create_upl(vp,
1579 upl_f_offset,
1580 upl_size,
1581 &upl,
1582 &pl,
1583 UPL_FLAGS_NONE);
1584 if (kret != KERN_SUCCESS)
1585 panic("cluster_write: failed to get pagelist");
1586
1587 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_NONE,
1588 upl, (int)upl_f_offset, upl_size, start_offset, 0);
1589
1590 if (start_offset && !upl_valid_page(pl, 0)) {
1591 int read_size;
1592
1593 /*
1594 * we're starting in the middle of the first page of the upl
1595 * and the page isn't currently valid, so we're going to have
1596 * to read it in first... this is a synchronous operation
1597 */
1598 read_size = PAGE_SIZE;
1599
1600 if ((upl_f_offset + read_size) > newEOF) {
1601 read_size = newEOF - upl_f_offset;
1602 read_size = (read_size + (devblocksize - 1)) & ~(devblocksize - 1);
1603 }
1604 retval = cluster_io(vp, upl, 0, upl_f_offset, read_size,
1605 CL_READ, (struct buf *)0);
1606 if (retval) {
1607 /*
1608 * we had an error during the read which causes us to abort
1609 * the current cluster_write request... before we do, we need
1610 * to release the rest of the pages in the upl without modifying
1611 * there state and mark the failed page in error
1612 */
1613 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
1614 ubc_upl_abort(upl, 0);
1615
1616 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1617 upl, 0, 0, retval, 0);
1618 break;
1619 }
1620 }
1621 if ((start_offset == 0 || upl_size > PAGE_SIZE) && ((start_offset + io_size) & PAGE_MASK)) {
1622 /*
1623 * the last offset we're writing to in this upl does not end on a page
1624 * boundary... if it's not beyond the old EOF, then we'll also need to
1625 * pre-read this page in if it isn't already valid
1626 */
1627 upl_offset = upl_size - PAGE_SIZE;
1628
1629 if ((upl_f_offset + start_offset + io_size) < oldEOF &&
1630 !upl_valid_page(pl, upl_offset / PAGE_SIZE)) {
1631 int read_size;
1632
1633 read_size = PAGE_SIZE;
1634
1635 if ((upl_f_offset + upl_offset + read_size) > newEOF) {
1636 read_size = newEOF - (upl_f_offset + upl_offset);
1637 read_size = (read_size + (devblocksize - 1)) & ~(devblocksize - 1);
1638 }
1639 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size,
1640 CL_READ, (struct buf *)0);
1641 if (retval) {
1642 /*
1643 * we had an error during the read which causes us to abort
1644 * the current cluster_write request... before we do, we
1645 * need to release the rest of the pages in the upl without
1646 * modifying there state and mark the failed page in error
1647 */
1648 ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE,
1649 UPL_ABORT_DUMP_PAGES);
1650 ubc_upl_abort(upl, 0);
1651
1652 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1653 upl, 0, 0, retval, 0);
1654 break;
1655 }
1656 }
1657 }
1658 if ((kret = ubc_upl_map(upl, &io_address)) != KERN_SUCCESS)
1659 panic("cluster_write: ubc_upl_map failed\n");
1660 xfer_resid = io_size;
1661 io_offset = start_offset;
1662
1663 while (zero_cnt && xfer_resid) {
1664
1665 if (zero_cnt < (long long)xfer_resid)
1666 bytes_to_zero = zero_cnt;
1667 else
1668 bytes_to_zero = xfer_resid;
1669
1670 if ( !(flags & IO_NOZEROVALID)) {
1671 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1672
1673 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1674 (int)upl_f_offset + io_offset, bytes_to_zero,
1675 (int)zero_cnt, xfer_resid, 0);
1676 } else {
1677 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off & PAGE_MASK_64));
1678
1679 if ( !upl_valid_page(pl, (int)(zero_off / PAGE_SIZE_64))) {
1680 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1681
1682 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1683 (int)upl_f_offset + io_offset, bytes_to_zero,
1684 (int)zero_cnt, xfer_resid, 0);
1685 }
1686 }
1687 xfer_resid -= bytes_to_zero;
1688 zero_cnt -= bytes_to_zero;
1689 zero_off += bytes_to_zero;
1690 io_offset += bytes_to_zero;
1691 }
1692 if (xfer_resid && uio_resid) {
1693 bytes_to_move = min(uio_resid, xfer_resid);
1694
1695 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 42)) | DBG_FUNC_NONE,
1696 (int)uio->uio_offset, bytes_to_move, uio_resid, xfer_resid, 0);
1697
1698 retval = uiomove((caddr_t)(io_address + io_offset), bytes_to_move, uio);
1699
1700 if (retval) {
1701 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
1702 panic("cluster_write: kernel_upl_unmap failed\n");
1703 ubc_upl_abort(upl, UPL_ABORT_DUMP_PAGES);
1704
1705 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1706 upl, 0, 0, retval, 0);
1707 } else {
1708 uio_resid -= bytes_to_move;
1709 xfer_resid -= bytes_to_move;
1710 io_offset += bytes_to_move;
1711 }
1712 }
1713 while (xfer_resid && zero_cnt1 && retval == 0) {
1714
1715 if (zero_cnt1 < (long long)xfer_resid)
1716 bytes_to_zero = zero_cnt1;
1717 else
1718 bytes_to_zero = xfer_resid;
1719
1720 if ( !(flags & IO_NOZEROVALID)) {
1721 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1722
1723 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1724 (int)upl_f_offset + io_offset,
1725 bytes_to_zero, (int)zero_cnt1, xfer_resid, 0);
1726 } else {
1727 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off1 & PAGE_MASK_64));
1728 if ( !upl_valid_page(pl, (int)(zero_off1 / PAGE_SIZE_64))) {
1729 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1730
1731 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1732 (int)upl_f_offset + io_offset,
1733 bytes_to_zero, (int)zero_cnt1, xfer_resid, 0);
1734 }
1735 }
1736 xfer_resid -= bytes_to_zero;
1737 zero_cnt1 -= bytes_to_zero;
1738 zero_off1 += bytes_to_zero;
1739 io_offset += bytes_to_zero;
1740 }
1741
1742 if (retval == 0) {
1743 int must_push;
1744 int can_delay;
1745
1746 io_size += start_offset;
1747
1748 if ((upl_f_offset + io_size) == newEOF && io_size < upl_size) {
1749 /*
1750 * if we're extending the file with this write
1751 * we'll zero fill the rest of the page so that
1752 * if the file gets extended again in such a way as to leave a
1753 * hole starting at this EOF, we'll have zero's in the correct spot
1754 */
1755 bzero((caddr_t)(io_address + io_size), upl_size - io_size);
1756
1757 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1758 (int)upl_f_offset + io_size,
1759 upl_size - io_size, 0, 0, 0);
1760 }
1761 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
1762 panic("cluster_write: kernel_upl_unmap failed\n");
1763
1764 io_size_before_rounding = io_size;
1765
1766 if (io_size & (devblocksize - 1))
1767 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
1768
1769 must_push = 0;
1770 can_delay = 0;
1771
1772 if (vp->v_clen) {
1773 int newsize;
1774
1775 /*
1776 * we have an existing cluster... see if this write will extend it nicely
1777 */
1778 if (start_blkno >= vp->v_cstart) {
1779 if (last_blkno <= (vp->v_cstart + vp->v_clen)) {
1780 /*
1781 * we have a write that fits entirely
1782 * within the existing cluster limits
1783 */
1784 if (last_blkno >= vp->v_lastw) {
1785 /*
1786 * if we're extending the dirty region within the cluster
1787 * we need to update the cluster info... we check for blkno
1788 * equality because we may be extending the file with a
1789 * partial write.... this in turn changes our idea of how
1790 * much data to write out (v_ciosiz) for the last page
1791 */
1792 vp->v_lastw = last_blkno;
1793 newsize = io_size + ((start_blkno - vp->v_cstart) * PAGE_SIZE);
1794
1795 if (newsize > vp->v_ciosiz)
1796 vp->v_ciosiz = newsize;
1797 }
1798 can_delay = 1;
1799 goto finish_io;
1800 }
1801 if (start_blkno < (vp->v_cstart + vp->v_clen)) {
1802 /*
1803 * we have a write that starts in the middle of the current cluster
1804 * but extends beyond the cluster's limit
1805 * we'll clip the current cluster if we actually
1806 * overlap with the new write and then push it out
1807 * and start a new cluster with the current write
1808 */
1809 if (vp->v_lastw > start_blkno) {
1810 vp->v_lastw = start_blkno;
1811 vp->v_ciosiz = (vp->v_lastw - vp->v_cstart) * PAGE_SIZE;
1812 }
1813 }
1814 /*
1815 * we also get here for the case where the current write starts
1816 * beyond the limit of the existing cluster
1817 */
1818 must_push = 1;
1819 goto check_delay;
1820 }
1821 /*
1822 * the current write starts in front of the current cluster
1823 */
1824 if (last_blkno > vp->v_cstart) {
1825 /*
1826 * the current write extends into the existing cluster
1827 */
1828 if ((vp->v_lastw - start_blkno) > vp->v_clen) {
1829 /*
1830 * if we were to combine this write with the current cluster
1831 * we would exceed the cluster size limit....
1832 * clip the current cluster by moving the start position
1833 * to where the current write ends, and then push it
1834 */
1835 vp->v_ciosiz -= (last_blkno - vp->v_cstart) * PAGE_SIZE;
1836 vp->v_cstart = last_blkno;
1837
1838 /*
1839 * round up the io_size to the nearest page size
1840 * since we've coalesced with at least 1 pre-existing
1841 * page in the current cluster... this write may have ended in the
1842 * middle of the page which would cause io_size to give us an
1843 * inaccurate view of how much I/O we actually need to do
1844 */
1845 io_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1846
1847 must_push = 1;
1848 goto check_delay;
1849 }
1850 /*
1851 * we can coalesce the current write with the existing cluster
1852 * adjust the cluster info to reflect this
1853 */
1854 if (last_blkno > vp->v_lastw) {
1855 /*
1856 * the current write completey overlaps
1857 * the existing cluster
1858 */
1859 vp->v_lastw = last_blkno;
1860 vp->v_ciosiz = io_size;
1861 } else {
1862 vp->v_ciosiz += (vp->v_cstart - start_blkno) * PAGE_SIZE;
1863
1864 if (io_size > vp->v_ciosiz)
1865 vp->v_ciosiz = io_size;
1866 }
1867 vp->v_cstart = start_blkno;
1868 can_delay = 1;
1869 goto finish_io;
1870 }
1871 /*
1872 * this I/O range is entirely in front of the current cluster
1873 * so we need to push the current cluster out before beginning
1874 * a new one
1875 */
1876 must_push = 1;
1877 }
1878 check_delay:
1879 if (must_push)
1880 cluster_push(vp);
1881
1882 if (io_size_before_rounding < (MAX_UPL_TRANSFER * PAGE_SIZE) && !(flags & IO_SYNC)) {
1883 vp->v_clen = MAX_UPL_TRANSFER;
1884 vp->v_cstart = start_blkno;
1885 vp->v_lastw = last_blkno;
1886 vp->v_ciosiz = io_size;
1887
1888 can_delay = 1;
1889 }
1890 finish_io:
1891 if (can_delay) {
1892 ubc_upl_commit_range(upl, 0, upl_size,
1893 UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
1894 continue;
1895 }
1896 if (flags & IO_SYNC)
1897 io_flags = CL_COMMIT | CL_AGE;
1898 else
1899 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
1900
1901 if (vp->v_flag & VNOCACHE_DATA)
1902 io_flags |= CL_DUMP;
1903
1904 while (vp->v_numoutput >= ASYNC_THROTTLE) {
1905 vp->v_flag |= VTHROTTLED;
1906 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_write", 0);
1907 }
1908 retval = cluster_io(vp, upl, 0, upl_f_offset, io_size,
1909 io_flags, (struct buf *)0);
1910 }
1911 }
1912 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1913 retval, 0, 0, 0, 0);
1914
1915 return (retval);
1916 }
1917
1918 cluster_read(vp, uio, filesize, devblocksize, flags)
1919 struct vnode *vp;
1920 struct uio *uio;
1921 off_t filesize;
1922 int devblocksize;
1923 int flags;
1924 {
1925 int prev_resid;
1926 int clip_size;
1927 off_t max_io_size;
1928 struct iovec *iov;
1929 vm_offset_t upl_offset;
1930 int upl_size;
1931 int pages_in_pl;
1932 upl_page_info_t *pl;
1933 int upl_flags;
1934 upl_t upl;
1935 int retval = 0;
1936
1937 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START,
1938 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
1939
1940 /*
1941 * We set a threshhold of 4 pages to decide if the nocopy
1942 * read loop is worth the trouble...
1943 */
1944
1945 if (!((vp->v_flag & VNOCACHE_DATA) && (uio->uio_segflg == UIO_USERSPACE)))
1946 {
1947 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
1948 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
1949 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
1950 return(retval);
1951 }
1952
1953 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0)
1954 {
1955 /* we know we have a resid, so this is safe */
1956 iov = uio->uio_iov;
1957 while (iov->iov_len == 0) {
1958 uio->uio_iov++;
1959 uio->uio_iovcnt--;
1960 iov = uio->uio_iov;
1961 }
1962
1963 /*
1964 * We check every vector target and if it is physically
1965 * contiguous space, we skip the sanity checks.
1966 */
1967
1968 upl_offset = (vm_offset_t)iov->iov_base & ~PAGE_MASK;
1969 upl_size = (upl_offset + PAGE_SIZE +(PAGE_SIZE -1)) & ~PAGE_MASK;
1970 pages_in_pl = 0;
1971 upl_flags = UPL_QUERY_OBJECT_TYPE;
1972 if((vm_map_get_upl(current_map(),
1973 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1974 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0)) != KERN_SUCCESS)
1975 {
1976 /*
1977 * the user app must have passed in an invalid address
1978 */
1979 return (EFAULT);
1980 }
1981
1982 if (upl_flags & UPL_PHYS_CONTIG)
1983 {
1984 retval = cluster_phys_read(vp, uio, filesize);
1985 }
1986 else if (uio->uio_resid < 4 * PAGE_SIZE)
1987 {
1988 /*
1989 * We set a threshhold of 4 pages to decide if the nocopy
1990 * read loop is worth the trouble...
1991 */
1992 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
1993 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
1994 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
1995 return(retval);
1996 }
1997 else if (uio->uio_offset & PAGE_MASK_64)
1998 {
1999 /* Bring the file offset read up to a pagesize boundary */
2000 clip_size = (PAGE_SIZE - (int)(uio->uio_offset & PAGE_MASK_64));
2001 if (uio->uio_resid < clip_size)
2002 clip_size = uio->uio_resid;
2003 /*
2004 * Fake the resid going into the cluster_read_x call
2005 * and restore it on the way out.
2006 */
2007 prev_resid = uio->uio_resid;
2008 uio->uio_resid = clip_size;
2009 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2010 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2011 }
2012 else if ((int)iov->iov_base & PAGE_MASK_64)
2013 {
2014 clip_size = iov->iov_len;
2015 prev_resid = uio->uio_resid;
2016 uio->uio_resid = clip_size;
2017 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2018 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2019 }
2020 else
2021 {
2022 /*
2023 * If we come in here, we know the offset into
2024 * the file is on a pagesize boundary
2025 */
2026
2027 max_io_size = filesize - uio->uio_offset;
2028 clip_size = uio->uio_resid;
2029 if (iov->iov_len < clip_size)
2030 clip_size = iov->iov_len;
2031 if (max_io_size < clip_size)
2032 clip_size = (int)max_io_size;
2033
2034 if (clip_size < PAGE_SIZE)
2035 {
2036 /*
2037 * Take care of the tail end of the read in this vector.
2038 */
2039 prev_resid = uio->uio_resid;
2040 uio->uio_resid = clip_size;
2041 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2042 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2043 }
2044 else
2045 {
2046 /* round clip_size down to a multiple of pagesize */
2047 clip_size = clip_size & ~(PAGE_MASK);
2048 prev_resid = uio->uio_resid;
2049 uio->uio_resid = clip_size;
2050 retval = cluster_nocopy_read(vp, uio, filesize, devblocksize, flags);
2051 if ((retval==0) && uio->uio_resid)
2052 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2053 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2054 }
2055 } /* end else */
2056 } /* end while */
2057
2058 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
2059 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
2060
2061 return(retval);
2062 }
2063
2064 static
2065 cluster_read_x(vp, uio, filesize, devblocksize, flags)
2066 struct vnode *vp;
2067 struct uio *uio;
2068 off_t filesize;
2069 int devblocksize;
2070 int flags;
2071 {
2072 upl_page_info_t *pl;
2073 upl_t upl;
2074 vm_offset_t upl_offset;
2075 int upl_size;
2076 off_t upl_f_offset;
2077 int start_offset;
2078 int start_pg;
2079 int last_pg;
2080 int uio_last;
2081 int pages_in_upl;
2082 off_t max_size;
2083 int io_size;
2084 vm_offset_t io_address;
2085 kern_return_t kret;
2086 int segflg;
2087 int error = 0;
2088 int retval = 0;
2089 int b_lblkno;
2090 int e_lblkno;
2091
2092 b_lblkno = (int)(uio->uio_offset / PAGE_SIZE_64);
2093
2094 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2095 /*
2096 * compute the size of the upl needed to encompass
2097 * the requested read... limit each call to cluster_io
2098 * to the maximum UPL size... cluster_io will clip if
2099 * this exceeds the maximum io_size for the device,
2100 * make sure to account for
2101 * a starting offset that's not page aligned
2102 */
2103 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
2104 upl_f_offset = uio->uio_offset - (off_t)start_offset;
2105 max_size = filesize - uio->uio_offset;
2106
2107 if ((off_t)((unsigned int)uio->uio_resid) < max_size)
2108 io_size = uio->uio_resid;
2109 else
2110 io_size = max_size;
2111 #ifdef ppc
2112 if (uio->uio_segflg == UIO_USERSPACE && !(vp->v_flag & VNOCACHE_DATA)) {
2113 segflg = uio->uio_segflg;
2114
2115 uio->uio_segflg = UIO_PHYS_USERSPACE;
2116
2117 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2118 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2119
2120 while (io_size && retval == 0) {
2121 int xsize;
2122 vm_offset_t paddr;
2123
2124 if (ubc_page_op(vp,
2125 upl_f_offset,
2126 UPL_POP_SET | UPL_POP_BUSY,
2127 &paddr, 0) != KERN_SUCCESS)
2128 break;
2129
2130 xsize = PAGE_SIZE - start_offset;
2131
2132 if (xsize > io_size)
2133 xsize = io_size;
2134
2135 retval = uiomove((caddr_t)(paddr + start_offset), xsize, uio);
2136
2137 ubc_page_op(vp, upl_f_offset,
2138 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2139
2140 io_size -= xsize;
2141 start_offset = (int)
2142 (uio->uio_offset & PAGE_MASK_64);
2143 upl_f_offset = uio->uio_offset - start_offset;
2144 }
2145 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2146 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2147
2148 uio->uio_segflg = segflg;
2149
2150 if (retval)
2151 break;
2152
2153 if (io_size == 0) {
2154 /*
2155 * we're already finished with this read request
2156 * let's see if we should do a read-ahead
2157 */
2158 e_lblkno = (int)
2159 ((uio->uio_offset - 1) / PAGE_SIZE_64);
2160
2161 if (!(vp->v_flag & VRAOFF))
2162 /*
2163 * let's try to read ahead if we're in
2164 * a sequential access pattern
2165 */
2166 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
2167 vp->v_lastr = e_lblkno;
2168
2169 break;
2170 }
2171 max_size = filesize - uio->uio_offset;
2172 }
2173 #endif
2174 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2175 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2176 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2177 pages_in_upl = upl_size / PAGE_SIZE;
2178
2179 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_START,
2180 upl, (int)upl_f_offset, upl_size, start_offset, 0);
2181
2182 kret = ubc_create_upl(vp,
2183 upl_f_offset,
2184 upl_size,
2185 &upl,
2186 &pl,
2187 UPL_FLAGS_NONE);
2188 if (kret != KERN_SUCCESS)
2189 panic("cluster_read: failed to get pagelist");
2190
2191 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_END,
2192 upl, (int)upl_f_offset, upl_size, start_offset, 0);
2193
2194 /*
2195 * scan from the beginning of the upl looking for the first
2196 * non-valid page.... this will become the first page in
2197 * the request we're going to make to 'cluster_io'... if all
2198 * of the pages are valid, we won't call through to 'cluster_io'
2199 */
2200 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
2201 if (!upl_valid_page(pl, start_pg))
2202 break;
2203 }
2204
2205 /*
2206 * scan from the starting invalid page looking for a valid
2207 * page before the end of the upl is reached, if we
2208 * find one, then it will be the last page of the request to
2209 * 'cluster_io'
2210 */
2211 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2212 if (upl_valid_page(pl, last_pg))
2213 break;
2214 }
2215
2216 if (start_pg < last_pg) {
2217 /*
2218 * we found a range of 'invalid' pages that must be filled
2219 * if the last page in this range is the last page of the file
2220 * we may have to clip the size of it to keep from reading past
2221 * the end of the last physical block associated with the file
2222 */
2223 upl_offset = start_pg * PAGE_SIZE;
2224 io_size = (last_pg - start_pg) * PAGE_SIZE;
2225
2226 if ((upl_f_offset + upl_offset + io_size) > filesize) {
2227 io_size = filesize - (upl_f_offset + upl_offset);
2228 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
2229 }
2230 /*
2231 * issue a synchronous read to cluster_io
2232 */
2233
2234 error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset,
2235 io_size, CL_READ, (struct buf *)0);
2236 }
2237 if (error == 0) {
2238 /*
2239 * if the read completed successfully, or there was no I/O request
2240 * issued, than map the upl into kernel address space and
2241 * move the data into user land.... we'll first add on any 'valid'
2242 * pages that were present in the upl when we acquired it.
2243 */
2244 u_int val_size;
2245 u_int size_of_prefetch;
2246
2247 for (uio_last = last_pg; uio_last < pages_in_upl; uio_last++) {
2248 if (!upl_valid_page(pl, uio_last))
2249 break;
2250 }
2251 /*
2252 * compute size to transfer this round, if uio->uio_resid is
2253 * still non-zero after this uiomove, we'll loop around and
2254 * set up for another I/O.
2255 */
2256 val_size = (uio_last * PAGE_SIZE) - start_offset;
2257
2258 if (max_size < val_size)
2259 val_size = max_size;
2260
2261 if (uio->uio_resid < val_size)
2262 val_size = uio->uio_resid;
2263
2264 e_lblkno = (int)((uio->uio_offset + ((off_t)val_size - 1)) / PAGE_SIZE_64);
2265
2266 if (size_of_prefetch = (uio->uio_resid - val_size)) {
2267 /*
2268 * if there's still I/O left to do for this request, then issue a
2269 * pre-fetch I/O... the I/O wait time will overlap
2270 * with the copying of the data
2271 */
2272 cluster_rd_prefetch(vp, uio->uio_offset + val_size, size_of_prefetch, filesize, devblocksize);
2273 } else {
2274 if (!(vp->v_flag & VRAOFF) && !(vp->v_flag & VNOCACHE_DATA))
2275 /*
2276 * let's try to read ahead if we're in
2277 * a sequential access pattern
2278 */
2279 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
2280 vp->v_lastr = e_lblkno;
2281 }
2282 #ifdef ppc
2283 if (uio->uio_segflg == UIO_USERSPACE) {
2284 int offset;
2285
2286 segflg = uio->uio_segflg;
2287
2288 uio->uio_segflg = UIO_PHYS_USERSPACE;
2289
2290
2291 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2292 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2293
2294 offset = start_offset;
2295
2296 while (val_size && retval == 0) {
2297 int csize;
2298 int i;
2299 caddr_t paddr;
2300
2301 i = offset / PAGE_SIZE;
2302 csize = min(PAGE_SIZE - start_offset, val_size);
2303
2304 paddr = (caddr_t)upl_phys_page(pl, i) + start_offset;
2305
2306 retval = uiomove(paddr, csize, uio);
2307
2308 val_size -= csize;
2309 offset += csize;
2310 start_offset = offset & PAGE_MASK;
2311 }
2312 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2313 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2314
2315 uio->uio_segflg = segflg;
2316 } else
2317 #endif
2318 {
2319 if ((kret = ubc_upl_map(upl, &io_address)) != KERN_SUCCESS)
2320 panic("cluster_read: ubc_upl_map() failed\n");
2321
2322 retval = uiomove((caddr_t)(io_address + start_offset), val_size, uio);
2323
2324 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
2325 panic("cluster_read: ubc_upl_unmap() failed\n");
2326 }
2327 }
2328 if (start_pg < last_pg) {
2329 /*
2330 * compute the range of pages that we actually issued an I/O for
2331 * and either commit them as valid if the I/O succeeded
2332 * or abort them if the I/O failed
2333 */
2334 io_size = (last_pg - start_pg) * PAGE_SIZE;
2335
2336 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2337 upl, start_pg * PAGE_SIZE, io_size, error, 0);
2338
2339 if (error || (vp->v_flag & VNOCACHE_DATA))
2340 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size,
2341 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2342 else
2343 ubc_upl_commit_range(upl, start_pg * PAGE_SIZE, io_size,
2344 UPL_COMMIT_CLEAR_DIRTY
2345 | UPL_COMMIT_FREE_ON_EMPTY
2346 | UPL_COMMIT_INACTIVATE);
2347
2348 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2349 upl, start_pg * PAGE_SIZE, io_size, error, 0);
2350 }
2351 if ((last_pg - start_pg) < pages_in_upl) {
2352 int cur_pg;
2353 int commit_flags;
2354
2355 /*
2356 * the set of pages that we issued an I/O for did not encompass
2357 * the entire upl... so just release these without modifying
2358 * there state
2359 */
2360 if (error)
2361 ubc_upl_abort(upl, 0);
2362 else {
2363 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2364 upl, -1, pages_in_upl - (last_pg - start_pg), 0, 0);
2365
2366 if (start_pg) {
2367 /*
2368 * we found some already valid pages at the beginning of
2369 * the upl commit these back to the inactive list with
2370 * reference cleared
2371 */
2372 for (cur_pg = 0; cur_pg < start_pg; cur_pg++) {
2373 commit_flags = UPL_COMMIT_FREE_ON_EMPTY
2374 | UPL_COMMIT_INACTIVATE;
2375
2376 if (upl_dirty_page(pl, cur_pg))
2377 commit_flags |= UPL_COMMIT_SET_DIRTY;
2378
2379 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2380 ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2381 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2382 else
2383 ubc_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2384 PAGE_SIZE, commit_flags);
2385 }
2386 }
2387 if (last_pg < uio_last) {
2388 /*
2389 * we found some already valid pages immediately after the
2390 * pages we issued I/O for, commit these back to the
2391 * inactive list with reference cleared
2392 */
2393 for (cur_pg = last_pg; cur_pg < uio_last; cur_pg++) {
2394 commit_flags = UPL_COMMIT_FREE_ON_EMPTY
2395 | UPL_COMMIT_INACTIVATE;
2396
2397 if (upl_dirty_page(pl, cur_pg))
2398 commit_flags |= UPL_COMMIT_SET_DIRTY;
2399
2400 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2401 ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2402 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2403 else
2404 ubc_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2405 PAGE_SIZE, commit_flags);
2406 }
2407 }
2408 if (uio_last < pages_in_upl) {
2409 /*
2410 * there were some invalid pages beyond the valid pages
2411 * that we didn't issue an I/O for, just release them
2412 * unchanged
2413 */
2414 ubc_upl_abort(upl, 0);
2415 }
2416
2417 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2418 upl, -1, -1, 0, 0);
2419 }
2420 }
2421 if (retval == 0)
2422 retval = error;
2423 }
2424
2425 return (retval);
2426 }
2427
2428 static
2429 cluster_nocopy_read(vp, uio, filesize, devblocksize, flags)
2430 struct vnode *vp;
2431 struct uio *uio;
2432 off_t filesize;
2433 int devblocksize;
2434 int flags;
2435 {
2436 upl_t upl;
2437 upl_page_info_t *pl;
2438 off_t upl_f_offset;
2439 vm_offset_t upl_offset;
2440 off_t start_upl_f_offset;
2441 off_t max_io_size;
2442 int io_size;
2443 int upl_size;
2444 int upl_needed_size;
2445 int pages_in_pl;
2446 vm_offset_t paddr;
2447 int upl_flags;
2448 kern_return_t kret;
2449 int segflg;
2450 struct iovec *iov;
2451 int i;
2452 int force_data_sync;
2453 int error = 0;
2454 int retval = 0;
2455
2456 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START,
2457 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
2458
2459 /*
2460 * When we enter this routine, we know
2461 * -- the offset into the file is on a pagesize boundary
2462 * -- the resid is a page multiple
2463 * -- the resid will not exceed iov_len
2464 */
2465
2466 iov = uio->uio_iov;
2467 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2468
2469 max_io_size = filesize - uio->uio_offset;
2470
2471 if (max_io_size < (off_t)((unsigned int)uio->uio_resid))
2472 io_size = max_io_size;
2473 else
2474 io_size = uio->uio_resid;
2475
2476 /*
2477 * We don't come into this routine unless
2478 * UIO_USERSPACE is set.
2479 */
2480 segflg = uio->uio_segflg;
2481
2482 uio->uio_segflg = UIO_PHYS_USERSPACE;
2483
2484 /*
2485 * First look for pages already in the cache
2486 * and move them to user space.
2487 */
2488 while (io_size && (retval == 0)) {
2489 upl_f_offset = uio->uio_offset;
2490
2491 /*
2492 * If this call fails, it means the page is not
2493 * in the page cache.
2494 */
2495 if (ubc_page_op(vp, upl_f_offset,
2496 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) != KERN_SUCCESS)
2497 break;
2498
2499 retval = uiomove((caddr_t)(paddr), PAGE_SIZE, uio);
2500
2501 ubc_page_op(vp, upl_f_offset,
2502 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2503
2504 io_size -= PAGE_SIZE;
2505 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 71)) | DBG_FUNC_NONE,
2506 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2507 }
2508
2509 uio->uio_segflg = segflg;
2510
2511 if (retval)
2512 {
2513 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2514 (int)uio->uio_offset, uio->uio_resid, 2, retval, 0);
2515 return(retval);
2516 }
2517
2518 /* If we are already finished with this read, then return */
2519 if (io_size == 0)
2520 {
2521
2522 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2523 (int)uio->uio_offset, uio->uio_resid, 3, io_size, 0);
2524 return(0);
2525 }
2526
2527 max_io_size = io_size;
2528 if (max_io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2529 max_io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2530
2531 start_upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
2532 upl_f_offset = start_upl_f_offset;
2533 io_size = 0;
2534
2535 while(io_size < max_io_size)
2536 {
2537
2538 if(ubc_page_op(vp, upl_f_offset,
2539 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) == KERN_SUCCESS)
2540 {
2541 ubc_page_op(vp, upl_f_offset,
2542 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2543 break;
2544 }
2545
2546 /*
2547 * Build up the io request parameters.
2548 */
2549
2550 io_size += PAGE_SIZE;
2551 upl_f_offset += PAGE_SIZE;
2552 }
2553
2554 if (io_size == 0)
2555 return(retval);
2556
2557 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2558 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
2559
2560 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START,
2561 (int)upl_offset, upl_needed_size, iov->iov_base, io_size, 0);
2562
2563 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
2564 {
2565 pages_in_pl = 0;
2566 upl_size = upl_needed_size;
2567 upl_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2568
2569 kret = vm_map_get_upl(current_map(),
2570 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2571 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, force_data_sync);
2572
2573 if (kret != KERN_SUCCESS)
2574 {
2575 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2576 (int)upl_offset, upl_size, io_size, kret, 0);
2577
2578 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2579 (int)uio->uio_offset, uio->uio_resid, 4, retval, 0);
2580
2581 /* cluster_nocopy_read: failed to get pagelist */
2582 /* do not return kret here */
2583 return(retval);
2584 }
2585
2586 pages_in_pl = upl_size / PAGE_SIZE;
2587 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2588
2589 for(i=0; i < pages_in_pl; i++)
2590 {
2591 if (!upl_valid_page(pl, i))
2592 break;
2593 }
2594 if (i == pages_in_pl)
2595 break;
2596
2597 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2598 UPL_ABORT_FREE_ON_EMPTY);
2599 }
2600
2601 if (force_data_sync >= 3)
2602 {
2603 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2604 (int)upl_offset, upl_size, io_size, kret, 0);
2605
2606 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2607 (int)uio->uio_offset, uio->uio_resid, 5, retval, 0);
2608 return(retval);
2609 }
2610 /*
2611 * Consider the possibility that upl_size wasn't satisfied.
2612 */
2613 if (upl_size != upl_needed_size)
2614 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
2615
2616 if (io_size == 0)
2617 {
2618 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2619 UPL_ABORT_FREE_ON_EMPTY);
2620 return(retval);
2621 }
2622
2623 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2624 (int)upl_offset, upl_size, io_size, kret, 0);
2625
2626 /*
2627 * issue a synchronous read to cluster_io
2628 */
2629
2630 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START,
2631 upl, (int)upl_offset, (int)start_upl_f_offset, io_size, 0);
2632
2633 error = cluster_io(vp, upl, upl_offset, start_upl_f_offset,
2634 io_size, CL_READ| CL_NOZERO, (struct buf *)0);
2635
2636 if (error == 0) {
2637 /*
2638 * The cluster_io read completed successfully,
2639 * update the uio structure and commit.
2640 */
2641
2642 ubc_upl_commit_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2643 UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
2644
2645 iov->iov_base += io_size;
2646 iov->iov_len -= io_size;
2647 uio->uio_resid -= io_size;
2648 uio->uio_offset += io_size;
2649 }
2650 else {
2651 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2652 UPL_ABORT_FREE_ON_EMPTY);
2653 }
2654
2655 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END,
2656 upl, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
2657
2658 if (retval == 0)
2659 retval = error;
2660
2661 } /* end while */
2662
2663
2664 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2665 (int)uio->uio_offset, (int)uio->uio_resid, 6, retval, 0);
2666
2667 return (retval);
2668 }
2669
2670
2671 static
2672 cluster_phys_read(vp, uio, filesize)
2673 struct vnode *vp;
2674 struct uio *uio;
2675 off_t filesize;
2676 {
2677 upl_t upl;
2678 vm_offset_t upl_offset;
2679 off_t max_size;
2680 int io_size;
2681 int upl_size;
2682 int upl_needed_size;
2683 int pages_in_pl;
2684 int upl_flags;
2685 kern_return_t kret;
2686 struct iovec *iov;
2687 int error;
2688
2689 /*
2690 * When we enter this routine, we know
2691 * -- the resid will not exceed iov_len
2692 * -- the target address is physically contiguous
2693 */
2694
2695 iov = uio->uio_iov;
2696
2697 max_size = filesize - uio->uio_offset;
2698
2699 if (max_size < (off_t)((unsigned int)iov->iov_len))
2700 io_size = max_size;
2701 else
2702 io_size = iov->iov_len;
2703
2704 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2705 upl_needed_size = upl_offset + io_size;
2706
2707 pages_in_pl = 0;
2708 upl_size = upl_needed_size;
2709 upl_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2710
2711 kret = vm_map_get_upl(current_map(),
2712 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2713 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0);
2714
2715 if (kret != KERN_SUCCESS)
2716 {
2717 /* cluster_phys_read: failed to get pagelist */
2718 return(EINVAL);
2719 }
2720
2721 /*
2722 * Consider the possibility that upl_size wasn't satisfied.
2723 */
2724 if (upl_size < upl_needed_size)
2725 {
2726 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2727 return(EINVAL);
2728 }
2729
2730 /*
2731 * issue a synchronous read to cluster_io
2732 */
2733
2734 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
2735 io_size, CL_READ| CL_NOZERO | CL_DEV_MEMORY, (struct buf *)0);
2736
2737 if (error == 0)
2738 {
2739 /*
2740 * The cluster_io read completed successfully,
2741 * update the uio structure and commit.
2742 */
2743
2744 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_FREE_ON_EMPTY);
2745
2746 iov->iov_base += io_size;
2747 iov->iov_len -= io_size;
2748 uio->uio_resid -= io_size;
2749 uio->uio_offset += io_size;
2750 }
2751 else
2752 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2753
2754 return (error);
2755 }
2756
2757 /*
2758 * generate advisory I/O's in the largest chunks possible
2759 * the completed pages will be released into the VM cache
2760 */
2761 advisory_read(vp, filesize, f_offset, resid, devblocksize)
2762 struct vnode *vp;
2763 off_t filesize;
2764 off_t f_offset;
2765 int resid;
2766 int devblocksize;
2767 {
2768 upl_page_info_t *pl;
2769 upl_t upl;
2770 vm_offset_t upl_offset;
2771 int upl_size;
2772 off_t upl_f_offset;
2773 int start_offset;
2774 int start_pg;
2775 int last_pg;
2776 int pages_in_upl;
2777 off_t max_size;
2778 int io_size;
2779 kern_return_t kret;
2780 int retval = 0;
2781
2782
2783 if (!UBCINFOEXISTS(vp))
2784 return(EINVAL);
2785
2786 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START,
2787 (int)f_offset, resid, (int)filesize, devblocksize, 0);
2788
2789 while (resid && f_offset < filesize && retval == 0) {
2790 /*
2791 * compute the size of the upl needed to encompass
2792 * the requested read... limit each call to cluster_io
2793 * to the maximum UPL size... cluster_io will clip if
2794 * this exceeds the maximum io_size for the device,
2795 * make sure to account for
2796 * a starting offset that's not page aligned
2797 */
2798 start_offset = (int)(f_offset & PAGE_MASK_64);
2799 upl_f_offset = f_offset - (off_t)start_offset;
2800 max_size = filesize - f_offset;
2801
2802 if (resid < max_size)
2803 io_size = resid;
2804 else
2805 io_size = max_size;
2806
2807 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2808 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2809 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2810 pages_in_upl = upl_size / PAGE_SIZE;
2811
2812 kret = ubc_create_upl(vp,
2813 upl_f_offset,
2814 upl_size,
2815 &upl,
2816 &pl,
2817 UPL_FLAGS_NONE);
2818 if (kret != KERN_SUCCESS)
2819 panic("advisory_read: failed to get pagelist");
2820
2821
2822 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 61)) | DBG_FUNC_NONE,
2823 upl, (int)upl_f_offset, upl_size, start_offset, 0);
2824
2825 /*
2826 * scan from the beginning of the upl looking for the first
2827 * non-valid page.... this will become the first page in
2828 * the request we're going to make to 'cluster_io'... if all
2829 * of the pages are valid, we won't call through to 'cluster_io'
2830 */
2831 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
2832 if (!upl_valid_page(pl, start_pg))
2833 break;
2834 }
2835
2836 /*
2837 * scan from the starting invalid page looking for a valid
2838 * page before the end of the upl is reached, if we
2839 * find one, then it will be the last page of the request to
2840 * 'cluster_io'
2841 */
2842 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2843 if (upl_valid_page(pl, last_pg))
2844 break;
2845 }
2846
2847 if (start_pg < last_pg) {
2848 /*
2849 * we found a range of 'invalid' pages that must be filled
2850 * if the last page in this range is the last page of the file
2851 * we may have to clip the size of it to keep from reading past
2852 * the end of the last physical block associated with the file
2853 */
2854 upl_offset = start_pg * PAGE_SIZE;
2855 io_size = (last_pg - start_pg) * PAGE_SIZE;
2856
2857 if ((upl_f_offset + upl_offset + io_size) > filesize) {
2858 io_size = filesize - (upl_f_offset + upl_offset);
2859 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
2860 }
2861 /*
2862 * issue an asynchronous read to cluster_io
2863 */
2864 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size,
2865 CL_ASYNC | CL_READ | CL_COMMIT | CL_AGE, (struct buf *)0);
2866 }
2867 if (start_pg) {
2868 /*
2869 * start_pg of non-zero indicates we found some already valid pages
2870 * at the beginning of the upl.... we need to release these without
2871 * modifying there state
2872 */
2873 ubc_upl_abort_range(upl, 0, start_pg * PAGE_SIZE,
2874 UPL_ABORT_FREE_ON_EMPTY);
2875
2876 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 62)) | DBG_FUNC_NONE,
2877 upl, 0, start_pg * PAGE_SIZE, 0, 0);
2878 }
2879 if (last_pg < pages_in_upl) {
2880 /*
2881 * the set of pages that we issued an I/O for did not extend all the
2882 * way to the end of the upl..so just release them without modifying
2883 * there state
2884 */
2885 ubc_upl_abort_range(upl, last_pg * PAGE_SIZE, (pages_in_upl - last_pg) * PAGE_SIZE,
2886 UPL_ABORT_FREE_ON_EMPTY);
2887
2888 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 63)) | DBG_FUNC_NONE,
2889 upl, last_pg * PAGE_SIZE,
2890 (pages_in_upl - last_pg) * PAGE_SIZE, 0, 0);
2891 }
2892 io_size = (last_pg * PAGE_SIZE) - start_offset;
2893
2894 if (io_size > resid)
2895 io_size = resid;
2896 f_offset += io_size;
2897 resid -= io_size;
2898 }
2899 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_END,
2900 (int)f_offset, resid, retval, 0, 0);
2901
2902 return(retval);
2903 }
2904
2905
2906 cluster_push(vp)
2907 struct vnode *vp;
2908 {
2909 upl_page_info_t *pl;
2910 upl_t upl;
2911 vm_offset_t upl_offset;
2912 int upl_size;
2913 off_t upl_f_offset;
2914 int pages_in_upl;
2915 int start_pg;
2916 int last_pg;
2917 int io_size;
2918 int io_flags;
2919 int size;
2920 kern_return_t kret;
2921
2922
2923 if (!UBCINFOEXISTS(vp))
2924 return(0);
2925
2926 if (vp->v_clen == 0 || (pages_in_upl = vp->v_lastw - vp->v_cstart) == 0)
2927 return (0);
2928 upl_size = pages_in_upl * PAGE_SIZE;
2929 upl_f_offset = ((off_t)vp->v_cstart) * PAGE_SIZE_64;
2930 size = vp->v_ciosiz;
2931 vp->v_clen = 0;
2932
2933 if (size > upl_size || (upl_size - size) > PAGE_SIZE)
2934 panic("cluster_push: v_ciosiz doesn't match size of cluster\n");
2935
2936 kret = ubc_create_upl(vp,
2937 upl_f_offset,
2938 upl_size,
2939 &upl,
2940 &pl,
2941 UPL_FLAGS_NONE);
2942 if (kret != KERN_SUCCESS)
2943 panic("cluster_push: failed to get pagelist");
2944
2945 last_pg = 0;
2946
2947 while (size) {
2948
2949 for (start_pg = last_pg; start_pg < pages_in_upl; start_pg++) {
2950 if (upl_valid_page(pl, start_pg) && upl_dirty_page(pl, start_pg))
2951 break;
2952 }
2953 if (start_pg > last_pg) {
2954 io_size = (start_pg - last_pg) * PAGE_SIZE;
2955
2956 ubc_upl_abort_range(upl, last_pg * PAGE_SIZE, io_size,
2957 UPL_ABORT_FREE_ON_EMPTY);
2958
2959 if (io_size < size)
2960 size -= io_size;
2961 else
2962 break;
2963 }
2964 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2965 if (!upl_valid_page(pl, last_pg) || !upl_dirty_page(pl, last_pg))
2966 break;
2967 }
2968 upl_offset = start_pg * PAGE_SIZE;
2969
2970 io_size = min(size, (last_pg - start_pg) * PAGE_SIZE);
2971
2972 if (vp->v_flag & VNOCACHE_DATA)
2973 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC | CL_DUMP;
2974 else
2975 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
2976
2977 while (vp->v_numoutput >= ASYNC_THROTTLE) {
2978 vp->v_flag |= VTHROTTLED;
2979 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_push", 0);
2980 }
2981 cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, io_flags, (struct buf *)0);
2982
2983 size -= io_size;
2984 }
2985 return(1);
2986 }