]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
xnu-344.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cluster.c
1
2 /*
3 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * The contents of this file constitute Original Code as defined in and
8 * are subject to the Apple Public Source License Version 1.1 (the
9 * "License"). You may not use this file except in compliance with the
10 * License. Please obtain a copy of the License at
11 * http://www.apple.com/publicsource and read it before using this file.
12 *
13 * This Original Code and all software distributed under the License are
14 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
18 * License for the specific language governing rights and limitations
19 * under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
57 */
58
59 #include <sys/param.h>
60 #include <sys/proc.h>
61 #include <sys/buf.h>
62 #include <sys/vnode.h>
63 #include <sys/mount.h>
64 #include <sys/trace.h>
65 #include <sys/malloc.h>
66 #include <sys/resourcevar.h>
67 #include <libkern/libkern.h>
68
69 #include <sys/ubc.h>
70 #include <vm/vm_pageout.h>
71
72 #include <sys/kdebug.h>
73
74 #define CL_READ 0x01
75 #define CL_ASYNC 0x02
76 #define CL_COMMIT 0x04
77 #define CL_PAGEOUT 0x10
78 #define CL_AGE 0x20
79 #define CL_DUMP 0x40
80 #define CL_NOZERO 0x80
81 #define CL_PAGEIN 0x100
82 #define CL_DEV_MEMORY 0x200
83
84 static void cluster_zero(upl_t upl, vm_offset_t upl_offset,
85 int size, struct buf *bp);
86 static int cluster_read_x(struct vnode *vp, struct uio *uio,
87 off_t filesize, int devblocksize, int flags);
88 static int cluster_write_x(struct vnode *vp, struct uio *uio,
89 off_t oldEOF, off_t newEOF, off_t headOff,
90 off_t tailOff, int devblocksize, int flags);
91 static int cluster_nocopy_read(struct vnode *vp, struct uio *uio,
92 off_t filesize, int devblocksize, int flags);
93 static int cluster_nocopy_write(struct vnode *vp, struct uio *uio,
94 off_t newEOF, int devblocksize, int flags);
95 static int cluster_phys_read(struct vnode *vp, struct uio *uio,
96 off_t filesize);
97 static int cluster_phys_write(struct vnode *vp, struct uio *uio);
98 static int cluster_push_x(struct vnode *vp, off_t EOF, daddr_t first, daddr_t last, int can_delay);
99 static int cluster_try_push(struct vnode *vp, off_t newEOF, int can_delay, int push_all);
100
101
102 /*
103 * throttle the number of async writes that
104 * can be outstanding on a single vnode
105 * before we issue a synchronous write
106 */
107 #define ASYNC_THROTTLE 9
108
109 static int
110 cluster_iodone(bp)
111 struct buf *bp;
112 {
113 int b_flags;
114 int error;
115 int total_size;
116 int total_resid;
117 int upl_offset;
118 int zero_offset;
119 upl_t upl;
120 struct buf *cbp;
121 struct buf *cbp_head;
122 struct buf *cbp_next;
123 struct buf *real_bp;
124 struct vnode *vp;
125 int commit_size;
126 int pg_offset;
127
128
129 cbp_head = (struct buf *)(bp->b_trans_head);
130
131 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START,
132 (int)cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
133
134 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
135 /*
136 * all I/O requests that are part of this transaction
137 * have to complete before we can process it
138 */
139 if ( !(cbp->b_flags & B_DONE)) {
140
141 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
142 (int)cbp_head, (int)cbp, cbp->b_bcount, cbp->b_flags, 0);
143
144 return 0;
145 }
146 }
147 error = 0;
148 total_size = 0;
149 total_resid = 0;
150
151 cbp = cbp_head;
152 upl_offset = cbp->b_uploffset;
153 upl = cbp->b_pagelist;
154 b_flags = cbp->b_flags;
155 real_bp = cbp->b_real_bp;
156 vp = cbp->b_vp;
157 zero_offset= cbp->b_validend;
158
159 while (cbp) {
160 if (cbp->b_vectorcount > 1)
161 _FREE(cbp->b_vectorlist, M_SEGMENT);
162
163 if ((cbp->b_flags & B_ERROR) && error == 0)
164 error = cbp->b_error;
165
166 total_resid += cbp->b_resid;
167 total_size += cbp->b_bcount;
168
169 cbp_next = cbp->b_trans_next;
170
171 free_io_buf(cbp);
172
173 cbp = cbp_next;
174 }
175 if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput <= (ASYNC_THROTTLE / 3))) {
176 vp->v_flag &= ~VTHROTTLED;
177 wakeup((caddr_t)&vp->v_numoutput);
178 }
179 if (zero_offset)
180 cluster_zero(upl, zero_offset, PAGE_SIZE - (zero_offset & PAGE_MASK), real_bp);
181
182 if ((b_flags & B_NEED_IODONE) && real_bp) {
183 if (error) {
184 real_bp->b_flags |= B_ERROR;
185 real_bp->b_error = error;
186 }
187 real_bp->b_resid = total_resid;
188
189 biodone(real_bp);
190 }
191 if (error == 0 && total_resid)
192 error = EIO;
193
194 if (b_flags & B_COMMIT_UPL) {
195 pg_offset = upl_offset & PAGE_MASK;
196 commit_size = (((pg_offset + total_size) + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
197
198 if (error || (b_flags & B_NOCACHE)) {
199 int upl_abort_code;
200
201 if ((b_flags & B_PAGEOUT) && (error != ENXIO)) /* transient error */
202 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
203 else if (b_flags & B_PGIN)
204 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
205 else
206 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
207
208 ubc_upl_abort_range(upl, upl_offset - pg_offset, commit_size,
209 upl_abort_code);
210
211 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
212 (int)upl, upl_offset - pg_offset, commit_size,
213 0x80000000|upl_abort_code, 0);
214
215 } else {
216 int upl_commit_flags = UPL_COMMIT_FREE_ON_EMPTY;
217
218 if ( !(b_flags & B_PAGEOUT))
219 upl_commit_flags |= UPL_COMMIT_CLEAR_DIRTY;
220 if (b_flags & B_AGE)
221 upl_commit_flags |= UPL_COMMIT_INACTIVATE;
222
223 ubc_upl_commit_range(upl, upl_offset - pg_offset, commit_size,
224 upl_commit_flags);
225
226 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
227 (int)upl, upl_offset - pg_offset, commit_size,
228 upl_commit_flags, 0);
229 }
230 } else
231 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
232 (int)upl, upl_offset, 0, error, 0);
233
234 return (error);
235 }
236
237
238 static void
239 cluster_zero(upl, upl_offset, size, bp)
240 upl_t upl;
241 vm_offset_t upl_offset;
242 int size;
243 struct buf *bp;
244 {
245 vm_offset_t io_addr = 0;
246 int must_unmap = 0;
247 kern_return_t kret;
248
249 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_NONE,
250 upl_offset, size, (int)bp, 0, 0);
251
252 if (bp == NULL || bp->b_data == NULL) {
253 kret = ubc_upl_map(upl, &io_addr);
254
255 if (kret != KERN_SUCCESS)
256 panic("cluster_zero: ubc_upl_map() failed with (%d)", kret);
257 if (io_addr == 0)
258 panic("cluster_zero: ubc_upl_map() mapped 0");
259
260 must_unmap = 1;
261 } else
262 io_addr = (vm_offset_t)bp->b_data;
263 bzero((caddr_t)(io_addr + upl_offset), size);
264
265 if (must_unmap) {
266 kret = ubc_upl_unmap(upl);
267
268 if (kret != KERN_SUCCESS)
269 panic("cluster_zero: kernel_upl_unmap failed");
270 }
271 }
272
273 static int
274 cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, real_bp)
275 struct vnode *vp;
276 upl_t upl;
277 vm_offset_t upl_offset;
278 off_t f_offset;
279 int non_rounded_size;
280 int devblocksize;
281 int flags;
282 struct buf *real_bp;
283 {
284 struct buf *cbp;
285 struct iovec *iovp;
286 u_int size;
287 int io_flags;
288 int error = 0;
289 int retval = 0;
290 struct buf *cbp_head = 0;
291 struct buf *cbp_tail = 0;
292 upl_page_info_t *pl;
293 int buf_count = 0;
294 int pg_count;
295 int pg_offset;
296 u_int max_iosize;
297 u_int max_vectors;
298 int priv;
299 int zero_offset = 0;
300
301 if (flags & CL_READ) {
302 io_flags = (B_VECTORLIST | B_READ);
303
304 vfs_io_attributes(vp, B_READ, &max_iosize, &max_vectors);
305 } else {
306 io_flags = (B_VECTORLIST | B_WRITEINPROG);
307
308 vfs_io_attributes(vp, B_WRITE, &max_iosize, &max_vectors);
309 }
310 pl = ubc_upl_pageinfo(upl);
311
312 if (flags & CL_ASYNC)
313 io_flags |= (B_CALL | B_ASYNC);
314 if (flags & CL_AGE)
315 io_flags |= B_AGE;
316 if (flags & CL_DUMP)
317 io_flags |= B_NOCACHE;
318 if (flags & CL_PAGEIN)
319 io_flags |= B_PGIN;
320
321 if (devblocksize)
322 size = (non_rounded_size + (devblocksize - 1)) & ~(devblocksize - 1);
323 else
324 size = non_rounded_size;
325
326
327 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_START,
328 (int)f_offset, size, upl_offset, flags, 0);
329
330 if ((flags & CL_READ) && ((upl_offset + non_rounded_size) & PAGE_MASK) && (!(flags & CL_NOZERO))) {
331 /*
332 * then we are going to end up
333 * with a page that we can't complete (the file size wasn't a multiple
334 * of PAGE_SIZE and we're trying to read to the end of the file
335 * so we'll go ahead and zero out the portion of the page we can't
336 * read in from the file
337 */
338 zero_offset = upl_offset + non_rounded_size;
339 }
340 while (size) {
341 size_t io_size;
342 int vsize;
343 int i;
344 int pl_index;
345 int pg_resid;
346 int num_contig;
347 daddr_t lblkno;
348 daddr_t blkno;
349
350 if (size > max_iosize)
351 io_size = max_iosize;
352 else
353 io_size = size;
354
355 if (error = VOP_CMAP(vp, f_offset, io_size, &blkno, &io_size, NULL)) {
356 if (error == EOPNOTSUPP)
357 panic("VOP_CMAP Unimplemented");
358 break;
359 }
360
361 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 24)) | DBG_FUNC_NONE,
362 (int)f_offset, (int)blkno, io_size, zero_offset, 0);
363
364 if ( (!(flags & CL_READ) && (long)blkno == -1) || io_size == 0) {
365 if (flags & CL_PAGEOUT) {
366 error = EINVAL;
367 break;
368 };
369
370 /* Try paging out the page individually before
371 giving up entirely and dumping it (it could
372 be mapped in a "hole" and require allocation
373 before the I/O:
374 */
375 ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE_64, UPL_ABORT_FREE_ON_EMPTY);
376 if (ubc_pushdirty_range(vp, f_offset, PAGE_SIZE_64) == 0) {
377 error = EINVAL;
378 break;
379 };
380
381 upl_offset += PAGE_SIZE_64;
382 f_offset += PAGE_SIZE_64;
383 size -= PAGE_SIZE_64;
384 continue;
385 }
386 lblkno = (daddr_t)(f_offset / PAGE_SIZE_64);
387 /*
388 * we have now figured out how much I/O we can do - this is in 'io_size'
389 * pl_index represents the first page in the 'upl' that the I/O will occur for
390 * pg_offset is the starting point in the first page for the I/O
391 * pg_count is the number of full and partial pages that 'io_size' encompasses
392 */
393 pl_index = upl_offset / PAGE_SIZE;
394 pg_offset = upl_offset & PAGE_MASK;
395 pg_count = (io_size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE;
396
397 if (flags & CL_DEV_MEMORY) {
398 /*
399 * currently, can't deal with reading 'holes' in file
400 */
401 if ((long)blkno == -1) {
402 error = EINVAL;
403 break;
404 }
405 /*
406 * treat physical requests as one 'giant' page
407 */
408 pg_count = 1;
409 }
410 if ((flags & CL_READ) && (long)blkno == -1) {
411 int bytes_to_zero;
412
413 /*
414 * if we're reading and blkno == -1, then we've got a
415 * 'hole' in the file that we need to deal with by zeroing
416 * out the affected area in the upl
417 */
418 if (zero_offset && io_size == size) {
419 /*
420 * if this upl contains the EOF and it is not a multiple of PAGE_SIZE
421 * than 'zero_offset' will be non-zero
422 * if the 'hole' returned by VOP_CMAP extends all the way to the eof
423 * (indicated by the io_size finishing off the I/O request for this UPL)
424 * than we're not going to issue an I/O for the
425 * last page in this upl... we need to zero both the hole and the tail
426 * of the page beyond the EOF, since the delayed zero-fill won't kick in
427 */
428 bytes_to_zero = (((upl_offset + io_size) + (PAGE_SIZE - 1)) & ~PAGE_MASK) - upl_offset;
429
430 zero_offset = 0;
431 } else
432 bytes_to_zero = io_size;
433
434 cluster_zero(upl, upl_offset, bytes_to_zero, real_bp);
435
436 if (cbp_head)
437 /*
438 * if there is a current I/O chain pending
439 * then the first page of the group we just zero'd
440 * will be handled by the I/O completion if the zero
441 * fill started in the middle of the page
442 */
443 pg_count = (io_size - pg_offset) / PAGE_SIZE;
444 else {
445 /*
446 * no pending I/O to pick up that first page
447 * so, we have to make sure it gets committed
448 * here.
449 * set the pg_offset to 0 so that the upl_commit_range
450 * starts with this page
451 */
452 pg_count = (io_size + pg_offset) / PAGE_SIZE;
453 pg_offset = 0;
454 }
455 if (io_size == size && ((upl_offset + io_size) & PAGE_MASK))
456 /*
457 * if we're done with the request for this UPL
458 * then we have to make sure to commit the last page
459 * even if we only partially zero-filled it
460 */
461 pg_count++;
462
463 if (pg_count) {
464 if (pg_offset)
465 pg_resid = PAGE_SIZE - pg_offset;
466 else
467 pg_resid = 0;
468
469 if (flags & CL_COMMIT)
470 ubc_upl_commit_range(upl,
471 (upl_offset + pg_resid) & ~PAGE_MASK,
472 pg_count * PAGE_SIZE,
473 UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
474 }
475 upl_offset += io_size;
476 f_offset += io_size;
477 size -= io_size;
478
479 if (cbp_head && pg_count)
480 goto start_io;
481 continue;
482
483 } else if (real_bp && (real_bp->b_blkno == real_bp->b_lblkno)) {
484 real_bp->b_blkno = blkno;
485 }
486
487 if (pg_count > 1) {
488 if (pg_count > max_vectors) {
489 io_size -= (pg_count - max_vectors) * PAGE_SIZE;
490
491 if (io_size < 0) {
492 io_size = PAGE_SIZE - pg_offset;
493 pg_count = 1;
494 } else
495 pg_count = max_vectors;
496 }
497 /*
498 * we need to allocate space for the vector list
499 */
500 if (pg_count > 1) {
501 iovp = (struct iovec *)_MALLOC(sizeof(struct iovec) * pg_count,
502 M_SEGMENT, M_NOWAIT);
503
504 if (iovp == (struct iovec *) 0) {
505 /*
506 * if the allocation fails, then throttle down to a single page
507 */
508 io_size = PAGE_SIZE - pg_offset;
509 pg_count = 1;
510 }
511 }
512 }
513
514 /* Throttle the speculative IO */
515 if ((flags & CL_ASYNC) && !(flags & CL_PAGEOUT))
516 priv = 0;
517 else
518 priv = 1;
519
520 cbp = alloc_io_buf(vp, priv);
521
522 if (pg_count == 1)
523 /*
524 * we use the io vector that's reserved in the buffer header
525 * this insures we can always issue an I/O even in a low memory
526 * condition that prevents the _MALLOC from succeeding... this
527 * is necessary to prevent deadlocks with the pager
528 */
529 iovp = (struct iovec *)(&cbp->b_vects[0]);
530
531 cbp->b_vectorlist = (void *)iovp;
532 cbp->b_vectorcount = pg_count;
533
534 if (flags & CL_DEV_MEMORY) {
535
536 iovp->iov_len = io_size;
537 iovp->iov_base = (caddr_t)upl_phys_page(pl, 0);
538
539 if (iovp->iov_base == (caddr_t) 0) {
540 free_io_buf(cbp);
541 error = EINVAL;
542 } else
543 iovp->iov_base += upl_offset;
544 } else {
545
546 for (i = 0, vsize = io_size; i < pg_count; i++, iovp++) {
547 int psize;
548
549 psize = PAGE_SIZE - pg_offset;
550
551 if (psize > vsize)
552 psize = vsize;
553
554 iovp->iov_len = psize;
555 iovp->iov_base = (caddr_t)upl_phys_page(pl, pl_index + i);
556
557 if (iovp->iov_base == (caddr_t) 0) {
558 if (pg_count > 1)
559 _FREE(cbp->b_vectorlist, M_SEGMENT);
560 free_io_buf(cbp);
561
562 error = EINVAL;
563 break;
564 }
565 iovp->iov_base += pg_offset;
566 pg_offset = 0;
567
568 if (flags & CL_PAGEOUT) {
569 int s;
570 struct buf *bp;
571
572 s = splbio();
573 if (bp = incore(vp, lblkno + i)) {
574 if (!ISSET(bp->b_flags, B_BUSY)) {
575 bremfree(bp);
576 SET(bp->b_flags, (B_BUSY | B_INVAL));
577 splx(s);
578 brelse(bp);
579 } else
580 panic("BUSY bp found in cluster_io");
581 }
582 splx(s);
583 }
584 vsize -= psize;
585 }
586 }
587 if (error)
588 break;
589
590 if (flags & CL_ASYNC)
591 cbp->b_iodone = (void *)cluster_iodone;
592 cbp->b_flags |= io_flags;
593
594 cbp->b_lblkno = lblkno;
595 cbp->b_blkno = blkno;
596 cbp->b_bcount = io_size;
597 cbp->b_pagelist = upl;
598 cbp->b_uploffset = upl_offset;
599 cbp->b_trans_next = (struct buf *)0;
600
601 if (flags & CL_READ)
602 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 26)) | DBG_FUNC_NONE,
603 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
604 else
605 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 27)) | DBG_FUNC_NONE,
606 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
607
608 if (cbp_head) {
609 cbp_tail->b_trans_next = cbp;
610 cbp_tail = cbp;
611 } else {
612 cbp_head = cbp;
613 cbp_tail = cbp;
614 }
615 (struct buf *)(cbp->b_trans_head) = cbp_head;
616 buf_count++;
617
618 upl_offset += io_size;
619 f_offset += io_size;
620 size -= io_size;
621
622 if ( (!(upl_offset & PAGE_MASK) && !(flags & CL_DEV_MEMORY) && ((flags & CL_ASYNC) || buf_count > 8)) || size == 0) {
623 /*
624 * if we have no more I/O to issue or
625 * the current I/O we've prepared fully
626 * completes the last page in this request
627 * and it's either an ASYNC request or
628 * we've already accumulated more than 8 I/O's into
629 * this transaction and it's not an I/O directed to
630 * special DEVICE memory
631 * then go ahead and issue the I/O
632 */
633 start_io:
634 if (flags & CL_COMMIT)
635 cbp_head->b_flags |= B_COMMIT_UPL;
636 if (flags & CL_PAGEOUT)
637 cbp_head->b_flags |= B_PAGEOUT;
638 if (flags & CL_PAGEIN)
639 cbp_head->b_flags |= B_PGIN;
640
641 if (real_bp) {
642 cbp_head->b_flags |= B_NEED_IODONE;
643 cbp_head->b_real_bp = real_bp;
644 } else
645 cbp_head->b_real_bp = (struct buf *)NULL;
646
647 if (size == 0) {
648 /*
649 * we're about to issue the last I/O for this upl
650 * if this was a read to the eof and the eof doesn't
651 * finish on a page boundary, than we need to zero-fill
652 * the rest of the page....
653 */
654 cbp_head->b_validend = zero_offset;
655 } else
656 cbp_head->b_validend = 0;
657
658 for (cbp = cbp_head; cbp;) {
659 struct buf * cbp_next;
660
661 if (io_flags & B_WRITEINPROG)
662 cbp->b_vp->v_numoutput++;
663
664 cbp_next = cbp->b_trans_next;
665
666 (void) VOP_STRATEGY(cbp);
667 cbp = cbp_next;
668 }
669 if ( !(flags & CL_ASYNC)) {
670 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next)
671 biowait(cbp);
672
673 if (error = cluster_iodone(cbp_head)) {
674 if ((flags & CL_PAGEOUT) && (error == ENXIO))
675 retval = 0; /* drop the error */
676 else
677 retval = error;
678 error = 0;
679 }
680 }
681 cbp_head = (struct buf *)0;
682 cbp_tail = (struct buf *)0;
683
684 buf_count = 0;
685 }
686 }
687 if (error) {
688 int abort_size;
689
690 for (cbp = cbp_head; cbp;) {
691 struct buf * cbp_next;
692
693 if (cbp->b_vectorcount > 1)
694 _FREE(cbp->b_vectorlist, M_SEGMENT);
695 upl_offset -= cbp->b_bcount;
696 size += cbp->b_bcount;
697
698 cbp_next = cbp->b_trans_next;
699 free_io_buf(cbp);
700 cbp = cbp_next;
701 }
702 pg_offset = upl_offset & PAGE_MASK;
703 abort_size = ((size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
704
705 if (flags & CL_COMMIT) {
706 int upl_abort_code;
707
708 if ((flags & CL_PAGEOUT) && (error != ENXIO)) /* transient error */
709 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
710 else if (flags & CL_PAGEIN)
711 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
712 else
713 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
714
715 ubc_upl_abort_range(upl, upl_offset - pg_offset, abort_size,
716 upl_abort_code);
717
718 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE,
719 (int)upl, upl_offset - pg_offset, abort_size, error, 0);
720 }
721 if (real_bp) {
722 real_bp->b_flags |= B_ERROR;
723 real_bp->b_error = error;
724
725 biodone(real_bp);
726 }
727 if (retval == 0)
728 retval = error;
729 }
730 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_END,
731 (int)f_offset, size, upl_offset, retval, 0);
732
733 return (retval);
734 }
735
736
737 static int
738 cluster_rd_prefetch(vp, f_offset, size, filesize, devblocksize)
739 struct vnode *vp;
740 off_t f_offset;
741 u_int size;
742 off_t filesize;
743 int devblocksize;
744 {
745 int pages_to_fetch;
746 int skipped_pages;
747
748 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_START,
749 (int)f_offset, size, (int)filesize, 0, 0);
750
751 if (f_offset >= filesize) {
752 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
753 (int)f_offset, 0, 0, 0, 0);
754 return(0);
755 }
756 if (size > (MAX_UPL_TRANSFER * PAGE_SIZE))
757 size = MAX_UPL_TRANSFER * PAGE_SIZE;
758 else
759 size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
760
761 if ((off_t)size > (filesize - f_offset))
762 size = filesize - f_offset;
763
764 pages_to_fetch = (size + (PAGE_SIZE - 1)) / PAGE_SIZE;
765
766 for (skipped_pages = 0; skipped_pages < pages_to_fetch; skipped_pages++) {
767 if (ubc_page_op(vp, f_offset, 0, 0, 0) != KERN_SUCCESS)
768 break;
769 f_offset += PAGE_SIZE;
770 size -= PAGE_SIZE;
771 }
772 if (skipped_pages < pages_to_fetch)
773 advisory_read(vp, filesize, f_offset, size, devblocksize);
774
775 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
776 (int)f_offset + (pages_to_fetch * PAGE_SIZE), skipped_pages, 0, 1, 0);
777
778 return (pages_to_fetch);
779 }
780
781
782
783 static void
784 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize)
785 struct vnode *vp;
786 daddr_t b_lblkno;
787 daddr_t e_lblkno;
788 off_t filesize;
789 int devblocksize;
790 {
791 daddr_t r_lblkno;
792 off_t f_offset;
793 int size_of_prefetch;
794 int max_pages;
795
796 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_START,
797 b_lblkno, e_lblkno, vp->v_lastr, 0, 0);
798
799 if (b_lblkno == vp->v_lastr && b_lblkno == e_lblkno) {
800 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
801 vp->v_ralen, vp->v_maxra, vp->v_lastr, 0, 0);
802 return;
803 }
804
805 if (vp->v_lastr == -1 || (b_lblkno != vp->v_lastr && b_lblkno != (vp->v_lastr + 1) &&
806 (b_lblkno != (vp->v_maxra + 1) || vp->v_ralen == 0))) {
807 vp->v_ralen = 0;
808 vp->v_maxra = 0;
809
810 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
811 vp->v_ralen, vp->v_maxra, vp->v_lastr, 1, 0);
812
813 return;
814 }
815 max_pages = MAX_UPL_TRANSFER;
816
817 vp->v_ralen = vp->v_ralen ? min(max_pages, vp->v_ralen << 1) : 1;
818
819 if (((e_lblkno + 1) - b_lblkno) > vp->v_ralen)
820 vp->v_ralen = min(max_pages, (e_lblkno + 1) - b_lblkno);
821
822 if (e_lblkno < vp->v_maxra) {
823 if ((vp->v_maxra - e_lblkno) > max(max_pages / 16, 4)) {
824
825 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
826 vp->v_ralen, vp->v_maxra, vp->v_lastr, 2, 0);
827 return;
828 }
829 }
830 r_lblkno = max(e_lblkno, vp->v_maxra) + 1;
831 f_offset = (off_t)r_lblkno * PAGE_SIZE_64;
832
833 if (f_offset < filesize) {
834 size_of_prefetch = cluster_rd_prefetch(vp, f_offset, vp->v_ralen * PAGE_SIZE, filesize, devblocksize);
835
836 if (size_of_prefetch)
837 vp->v_maxra = (r_lblkno + size_of_prefetch) - 1;
838 }
839 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
840 vp->v_ralen, vp->v_maxra, vp->v_lastr, 3, 0);
841 }
842
843 int
844 cluster_pageout(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
845 struct vnode *vp;
846 upl_t upl;
847 vm_offset_t upl_offset;
848 off_t f_offset;
849 int size;
850 off_t filesize;
851 int devblocksize;
852 int flags;
853 {
854 int io_size;
855 int pg_size;
856 off_t max_size;
857 int local_flags = CL_PAGEOUT;
858
859 if ((flags & UPL_IOSYNC) == 0)
860 local_flags |= CL_ASYNC;
861 if ((flags & UPL_NOCOMMIT) == 0)
862 local_flags |= CL_COMMIT;
863
864
865 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 52)) | DBG_FUNC_NONE,
866 (int)f_offset, size, (int)filesize, local_flags, 0);
867
868 /*
869 * If they didn't specify any I/O, then we are done...
870 * we can't issue an abort because we don't know how
871 * big the upl really is
872 */
873 if (size <= 0)
874 return (EINVAL);
875
876 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
877 if (local_flags & CL_COMMIT)
878 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
879 return (EROFS);
880 }
881 /*
882 * can't page-in from a negative offset
883 * or if we're starting beyond the EOF
884 * or if the file offset isn't page aligned
885 * or the size requested isn't a multiple of PAGE_SIZE
886 */
887 if (f_offset < 0 || f_offset >= filesize ||
888 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
889 if (local_flags & CL_COMMIT)
890 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
891 return (EINVAL);
892 }
893 max_size = filesize - f_offset;
894
895 if (size < max_size)
896 io_size = size;
897 else
898 io_size = max_size;
899
900 pg_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
901
902 if (size > pg_size) {
903 if (local_flags & CL_COMMIT)
904 ubc_upl_abort_range(upl, upl_offset + pg_size, size - pg_size,
905 UPL_ABORT_FREE_ON_EMPTY);
906 }
907 while (vp->v_numoutput >= ASYNC_THROTTLE) {
908 vp->v_flag |= VTHROTTLED;
909 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_pageout", 0);
910 }
911
912 return (cluster_io(vp, upl, upl_offset, f_offset, io_size, devblocksize,
913 local_flags, (struct buf *)0));
914 }
915
916 int
917 cluster_pagein(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
918 struct vnode *vp;
919 upl_t upl;
920 vm_offset_t upl_offset;
921 off_t f_offset;
922 int size;
923 off_t filesize;
924 int devblocksize;
925 int flags;
926 {
927 u_int io_size;
928 int rounded_size;
929 off_t max_size;
930 int retval;
931 int local_flags = 0;
932
933 if (upl == NULL || size < 0)
934 panic("cluster_pagein: NULL upl passed in");
935
936 if ((flags & UPL_IOSYNC) == 0)
937 local_flags |= CL_ASYNC;
938 if ((flags & UPL_NOCOMMIT) == 0)
939 local_flags |= CL_COMMIT;
940
941
942 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 56)) | DBG_FUNC_NONE,
943 (int)f_offset, size, (int)filesize, local_flags, 0);
944
945 /*
946 * can't page-in from a negative offset
947 * or if we're starting beyond the EOF
948 * or if the file offset isn't page aligned
949 * or the size requested isn't a multiple of PAGE_SIZE
950 */
951 if (f_offset < 0 || f_offset >= filesize ||
952 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK) || (upl_offset & PAGE_MASK)) {
953 if (local_flags & CL_COMMIT)
954 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
955 return (EINVAL);
956 }
957 max_size = filesize - f_offset;
958
959 if (size < max_size)
960 io_size = size;
961 else
962 io_size = max_size;
963
964 rounded_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
965
966 if (size > rounded_size && (local_flags & CL_COMMIT))
967 ubc_upl_abort_range(upl, upl_offset + rounded_size,
968 size - (upl_offset + rounded_size), UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
969
970 retval = cluster_io(vp, upl, upl_offset, f_offset, io_size, devblocksize,
971 local_flags | CL_READ | CL_PAGEIN, (struct buf *)0);
972
973 if (retval == 0) {
974 int b_lblkno;
975 int e_lblkno;
976
977 b_lblkno = (int)(f_offset / PAGE_SIZE_64);
978 e_lblkno = (int)
979 ((f_offset + ((off_t)io_size - 1)) / PAGE_SIZE_64);
980
981 if (!(flags & UPL_NORDAHEAD) && !(vp->v_flag & VRAOFF) && rounded_size == PAGE_SIZE) {
982 /*
983 * we haven't read the last page in of the file yet
984 * so let's try to read ahead if we're in
985 * a sequential access pattern
986 */
987 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
988 }
989 vp->v_lastr = e_lblkno;
990 }
991 return (retval);
992 }
993
994 int
995 cluster_bp(bp)
996 struct buf *bp;
997 {
998 off_t f_offset;
999 int flags;
1000
1001 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 19)) | DBG_FUNC_START,
1002 (int)bp, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
1003
1004 if (bp->b_pagelist == (upl_t) 0)
1005 panic("cluster_bp: can't handle NULL upl yet\n");
1006 if (bp->b_flags & B_READ)
1007 flags = CL_ASYNC | CL_READ;
1008 else
1009 flags = CL_ASYNC;
1010
1011 f_offset = ubc_blktooff(bp->b_vp, bp->b_lblkno);
1012
1013 return (cluster_io(bp->b_vp, bp->b_pagelist, 0, f_offset, bp->b_bcount, 0, flags, bp));
1014 }
1015
1016 int
1017 cluster_write(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1018 struct vnode *vp;
1019 struct uio *uio;
1020 off_t oldEOF;
1021 off_t newEOF;
1022 off_t headOff;
1023 off_t tailOff;
1024 int devblocksize;
1025 int flags;
1026 {
1027 int prev_resid;
1028 int clip_size;
1029 off_t max_io_size;
1030 struct iovec *iov;
1031 vm_offset_t upl_offset;
1032 int upl_size;
1033 int pages_in_pl;
1034 upl_page_info_t *pl;
1035 int upl_flags;
1036 upl_t upl;
1037 int retval = 0;
1038
1039
1040 if ((!uio) || (uio->uio_segflg != UIO_USERSPACE) || (!(vp->v_flag & VNOCACHE_DATA)))
1041 {
1042 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1043 return(retval);
1044 }
1045
1046 while (uio->uio_resid && uio->uio_offset < newEOF && retval == 0)
1047 {
1048 /* we know we have a resid, so this is safe */
1049 iov = uio->uio_iov;
1050 while (iov->iov_len == 0) {
1051 uio->uio_iov++;
1052 uio->uio_iovcnt--;
1053 iov = uio->uio_iov;
1054 }
1055
1056 /*
1057 * We check every vector target and if it is physically
1058 * contiguous space, we skip the sanity checks.
1059 */
1060
1061 upl_offset = (vm_offset_t)iov->iov_base & ~PAGE_MASK;
1062 upl_size = (upl_offset + PAGE_SIZE +(PAGE_SIZE -1)) & ~PAGE_MASK;
1063 pages_in_pl = 0;
1064 upl_flags = UPL_QUERY_OBJECT_TYPE;
1065 if ((vm_map_get_upl(current_map(),
1066 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1067 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0)) != KERN_SUCCESS)
1068 {
1069 /*
1070 * the user app must have passed in an invalid address
1071 */
1072 return (EFAULT);
1073 }
1074
1075 if (upl_flags & UPL_PHYS_CONTIG)
1076 {
1077 /*
1078 * since the interface to the IOKit below us uses physical block #'s and
1079 * block counts to specify the I/O, we can't handle anything that isn't
1080 * devblocksize aligned
1081 */
1082 if ((uio->uio_offset & (devblocksize - 1)) || (uio->uio_resid & (devblocksize - 1)))
1083 return(EINVAL);
1084
1085 if (flags & IO_HEADZEROFILL)
1086 {
1087 flags &= ~IO_HEADZEROFILL;
1088
1089 if (retval = cluster_write_x(vp, (struct uio *)0, 0, uio->uio_offset, headOff, 0, devblocksize, IO_HEADZEROFILL))
1090 return(retval);
1091 }
1092
1093 retval = cluster_phys_write(vp, uio);
1094
1095 if (uio->uio_resid == 0 && (flags & IO_TAILZEROFILL))
1096 {
1097 retval = cluster_write_x(vp, (struct uio *)0, 0, tailOff, uio->uio_offset, 0, devblocksize, IO_HEADZEROFILL);
1098 return(retval);
1099 }
1100 }
1101 else if ((uio->uio_resid < 4 * PAGE_SIZE) || (flags & (IO_TAILZEROFILL | IO_HEADZEROFILL)))
1102 {
1103 /*
1104 * We set a threshhold of 4 pages to decide if the nocopy
1105 * write loop is worth the trouble...
1106 * we also come here if we're trying to zero the head and/or tail
1107 * of a partially written page, and the user source is not a physically contiguous region
1108 */
1109 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1110 return(retval);
1111 }
1112 else if (uio->uio_offset & PAGE_MASK_64)
1113 {
1114 /* Bring the file offset write up to a pagesize boundary */
1115 clip_size = (PAGE_SIZE - (uio->uio_offset & PAGE_MASK_64));
1116 if (uio->uio_resid < clip_size)
1117 clip_size = uio->uio_resid;
1118 /*
1119 * Fake the resid going into the cluster_write_x call
1120 * and restore it on the way out.
1121 */
1122 prev_resid = uio->uio_resid;
1123 uio->uio_resid = clip_size;
1124 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1125 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1126 }
1127 else if ((int)iov->iov_base & PAGE_MASK_64)
1128 {
1129 clip_size = iov->iov_len;
1130 prev_resid = uio->uio_resid;
1131 uio->uio_resid = clip_size;
1132 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1133 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1134 }
1135 else
1136 {
1137 /*
1138 * If we come in here, we know the offset into
1139 * the file is on a pagesize boundary
1140 */
1141
1142 max_io_size = newEOF - uio->uio_offset;
1143 clip_size = uio->uio_resid;
1144 if (iov->iov_len < clip_size)
1145 clip_size = iov->iov_len;
1146 if (max_io_size < clip_size)
1147 clip_size = max_io_size;
1148
1149 if (clip_size < PAGE_SIZE)
1150 {
1151 /*
1152 * Take care of tail end of write in this vector
1153 */
1154 prev_resid = uio->uio_resid;
1155 uio->uio_resid = clip_size;
1156 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1157 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1158 }
1159 else
1160 {
1161 /* round clip_size down to a multiple of pagesize */
1162 clip_size = clip_size & ~(PAGE_MASK);
1163 prev_resid = uio->uio_resid;
1164 uio->uio_resid = clip_size;
1165 retval = cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags);
1166 if ((retval == 0) && uio->uio_resid)
1167 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1168 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1169 }
1170 } /* end else */
1171 } /* end while */
1172 return(retval);
1173 }
1174
1175 static int
1176 cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags)
1177 struct vnode *vp;
1178 struct uio *uio;
1179 off_t newEOF;
1180 int devblocksize;
1181 int flags;
1182 {
1183 upl_t upl;
1184 upl_page_info_t *pl;
1185 off_t upl_f_offset;
1186 vm_offset_t upl_offset;
1187 off_t max_io_size;
1188 int io_size;
1189 int upl_size;
1190 int upl_needed_size;
1191 int pages_in_pl;
1192 int upl_flags;
1193 kern_return_t kret;
1194 struct iovec *iov;
1195 int i;
1196 int force_data_sync;
1197 int error = 0;
1198
1199 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START,
1200 (int)uio->uio_offset, (int)uio->uio_resid,
1201 (int)newEOF, devblocksize, 0);
1202
1203 /*
1204 * When we enter this routine, we know
1205 * -- the offset into the file is on a pagesize boundary
1206 * -- the resid is a page multiple
1207 * -- the resid will not exceed iov_len
1208 */
1209
1210 iov = uio->uio_iov;
1211
1212 while (uio->uio_resid && uio->uio_offset < newEOF && error == 0) {
1213 io_size = uio->uio_resid;
1214
1215 if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1216 io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1217
1218 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1219 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
1220
1221 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START,
1222 (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0);
1223
1224 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
1225 {
1226 pages_in_pl = 0;
1227 upl_size = upl_needed_size;
1228 upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
1229 UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1230
1231 kret = vm_map_get_upl(current_map(),
1232 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1233 &upl_size,
1234 &upl,
1235 NULL,
1236 &pages_in_pl,
1237 &upl_flags,
1238 force_data_sync);
1239
1240 if (kret != KERN_SUCCESS)
1241 {
1242 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1243 0, 0, 0, kret, 0);
1244
1245 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1246 (int)uio->uio_offset, (int)uio->uio_resid, kret, 1, 0);
1247
1248 /* cluster_nocopy_write: failed to get pagelist */
1249 /* do not return kret here */
1250 return(0);
1251 }
1252
1253 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1254 pages_in_pl = upl_size / PAGE_SIZE;
1255
1256 for(i=0; i < pages_in_pl; i++)
1257 {
1258 if (!upl_valid_page(pl, i))
1259 break;
1260 }
1261
1262 if (i == pages_in_pl)
1263 break;
1264
1265 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1266 UPL_ABORT_FREE_ON_EMPTY);
1267 }
1268
1269 if (force_data_sync >= 3)
1270 {
1271 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1272 i, pages_in_pl, upl_size, kret, 0);
1273
1274 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1275 (int)uio->uio_offset, (int)uio->uio_resid, kret, 2, 0);
1276 return(0);
1277 }
1278
1279 /*
1280 * Consider the possibility that upl_size wasn't satisfied.
1281 */
1282 if (upl_size != upl_needed_size)
1283 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
1284
1285 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1286 (int)upl_offset, upl_size, (int)iov->iov_base, io_size, 0);
1287
1288 if (io_size == 0)
1289 {
1290 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1291 UPL_ABORT_FREE_ON_EMPTY);
1292 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1293 (int)uio->uio_offset, uio->uio_resid, 0, 3, 0);
1294
1295 return(0);
1296 }
1297
1298 /*
1299 * Now look for pages already in the cache
1300 * and throw them away.
1301 */
1302
1303 upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
1304 max_io_size = io_size;
1305
1306 while (max_io_size) {
1307
1308 /*
1309 * Flag UPL_POP_DUMP says if the page is found
1310 * in the page cache it must be thrown away.
1311 */
1312 ubc_page_op(vp,
1313 upl_f_offset,
1314 UPL_POP_SET | UPL_POP_BUSY | UPL_POP_DUMP,
1315 0, 0);
1316 max_io_size -= PAGE_SIZE;
1317 upl_f_offset += PAGE_SIZE;
1318 }
1319
1320 /*
1321 * issue a synchronous write to cluster_io
1322 */
1323
1324 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_START,
1325 (int)upl_offset, (int)uio->uio_offset, io_size, 0, 0);
1326
1327 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1328 io_size, devblocksize, 0, (struct buf *)0);
1329
1330 if (error == 0) {
1331 /*
1332 * The cluster_io write completed successfully,
1333 * update the uio structure.
1334 */
1335 iov->iov_base += io_size;
1336 iov->iov_len -= io_size;
1337 uio->uio_resid -= io_size;
1338 uio->uio_offset += io_size;
1339 }
1340 /*
1341 * always 'commit' the I/O via the abort primitive whether the I/O
1342 * succeeded cleanly or not... this is necessary to insure that
1343 * we preserve the state of the DIRTY flag on the pages used to
1344 * provide the data for the I/O... the state of this flag SHOULD
1345 * NOT be changed by a write
1346 */
1347 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1348 UPL_ABORT_FREE_ON_EMPTY);
1349
1350
1351 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END,
1352 (int)upl_offset, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
1353
1354 } /* end while */
1355
1356
1357 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1358 (int)uio->uio_offset, (int)uio->uio_resid, error, 4, 0);
1359
1360 return (error);
1361 }
1362
1363 static int
1364 cluster_phys_write(vp, uio)
1365 struct vnode *vp;
1366 struct uio *uio;
1367 {
1368 upl_t upl;
1369 vm_offset_t upl_offset;
1370 int io_size;
1371 int upl_size;
1372 int upl_needed_size;
1373 int pages_in_pl;
1374 int upl_flags;
1375 kern_return_t kret;
1376 struct iovec *iov;
1377 int error = 0;
1378
1379 /*
1380 * When we enter this routine, we know
1381 * -- the resid will not exceed iov_len
1382 * -- the vector target address is physcially contiguous
1383 */
1384
1385 iov = uio->uio_iov;
1386 io_size = iov->iov_len;
1387 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1388 upl_needed_size = upl_offset + io_size;
1389
1390 pages_in_pl = 0;
1391 upl_size = upl_needed_size;
1392 upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
1393 UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1394
1395 kret = vm_map_get_upl(current_map(),
1396 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1397 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0);
1398
1399 if (kret != KERN_SUCCESS)
1400 {
1401 /* cluster_phys_write: failed to get pagelist */
1402 /* note: return kret here */
1403 return(EINVAL);
1404 }
1405
1406 /*
1407 * Consider the possibility that upl_size wasn't satisfied.
1408 * This is a failure in the physical memory case.
1409 */
1410 if (upl_size < upl_needed_size)
1411 {
1412 kernel_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1413 return(EINVAL);
1414 }
1415
1416 /*
1417 * issue a synchronous write to cluster_io
1418 */
1419
1420 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1421 io_size, 0, CL_DEV_MEMORY, (struct buf *)0);
1422
1423 if (error == 0) {
1424 /*
1425 * The cluster_io write completed successfully,
1426 * update the uio structure and commit.
1427 */
1428
1429 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_FREE_ON_EMPTY);
1430
1431 iov->iov_base += io_size;
1432 iov->iov_len -= io_size;
1433 uio->uio_resid -= io_size;
1434 uio->uio_offset += io_size;
1435 }
1436 else
1437 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1438
1439 return (error);
1440 }
1441
1442 static int
1443 cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1444 struct vnode *vp;
1445 struct uio *uio;
1446 off_t oldEOF;
1447 off_t newEOF;
1448 off_t headOff;
1449 off_t tailOff;
1450 int devblocksize;
1451 int flags;
1452 {
1453 upl_page_info_t *pl;
1454 upl_t upl;
1455 vm_offset_t upl_offset;
1456 int upl_size;
1457 off_t upl_f_offset;
1458 int pages_in_upl;
1459 int start_offset;
1460 int xfer_resid;
1461 int io_size;
1462 int io_flags;
1463 vm_offset_t io_address;
1464 int io_offset;
1465 int bytes_to_zero;
1466 int bytes_to_move;
1467 kern_return_t kret;
1468 int retval = 0;
1469 int uio_resid;
1470 long long total_size;
1471 long long zero_cnt;
1472 off_t zero_off;
1473 long long zero_cnt1;
1474 off_t zero_off1;
1475 daddr_t start_blkno;
1476 daddr_t last_blkno;
1477
1478 if (uio) {
1479 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1480 (int)uio->uio_offset, uio->uio_resid, (int)oldEOF, (int)newEOF, 0);
1481
1482 uio_resid = uio->uio_resid;
1483 } else {
1484 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1485 0, 0, (int)oldEOF, (int)newEOF, 0);
1486
1487 uio_resid = 0;
1488 }
1489 zero_cnt = 0;
1490 zero_cnt1 = 0;
1491
1492 if (flags & IO_HEADZEROFILL) {
1493 /*
1494 * some filesystems (HFS is one) don't support unallocated holes within a file...
1495 * so we zero fill the intervening space between the old EOF and the offset
1496 * where the next chunk of real data begins.... ftruncate will also use this
1497 * routine to zero fill to the new EOF when growing a file... in this case, the
1498 * uio structure will not be provided
1499 */
1500 if (uio) {
1501 if (headOff < uio->uio_offset) {
1502 zero_cnt = uio->uio_offset - headOff;
1503 zero_off = headOff;
1504 }
1505 } else if (headOff < newEOF) {
1506 zero_cnt = newEOF - headOff;
1507 zero_off = headOff;
1508 }
1509 }
1510 if (flags & IO_TAILZEROFILL) {
1511 if (uio) {
1512 zero_off1 = uio->uio_offset + uio->uio_resid;
1513
1514 if (zero_off1 < tailOff)
1515 zero_cnt1 = tailOff - zero_off1;
1516 }
1517 }
1518 if (zero_cnt == 0 && uio == (struct uio *) 0)
1519 {
1520 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1521 retval, 0, 0, 0, 0);
1522 return (0);
1523 }
1524
1525 while ((total_size = (uio_resid + zero_cnt + zero_cnt1)) && retval == 0) {
1526 /*
1527 * for this iteration of the loop, figure out where our starting point is
1528 */
1529 if (zero_cnt) {
1530 start_offset = (int)(zero_off & PAGE_MASK_64);
1531 upl_f_offset = zero_off - start_offset;
1532 } else if (uio_resid) {
1533 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
1534 upl_f_offset = uio->uio_offset - start_offset;
1535 } else {
1536 start_offset = (int)(zero_off1 & PAGE_MASK_64);
1537 upl_f_offset = zero_off1 - start_offset;
1538 }
1539 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 46)) | DBG_FUNC_NONE,
1540 (int)zero_off, (int)zero_cnt, (int)zero_off1, (int)zero_cnt1, 0);
1541
1542 if (total_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1543 total_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1544
1545 /*
1546 * compute the size of the upl needed to encompass
1547 * the requested write... limit each call to cluster_io
1548 * to the maximum UPL size... cluster_io will clip if
1549 * this exceeds the maximum io_size for the device,
1550 * make sure to account for
1551 * a starting offset that's not page aligned
1552 */
1553 upl_size = (start_offset + total_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1554
1555 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1556 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1557
1558 pages_in_upl = upl_size / PAGE_SIZE;
1559 io_size = upl_size - start_offset;
1560
1561 if ((long long)io_size > total_size)
1562 io_size = total_size;
1563
1564 start_blkno = (daddr_t)(upl_f_offset / PAGE_SIZE_64);
1565 last_blkno = start_blkno + pages_in_upl;
1566
1567 kret = ubc_create_upl(vp,
1568 upl_f_offset,
1569 upl_size,
1570 &upl,
1571 &pl,
1572 UPL_FLAGS_NONE);
1573 if (kret != KERN_SUCCESS)
1574 panic("cluster_write: failed to get pagelist");
1575
1576 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_NONE,
1577 (int)upl, (int)upl_f_offset, upl_size, start_offset, 0);
1578
1579 if (start_offset && !upl_valid_page(pl, 0)) {
1580 int read_size;
1581
1582 /*
1583 * we're starting in the middle of the first page of the upl
1584 * and the page isn't currently valid, so we're going to have
1585 * to read it in first... this is a synchronous operation
1586 */
1587 read_size = PAGE_SIZE;
1588
1589 if ((upl_f_offset + read_size) > newEOF)
1590 read_size = newEOF - upl_f_offset;
1591
1592 retval = cluster_io(vp, upl, 0, upl_f_offset, read_size, devblocksize,
1593 CL_READ, (struct buf *)0);
1594 if (retval) {
1595 /*
1596 * we had an error during the read which causes us to abort
1597 * the current cluster_write request... before we do, we need
1598 * to release the rest of the pages in the upl without modifying
1599 * there state and mark the failed page in error
1600 */
1601 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
1602 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1603
1604 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1605 (int)upl, 0, 0, retval, 0);
1606 break;
1607 }
1608 }
1609 if ((start_offset == 0 || upl_size > PAGE_SIZE) && ((start_offset + io_size) & PAGE_MASK)) {
1610 /*
1611 * the last offset we're writing to in this upl does not end on a page
1612 * boundary... if it's not beyond the old EOF, then we'll also need to
1613 * pre-read this page in if it isn't already valid
1614 */
1615 upl_offset = upl_size - PAGE_SIZE;
1616
1617 if ((upl_f_offset + start_offset + io_size) < oldEOF &&
1618 !upl_valid_page(pl, upl_offset / PAGE_SIZE)) {
1619 int read_size;
1620
1621 read_size = PAGE_SIZE;
1622
1623 if ((upl_f_offset + upl_offset + read_size) > newEOF)
1624 read_size = newEOF - (upl_f_offset + upl_offset);
1625
1626 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size, devblocksize,
1627 CL_READ, (struct buf *)0);
1628 if (retval) {
1629 /*
1630 * we had an error during the read which causes us to abort
1631 * the current cluster_write request... before we do, we
1632 * need to release the rest of the pages in the upl without
1633 * modifying there state and mark the failed page in error
1634 */
1635 ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
1636 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1637
1638 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1639 (int)upl, 0, 0, retval, 0);
1640 break;
1641 }
1642 }
1643 }
1644 if ((kret = ubc_upl_map(upl, &io_address)) != KERN_SUCCESS)
1645 panic("cluster_write: ubc_upl_map failed\n");
1646 xfer_resid = io_size;
1647 io_offset = start_offset;
1648
1649 while (zero_cnt && xfer_resid) {
1650
1651 if (zero_cnt < (long long)xfer_resid)
1652 bytes_to_zero = zero_cnt;
1653 else
1654 bytes_to_zero = xfer_resid;
1655
1656 if ( !(flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) {
1657 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1658
1659 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1660 (int)upl_f_offset + io_offset, bytes_to_zero,
1661 (int)io_offset, xfer_resid, 0);
1662 } else {
1663 int zero_pg_index;
1664
1665 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off & PAGE_MASK_64));
1666 zero_pg_index = (int)((zero_off - upl_f_offset) / PAGE_SIZE_64);
1667
1668 if ( !upl_valid_page(pl, zero_pg_index)) {
1669 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1670
1671 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1672 (int)upl_f_offset + io_offset, bytes_to_zero,
1673 (int)io_offset, xfer_resid, 0);
1674
1675 } else if ((flags & (IO_NOZERODIRTY | IO_NOZEROVALID)) == IO_NOZERODIRTY &&
1676 !upl_dirty_page(pl, zero_pg_index)) {
1677 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1678
1679 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1680 (int)upl_f_offset + io_offset, bytes_to_zero,
1681 (int)io_offset, xfer_resid, 0);
1682 }
1683 }
1684 xfer_resid -= bytes_to_zero;
1685 zero_cnt -= bytes_to_zero;
1686 zero_off += bytes_to_zero;
1687 io_offset += bytes_to_zero;
1688 }
1689 if (xfer_resid && uio_resid) {
1690 bytes_to_move = min(uio_resid, xfer_resid);
1691
1692 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 42)) | DBG_FUNC_NONE,
1693 (int)uio->uio_offset, bytes_to_move, uio_resid, xfer_resid, 0);
1694
1695 retval = uiomove((caddr_t)(io_address + io_offset), bytes_to_move, uio);
1696
1697
1698 if (retval) {
1699 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
1700 panic("cluster_write: kernel_upl_unmap failed\n");
1701
1702 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
1703
1704 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1705 (int)upl, 0, 0, retval, 0);
1706 } else {
1707 uio_resid -= bytes_to_move;
1708 xfer_resid -= bytes_to_move;
1709 io_offset += bytes_to_move;
1710 }
1711 }
1712 while (xfer_resid && zero_cnt1 && retval == 0) {
1713
1714 if (zero_cnt1 < (long long)xfer_resid)
1715 bytes_to_zero = zero_cnt1;
1716 else
1717 bytes_to_zero = xfer_resid;
1718
1719 if ( !(flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) {
1720 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1721
1722 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1723 (int)upl_f_offset + io_offset,
1724 bytes_to_zero, (int)io_offset, xfer_resid, 0);
1725 } else {
1726 int zero_pg_index;
1727
1728 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off1 & PAGE_MASK_64));
1729 zero_pg_index = (int)((zero_off1 - upl_f_offset) / PAGE_SIZE_64);
1730
1731 if ( !upl_valid_page(pl, zero_pg_index)) {
1732 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1733
1734 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1735 (int)upl_f_offset + io_offset,
1736 bytes_to_zero, (int)io_offset, xfer_resid, 0);
1737
1738 } else if ((flags & (IO_NOZERODIRTY | IO_NOZEROVALID)) == IO_NOZERODIRTY &&
1739 !upl_dirty_page(pl, zero_pg_index)) {
1740 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1741
1742 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1743 (int)upl_f_offset + io_offset,
1744 bytes_to_zero, (int)io_offset, xfer_resid, 0);
1745 }
1746 }
1747 xfer_resid -= bytes_to_zero;
1748 zero_cnt1 -= bytes_to_zero;
1749 zero_off1 += bytes_to_zero;
1750 io_offset += bytes_to_zero;
1751 }
1752
1753 if (retval == 0) {
1754 int cl_index;
1755 int can_delay;
1756
1757 io_size += start_offset;
1758
1759 if ((upl_f_offset + io_size) >= newEOF && io_size < upl_size) {
1760 /*
1761 * if we're extending the file with this write
1762 * we'll zero fill the rest of the page so that
1763 * if the file gets extended again in such a way as to leave a
1764 * hole starting at this EOF, we'll have zero's in the correct spot
1765 */
1766 bzero((caddr_t)(io_address + io_size), upl_size - io_size);
1767
1768 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1769 (int)upl_f_offset + io_size,
1770 upl_size - io_size, 0, 0, 0);
1771 }
1772 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
1773 panic("cluster_write: kernel_upl_unmap failed\n");
1774
1775 if (flags & IO_SYNC)
1776 /*
1777 * if the IO_SYNC flag is set than we need to
1778 * bypass any clusters and immediately issue
1779 * the I/O
1780 */
1781 goto issue_io;
1782
1783 if (vp->v_clen == 0)
1784 /*
1785 * no clusters currently present
1786 */
1787 goto start_new_cluster;
1788
1789 /*
1790 * keep track of the overall dirty page
1791 * range we've developed
1792 * in case we have to fall back to the
1793 * VHASDIRTY method of flushing
1794 */
1795 if (vp->v_flag & VHASDIRTY)
1796 goto delay_io;
1797
1798 for (cl_index = 0; cl_index < vp->v_clen; cl_index++) {
1799 /*
1800 * we have an existing cluster... see if this write will extend it nicely
1801 */
1802 if (start_blkno >= vp->v_clusters[cl_index].start_pg) {
1803 /*
1804 * the current write starts at or after the current cluster
1805 */
1806 if (last_blkno <= (vp->v_clusters[cl_index].start_pg + MAX_UPL_TRANSFER)) {
1807 /*
1808 * we have a write that fits entirely
1809 * within the existing cluster limits
1810 */
1811 if (last_blkno > vp->v_clusters[cl_index].last_pg)
1812 /*
1813 * update our idea of where the cluster ends
1814 */
1815 vp->v_clusters[cl_index].last_pg = last_blkno;
1816 break;
1817 }
1818 if (start_blkno < (vp->v_clusters[cl_index].start_pg + MAX_UPL_TRANSFER)) {
1819 /*
1820 * we have a write that starts in the middle of the current cluster
1821 * but extends beyond the cluster's limit
1822 * we'll clip the current cluster if we actually
1823 * overlap with the new write
1824 * and start a new cluster with the current write
1825 */
1826 if (vp->v_clusters[cl_index].last_pg > start_blkno)
1827 vp->v_clusters[cl_index].last_pg = start_blkno;
1828 }
1829 /*
1830 * we also get here for the case where the current write starts
1831 * beyond the limit of the existing cluster
1832 *
1833 * in either case, we'll check the remaining clusters before
1834 * starting a new one
1835 */
1836 } else {
1837 /*
1838 * the current write starts in front of the current cluster
1839 */
1840 if ((vp->v_clusters[cl_index].last_pg - start_blkno) <= MAX_UPL_TRANSFER) {
1841 /*
1842 * we can just merge the old cluster
1843 * with the new request and leave it
1844 * in the cache
1845 */
1846 vp->v_clusters[cl_index].start_pg = start_blkno;
1847
1848 if (last_blkno > vp->v_clusters[cl_index].last_pg) {
1849 /*
1850 * the current write completely
1851 * envelops the existing cluster
1852 */
1853 vp->v_clusters[cl_index].last_pg = last_blkno;
1854 }
1855 break;
1856 }
1857
1858 /*
1859 * if we were to combine this write with the current cluster
1860 * we would exceed the cluster size limit.... so,
1861 * let's see if there's any overlap of the new I/O with
1862 * the existing cluster...
1863 *
1864 */
1865 if (last_blkno > vp->v_clusters[cl_index].start_pg)
1866 /*
1867 * the current write extends into the existing cluster
1868 * clip the current cluster by moving the start position
1869 * to where the current write ends
1870 */
1871 vp->v_clusters[cl_index].start_pg = last_blkno;
1872 /*
1873 * if we get here, there was no way to merge
1874 * the new I/O with this cluster and
1875 * keep it under our maximum cluster length
1876 * we'll check the remaining clusters before starting a new one
1877 */
1878 }
1879 }
1880 if (cl_index < vp->v_clen)
1881 /*
1882 * we found an existing cluster that we
1883 * could merger this I/O into
1884 */
1885 goto delay_io;
1886
1887 if (vp->v_clen < MAX_CLUSTERS && !(vp->v_flag & VNOCACHE_DATA))
1888 /*
1889 * we didn't find an existing cluster to
1890 * merge into, but there's room to start
1891 * a new one
1892 */
1893 goto start_new_cluster;
1894
1895 /*
1896 * no exisitng cluster to merge with and no
1897 * room to start a new one... we'll try
1898 * pushing the existing ones... if none of
1899 * them are able to be pushed, we'll have
1900 * to fall back on the VHASDIRTY mechanism
1901 * cluster_try_push will set v_clen to the
1902 * number of remaining clusters if it is
1903 * unable to push all of them
1904 */
1905 if (vp->v_flag & VNOCACHE_DATA)
1906 can_delay = 0;
1907 else
1908 can_delay = 1;
1909
1910 if (cluster_try_push(vp, newEOF, can_delay, 0) == 0) {
1911 vp->v_flag |= VHASDIRTY;
1912 goto delay_io;
1913 }
1914 start_new_cluster:
1915 if (vp->v_clen == 0) {
1916 vp->v_ciosiz = devblocksize;
1917 vp->v_cstart = start_blkno;
1918 vp->v_lastw = last_blkno;
1919 }
1920 vp->v_clusters[vp->v_clen].start_pg = start_blkno;
1921 vp->v_clusters[vp->v_clen].last_pg = last_blkno;
1922 vp->v_clen++;
1923 delay_io:
1924 /*
1925 * make sure we keep v_cstart and v_lastw up to
1926 * date in case we have to fall back on the
1927 * V_HASDIRTY mechanism (or we've already entered it)
1928 */
1929 if (start_blkno < vp->v_cstart)
1930 vp->v_cstart = start_blkno;
1931 if (last_blkno > vp->v_lastw)
1932 vp->v_lastw = last_blkno;
1933
1934 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
1935 continue;
1936 issue_io:
1937 /*
1938 * in order to maintain some semblance of coherency with mapped writes
1939 * we need to write the cluster back out as a multiple of the PAGESIZE
1940 * unless the cluster encompasses the last page of the file... in this
1941 * case we'll round out to the nearest device block boundary
1942 */
1943 io_size = upl_size;
1944
1945 if ((upl_f_offset + io_size) > newEOF) {
1946 io_size = newEOF - upl_f_offset;
1947 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
1948 }
1949
1950 if (flags & IO_SYNC)
1951 io_flags = CL_COMMIT | CL_AGE;
1952 else
1953 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
1954
1955 if (vp->v_flag & VNOCACHE_DATA)
1956 io_flags |= CL_DUMP;
1957
1958 while (vp->v_numoutput >= ASYNC_THROTTLE) {
1959 vp->v_flag |= VTHROTTLED;
1960 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_write", 0);
1961 }
1962 retval = cluster_io(vp, upl, 0, upl_f_offset, io_size, devblocksize,
1963 io_flags, (struct buf *)0);
1964 }
1965 }
1966 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1967 retval, 0, 0, 0, 0);
1968
1969 return (retval);
1970 }
1971
1972 int
1973 cluster_read(vp, uio, filesize, devblocksize, flags)
1974 struct vnode *vp;
1975 struct uio *uio;
1976 off_t filesize;
1977 int devblocksize;
1978 int flags;
1979 {
1980 int prev_resid;
1981 int clip_size;
1982 off_t max_io_size;
1983 struct iovec *iov;
1984 vm_offset_t upl_offset;
1985 int upl_size;
1986 int pages_in_pl;
1987 upl_page_info_t *pl;
1988 int upl_flags;
1989 upl_t upl;
1990 int retval = 0;
1991
1992 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START,
1993 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
1994
1995 /*
1996 * We set a threshhold of 4 pages to decide if the nocopy
1997 * read loop is worth the trouble...
1998 */
1999
2000 if (!((vp->v_flag & VNOCACHE_DATA) && (uio->uio_segflg == UIO_USERSPACE)))
2001 {
2002 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2003 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
2004 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
2005 return(retval);
2006 }
2007
2008 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0)
2009 {
2010 /* we know we have a resid, so this is safe */
2011 iov = uio->uio_iov;
2012 while (iov->iov_len == 0) {
2013 uio->uio_iov++;
2014 uio->uio_iovcnt--;
2015 iov = uio->uio_iov;
2016 }
2017
2018 /*
2019 * We check every vector target and if it is physically
2020 * contiguous space, we skip the sanity checks.
2021 */
2022
2023 upl_offset = (vm_offset_t)iov->iov_base & ~PAGE_MASK;
2024 upl_size = (upl_offset + PAGE_SIZE +(PAGE_SIZE -1)) & ~PAGE_MASK;
2025 pages_in_pl = 0;
2026 upl_flags = UPL_QUERY_OBJECT_TYPE;
2027 if((vm_map_get_upl(current_map(),
2028 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2029 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0)) != KERN_SUCCESS)
2030 {
2031 /*
2032 * the user app must have passed in an invalid address
2033 */
2034 return (EFAULT);
2035 }
2036
2037 if (upl_flags & UPL_PHYS_CONTIG)
2038 {
2039 retval = cluster_phys_read(vp, uio, filesize);
2040 }
2041 else if (uio->uio_resid < 4 * PAGE_SIZE)
2042 {
2043 /*
2044 * We set a threshhold of 4 pages to decide if the nocopy
2045 * read loop is worth the trouble...
2046 */
2047 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2048 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
2049 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
2050 return(retval);
2051 }
2052 else if (uio->uio_offset & PAGE_MASK_64)
2053 {
2054 /* Bring the file offset read up to a pagesize boundary */
2055 clip_size = (PAGE_SIZE - (int)(uio->uio_offset & PAGE_MASK_64));
2056 if (uio->uio_resid < clip_size)
2057 clip_size = uio->uio_resid;
2058 /*
2059 * Fake the resid going into the cluster_read_x call
2060 * and restore it on the way out.
2061 */
2062 prev_resid = uio->uio_resid;
2063 uio->uio_resid = clip_size;
2064 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2065 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2066 }
2067 else if ((int)iov->iov_base & PAGE_MASK_64)
2068 {
2069 clip_size = iov->iov_len;
2070 prev_resid = uio->uio_resid;
2071 uio->uio_resid = clip_size;
2072 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2073 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2074 }
2075 else
2076 {
2077 /*
2078 * If we come in here, we know the offset into
2079 * the file is on a pagesize boundary
2080 */
2081
2082 max_io_size = filesize - uio->uio_offset;
2083 clip_size = uio->uio_resid;
2084 if (iov->iov_len < clip_size)
2085 clip_size = iov->iov_len;
2086 if (max_io_size < clip_size)
2087 clip_size = (int)max_io_size;
2088
2089 if (clip_size < PAGE_SIZE)
2090 {
2091 /*
2092 * Take care of the tail end of the read in this vector.
2093 */
2094 prev_resid = uio->uio_resid;
2095 uio->uio_resid = clip_size;
2096 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2097 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2098 }
2099 else
2100 {
2101 /* round clip_size down to a multiple of pagesize */
2102 clip_size = clip_size & ~(PAGE_MASK);
2103 prev_resid = uio->uio_resid;
2104 uio->uio_resid = clip_size;
2105 retval = cluster_nocopy_read(vp, uio, filesize, devblocksize, flags);
2106 if ((retval==0) && uio->uio_resid)
2107 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2108 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2109 }
2110 } /* end else */
2111 } /* end while */
2112
2113 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
2114 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
2115
2116 return(retval);
2117 }
2118
2119 static int
2120 cluster_read_x(vp, uio, filesize, devblocksize, flags)
2121 struct vnode *vp;
2122 struct uio *uio;
2123 off_t filesize;
2124 int devblocksize;
2125 int flags;
2126 {
2127 upl_page_info_t *pl;
2128 upl_t upl;
2129 vm_offset_t upl_offset;
2130 int upl_size;
2131 off_t upl_f_offset;
2132 int start_offset;
2133 int start_pg;
2134 int last_pg;
2135 int uio_last;
2136 int pages_in_upl;
2137 off_t max_size;
2138 int io_size;
2139 vm_offset_t io_address;
2140 kern_return_t kret;
2141 int segflg;
2142 int error = 0;
2143 int retval = 0;
2144 int b_lblkno;
2145 int e_lblkno;
2146
2147 b_lblkno = (int)(uio->uio_offset / PAGE_SIZE_64);
2148
2149 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2150 /*
2151 * compute the size of the upl needed to encompass
2152 * the requested read... limit each call to cluster_io
2153 * to the maximum UPL size... cluster_io will clip if
2154 * this exceeds the maximum io_size for the device,
2155 * make sure to account for
2156 * a starting offset that's not page aligned
2157 */
2158 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
2159 upl_f_offset = uio->uio_offset - (off_t)start_offset;
2160 max_size = filesize - uio->uio_offset;
2161
2162 if ((off_t)((unsigned int)uio->uio_resid) < max_size)
2163 io_size = uio->uio_resid;
2164 else
2165 io_size = max_size;
2166
2167 if (uio->uio_segflg == UIO_USERSPACE && !(vp->v_flag & VNOCACHE_DATA)) {
2168 segflg = uio->uio_segflg;
2169
2170 uio->uio_segflg = UIO_PHYS_USERSPACE;
2171
2172 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2173 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2174
2175 while (io_size && retval == 0) {
2176 int xsize;
2177 vm_offset_t paddr;
2178
2179 if (ubc_page_op(vp,
2180 upl_f_offset,
2181 UPL_POP_SET | UPL_POP_BUSY,
2182 &paddr, 0) != KERN_SUCCESS)
2183 break;
2184
2185 xsize = PAGE_SIZE - start_offset;
2186
2187 if (xsize > io_size)
2188 xsize = io_size;
2189
2190 retval = uiomove((caddr_t)(paddr + start_offset), xsize, uio);
2191
2192 ubc_page_op(vp, upl_f_offset,
2193 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2194
2195 io_size -= xsize;
2196 start_offset = (int)
2197 (uio->uio_offset & PAGE_MASK_64);
2198 upl_f_offset = uio->uio_offset - start_offset;
2199 }
2200 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2201 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2202
2203 uio->uio_segflg = segflg;
2204
2205 if (retval)
2206 break;
2207
2208 if (io_size == 0) {
2209 /*
2210 * we're already finished with this read request
2211 * let's see if we should do a read-ahead
2212 */
2213 e_lblkno = (int)
2214 ((uio->uio_offset - 1) / PAGE_SIZE_64);
2215
2216 if (!(vp->v_flag & VRAOFF))
2217 /*
2218 * let's try to read ahead if we're in
2219 * a sequential access pattern
2220 */
2221 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
2222 vp->v_lastr = e_lblkno;
2223
2224 break;
2225 }
2226 max_size = filesize - uio->uio_offset;
2227 }
2228 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2229 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2230 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2231 pages_in_upl = upl_size / PAGE_SIZE;
2232
2233 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_START,
2234 (int)upl, (int)upl_f_offset, upl_size, start_offset, 0);
2235
2236 kret = ubc_create_upl(vp,
2237 upl_f_offset,
2238 upl_size,
2239 &upl,
2240 &pl,
2241 UPL_FLAGS_NONE);
2242 if (kret != KERN_SUCCESS)
2243 panic("cluster_read: failed to get pagelist");
2244
2245 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_END,
2246 (int)upl, (int)upl_f_offset, upl_size, start_offset, 0);
2247
2248 /*
2249 * scan from the beginning of the upl looking for the first
2250 * non-valid page.... this will become the first page in
2251 * the request we're going to make to 'cluster_io'... if all
2252 * of the pages are valid, we won't call through to 'cluster_io'
2253 */
2254 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
2255 if (!upl_valid_page(pl, start_pg))
2256 break;
2257 }
2258
2259 /*
2260 * scan from the starting invalid page looking for a valid
2261 * page before the end of the upl is reached, if we
2262 * find one, then it will be the last page of the request to
2263 * 'cluster_io'
2264 */
2265 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2266 if (upl_valid_page(pl, last_pg))
2267 break;
2268 }
2269
2270 if (start_pg < last_pg) {
2271 /*
2272 * we found a range of 'invalid' pages that must be filled
2273 * if the last page in this range is the last page of the file
2274 * we may have to clip the size of it to keep from reading past
2275 * the end of the last physical block associated with the file
2276 */
2277 upl_offset = start_pg * PAGE_SIZE;
2278 io_size = (last_pg - start_pg) * PAGE_SIZE;
2279
2280 if ((upl_f_offset + upl_offset + io_size) > filesize)
2281 io_size = filesize - (upl_f_offset + upl_offset);
2282
2283 /*
2284 * issue a synchronous read to cluster_io
2285 */
2286
2287 error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset,
2288 io_size, devblocksize, CL_READ, (struct buf *)0);
2289 }
2290 if (error == 0) {
2291 /*
2292 * if the read completed successfully, or there was no I/O request
2293 * issued, than map the upl into kernel address space and
2294 * move the data into user land.... we'll first add on any 'valid'
2295 * pages that were present in the upl when we acquired it.
2296 */
2297 u_int val_size;
2298 u_int size_of_prefetch;
2299
2300 for (uio_last = last_pg; uio_last < pages_in_upl; uio_last++) {
2301 if (!upl_valid_page(pl, uio_last))
2302 break;
2303 }
2304 /*
2305 * compute size to transfer this round, if uio->uio_resid is
2306 * still non-zero after this uiomove, we'll loop around and
2307 * set up for another I/O.
2308 */
2309 val_size = (uio_last * PAGE_SIZE) - start_offset;
2310
2311 if (max_size < val_size)
2312 val_size = max_size;
2313
2314 if (uio->uio_resid < val_size)
2315 val_size = uio->uio_resid;
2316
2317 e_lblkno = (int)((uio->uio_offset + ((off_t)val_size - 1)) / PAGE_SIZE_64);
2318
2319 if (size_of_prefetch = (uio->uio_resid - val_size)) {
2320 /*
2321 * if there's still I/O left to do for this request, then issue a
2322 * pre-fetch I/O... the I/O wait time will overlap
2323 * with the copying of the data
2324 */
2325 cluster_rd_prefetch(vp, uio->uio_offset + val_size, size_of_prefetch, filesize, devblocksize);
2326 } else {
2327 if (!(vp->v_flag & VRAOFF) && !(vp->v_flag & VNOCACHE_DATA))
2328 /*
2329 * let's try to read ahead if we're in
2330 * a sequential access pattern
2331 */
2332 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
2333 vp->v_lastr = e_lblkno;
2334 }
2335 if (uio->uio_segflg == UIO_USERSPACE) {
2336 int offset;
2337
2338 segflg = uio->uio_segflg;
2339
2340 uio->uio_segflg = UIO_PHYS_USERSPACE;
2341
2342
2343 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2344 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2345
2346 offset = start_offset;
2347
2348 while (val_size && retval == 0) {
2349 int csize;
2350 int i;
2351 caddr_t paddr;
2352
2353 i = offset / PAGE_SIZE;
2354 csize = min(PAGE_SIZE - start_offset, val_size);
2355
2356 paddr = (caddr_t)upl_phys_page(pl, i) + start_offset;
2357
2358 retval = uiomove(paddr, csize, uio);
2359
2360 val_size -= csize;
2361 offset += csize;
2362 start_offset = offset & PAGE_MASK;
2363 }
2364 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2365 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2366
2367 uio->uio_segflg = segflg;
2368 }
2369 else
2370 {
2371 if ((kret = ubc_upl_map(upl, &io_address)) != KERN_SUCCESS)
2372 panic("cluster_read: ubc_upl_map() failed\n");
2373
2374 retval = uiomove((caddr_t)(io_address + start_offset), val_size, uio);
2375
2376 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
2377 panic("cluster_read: ubc_upl_unmap() failed\n");
2378 }
2379 }
2380 if (start_pg < last_pg) {
2381 /*
2382 * compute the range of pages that we actually issued an I/O for
2383 * and either commit them as valid if the I/O succeeded
2384 * or abort them if the I/O failed
2385 */
2386 io_size = (last_pg - start_pg) * PAGE_SIZE;
2387
2388 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2389 (int)upl, start_pg * PAGE_SIZE, io_size, error, 0);
2390
2391 if (error || (vp->v_flag & VNOCACHE_DATA))
2392 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size,
2393 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2394 else
2395 ubc_upl_commit_range(upl, start_pg * PAGE_SIZE, io_size,
2396 UPL_COMMIT_CLEAR_DIRTY
2397 | UPL_COMMIT_FREE_ON_EMPTY
2398 | UPL_COMMIT_INACTIVATE);
2399
2400 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2401 (int)upl, start_pg * PAGE_SIZE, io_size, error, 0);
2402 }
2403 if ((last_pg - start_pg) < pages_in_upl) {
2404 int cur_pg;
2405 int commit_flags;
2406
2407 /*
2408 * the set of pages that we issued an I/O for did not encompass
2409 * the entire upl... so just release these without modifying
2410 * there state
2411 */
2412 if (error)
2413 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2414 else {
2415 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2416 (int)upl, -1, pages_in_upl - (last_pg - start_pg), 0, 0);
2417
2418 if (start_pg) {
2419 /*
2420 * we found some already valid pages at the beginning of
2421 * the upl commit these back to the inactive list with
2422 * reference cleared
2423 */
2424 for (cur_pg = 0; cur_pg < start_pg; cur_pg++) {
2425 commit_flags = UPL_COMMIT_FREE_ON_EMPTY
2426 | UPL_COMMIT_INACTIVATE;
2427
2428 if (upl_dirty_page(pl, cur_pg))
2429 commit_flags |= UPL_COMMIT_SET_DIRTY;
2430
2431 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2432 ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2433 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2434 else
2435 ubc_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2436 PAGE_SIZE, commit_flags);
2437 }
2438 }
2439 if (last_pg < uio_last) {
2440 /*
2441 * we found some already valid pages immediately after the
2442 * pages we issued I/O for, commit these back to the
2443 * inactive list with reference cleared
2444 */
2445 for (cur_pg = last_pg; cur_pg < uio_last; cur_pg++) {
2446 commit_flags = UPL_COMMIT_FREE_ON_EMPTY
2447 | UPL_COMMIT_INACTIVATE;
2448
2449 if (upl_dirty_page(pl, cur_pg))
2450 commit_flags |= UPL_COMMIT_SET_DIRTY;
2451
2452 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2453 ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2454 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2455 else
2456 ubc_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2457 PAGE_SIZE, commit_flags);
2458 }
2459 }
2460 if (uio_last < pages_in_upl) {
2461 /*
2462 * there were some invalid pages beyond the valid pages
2463 * that we didn't issue an I/O for, just release them
2464 * unchanged
2465 */
2466 ubc_upl_abort_range(upl, uio_last * PAGE_SIZE,
2467 (pages_in_upl - uio_last) * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
2468 }
2469
2470 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2471 (int)upl, -1, -1, 0, 0);
2472 }
2473 }
2474 if (retval == 0)
2475 retval = error;
2476 }
2477
2478 return (retval);
2479 }
2480
2481 static int
2482 cluster_nocopy_read(vp, uio, filesize, devblocksize, flags)
2483 struct vnode *vp;
2484 struct uio *uio;
2485 off_t filesize;
2486 int devblocksize;
2487 int flags;
2488 {
2489 upl_t upl;
2490 upl_page_info_t *pl;
2491 off_t upl_f_offset;
2492 vm_offset_t upl_offset;
2493 off_t start_upl_f_offset;
2494 off_t max_io_size;
2495 int io_size;
2496 int upl_size;
2497 int upl_needed_size;
2498 int pages_in_pl;
2499 vm_offset_t paddr;
2500 int upl_flags;
2501 kern_return_t kret;
2502 int segflg;
2503 struct iovec *iov;
2504 int i;
2505 int force_data_sync;
2506 int error = 0;
2507 int retval = 0;
2508
2509 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START,
2510 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
2511
2512 /*
2513 * When we enter this routine, we know
2514 * -- the offset into the file is on a pagesize boundary
2515 * -- the resid is a page multiple
2516 * -- the resid will not exceed iov_len
2517 */
2518
2519 iov = uio->uio_iov;
2520 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2521
2522 max_io_size = filesize - uio->uio_offset;
2523
2524 if (max_io_size < (off_t)((unsigned int)uio->uio_resid))
2525 io_size = max_io_size;
2526 else
2527 io_size = uio->uio_resid;
2528
2529 /*
2530 * We don't come into this routine unless
2531 * UIO_USERSPACE is set.
2532 */
2533 segflg = uio->uio_segflg;
2534
2535 uio->uio_segflg = UIO_PHYS_USERSPACE;
2536
2537 /*
2538 * First look for pages already in the cache
2539 * and move them to user space.
2540 */
2541 while (io_size && (retval == 0)) {
2542 upl_f_offset = uio->uio_offset;
2543
2544 /*
2545 * If this call fails, it means the page is not
2546 * in the page cache.
2547 */
2548 if (ubc_page_op(vp, upl_f_offset,
2549 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) != KERN_SUCCESS)
2550 break;
2551
2552 retval = uiomove((caddr_t)(paddr), PAGE_SIZE, uio);
2553
2554 ubc_page_op(vp, upl_f_offset,
2555 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2556
2557 io_size -= PAGE_SIZE;
2558 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 71)) | DBG_FUNC_NONE,
2559 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2560 }
2561
2562 uio->uio_segflg = segflg;
2563
2564 if (retval)
2565 {
2566 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2567 (int)uio->uio_offset, uio->uio_resid, 2, retval, 0);
2568 return(retval);
2569 }
2570
2571 /* If we are already finished with this read, then return */
2572 if (io_size == 0)
2573 {
2574
2575 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2576 (int)uio->uio_offset, uio->uio_resid, 3, io_size, 0);
2577 return(0);
2578 }
2579
2580 max_io_size = io_size;
2581 if (max_io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2582 max_io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2583
2584 start_upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
2585 upl_f_offset = start_upl_f_offset;
2586 io_size = 0;
2587
2588 while(io_size < max_io_size)
2589 {
2590
2591 if(ubc_page_op(vp, upl_f_offset,
2592 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) == KERN_SUCCESS)
2593 {
2594 ubc_page_op(vp, upl_f_offset,
2595 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2596 break;
2597 }
2598
2599 /*
2600 * Build up the io request parameters.
2601 */
2602
2603 io_size += PAGE_SIZE;
2604 upl_f_offset += PAGE_SIZE;
2605 }
2606
2607 if (io_size == 0)
2608 return(retval);
2609
2610 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2611 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
2612
2613 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START,
2614 (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0);
2615
2616 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
2617 {
2618 pages_in_pl = 0;
2619 upl_size = upl_needed_size;
2620 upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2621
2622 kret = vm_map_get_upl(current_map(),
2623 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2624 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, force_data_sync);
2625
2626 if (kret != KERN_SUCCESS)
2627 {
2628 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2629 (int)upl_offset, upl_size, io_size, kret, 0);
2630
2631 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2632 (int)uio->uio_offset, uio->uio_resid, 4, retval, 0);
2633
2634 /* cluster_nocopy_read: failed to get pagelist */
2635 /* do not return kret here */
2636 return(retval);
2637 }
2638
2639 pages_in_pl = upl_size / PAGE_SIZE;
2640 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2641
2642 for(i=0; i < pages_in_pl; i++)
2643 {
2644 if (!upl_valid_page(pl, i))
2645 break;
2646 }
2647 if (i == pages_in_pl)
2648 break;
2649
2650 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2651 UPL_ABORT_FREE_ON_EMPTY);
2652 }
2653
2654 if (force_data_sync >= 3)
2655 {
2656 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2657 (int)upl_offset, upl_size, io_size, kret, 0);
2658
2659 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2660 (int)uio->uio_offset, uio->uio_resid, 5, retval, 0);
2661 return(retval);
2662 }
2663 /*
2664 * Consider the possibility that upl_size wasn't satisfied.
2665 */
2666 if (upl_size != upl_needed_size)
2667 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
2668
2669 if (io_size == 0)
2670 {
2671 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2672 UPL_ABORT_FREE_ON_EMPTY);
2673 return(retval);
2674 }
2675
2676 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2677 (int)upl_offset, upl_size, io_size, kret, 0);
2678
2679 /*
2680 * issue a synchronous read to cluster_io
2681 */
2682
2683 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START,
2684 (int)upl, (int)upl_offset, (int)start_upl_f_offset, io_size, 0);
2685
2686 error = cluster_io(vp, upl, upl_offset, start_upl_f_offset,
2687 io_size, devblocksize, CL_READ| CL_NOZERO, (struct buf *)0);
2688
2689 if (error == 0) {
2690 /*
2691 * The cluster_io read completed successfully,
2692 * update the uio structure and commit.
2693 */
2694
2695 ubc_upl_commit_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2696 UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
2697
2698 iov->iov_base += io_size;
2699 iov->iov_len -= io_size;
2700 uio->uio_resid -= io_size;
2701 uio->uio_offset += io_size;
2702 }
2703 else {
2704 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2705 UPL_ABORT_FREE_ON_EMPTY);
2706 }
2707
2708 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END,
2709 (int)upl, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
2710
2711 if (retval == 0)
2712 retval = error;
2713
2714 } /* end while */
2715
2716
2717 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2718 (int)uio->uio_offset, (int)uio->uio_resid, 6, retval, 0);
2719
2720 return (retval);
2721 }
2722
2723
2724 static int
2725 cluster_phys_read(vp, uio, filesize)
2726 struct vnode *vp;
2727 struct uio *uio;
2728 off_t filesize;
2729 {
2730 upl_t upl;
2731 vm_offset_t upl_offset;
2732 off_t max_size;
2733 int io_size;
2734 int upl_size;
2735 int upl_needed_size;
2736 int pages_in_pl;
2737 int upl_flags;
2738 kern_return_t kret;
2739 struct iovec *iov;
2740 int error;
2741
2742 /*
2743 * When we enter this routine, we know
2744 * -- the resid will not exceed iov_len
2745 * -- the target address is physically contiguous
2746 */
2747
2748 iov = uio->uio_iov;
2749
2750 max_size = filesize - uio->uio_offset;
2751
2752 if (max_size < (off_t)((unsigned int)iov->iov_len))
2753 io_size = max_size;
2754 else
2755 io_size = iov->iov_len;
2756
2757 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2758 upl_needed_size = upl_offset + io_size;
2759
2760 pages_in_pl = 0;
2761 upl_size = upl_needed_size;
2762 upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2763
2764 kret = vm_map_get_upl(current_map(),
2765 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2766 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0);
2767
2768 if (kret != KERN_SUCCESS)
2769 {
2770 /* cluster_phys_read: failed to get pagelist */
2771 return(EINVAL);
2772 }
2773
2774 /*
2775 * Consider the possibility that upl_size wasn't satisfied.
2776 */
2777 if (upl_size < upl_needed_size)
2778 {
2779 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2780 return(EINVAL);
2781 }
2782
2783 /*
2784 * issue a synchronous read to cluster_io
2785 */
2786
2787 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
2788 io_size, 0, CL_READ| CL_NOZERO | CL_DEV_MEMORY, (struct buf *)0);
2789
2790 if (error == 0)
2791 {
2792 /*
2793 * The cluster_io read completed successfully,
2794 * update the uio structure and commit.
2795 */
2796
2797 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_FREE_ON_EMPTY);
2798
2799 iov->iov_base += io_size;
2800 iov->iov_len -= io_size;
2801 uio->uio_resid -= io_size;
2802 uio->uio_offset += io_size;
2803 }
2804 else
2805 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2806
2807 return (error);
2808 }
2809
2810 /*
2811 * generate advisory I/O's in the largest chunks possible
2812 * the completed pages will be released into the VM cache
2813 */
2814 int
2815 advisory_read(vp, filesize, f_offset, resid, devblocksize)
2816 struct vnode *vp;
2817 off_t filesize;
2818 off_t f_offset;
2819 int resid;
2820 int devblocksize;
2821 {
2822 upl_page_info_t *pl;
2823 upl_t upl;
2824 vm_offset_t upl_offset;
2825 int upl_size;
2826 off_t upl_f_offset;
2827 int start_offset;
2828 int start_pg;
2829 int last_pg;
2830 int pages_in_upl;
2831 off_t max_size;
2832 int io_size;
2833 kern_return_t kret;
2834 int retval = 0;
2835 int issued_io;
2836
2837 if (!UBCINFOEXISTS(vp))
2838 return(EINVAL);
2839
2840 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START,
2841 (int)f_offset, resid, (int)filesize, devblocksize, 0);
2842
2843 while (resid && f_offset < filesize && retval == 0) {
2844 /*
2845 * compute the size of the upl needed to encompass
2846 * the requested read... limit each call to cluster_io
2847 * to the maximum UPL size... cluster_io will clip if
2848 * this exceeds the maximum io_size for the device,
2849 * make sure to account for
2850 * a starting offset that's not page aligned
2851 */
2852 start_offset = (int)(f_offset & PAGE_MASK_64);
2853 upl_f_offset = f_offset - (off_t)start_offset;
2854 max_size = filesize - f_offset;
2855
2856 if (resid < max_size)
2857 io_size = resid;
2858 else
2859 io_size = max_size;
2860
2861 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2862 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2863 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2864 pages_in_upl = upl_size / PAGE_SIZE;
2865
2866 kret = ubc_create_upl(vp,
2867 upl_f_offset,
2868 upl_size,
2869 &upl,
2870 &pl,
2871 UPL_RET_ONLY_ABSENT);
2872 if (kret != KERN_SUCCESS)
2873 return(retval);
2874 issued_io = 0;
2875
2876 /*
2877 * before we start marching forward, we must make sure we end on
2878 * a present page, otherwise we will be working with a freed
2879 * upl
2880 */
2881 for (last_pg = pages_in_upl - 1; last_pg >= 0; last_pg--) {
2882 if (upl_page_present(pl, last_pg))
2883 break;
2884 }
2885 pages_in_upl = last_pg + 1;
2886
2887
2888 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 61)) | DBG_FUNC_NONE,
2889 (int)upl, (int)upl_f_offset, upl_size, start_offset, 0);
2890
2891
2892 for (last_pg = 0; last_pg < pages_in_upl; ) {
2893 /*
2894 * scan from the beginning of the upl looking for the first
2895 * page that is present.... this will become the first page in
2896 * the request we're going to make to 'cluster_io'... if all
2897 * of the pages are absent, we won't call through to 'cluster_io'
2898 */
2899 for (start_pg = last_pg; start_pg < pages_in_upl; start_pg++) {
2900 if (upl_page_present(pl, start_pg))
2901 break;
2902 }
2903
2904 /*
2905 * scan from the starting present page looking for an absent
2906 * page before the end of the upl is reached, if we
2907 * find one, then it will terminate the range of pages being
2908 * presented to 'cluster_io'
2909 */
2910 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2911 if (!upl_page_present(pl, last_pg))
2912 break;
2913 }
2914
2915 if (last_pg > start_pg) {
2916 /*
2917 * we found a range of pages that must be filled
2918 * if the last page in this range is the last page of the file
2919 * we may have to clip the size of it to keep from reading past
2920 * the end of the last physical block associated with the file
2921 */
2922 upl_offset = start_pg * PAGE_SIZE;
2923 io_size = (last_pg - start_pg) * PAGE_SIZE;
2924
2925 if ((upl_f_offset + upl_offset + io_size) > filesize)
2926 io_size = filesize - (upl_f_offset + upl_offset);
2927
2928 /*
2929 * issue an asynchronous read to cluster_io
2930 */
2931 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, devblocksize,
2932 CL_ASYNC | CL_READ | CL_COMMIT | CL_AGE, (struct buf *)0);
2933
2934 issued_io = 1;
2935 }
2936 }
2937 if (issued_io == 0)
2938 ubc_upl_abort(upl, 0);
2939
2940 io_size = upl_size - start_offset;
2941
2942 if (io_size > resid)
2943 io_size = resid;
2944 f_offset += io_size;
2945 resid -= io_size;
2946 }
2947
2948 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_END,
2949 (int)f_offset, resid, retval, 0, 0);
2950
2951 return(retval);
2952 }
2953
2954
2955 int
2956 cluster_push(vp)
2957 struct vnode *vp;
2958 {
2959 int retval;
2960
2961 if (!UBCINFOEXISTS(vp) || vp->v_clen == 0) {
2962 vp->v_flag &= ~VHASDIRTY;
2963 return(0);
2964 }
2965
2966 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_START,
2967 vp->v_flag & VHASDIRTY, vp->v_clen, 0, 0, 0);
2968
2969 if (vp->v_flag & VHASDIRTY) {
2970 daddr_t start_pg;
2971 daddr_t last_pg;
2972 daddr_t end_pg;
2973
2974 start_pg = vp->v_cstart;
2975 end_pg = vp->v_lastw;
2976
2977 vp->v_flag &= ~VHASDIRTY;
2978 vp->v_clen = 0;
2979
2980 while (start_pg < end_pg) {
2981 last_pg = start_pg + MAX_UPL_TRANSFER;
2982
2983 if (last_pg > end_pg)
2984 last_pg = end_pg;
2985
2986 cluster_push_x(vp, ubc_getsize(vp), start_pg, last_pg, 0);
2987
2988 start_pg = last_pg;
2989 }
2990 return (1);
2991 }
2992 retval = cluster_try_push(vp, ubc_getsize(vp), 0, 1);
2993
2994 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_END,
2995 vp->v_flag & VHASDIRTY, vp->v_clen, retval, 0, 0);
2996
2997 return (retval);
2998 }
2999
3000
3001 static int
3002 cluster_try_push(vp, EOF, can_delay, push_all)
3003 struct vnode *vp;
3004 off_t EOF;
3005 int can_delay;
3006 int push_all;
3007 {
3008 int cl_index;
3009 int cl_index1;
3010 int min_index;
3011 int cl_len;
3012 int cl_total;
3013 int cl_pushed;
3014 struct v_cluster l_clusters[MAX_CLUSTERS];
3015
3016 /*
3017 * make a local 'sorted' copy of the clusters
3018 * and clear vp->v_clen so that new clusters can
3019 * be developed
3020 */
3021 for (cl_index = 0; cl_index < vp->v_clen; cl_index++) {
3022 for (min_index = -1, cl_index1 = 0; cl_index1 < vp->v_clen; cl_index1++) {
3023 if (vp->v_clusters[cl_index1].start_pg == vp->v_clusters[cl_index1].last_pg)
3024 continue;
3025 if (min_index == -1)
3026 min_index = cl_index1;
3027 else if (vp->v_clusters[cl_index1].start_pg < vp->v_clusters[min_index].start_pg)
3028 min_index = cl_index1;
3029 }
3030 if (min_index == -1)
3031 break;
3032 l_clusters[cl_index].start_pg = vp->v_clusters[min_index].start_pg;
3033 l_clusters[cl_index].last_pg = vp->v_clusters[min_index].last_pg;
3034
3035 vp->v_clusters[min_index].start_pg = vp->v_clusters[min_index].last_pg;
3036 }
3037 cl_len = cl_index;
3038 vp->v_clen = 0;
3039
3040 for (cl_pushed = 0, cl_index = 0; cl_index < cl_len; cl_index++) {
3041 /*
3042 * try to push each cluster in turn... cluster_push_x may not
3043 * push the cluster if can_delay is TRUE and the cluster doesn't
3044 * meet the critera for an immediate push
3045 */
3046 if (cluster_push_x(vp, EOF, l_clusters[cl_index].start_pg, l_clusters[cl_index].last_pg, can_delay)) {
3047 l_clusters[cl_index].start_pg = 0;
3048 l_clusters[cl_index].last_pg = 0;
3049
3050 cl_pushed++;
3051
3052 if (push_all == 0)
3053 break;
3054 }
3055 }
3056 if (cl_len > cl_pushed) {
3057 /*
3058 * we didn't push all of the clusters, so
3059 * lets try to merge them back in to the vnode
3060 */
3061 if ((MAX_CLUSTERS - vp->v_clen) < (cl_len - cl_pushed)) {
3062 /*
3063 * we picked up some new clusters while we were trying to
3064 * push the old ones (I don't think this can happen because
3065 * I'm holding the lock, but just in case)... the sum of the
3066 * leftovers plus the new cluster count exceeds our ability
3067 * to represent them, so fall back to the VHASDIRTY mechanism
3068 */
3069 for (cl_index = 0; cl_index < cl_len; cl_index++) {
3070 if (l_clusters[cl_index].start_pg == l_clusters[cl_index].last_pg)
3071 continue;
3072
3073 if (l_clusters[cl_index].start_pg < vp->v_cstart)
3074 vp->v_cstart = l_clusters[cl_index].start_pg;
3075 if (l_clusters[cl_index].last_pg > vp->v_lastw)
3076 vp->v_lastw = l_clusters[cl_index].last_pg;
3077 }
3078 vp->v_flag |= VHASDIRTY;
3079 } else {
3080 /*
3081 * we've got room to merge the leftovers back in
3082 * just append them starting at the next 'hole'
3083 * represented by vp->v_clen
3084 */
3085 for (cl_index = 0, cl_index1 = vp->v_clen; cl_index < cl_len; cl_index++) {
3086 if (l_clusters[cl_index].start_pg == l_clusters[cl_index].last_pg)
3087 continue;
3088
3089 vp->v_clusters[cl_index1].start_pg = l_clusters[cl_index].start_pg;
3090 vp->v_clusters[cl_index1].last_pg = l_clusters[cl_index].last_pg;
3091
3092 if (cl_index1 == 0) {
3093 vp->v_cstart = l_clusters[cl_index].start_pg;
3094 vp->v_lastw = l_clusters[cl_index].last_pg;
3095 } else {
3096 if (l_clusters[cl_index].start_pg < vp->v_cstart)
3097 vp->v_cstart = l_clusters[cl_index].start_pg;
3098 if (l_clusters[cl_index].last_pg > vp->v_lastw)
3099 vp->v_lastw = l_clusters[cl_index].last_pg;
3100 }
3101 cl_index1++;
3102 }
3103 /*
3104 * update the cluster count
3105 */
3106 vp->v_clen = cl_index1;
3107 }
3108 }
3109 return(MAX_CLUSTERS - vp->v_clen);
3110 }
3111
3112
3113
3114 static int
3115 cluster_push_x(vp, EOF, first, last, can_delay)
3116 struct vnode *vp;
3117 off_t EOF;
3118 daddr_t first;
3119 daddr_t last;
3120 int can_delay;
3121 {
3122 upl_page_info_t *pl;
3123 upl_t upl;
3124 vm_offset_t upl_offset;
3125 int upl_size;
3126 off_t upl_f_offset;
3127 int pages_in_upl;
3128 int start_pg;
3129 int last_pg;
3130 int io_size;
3131 int io_flags;
3132 int size;
3133 kern_return_t kret;
3134
3135
3136 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_START,
3137 vp->v_clen, first, last, EOF, 0);
3138
3139 if ((pages_in_upl = last - first) == 0) {
3140 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 0, 0, 0, 0);
3141
3142 return (1);
3143 }
3144 upl_size = pages_in_upl * PAGE_SIZE;
3145 upl_f_offset = ((off_t)first) * PAGE_SIZE_64;
3146
3147 if (upl_f_offset + upl_size >= EOF) {
3148
3149 if (upl_f_offset >= EOF) {
3150 /*
3151 * must have truncated the file and missed
3152 * clearing a dangling cluster (i.e. it's completely
3153 * beyond the new EOF
3154 */
3155 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 1, 0, 0, 0);
3156
3157 return(1);
3158 }
3159 size = EOF - upl_f_offset;
3160
3161 upl_size = (size + (PAGE_SIZE - 1) ) & ~(PAGE_SIZE - 1);
3162 pages_in_upl = upl_size / PAGE_SIZE;
3163 } else {
3164 if (can_delay && (pages_in_upl < (MAX_UPL_TRANSFER - (MAX_UPL_TRANSFER / 2))))
3165 return(0);
3166 size = upl_size;
3167 }
3168 kret = ubc_create_upl(vp,
3169 upl_f_offset,
3170 upl_size,
3171 &upl,
3172 &pl,
3173 UPL_RET_ONLY_DIRTY);
3174 if (kret != KERN_SUCCESS)
3175 panic("cluster_push: failed to get pagelist");
3176
3177 if (can_delay) {
3178 int num_of_dirty;
3179
3180 for (num_of_dirty = 0, start_pg = 0; start_pg < pages_in_upl; start_pg++) {
3181 if (upl_valid_page(pl, start_pg) && upl_dirty_page(pl, start_pg))
3182 num_of_dirty++;
3183 }
3184 if (num_of_dirty < pages_in_upl / 2) {
3185 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
3186
3187 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 0, 2, num_of_dirty, (pages_in_upl / 2), 0);
3188
3189 return(0);
3190 }
3191 }
3192 last_pg = 0;
3193
3194 while (size) {
3195
3196 for (start_pg = last_pg; start_pg < pages_in_upl; start_pg++) {
3197 if (upl_valid_page(pl, start_pg) && upl_dirty_page(pl, start_pg))
3198 break;
3199 }
3200 if (start_pg > last_pg) {
3201 io_size = (start_pg - last_pg) * PAGE_SIZE;
3202
3203 ubc_upl_abort_range(upl, last_pg * PAGE_SIZE, io_size,
3204 UPL_ABORT_FREE_ON_EMPTY);
3205
3206 if (io_size < size)
3207 size -= io_size;
3208 else
3209 break;
3210 }
3211 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
3212 if (!upl_valid_page(pl, last_pg) || !upl_dirty_page(pl, last_pg))
3213 break;
3214 }
3215 upl_offset = start_pg * PAGE_SIZE;
3216
3217 io_size = min(size, (last_pg - start_pg) * PAGE_SIZE);
3218
3219 if (vp->v_flag & VNOCACHE_DATA)
3220 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC | CL_DUMP;
3221 else
3222 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
3223
3224 while (vp->v_numoutput >= ASYNC_THROTTLE) {
3225 vp->v_flag |= VTHROTTLED;
3226 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_push", 0);
3227 }
3228 cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, vp->v_ciosiz, io_flags, (struct buf *)0);
3229
3230 size -= io_size;
3231 }
3232 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 3, 0, 0, 0);
3233
3234 return(1);
3235 }