]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cluster.c
df2e7375144c2142f50f58f754aeed59057af4ee
[apple/xnu.git] / bsd / vfs / vfs_cluster.c
1
2 /*
3 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * The contents of this file constitute Original Code as defined in and
8 * are subject to the Apple Public Source License Version 1.1 (the
9 * "License"). You may not use this file except in compliance with the
10 * License. Please obtain a copy of the License at
11 * http://www.apple.com/publicsource and read it before using this file.
12 *
13 * This Original Code and all software distributed under the License are
14 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
18 * License for the specific language governing rights and limitations
19 * under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95
57 */
58
59 #include <sys/param.h>
60 #include <sys/proc.h>
61 #include <sys/buf.h>
62 #include <sys/vnode.h>
63 #include <sys/mount.h>
64 #include <sys/trace.h>
65 #include <sys/malloc.h>
66 #include <sys/resourcevar.h>
67 #include <libkern/libkern.h>
68
69 #include <sys/ubc.h>
70 #include <vm/vm_pageout.h>
71
72 #include <sys/kdebug.h>
73
74 #define CL_READ 0x01
75 #define CL_ASYNC 0x02
76 #define CL_COMMIT 0x04
77 #define CL_PAGEOUT 0x10
78 #define CL_AGE 0x20
79 #define CL_DUMP 0x40
80 #define CL_NOZERO 0x80
81 #define CL_PAGEIN 0x100
82 #define CL_DEV_MEMORY 0x200
83
84 static void cluster_zero(upl_t upl, vm_offset_t upl_offset,
85 int size, struct buf *bp);
86 static int cluster_read_x(struct vnode *vp, struct uio *uio,
87 off_t filesize, int devblocksize, int flags);
88 static int cluster_write_x(struct vnode *vp, struct uio *uio,
89 off_t oldEOF, off_t newEOF, off_t headOff,
90 off_t tailOff, int devblocksize, int flags);
91 static int cluster_nocopy_read(struct vnode *vp, struct uio *uio,
92 off_t filesize, int devblocksize, int flags);
93 static int cluster_nocopy_write(struct vnode *vp, struct uio *uio,
94 off_t newEOF, int devblocksize, int flags);
95 static int cluster_phys_read(struct vnode *vp, struct uio *uio,
96 off_t filesize);
97 static int cluster_phys_write(struct vnode *vp, struct uio *uio, off_t newEOF);
98 static int cluster_push_x(struct vnode *vp, off_t EOF, daddr_t first, daddr_t last, int can_delay);
99 static int cluster_try_push(struct vnode *vp, off_t newEOF, int can_delay, int push_all);
100
101
102 /*
103 * throttle the number of async writes that
104 * can be outstanding on a single vnode
105 * before we issue a synchronous write
106 */
107 #define ASYNC_THROTTLE 9
108
109 static int
110 cluster_iodone(bp)
111 struct buf *bp;
112 {
113 int b_flags;
114 int error;
115 int total_size;
116 int total_resid;
117 int upl_offset;
118 int zero_offset;
119 upl_t upl;
120 struct buf *cbp;
121 struct buf *cbp_head;
122 struct buf *cbp_next;
123 struct buf *real_bp;
124 struct vnode *vp;
125 int commit_size;
126 int pg_offset;
127
128
129 cbp_head = (struct buf *)(bp->b_trans_head);
130
131 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START,
132 (int)cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
133
134 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
135 /*
136 * all I/O requests that are part of this transaction
137 * have to complete before we can process it
138 */
139 if ( !(cbp->b_flags & B_DONE)) {
140
141 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
142 (int)cbp_head, (int)cbp, cbp->b_bcount, cbp->b_flags, 0);
143
144 return 0;
145 }
146 }
147 error = 0;
148 total_size = 0;
149 total_resid = 0;
150
151 cbp = cbp_head;
152 upl_offset = cbp->b_uploffset;
153 upl = cbp->b_pagelist;
154 b_flags = cbp->b_flags;
155 real_bp = cbp->b_real_bp;
156 vp = cbp->b_vp;
157 zero_offset= cbp->b_validend;
158
159 while (cbp) {
160 if (cbp->b_vectorcount > 1)
161 _FREE(cbp->b_vectorlist, M_SEGMENT);
162
163 if ((cbp->b_flags & B_ERROR) && error == 0)
164 error = cbp->b_error;
165
166 total_resid += cbp->b_resid;
167 total_size += cbp->b_bcount;
168
169 cbp_next = cbp->b_trans_next;
170
171 free_io_buf(cbp);
172
173 cbp = cbp_next;
174 }
175 if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput <= (ASYNC_THROTTLE / 3))) {
176 vp->v_flag &= ~VTHROTTLED;
177 wakeup((caddr_t)&vp->v_numoutput);
178 }
179 if (zero_offset)
180 cluster_zero(upl, zero_offset, PAGE_SIZE - (zero_offset & PAGE_MASK), real_bp);
181
182 if ((b_flags & B_NEED_IODONE) && real_bp) {
183 if (error) {
184 real_bp->b_flags |= B_ERROR;
185 real_bp->b_error = error;
186 }
187 real_bp->b_resid = total_resid;
188
189 biodone(real_bp);
190 }
191 if (error == 0 && total_resid)
192 error = EIO;
193
194 if (b_flags & B_COMMIT_UPL) {
195 pg_offset = upl_offset & PAGE_MASK;
196 commit_size = (((pg_offset + total_size) + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
197
198 if (error || (b_flags & B_NOCACHE)) {
199 int upl_abort_code;
200
201 if ((b_flags & B_PAGEOUT) && (error != ENXIO)) /* transient error */
202 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
203 else if (b_flags & B_PGIN)
204 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
205 else
206 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
207
208 ubc_upl_abort_range(upl, upl_offset - pg_offset, commit_size,
209 upl_abort_code);
210
211 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
212 (int)upl, upl_offset - pg_offset, commit_size,
213 0x80000000|upl_abort_code, 0);
214
215 } else {
216 int upl_commit_flags = UPL_COMMIT_FREE_ON_EMPTY;
217
218 if ( !(b_flags & B_PAGEOUT))
219 upl_commit_flags |= UPL_COMMIT_CLEAR_DIRTY;
220 if (b_flags & B_AGE)
221 upl_commit_flags |= UPL_COMMIT_INACTIVATE;
222
223 ubc_upl_commit_range(upl, upl_offset - pg_offset, commit_size,
224 upl_commit_flags);
225
226 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
227 (int)upl, upl_offset - pg_offset, commit_size,
228 upl_commit_flags, 0);
229 }
230 } else
231 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
232 (int)upl, upl_offset, 0, error, 0);
233
234 return (error);
235 }
236
237
238 static void
239 cluster_zero(upl, upl_offset, size, bp)
240 upl_t upl;
241 vm_offset_t upl_offset;
242 int size;
243 struct buf *bp;
244 {
245 vm_offset_t io_addr = 0;
246 int must_unmap = 0;
247 kern_return_t kret;
248
249 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_NONE,
250 upl_offset, size, (int)bp, 0, 0);
251
252 if (bp == NULL || bp->b_data == NULL) {
253 kret = ubc_upl_map(upl, &io_addr);
254
255 if (kret != KERN_SUCCESS)
256 panic("cluster_zero: ubc_upl_map() failed with (%d)", kret);
257 if (io_addr == 0)
258 panic("cluster_zero: ubc_upl_map() mapped 0");
259
260 must_unmap = 1;
261 } else
262 io_addr = (vm_offset_t)bp->b_data;
263 bzero((caddr_t)(io_addr + upl_offset), size);
264
265 if (must_unmap) {
266 kret = ubc_upl_unmap(upl);
267
268 if (kret != KERN_SUCCESS)
269 panic("cluster_zero: kernel_upl_unmap failed");
270 }
271 }
272
273 static int
274 cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, real_bp)
275 struct vnode *vp;
276 upl_t upl;
277 vm_offset_t upl_offset;
278 off_t f_offset;
279 int non_rounded_size;
280 int devblocksize;
281 int flags;
282 struct buf *real_bp;
283 {
284 struct buf *cbp;
285 struct iovec *iovp;
286 u_int size;
287 int io_flags;
288 int error = 0;
289 int retval = 0;
290 struct buf *cbp_head = 0;
291 struct buf *cbp_tail = 0;
292 upl_page_info_t *pl;
293 int buf_count = 0;
294 int pg_count;
295 int pg_offset;
296 u_int max_iosize;
297 u_int max_vectors;
298 int priv;
299 int zero_offset = 0;
300
301 if (flags & CL_READ) {
302 io_flags = (B_VECTORLIST | B_READ);
303
304 vfs_io_attributes(vp, B_READ, &max_iosize, &max_vectors);
305 } else {
306 io_flags = (B_VECTORLIST | B_WRITEINPROG);
307
308 vfs_io_attributes(vp, B_WRITE, &max_iosize, &max_vectors);
309 }
310 pl = ubc_upl_pageinfo(upl);
311
312 if (flags & CL_ASYNC)
313 io_flags |= (B_CALL | B_ASYNC);
314 if (flags & CL_AGE)
315 io_flags |= B_AGE;
316 if (flags & CL_DUMP)
317 io_flags |= B_NOCACHE;
318 if (flags & CL_PAGEIN)
319 io_flags |= B_PGIN;
320
321 if (devblocksize)
322 size = (non_rounded_size + (devblocksize - 1)) & ~(devblocksize - 1);
323 else
324 size = non_rounded_size;
325
326
327 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_START,
328 (int)f_offset, size, upl_offset, flags, 0);
329
330 if ((flags & CL_READ) && ((upl_offset + non_rounded_size) & PAGE_MASK) && (!(flags & CL_NOZERO))) {
331 /*
332 * then we are going to end up
333 * with a page that we can't complete (the file size wasn't a multiple
334 * of PAGE_SIZE and we're trying to read to the end of the file
335 * so we'll go ahead and zero out the portion of the page we can't
336 * read in from the file
337 */
338 zero_offset = upl_offset + non_rounded_size;
339 }
340 while (size) {
341 size_t io_size;
342 int vsize;
343 int i;
344 int pl_index;
345 int pg_resid;
346 int num_contig;
347 daddr_t lblkno;
348 daddr_t blkno;
349
350 if (size > max_iosize)
351 io_size = max_iosize;
352 else
353 io_size = size;
354
355 if (error = VOP_CMAP(vp, f_offset, io_size, &blkno, &io_size, NULL)) {
356 if (error == EOPNOTSUPP)
357 panic("VOP_CMAP Unimplemented");
358 break;
359 }
360
361 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 24)) | DBG_FUNC_NONE,
362 (int)f_offset, (int)blkno, io_size, zero_offset, 0);
363
364 if ( (!(flags & CL_READ) && (long)blkno == -1) || io_size == 0) {
365 if (flags & CL_PAGEOUT) {
366 error = EINVAL;
367 break;
368 };
369
370 /* Try paging out the page individually before
371 giving up entirely and dumping it (it could
372 be mapped in a "hole" and require allocation
373 before the I/O:
374 */
375 ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE_64, UPL_ABORT_FREE_ON_EMPTY);
376 if (ubc_pushdirty_range(vp, f_offset, PAGE_SIZE_64) == 0) {
377 error = EINVAL;
378 break;
379 };
380
381 upl_offset += PAGE_SIZE_64;
382 f_offset += PAGE_SIZE_64;
383 size -= PAGE_SIZE_64;
384 continue;
385 }
386 lblkno = (daddr_t)(f_offset / PAGE_SIZE_64);
387 /*
388 * we have now figured out how much I/O we can do - this is in 'io_size'
389 * pl_index represents the first page in the 'upl' that the I/O will occur for
390 * pg_offset is the starting point in the first page for the I/O
391 * pg_count is the number of full and partial pages that 'io_size' encompasses
392 */
393 pl_index = upl_offset / PAGE_SIZE;
394 pg_offset = upl_offset & PAGE_MASK;
395 pg_count = (io_size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE;
396
397 if (flags & CL_DEV_MEMORY) {
398 /*
399 * currently, can't deal with reading 'holes' in file
400 */
401 if ((long)blkno == -1) {
402 error = EINVAL;
403 break;
404 }
405 /*
406 * treat physical requests as one 'giant' page
407 */
408 pg_count = 1;
409 }
410 if ((flags & CL_READ) && (long)blkno == -1) {
411 int bytes_to_zero;
412
413 /*
414 * if we're reading and blkno == -1, then we've got a
415 * 'hole' in the file that we need to deal with by zeroing
416 * out the affected area in the upl
417 */
418 if (zero_offset && io_size == size) {
419 /*
420 * if this upl contains the EOF and it is not a multiple of PAGE_SIZE
421 * than 'zero_offset' will be non-zero
422 * if the 'hole' returned by VOP_CMAP extends all the way to the eof
423 * (indicated by the io_size finishing off the I/O request for this UPL)
424 * than we're not going to issue an I/O for the
425 * last page in this upl... we need to zero both the hole and the tail
426 * of the page beyond the EOF, since the delayed zero-fill won't kick in
427 */
428 bytes_to_zero = (((upl_offset + io_size) + (PAGE_SIZE - 1)) & ~PAGE_MASK) - upl_offset;
429
430 zero_offset = 0;
431 } else
432 bytes_to_zero = io_size;
433
434 cluster_zero(upl, upl_offset, bytes_to_zero, real_bp);
435
436 if (cbp_head)
437 /*
438 * if there is a current I/O chain pending
439 * then the first page of the group we just zero'd
440 * will be handled by the I/O completion if the zero
441 * fill started in the middle of the page
442 */
443 pg_count = (io_size - pg_offset) / PAGE_SIZE;
444 else {
445 /*
446 * no pending I/O to pick up that first page
447 * so, we have to make sure it gets committed
448 * here.
449 * set the pg_offset to 0 so that the upl_commit_range
450 * starts with this page
451 */
452 pg_count = (io_size + pg_offset) / PAGE_SIZE;
453 pg_offset = 0;
454 }
455 if (io_size == size && ((upl_offset + io_size) & PAGE_MASK))
456 /*
457 * if we're done with the request for this UPL
458 * then we have to make sure to commit the last page
459 * even if we only partially zero-filled it
460 */
461 pg_count++;
462
463 if (pg_count) {
464 if (pg_offset)
465 pg_resid = PAGE_SIZE - pg_offset;
466 else
467 pg_resid = 0;
468
469 if (flags & CL_COMMIT)
470 ubc_upl_commit_range(upl,
471 (upl_offset + pg_resid) & ~PAGE_MASK,
472 pg_count * PAGE_SIZE,
473 UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
474 }
475 upl_offset += io_size;
476 f_offset += io_size;
477 size -= io_size;
478
479 if (cbp_head && pg_count)
480 goto start_io;
481 continue;
482
483 } else if (real_bp && (real_bp->b_blkno == real_bp->b_lblkno)) {
484 real_bp->b_blkno = blkno;
485 }
486
487 if (pg_count > 1) {
488 if (pg_count > max_vectors) {
489 io_size -= (pg_count - max_vectors) * PAGE_SIZE;
490
491 if (io_size < 0) {
492 io_size = PAGE_SIZE - pg_offset;
493 pg_count = 1;
494 } else
495 pg_count = max_vectors;
496 }
497 /*
498 * we need to allocate space for the vector list
499 */
500 if (pg_count > 1) {
501 iovp = (struct iovec *)_MALLOC(sizeof(struct iovec) * pg_count,
502 M_SEGMENT, M_NOWAIT);
503
504 if (iovp == (struct iovec *) 0) {
505 /*
506 * if the allocation fails, then throttle down to a single page
507 */
508 io_size = PAGE_SIZE - pg_offset;
509 pg_count = 1;
510 }
511 }
512 }
513
514 /* Throttle the speculative IO */
515 if ((flags & CL_ASYNC) && !(flags & CL_PAGEOUT))
516 priv = 0;
517 else
518 priv = 1;
519
520 cbp = alloc_io_buf(vp, priv);
521
522 if (pg_count == 1)
523 /*
524 * we use the io vector that's reserved in the buffer header
525 * this insures we can always issue an I/O even in a low memory
526 * condition that prevents the _MALLOC from succeeding... this
527 * is necessary to prevent deadlocks with the pager
528 */
529 iovp = (struct iovec *)(&cbp->b_vects[0]);
530
531 cbp->b_vectorlist = (void *)iovp;
532 cbp->b_vectorcount = pg_count;
533
534 if (flags & CL_DEV_MEMORY) {
535
536 iovp->iov_len = io_size;
537 iovp->iov_base = (caddr_t)upl_phys_page(pl, 0);
538
539 if (iovp->iov_base == (caddr_t) 0) {
540 free_io_buf(cbp);
541 error = EINVAL;
542 } else
543 iovp->iov_base += upl_offset;
544 } else {
545
546 for (i = 0, vsize = io_size; i < pg_count; i++, iovp++) {
547 int psize;
548
549 psize = PAGE_SIZE - pg_offset;
550
551 if (psize > vsize)
552 psize = vsize;
553
554 iovp->iov_len = psize;
555 iovp->iov_base = (caddr_t)upl_phys_page(pl, pl_index + i);
556
557 if (iovp->iov_base == (caddr_t) 0) {
558 if (pg_count > 1)
559 _FREE(cbp->b_vectorlist, M_SEGMENT);
560 free_io_buf(cbp);
561
562 error = EINVAL;
563 break;
564 }
565 iovp->iov_base += pg_offset;
566 pg_offset = 0;
567
568 if (flags & CL_PAGEOUT) {
569 int s;
570 struct buf *bp;
571
572 s = splbio();
573 if (bp = incore(vp, lblkno + i)) {
574 if (!ISSET(bp->b_flags, B_BUSY)) {
575 bremfree(bp);
576 SET(bp->b_flags, (B_BUSY | B_INVAL));
577 splx(s);
578 brelse(bp);
579 } else
580 panic("BUSY bp found in cluster_io");
581 }
582 splx(s);
583 }
584 vsize -= psize;
585 }
586 }
587 if (error)
588 break;
589
590 if (flags & CL_ASYNC)
591 cbp->b_iodone = (void *)cluster_iodone;
592 cbp->b_flags |= io_flags;
593
594 cbp->b_lblkno = lblkno;
595 cbp->b_blkno = blkno;
596 cbp->b_bcount = io_size;
597 cbp->b_pagelist = upl;
598 cbp->b_uploffset = upl_offset;
599 cbp->b_trans_next = (struct buf *)0;
600
601 if (flags & CL_READ)
602 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 26)) | DBG_FUNC_NONE,
603 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
604 else
605 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 27)) | DBG_FUNC_NONE,
606 cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0);
607
608 if (cbp_head) {
609 cbp_tail->b_trans_next = cbp;
610 cbp_tail = cbp;
611 } else {
612 cbp_head = cbp;
613 cbp_tail = cbp;
614 }
615 (struct buf *)(cbp->b_trans_head) = cbp_head;
616 buf_count++;
617
618 upl_offset += io_size;
619 f_offset += io_size;
620 size -= io_size;
621
622 if ( (!(upl_offset & PAGE_MASK) && !(flags & CL_DEV_MEMORY) && ((flags & CL_ASYNC) || buf_count > 8)) || size == 0) {
623 /*
624 * if we have no more I/O to issue or
625 * the current I/O we've prepared fully
626 * completes the last page in this request
627 * and it's either an ASYNC request or
628 * we've already accumulated more than 8 I/O's into
629 * this transaction and it's not an I/O directed to
630 * special DEVICE memory
631 * then go ahead and issue the I/O
632 */
633 start_io:
634 if (flags & CL_COMMIT)
635 cbp_head->b_flags |= B_COMMIT_UPL;
636 if (flags & CL_PAGEOUT)
637 cbp_head->b_flags |= B_PAGEOUT;
638 if (flags & CL_PAGEIN)
639 cbp_head->b_flags |= B_PGIN;
640
641 if (real_bp) {
642 cbp_head->b_flags |= B_NEED_IODONE;
643 cbp_head->b_real_bp = real_bp;
644 } else
645 cbp_head->b_real_bp = (struct buf *)NULL;
646
647 if (size == 0) {
648 /*
649 * we're about to issue the last I/O for this upl
650 * if this was a read to the eof and the eof doesn't
651 * finish on a page boundary, than we need to zero-fill
652 * the rest of the page....
653 */
654 cbp_head->b_validend = zero_offset;
655 } else
656 cbp_head->b_validend = 0;
657
658 for (cbp = cbp_head; cbp;) {
659 struct buf * cbp_next;
660
661 if (io_flags & B_WRITEINPROG)
662 cbp->b_vp->v_numoutput++;
663
664 cbp_next = cbp->b_trans_next;
665
666 (void) VOP_STRATEGY(cbp);
667 cbp = cbp_next;
668 }
669 if ( !(flags & CL_ASYNC)) {
670 for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next)
671 biowait(cbp);
672
673 if (error = cluster_iodone(cbp_head)) {
674 if ((flags & CL_PAGEOUT) && (error == ENXIO))
675 retval = 0; /* drop the error */
676 else
677 retval = error;
678 error = 0;
679 }
680 }
681 cbp_head = (struct buf *)0;
682 cbp_tail = (struct buf *)0;
683
684 buf_count = 0;
685 }
686 }
687 if (error) {
688 int abort_size;
689
690 for (cbp = cbp_head; cbp;) {
691 struct buf * cbp_next;
692
693 if (cbp->b_vectorcount > 1)
694 _FREE(cbp->b_vectorlist, M_SEGMENT);
695 upl_offset -= cbp->b_bcount;
696 size += cbp->b_bcount;
697
698 cbp_next = cbp->b_trans_next;
699 free_io_buf(cbp);
700 cbp = cbp_next;
701 }
702 pg_offset = upl_offset & PAGE_MASK;
703 abort_size = ((size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE) * PAGE_SIZE;
704
705 if (flags & CL_COMMIT) {
706 int upl_abort_code;
707
708 if ((flags & CL_PAGEOUT) && (error != ENXIO)) /* transient error */
709 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY;
710 else if (flags & CL_PAGEIN)
711 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR;
712 else
713 upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES;
714
715 ubc_upl_abort_range(upl, upl_offset - pg_offset, abort_size,
716 upl_abort_code);
717
718 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE,
719 (int)upl, upl_offset - pg_offset, abort_size, error, 0);
720 }
721 if (real_bp) {
722 real_bp->b_flags |= B_ERROR;
723 real_bp->b_error = error;
724
725 biodone(real_bp);
726 }
727 if (retval == 0)
728 retval = error;
729 }
730 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_END,
731 (int)f_offset, size, upl_offset, retval, 0);
732
733 return (retval);
734 }
735
736
737 static int
738 cluster_rd_prefetch(vp, f_offset, size, filesize, devblocksize)
739 struct vnode *vp;
740 off_t f_offset;
741 u_int size;
742 off_t filesize;
743 int devblocksize;
744 {
745 int pages_to_fetch;
746 int skipped_pages;
747
748 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_START,
749 (int)f_offset, size, (int)filesize, 0, 0);
750
751 if (f_offset >= filesize) {
752 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
753 (int)f_offset, 0, 0, 0, 0);
754 return(0);
755 }
756 if (size > (MAX_UPL_TRANSFER * PAGE_SIZE))
757 size = MAX_UPL_TRANSFER * PAGE_SIZE;
758 else
759 size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
760
761 if ((off_t)size > (filesize - f_offset))
762 size = filesize - f_offset;
763
764 pages_to_fetch = (size + (PAGE_SIZE - 1)) / PAGE_SIZE;
765
766 for (skipped_pages = 0; skipped_pages < pages_to_fetch; skipped_pages++) {
767 if (ubc_page_op(vp, f_offset, 0, 0, 0) != KERN_SUCCESS)
768 break;
769 f_offset += PAGE_SIZE;
770 size -= PAGE_SIZE;
771 }
772 if (skipped_pages < pages_to_fetch)
773 advisory_read(vp, filesize, f_offset, size, devblocksize);
774
775 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END,
776 (int)f_offset + (pages_to_fetch * PAGE_SIZE), skipped_pages, 0, 1, 0);
777
778 return (pages_to_fetch);
779 }
780
781
782
783 static void
784 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize)
785 struct vnode *vp;
786 daddr_t b_lblkno;
787 daddr_t e_lblkno;
788 off_t filesize;
789 int devblocksize;
790 {
791 daddr_t r_lblkno;
792 off_t f_offset;
793 int size_of_prefetch;
794 int max_pages;
795
796 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_START,
797 b_lblkno, e_lblkno, vp->v_lastr, 0, 0);
798
799 if (b_lblkno == vp->v_lastr && b_lblkno == e_lblkno) {
800 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
801 vp->v_ralen, vp->v_maxra, vp->v_lastr, 0, 0);
802 return;
803 }
804
805 if (vp->v_lastr == -1 || (b_lblkno != vp->v_lastr && b_lblkno != (vp->v_lastr + 1) &&
806 (b_lblkno != (vp->v_maxra + 1) || vp->v_ralen == 0))) {
807 vp->v_ralen = 0;
808 vp->v_maxra = 0;
809
810 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
811 vp->v_ralen, vp->v_maxra, vp->v_lastr, 1, 0);
812
813 return;
814 }
815 max_pages = MAX_UPL_TRANSFER;
816
817 vp->v_ralen = vp->v_ralen ? min(max_pages, vp->v_ralen << 1) : 1;
818
819 if (((e_lblkno + 1) - b_lblkno) > vp->v_ralen)
820 vp->v_ralen = min(max_pages, (e_lblkno + 1) - b_lblkno);
821
822 if (e_lblkno < vp->v_maxra) {
823 if ((vp->v_maxra - e_lblkno) > max(max_pages / 16, 4)) {
824
825 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
826 vp->v_ralen, vp->v_maxra, vp->v_lastr, 2, 0);
827 return;
828 }
829 }
830 r_lblkno = max(e_lblkno, vp->v_maxra) + 1;
831 f_offset = (off_t)r_lblkno * PAGE_SIZE_64;
832
833 if (f_offset < filesize) {
834 size_of_prefetch = cluster_rd_prefetch(vp, f_offset, vp->v_ralen * PAGE_SIZE, filesize, devblocksize);
835
836 if (size_of_prefetch)
837 vp->v_maxra = (r_lblkno + size_of_prefetch) - 1;
838 }
839 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
840 vp->v_ralen, vp->v_maxra, vp->v_lastr, 3, 0);
841 }
842
843 int
844 cluster_pageout(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
845 struct vnode *vp;
846 upl_t upl;
847 vm_offset_t upl_offset;
848 off_t f_offset;
849 int size;
850 off_t filesize;
851 int devblocksize;
852 int flags;
853 {
854 int io_size;
855 int pg_size;
856 off_t max_size;
857 int local_flags = CL_PAGEOUT;
858
859 if ((flags & UPL_IOSYNC) == 0)
860 local_flags |= CL_ASYNC;
861 if ((flags & UPL_NOCOMMIT) == 0)
862 local_flags |= CL_COMMIT;
863
864
865 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 52)) | DBG_FUNC_NONE,
866 (int)f_offset, size, (int)filesize, local_flags, 0);
867
868 /*
869 * If they didn't specify any I/O, then we are done...
870 * we can't issue an abort because we don't know how
871 * big the upl really is
872 */
873 if (size <= 0)
874 return (EINVAL);
875
876 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
877 if (local_flags & CL_COMMIT)
878 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
879 return (EROFS);
880 }
881 /*
882 * can't page-in from a negative offset
883 * or if we're starting beyond the EOF
884 * or if the file offset isn't page aligned
885 * or the size requested isn't a multiple of PAGE_SIZE
886 */
887 if (f_offset < 0 || f_offset >= filesize ||
888 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) {
889 if (local_flags & CL_COMMIT)
890 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
891 return (EINVAL);
892 }
893 max_size = filesize - f_offset;
894
895 if (size < max_size)
896 io_size = size;
897 else
898 io_size = max_size;
899
900 pg_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
901
902 if (size > pg_size) {
903 if (local_flags & CL_COMMIT)
904 ubc_upl_abort_range(upl, upl_offset + pg_size, size - pg_size,
905 UPL_ABORT_FREE_ON_EMPTY);
906 }
907 while (vp->v_numoutput >= ASYNC_THROTTLE) {
908 vp->v_flag |= VTHROTTLED;
909 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_pageout", 0);
910 }
911
912 return (cluster_io(vp, upl, upl_offset, f_offset, io_size, devblocksize,
913 local_flags, (struct buf *)0));
914 }
915
916 int
917 cluster_pagein(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags)
918 struct vnode *vp;
919 upl_t upl;
920 vm_offset_t upl_offset;
921 off_t f_offset;
922 int size;
923 off_t filesize;
924 int devblocksize;
925 int flags;
926 {
927 u_int io_size;
928 int rounded_size;
929 off_t max_size;
930 int retval;
931 int local_flags = 0;
932
933 if (upl == NULL || size < 0)
934 panic("cluster_pagein: NULL upl passed in");
935
936 if ((flags & UPL_IOSYNC) == 0)
937 local_flags |= CL_ASYNC;
938 if ((flags & UPL_NOCOMMIT) == 0)
939 local_flags |= CL_COMMIT;
940
941
942 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 56)) | DBG_FUNC_NONE,
943 (int)f_offset, size, (int)filesize, local_flags, 0);
944
945 /*
946 * can't page-in from a negative offset
947 * or if we're starting beyond the EOF
948 * or if the file offset isn't page aligned
949 * or the size requested isn't a multiple of PAGE_SIZE
950 */
951 if (f_offset < 0 || f_offset >= filesize ||
952 (f_offset & PAGE_MASK_64) || (size & PAGE_MASK) || (upl_offset & PAGE_MASK)) {
953 if (local_flags & CL_COMMIT)
954 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
955 return (EINVAL);
956 }
957 max_size = filesize - f_offset;
958
959 if (size < max_size)
960 io_size = size;
961 else
962 io_size = max_size;
963
964 rounded_size = (io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
965
966 if (size > rounded_size && (local_flags & CL_COMMIT))
967 ubc_upl_abort_range(upl, upl_offset + rounded_size,
968 size - (upl_offset + rounded_size), UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
969
970 retval = cluster_io(vp, upl, upl_offset, f_offset, io_size, devblocksize,
971 local_flags | CL_READ | CL_PAGEIN, (struct buf *)0);
972
973 if (retval == 0) {
974 int b_lblkno;
975 int e_lblkno;
976
977 b_lblkno = (int)(f_offset / PAGE_SIZE_64);
978 e_lblkno = (int)
979 ((f_offset + ((off_t)io_size - 1)) / PAGE_SIZE_64);
980
981 if (!(flags & UPL_NORDAHEAD) && !(vp->v_flag & VRAOFF) && rounded_size == PAGE_SIZE) {
982 /*
983 * we haven't read the last page in of the file yet
984 * so let's try to read ahead if we're in
985 * a sequential access pattern
986 */
987 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
988 }
989 vp->v_lastr = e_lblkno;
990 }
991 return (retval);
992 }
993
994 int
995 cluster_bp(bp)
996 struct buf *bp;
997 {
998 off_t f_offset;
999 int flags;
1000
1001 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 19)) | DBG_FUNC_START,
1002 (int)bp, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
1003
1004 if (bp->b_pagelist == (upl_t) 0)
1005 panic("cluster_bp: can't handle NULL upl yet\n");
1006 if (bp->b_flags & B_READ)
1007 flags = CL_ASYNC | CL_READ;
1008 else
1009 flags = CL_ASYNC;
1010
1011 f_offset = ubc_blktooff(bp->b_vp, bp->b_lblkno);
1012
1013 return (cluster_io(bp->b_vp, bp->b_pagelist, 0, f_offset, bp->b_bcount, 0, flags, bp));
1014 }
1015
1016 int
1017 cluster_write(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1018 struct vnode *vp;
1019 struct uio *uio;
1020 off_t oldEOF;
1021 off_t newEOF;
1022 off_t headOff;
1023 off_t tailOff;
1024 int devblocksize;
1025 int flags;
1026 {
1027 int prev_resid;
1028 int clip_size;
1029 off_t max_io_size;
1030 struct iovec *iov;
1031 vm_offset_t upl_offset;
1032 int upl_size;
1033 int pages_in_pl;
1034 upl_page_info_t *pl;
1035 int upl_flags;
1036 upl_t upl;
1037 int retval = 0;
1038
1039
1040 if ((!uio) || (uio->uio_segflg != UIO_USERSPACE) || (!(vp->v_flag & VNOCACHE_DATA)))
1041 {
1042 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1043 return(retval);
1044 }
1045
1046 while (uio->uio_resid && uio->uio_offset < newEOF && retval == 0)
1047 {
1048 /* we know we have a resid, so this is safe */
1049 iov = uio->uio_iov;
1050 while (iov->iov_len == 0) {
1051 uio->uio_iov++;
1052 uio->uio_iovcnt--;
1053 iov = uio->uio_iov;
1054 }
1055
1056 /*
1057 * We check every vector target and if it is physically
1058 * contiguous space, we skip the sanity checks.
1059 */
1060
1061 upl_offset = (vm_offset_t)iov->iov_base & ~PAGE_MASK;
1062 upl_size = (upl_offset + PAGE_SIZE +(PAGE_SIZE -1)) & ~PAGE_MASK;
1063 pages_in_pl = 0;
1064 upl_flags = UPL_QUERY_OBJECT_TYPE;
1065 if ((vm_map_get_upl(current_map(),
1066 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1067 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0)) != KERN_SUCCESS)
1068 {
1069 /*
1070 * the user app must have passed in an invalid address
1071 */
1072 return (EFAULT);
1073 }
1074
1075 if (upl_flags & UPL_PHYS_CONTIG)
1076 {
1077 /*
1078 * since the interface to the IOKit below us uses physical block #'s and
1079 * block counts to specify the I/O, we can't handle anything that isn't
1080 * devblocksize aligned
1081 */
1082 if ((uio->uio_offset & (devblocksize - 1)) || (uio->uio_resid & (devblocksize - 1)))
1083 return(EINVAL);
1084
1085 if (flags & IO_HEADZEROFILL)
1086 {
1087 flags &= ~IO_HEADZEROFILL;
1088
1089 if (retval = cluster_write_x(vp, (struct uio *)0, 0, uio->uio_offset, headOff, 0, devblocksize, IO_HEADZEROFILL))
1090 return(retval);
1091 }
1092
1093 retval = cluster_phys_write(vp, uio, newEOF);
1094
1095 if (uio->uio_resid == 0 && (flags & IO_TAILZEROFILL))
1096 {
1097 retval = cluster_write_x(vp, (struct uio *)0, 0, tailOff, uio->uio_offset, 0, devblocksize, IO_HEADZEROFILL);
1098 return(retval);
1099 }
1100 }
1101 else if ((uio->uio_resid < 4 * PAGE_SIZE) || (flags & (IO_TAILZEROFILL | IO_HEADZEROFILL)))
1102 {
1103 /*
1104 * We set a threshhold of 4 pages to decide if the nocopy
1105 * write loop is worth the trouble...
1106 * we also come here if we're trying to zero the head and/or tail
1107 * of a partially written page, and the user source is not a physically contiguous region
1108 */
1109 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1110 return(retval);
1111 }
1112 else if (uio->uio_offset & PAGE_MASK_64)
1113 {
1114 /* Bring the file offset write up to a pagesize boundary */
1115 clip_size = (PAGE_SIZE - (uio->uio_offset & PAGE_MASK_64));
1116 if (uio->uio_resid < clip_size)
1117 clip_size = uio->uio_resid;
1118 /*
1119 * Fake the resid going into the cluster_write_x call
1120 * and restore it on the way out.
1121 */
1122 prev_resid = uio->uio_resid;
1123 uio->uio_resid = clip_size;
1124 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1125 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1126 }
1127 else if ((int)iov->iov_base & PAGE_MASK_64)
1128 {
1129 clip_size = iov->iov_len;
1130 prev_resid = uio->uio_resid;
1131 uio->uio_resid = clip_size;
1132 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1133 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1134 }
1135 else
1136 {
1137 /*
1138 * If we come in here, we know the offset into
1139 * the file is on a pagesize boundary
1140 */
1141
1142 max_io_size = newEOF - uio->uio_offset;
1143 clip_size = uio->uio_resid;
1144 if (iov->iov_len < clip_size)
1145 clip_size = iov->iov_len;
1146 if (max_io_size < clip_size)
1147 clip_size = max_io_size;
1148
1149 if (clip_size < PAGE_SIZE)
1150 {
1151 /*
1152 * Take care of tail end of write in this vector
1153 */
1154 prev_resid = uio->uio_resid;
1155 uio->uio_resid = clip_size;
1156 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1157 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1158 }
1159 else
1160 {
1161 /* round clip_size down to a multiple of pagesize */
1162 clip_size = clip_size & ~(PAGE_MASK);
1163 prev_resid = uio->uio_resid;
1164 uio->uio_resid = clip_size;
1165 retval = cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags);
1166 if ((retval == 0) && uio->uio_resid)
1167 retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags);
1168 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
1169 }
1170 } /* end else */
1171 } /* end while */
1172 return(retval);
1173 }
1174
1175 static int
1176 cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags)
1177 struct vnode *vp;
1178 struct uio *uio;
1179 off_t newEOF;
1180 int devblocksize;
1181 int flags;
1182 {
1183 upl_t upl;
1184 upl_page_info_t *pl;
1185 off_t upl_f_offset;
1186 vm_offset_t upl_offset;
1187 off_t max_io_size;
1188 int io_size;
1189 int upl_size;
1190 int upl_needed_size;
1191 int pages_in_pl;
1192 int upl_flags;
1193 kern_return_t kret;
1194 struct iovec *iov;
1195 int i;
1196 int force_data_sync;
1197 int error = 0;
1198
1199 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START,
1200 (int)uio->uio_offset, (int)uio->uio_resid,
1201 (int)newEOF, devblocksize, 0);
1202
1203 /*
1204 * When we enter this routine, we know
1205 * -- the offset into the file is on a pagesize boundary
1206 * -- the resid is a page multiple
1207 * -- the resid will not exceed iov_len
1208 */
1209 cluster_try_push(vp, newEOF, 0, 1);
1210
1211 iov = uio->uio_iov;
1212
1213 while (uio->uio_resid && uio->uio_offset < newEOF && error == 0) {
1214 io_size = uio->uio_resid;
1215
1216 if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1217 io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1218
1219 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1220 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
1221
1222 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START,
1223 (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0);
1224
1225 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
1226 {
1227 pages_in_pl = 0;
1228 upl_size = upl_needed_size;
1229 upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
1230 UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1231
1232 kret = vm_map_get_upl(current_map(),
1233 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1234 &upl_size,
1235 &upl,
1236 NULL,
1237 &pages_in_pl,
1238 &upl_flags,
1239 force_data_sync);
1240
1241 if (kret != KERN_SUCCESS)
1242 {
1243 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1244 0, 0, 0, kret, 0);
1245
1246 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1247 (int)uio->uio_offset, (int)uio->uio_resid, kret, 1, 0);
1248
1249 /* cluster_nocopy_write: failed to get pagelist */
1250 /* do not return kret here */
1251 return(0);
1252 }
1253
1254 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1255 pages_in_pl = upl_size / PAGE_SIZE;
1256
1257 for(i=0; i < pages_in_pl; i++)
1258 {
1259 if (!upl_valid_page(pl, i))
1260 break;
1261 }
1262
1263 if (i == pages_in_pl)
1264 break;
1265
1266 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1267 UPL_ABORT_FREE_ON_EMPTY);
1268 }
1269
1270 if (force_data_sync >= 3)
1271 {
1272 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1273 i, pages_in_pl, upl_size, kret, 0);
1274
1275 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1276 (int)uio->uio_offset, (int)uio->uio_resid, kret, 2, 0);
1277 return(0);
1278 }
1279
1280 /*
1281 * Consider the possibility that upl_size wasn't satisfied.
1282 */
1283 if (upl_size != upl_needed_size)
1284 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
1285
1286 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_END,
1287 (int)upl_offset, upl_size, (int)iov->iov_base, io_size, 0);
1288
1289 if (io_size == 0)
1290 {
1291 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1292 UPL_ABORT_FREE_ON_EMPTY);
1293 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1294 (int)uio->uio_offset, uio->uio_resid, 0, 3, 0);
1295
1296 return(0);
1297 }
1298
1299 /*
1300 * Now look for pages already in the cache
1301 * and throw them away.
1302 */
1303
1304 upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
1305 max_io_size = io_size;
1306
1307 while (max_io_size) {
1308
1309 /*
1310 * Flag UPL_POP_DUMP says if the page is found
1311 * in the page cache it must be thrown away.
1312 */
1313 ubc_page_op(vp,
1314 upl_f_offset,
1315 UPL_POP_SET | UPL_POP_BUSY | UPL_POP_DUMP,
1316 0, 0);
1317 max_io_size -= PAGE_SIZE;
1318 upl_f_offset += PAGE_SIZE;
1319 }
1320
1321 /*
1322 * issue a synchronous write to cluster_io
1323 */
1324
1325 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_START,
1326 (int)upl_offset, (int)uio->uio_offset, io_size, 0, 0);
1327
1328 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1329 io_size, devblocksize, 0, (struct buf *)0);
1330
1331 if (error == 0) {
1332 /*
1333 * The cluster_io write completed successfully,
1334 * update the uio structure.
1335 */
1336 iov->iov_base += io_size;
1337 iov->iov_len -= io_size;
1338 uio->uio_resid -= io_size;
1339 uio->uio_offset += io_size;
1340 }
1341 /*
1342 * always 'commit' the I/O via the abort primitive whether the I/O
1343 * succeeded cleanly or not... this is necessary to insure that
1344 * we preserve the state of the DIRTY flag on the pages used to
1345 * provide the data for the I/O... the state of this flag SHOULD
1346 * NOT be changed by a write
1347 */
1348 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
1349 UPL_ABORT_FREE_ON_EMPTY);
1350
1351
1352 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END,
1353 (int)upl_offset, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
1354
1355 } /* end while */
1356
1357
1358 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_END,
1359 (int)uio->uio_offset, (int)uio->uio_resid, error, 4, 0);
1360
1361 return (error);
1362 }
1363
1364 static int
1365 cluster_phys_write(vp, uio, newEOF)
1366 struct vnode *vp;
1367 struct uio *uio;
1368 off_t newEOF;
1369 {
1370 upl_t upl;
1371 vm_offset_t upl_offset;
1372 int io_size;
1373 int upl_size;
1374 int upl_needed_size;
1375 int pages_in_pl;
1376 int upl_flags;
1377 kern_return_t kret;
1378 struct iovec *iov;
1379 int error = 0;
1380
1381 /*
1382 * When we enter this routine, we know
1383 * -- the resid will not exceed iov_len
1384 * -- the vector target address is physcially contiguous
1385 */
1386 cluster_try_push(vp, newEOF, 0, 1);
1387
1388 iov = uio->uio_iov;
1389 io_size = iov->iov_len;
1390 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
1391 upl_needed_size = upl_offset + io_size;
1392
1393 pages_in_pl = 0;
1394 upl_size = upl_needed_size;
1395 upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC |
1396 UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
1397
1398 kret = vm_map_get_upl(current_map(),
1399 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
1400 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0);
1401
1402 if (kret != KERN_SUCCESS)
1403 {
1404 /* cluster_phys_write: failed to get pagelist */
1405 /* note: return kret here */
1406 return(EINVAL);
1407 }
1408
1409 /*
1410 * Consider the possibility that upl_size wasn't satisfied.
1411 * This is a failure in the physical memory case.
1412 */
1413 if (upl_size < upl_needed_size)
1414 {
1415 kernel_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1416 return(EINVAL);
1417 }
1418
1419 /*
1420 * issue a synchronous write to cluster_io
1421 */
1422
1423 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
1424 io_size, 0, CL_DEV_MEMORY, (struct buf *)0);
1425
1426 if (error == 0) {
1427 /*
1428 * The cluster_io write completed successfully,
1429 * update the uio structure and commit.
1430 */
1431
1432 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_FREE_ON_EMPTY);
1433
1434 iov->iov_base += io_size;
1435 iov->iov_len -= io_size;
1436 uio->uio_resid -= io_size;
1437 uio->uio_offset += io_size;
1438 }
1439 else
1440 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1441
1442 return (error);
1443 }
1444
1445 static int
1446 cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)
1447 struct vnode *vp;
1448 struct uio *uio;
1449 off_t oldEOF;
1450 off_t newEOF;
1451 off_t headOff;
1452 off_t tailOff;
1453 int devblocksize;
1454 int flags;
1455 {
1456 upl_page_info_t *pl;
1457 upl_t upl;
1458 vm_offset_t upl_offset;
1459 int upl_size;
1460 off_t upl_f_offset;
1461 int pages_in_upl;
1462 int start_offset;
1463 int xfer_resid;
1464 int io_size;
1465 int io_flags;
1466 vm_offset_t io_address;
1467 int io_offset;
1468 int bytes_to_zero;
1469 int bytes_to_move;
1470 kern_return_t kret;
1471 int retval = 0;
1472 int uio_resid;
1473 long long total_size;
1474 long long zero_cnt;
1475 off_t zero_off;
1476 long long zero_cnt1;
1477 off_t zero_off1;
1478 daddr_t start_blkno;
1479 daddr_t last_blkno;
1480
1481 if (uio) {
1482 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1483 (int)uio->uio_offset, uio->uio_resid, (int)oldEOF, (int)newEOF, 0);
1484
1485 uio_resid = uio->uio_resid;
1486 } else {
1487 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START,
1488 0, 0, (int)oldEOF, (int)newEOF, 0);
1489
1490 uio_resid = 0;
1491 }
1492 zero_cnt = 0;
1493 zero_cnt1 = 0;
1494
1495 if (flags & IO_HEADZEROFILL) {
1496 /*
1497 * some filesystems (HFS is one) don't support unallocated holes within a file...
1498 * so we zero fill the intervening space between the old EOF and the offset
1499 * where the next chunk of real data begins.... ftruncate will also use this
1500 * routine to zero fill to the new EOF when growing a file... in this case, the
1501 * uio structure will not be provided
1502 */
1503 if (uio) {
1504 if (headOff < uio->uio_offset) {
1505 zero_cnt = uio->uio_offset - headOff;
1506 zero_off = headOff;
1507 }
1508 } else if (headOff < newEOF) {
1509 zero_cnt = newEOF - headOff;
1510 zero_off = headOff;
1511 }
1512 }
1513 if (flags & IO_TAILZEROFILL) {
1514 if (uio) {
1515 zero_off1 = uio->uio_offset + uio->uio_resid;
1516
1517 if (zero_off1 < tailOff)
1518 zero_cnt1 = tailOff - zero_off1;
1519 }
1520 }
1521 if (zero_cnt == 0 && uio == (struct uio *) 0)
1522 {
1523 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1524 retval, 0, 0, 0, 0);
1525 return (0);
1526 }
1527
1528 while ((total_size = (uio_resid + zero_cnt + zero_cnt1)) && retval == 0) {
1529 /*
1530 * for this iteration of the loop, figure out where our starting point is
1531 */
1532 if (zero_cnt) {
1533 start_offset = (int)(zero_off & PAGE_MASK_64);
1534 upl_f_offset = zero_off - start_offset;
1535 } else if (uio_resid) {
1536 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
1537 upl_f_offset = uio->uio_offset - start_offset;
1538 } else {
1539 start_offset = (int)(zero_off1 & PAGE_MASK_64);
1540 upl_f_offset = zero_off1 - start_offset;
1541 }
1542 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 46)) | DBG_FUNC_NONE,
1543 (int)zero_off, (int)zero_cnt, (int)zero_off1, (int)zero_cnt1, 0);
1544
1545 if (total_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1546 total_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1547
1548 /*
1549 * compute the size of the upl needed to encompass
1550 * the requested write... limit each call to cluster_io
1551 * to the maximum UPL size... cluster_io will clip if
1552 * this exceeds the maximum io_size for the device,
1553 * make sure to account for
1554 * a starting offset that's not page aligned
1555 */
1556 upl_size = (start_offset + total_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
1557
1558 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
1559 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
1560
1561 pages_in_upl = upl_size / PAGE_SIZE;
1562 io_size = upl_size - start_offset;
1563
1564 if ((long long)io_size > total_size)
1565 io_size = total_size;
1566
1567 start_blkno = (daddr_t)(upl_f_offset / PAGE_SIZE_64);
1568 last_blkno = start_blkno + pages_in_upl;
1569
1570 kret = ubc_create_upl(vp,
1571 upl_f_offset,
1572 upl_size,
1573 &upl,
1574 &pl,
1575 UPL_FLAGS_NONE);
1576 if (kret != KERN_SUCCESS)
1577 panic("cluster_write: failed to get pagelist");
1578
1579 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_NONE,
1580 (int)upl, (int)upl_f_offset, upl_size, start_offset, 0);
1581
1582 if (start_offset && !upl_valid_page(pl, 0)) {
1583 int read_size;
1584
1585 /*
1586 * we're starting in the middle of the first page of the upl
1587 * and the page isn't currently valid, so we're going to have
1588 * to read it in first... this is a synchronous operation
1589 */
1590 read_size = PAGE_SIZE;
1591
1592 if ((upl_f_offset + read_size) > newEOF)
1593 read_size = newEOF - upl_f_offset;
1594
1595 retval = cluster_io(vp, upl, 0, upl_f_offset, read_size, devblocksize,
1596 CL_READ, (struct buf *)0);
1597 if (retval) {
1598 /*
1599 * we had an error during the read which causes us to abort
1600 * the current cluster_write request... before we do, we need
1601 * to release the rest of the pages in the upl without modifying
1602 * there state and mark the failed page in error
1603 */
1604 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
1605 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1606
1607 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1608 (int)upl, 0, 0, retval, 0);
1609 break;
1610 }
1611 }
1612 if ((start_offset == 0 || upl_size > PAGE_SIZE) && ((start_offset + io_size) & PAGE_MASK)) {
1613 /*
1614 * the last offset we're writing to in this upl does not end on a page
1615 * boundary... if it's not beyond the old EOF, then we'll also need to
1616 * pre-read this page in if it isn't already valid
1617 */
1618 upl_offset = upl_size - PAGE_SIZE;
1619
1620 if ((upl_f_offset + start_offset + io_size) < oldEOF &&
1621 !upl_valid_page(pl, upl_offset / PAGE_SIZE)) {
1622 int read_size;
1623
1624 read_size = PAGE_SIZE;
1625
1626 if ((upl_f_offset + upl_offset + read_size) > newEOF)
1627 read_size = newEOF - (upl_f_offset + upl_offset);
1628
1629 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size, devblocksize,
1630 CL_READ, (struct buf *)0);
1631 if (retval) {
1632 /*
1633 * we had an error during the read which causes us to abort
1634 * the current cluster_write request... before we do, we
1635 * need to release the rest of the pages in the upl without
1636 * modifying there state and mark the failed page in error
1637 */
1638 ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
1639 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
1640
1641 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1642 (int)upl, 0, 0, retval, 0);
1643 break;
1644 }
1645 }
1646 }
1647 if ((kret = ubc_upl_map(upl, &io_address)) != KERN_SUCCESS)
1648 panic("cluster_write: ubc_upl_map failed\n");
1649 xfer_resid = io_size;
1650 io_offset = start_offset;
1651
1652 while (zero_cnt && xfer_resid) {
1653
1654 if (zero_cnt < (long long)xfer_resid)
1655 bytes_to_zero = zero_cnt;
1656 else
1657 bytes_to_zero = xfer_resid;
1658
1659 if ( !(flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) {
1660 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1661
1662 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1663 (int)upl_f_offset + io_offset, bytes_to_zero,
1664 (int)io_offset, xfer_resid, 0);
1665 } else {
1666 int zero_pg_index;
1667
1668 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off & PAGE_MASK_64));
1669 zero_pg_index = (int)((zero_off - upl_f_offset) / PAGE_SIZE_64);
1670
1671 if ( !upl_valid_page(pl, zero_pg_index)) {
1672 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1673
1674 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1675 (int)upl_f_offset + io_offset, bytes_to_zero,
1676 (int)io_offset, xfer_resid, 0);
1677
1678 } else if ((flags & (IO_NOZERODIRTY | IO_NOZEROVALID)) == IO_NOZERODIRTY &&
1679 !upl_dirty_page(pl, zero_pg_index)) {
1680 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1681
1682 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1683 (int)upl_f_offset + io_offset, bytes_to_zero,
1684 (int)io_offset, xfer_resid, 0);
1685 }
1686 }
1687 xfer_resid -= bytes_to_zero;
1688 zero_cnt -= bytes_to_zero;
1689 zero_off += bytes_to_zero;
1690 io_offset += bytes_to_zero;
1691 }
1692 if (xfer_resid && uio_resid) {
1693 bytes_to_move = min(uio_resid, xfer_resid);
1694
1695 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 42)) | DBG_FUNC_NONE,
1696 (int)uio->uio_offset, bytes_to_move, uio_resid, xfer_resid, 0);
1697
1698 retval = uiomove((caddr_t)(io_address + io_offset), bytes_to_move, uio);
1699
1700
1701 if (retval) {
1702 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
1703 panic("cluster_write: kernel_upl_unmap failed\n");
1704
1705 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
1706
1707 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE,
1708 (int)upl, 0, 0, retval, 0);
1709 } else {
1710 uio_resid -= bytes_to_move;
1711 xfer_resid -= bytes_to_move;
1712 io_offset += bytes_to_move;
1713 }
1714 }
1715 while (xfer_resid && zero_cnt1 && retval == 0) {
1716
1717 if (zero_cnt1 < (long long)xfer_resid)
1718 bytes_to_zero = zero_cnt1;
1719 else
1720 bytes_to_zero = xfer_resid;
1721
1722 if ( !(flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) {
1723 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1724
1725 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1726 (int)upl_f_offset + io_offset,
1727 bytes_to_zero, (int)io_offset, xfer_resid, 0);
1728 } else {
1729 int zero_pg_index;
1730
1731 bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off1 & PAGE_MASK_64));
1732 zero_pg_index = (int)((zero_off1 - upl_f_offset) / PAGE_SIZE_64);
1733
1734 if ( !upl_valid_page(pl, zero_pg_index)) {
1735 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1736
1737 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1738 (int)upl_f_offset + io_offset,
1739 bytes_to_zero, (int)io_offset, xfer_resid, 0);
1740
1741 } else if ((flags & (IO_NOZERODIRTY | IO_NOZEROVALID)) == IO_NOZERODIRTY &&
1742 !upl_dirty_page(pl, zero_pg_index)) {
1743 bzero((caddr_t)(io_address + io_offset), bytes_to_zero);
1744
1745 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1746 (int)upl_f_offset + io_offset,
1747 bytes_to_zero, (int)io_offset, xfer_resid, 0);
1748 }
1749 }
1750 xfer_resid -= bytes_to_zero;
1751 zero_cnt1 -= bytes_to_zero;
1752 zero_off1 += bytes_to_zero;
1753 io_offset += bytes_to_zero;
1754 }
1755
1756 if (retval == 0) {
1757 int cl_index;
1758 int can_delay;
1759
1760 io_size += start_offset;
1761
1762 if ((upl_f_offset + io_size) >= newEOF && io_size < upl_size) {
1763 /*
1764 * if we're extending the file with this write
1765 * we'll zero fill the rest of the page so that
1766 * if the file gets extended again in such a way as to leave a
1767 * hole starting at this EOF, we'll have zero's in the correct spot
1768 */
1769 bzero((caddr_t)(io_address + io_size), upl_size - io_size);
1770
1771 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 43)) | DBG_FUNC_NONE,
1772 (int)upl_f_offset + io_size,
1773 upl_size - io_size, 0, 0, 0);
1774 }
1775 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
1776 panic("cluster_write: kernel_upl_unmap failed\n");
1777
1778 if (flags & IO_SYNC)
1779 /*
1780 * if the IO_SYNC flag is set than we need to
1781 * bypass any clusters and immediately issue
1782 * the I/O
1783 */
1784 goto issue_io;
1785
1786 if (vp->v_clen == 0)
1787 /*
1788 * no clusters currently present
1789 */
1790 goto start_new_cluster;
1791
1792 /*
1793 * keep track of the overall dirty page
1794 * range we've developed
1795 * in case we have to fall back to the
1796 * VHASDIRTY method of flushing
1797 */
1798 if (vp->v_flag & VHASDIRTY)
1799 goto delay_io;
1800
1801 for (cl_index = 0; cl_index < vp->v_clen; cl_index++) {
1802 /*
1803 * we have an existing cluster... see if this write will extend it nicely
1804 */
1805 if (start_blkno >= vp->v_clusters[cl_index].start_pg) {
1806 /*
1807 * the current write starts at or after the current cluster
1808 */
1809 if (last_blkno <= (vp->v_clusters[cl_index].start_pg + MAX_UPL_TRANSFER)) {
1810 /*
1811 * we have a write that fits entirely
1812 * within the existing cluster limits
1813 */
1814 if (last_blkno > vp->v_clusters[cl_index].last_pg)
1815 /*
1816 * update our idea of where the cluster ends
1817 */
1818 vp->v_clusters[cl_index].last_pg = last_blkno;
1819 break;
1820 }
1821 if (start_blkno < (vp->v_clusters[cl_index].start_pg + MAX_UPL_TRANSFER)) {
1822 /*
1823 * we have a write that starts in the middle of the current cluster
1824 * but extends beyond the cluster's limit
1825 * we'll clip the current cluster if we actually
1826 * overlap with the new write
1827 * and start a new cluster with the current write
1828 */
1829 if (vp->v_clusters[cl_index].last_pg > start_blkno)
1830 vp->v_clusters[cl_index].last_pg = start_blkno;
1831 }
1832 /*
1833 * we also get here for the case where the current write starts
1834 * beyond the limit of the existing cluster
1835 *
1836 * in either case, we'll check the remaining clusters before
1837 * starting a new one
1838 */
1839 } else {
1840 /*
1841 * the current write starts in front of the current cluster
1842 */
1843 if ((vp->v_clusters[cl_index].last_pg - start_blkno) <= MAX_UPL_TRANSFER) {
1844 /*
1845 * we can just merge the old cluster
1846 * with the new request and leave it
1847 * in the cache
1848 */
1849 vp->v_clusters[cl_index].start_pg = start_blkno;
1850
1851 if (last_blkno > vp->v_clusters[cl_index].last_pg) {
1852 /*
1853 * the current write completely
1854 * envelops the existing cluster
1855 */
1856 vp->v_clusters[cl_index].last_pg = last_blkno;
1857 }
1858 break;
1859 }
1860
1861 /*
1862 * if we were to combine this write with the current cluster
1863 * we would exceed the cluster size limit.... so,
1864 * let's see if there's any overlap of the new I/O with
1865 * the existing cluster...
1866 *
1867 */
1868 if (last_blkno > vp->v_clusters[cl_index].start_pg)
1869 /*
1870 * the current write extends into the existing cluster
1871 * clip the current cluster by moving the start position
1872 * to where the current write ends
1873 */
1874 vp->v_clusters[cl_index].start_pg = last_blkno;
1875 /*
1876 * if we get here, there was no way to merge
1877 * the new I/O with this cluster and
1878 * keep it under our maximum cluster length
1879 * we'll check the remaining clusters before starting a new one
1880 */
1881 }
1882 }
1883 if (cl_index < vp->v_clen)
1884 /*
1885 * we found an existing cluster that we
1886 * could merger this I/O into
1887 */
1888 goto delay_io;
1889
1890 if (vp->v_clen < MAX_CLUSTERS && !(vp->v_flag & VNOCACHE_DATA))
1891 /*
1892 * we didn't find an existing cluster to
1893 * merge into, but there's room to start
1894 * a new one
1895 */
1896 goto start_new_cluster;
1897
1898 /*
1899 * no exisitng cluster to merge with and no
1900 * room to start a new one... we'll try
1901 * pushing the existing ones... if none of
1902 * them are able to be pushed, we'll have
1903 * to fall back on the VHASDIRTY mechanism
1904 * cluster_try_push will set v_clen to the
1905 * number of remaining clusters if it is
1906 * unable to push all of them
1907 */
1908 if (vp->v_flag & VNOCACHE_DATA)
1909 can_delay = 0;
1910 else
1911 can_delay = 1;
1912
1913 if (cluster_try_push(vp, newEOF, 0, 0) == 0) {
1914 vp->v_flag |= VHASDIRTY;
1915 goto delay_io;
1916 }
1917 start_new_cluster:
1918 if (vp->v_clen == 0) {
1919 vp->v_ciosiz = devblocksize;
1920 vp->v_cstart = start_blkno;
1921 vp->v_lastw = last_blkno;
1922 }
1923 vp->v_clusters[vp->v_clen].start_pg = start_blkno;
1924 vp->v_clusters[vp->v_clen].last_pg = last_blkno;
1925 vp->v_clen++;
1926 delay_io:
1927 /*
1928 * make sure we keep v_cstart and v_lastw up to
1929 * date in case we have to fall back on the
1930 * V_HASDIRTY mechanism (or we've already entered it)
1931 */
1932 if (start_blkno < vp->v_cstart)
1933 vp->v_cstart = start_blkno;
1934 if (last_blkno > vp->v_lastw)
1935 vp->v_lastw = last_blkno;
1936
1937 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
1938 continue;
1939 issue_io:
1940 /*
1941 * in order to maintain some semblance of coherency with mapped writes
1942 * we need to write the cluster back out as a multiple of the PAGESIZE
1943 * unless the cluster encompasses the last page of the file... in this
1944 * case we'll round out to the nearest device block boundary
1945 */
1946 io_size = upl_size;
1947
1948 if ((upl_f_offset + io_size) > newEOF) {
1949 io_size = newEOF - upl_f_offset;
1950 io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1);
1951 }
1952
1953 if (flags & IO_SYNC)
1954 io_flags = CL_COMMIT | CL_AGE;
1955 else
1956 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
1957
1958 if (vp->v_flag & VNOCACHE_DATA)
1959 io_flags |= CL_DUMP;
1960
1961 while (vp->v_numoutput >= ASYNC_THROTTLE) {
1962 vp->v_flag |= VTHROTTLED;
1963 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_write", 0);
1964 }
1965 retval = cluster_io(vp, upl, 0, upl_f_offset, io_size, devblocksize,
1966 io_flags, (struct buf *)0);
1967 }
1968 }
1969 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END,
1970 retval, 0, 0, 0, 0);
1971
1972 return (retval);
1973 }
1974
1975 int
1976 cluster_read(vp, uio, filesize, devblocksize, flags)
1977 struct vnode *vp;
1978 struct uio *uio;
1979 off_t filesize;
1980 int devblocksize;
1981 int flags;
1982 {
1983 int prev_resid;
1984 int clip_size;
1985 off_t max_io_size;
1986 struct iovec *iov;
1987 vm_offset_t upl_offset;
1988 int upl_size;
1989 int pages_in_pl;
1990 upl_page_info_t *pl;
1991 int upl_flags;
1992 upl_t upl;
1993 int retval = 0;
1994
1995 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START,
1996 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
1997
1998 /*
1999 * We set a threshhold of 4 pages to decide if the nocopy
2000 * read loop is worth the trouble...
2001 */
2002
2003 if (!((vp->v_flag & VNOCACHE_DATA) && (uio->uio_segflg == UIO_USERSPACE)))
2004 {
2005 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2006 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
2007 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
2008 return(retval);
2009 }
2010
2011 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0)
2012 {
2013 /* we know we have a resid, so this is safe */
2014 iov = uio->uio_iov;
2015 while (iov->iov_len == 0) {
2016 uio->uio_iov++;
2017 uio->uio_iovcnt--;
2018 iov = uio->uio_iov;
2019 }
2020
2021 /*
2022 * We check every vector target and if it is physically
2023 * contiguous space, we skip the sanity checks.
2024 */
2025
2026 upl_offset = (vm_offset_t)iov->iov_base & ~PAGE_MASK;
2027 upl_size = (upl_offset + PAGE_SIZE +(PAGE_SIZE -1)) & ~PAGE_MASK;
2028 pages_in_pl = 0;
2029 upl_flags = UPL_QUERY_OBJECT_TYPE;
2030 if((vm_map_get_upl(current_map(),
2031 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2032 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0)) != KERN_SUCCESS)
2033 {
2034 /*
2035 * the user app must have passed in an invalid address
2036 */
2037 return (EFAULT);
2038 }
2039
2040 if (upl_flags & UPL_PHYS_CONTIG)
2041 {
2042 retval = cluster_phys_read(vp, uio, filesize);
2043 }
2044 else if (uio->uio_resid < 4 * PAGE_SIZE)
2045 {
2046 /*
2047 * We set a threshhold of 4 pages to decide if the nocopy
2048 * read loop is worth the trouble...
2049 */
2050 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2051 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
2052 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
2053 return(retval);
2054 }
2055 else if (uio->uio_offset & PAGE_MASK_64)
2056 {
2057 /* Bring the file offset read up to a pagesize boundary */
2058 clip_size = (PAGE_SIZE - (int)(uio->uio_offset & PAGE_MASK_64));
2059 if (uio->uio_resid < clip_size)
2060 clip_size = uio->uio_resid;
2061 /*
2062 * Fake the resid going into the cluster_read_x call
2063 * and restore it on the way out.
2064 */
2065 prev_resid = uio->uio_resid;
2066 uio->uio_resid = clip_size;
2067 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2068 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2069 }
2070 else if ((int)iov->iov_base & PAGE_MASK_64)
2071 {
2072 clip_size = iov->iov_len;
2073 prev_resid = uio->uio_resid;
2074 uio->uio_resid = clip_size;
2075 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2076 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2077 }
2078 else
2079 {
2080 /*
2081 * If we come in here, we know the offset into
2082 * the file is on a pagesize boundary
2083 */
2084
2085 max_io_size = filesize - uio->uio_offset;
2086 clip_size = uio->uio_resid;
2087 if (iov->iov_len < clip_size)
2088 clip_size = iov->iov_len;
2089 if (max_io_size < clip_size)
2090 clip_size = (int)max_io_size;
2091
2092 if (clip_size < PAGE_SIZE)
2093 {
2094 /*
2095 * Take care of the tail end of the read in this vector.
2096 */
2097 prev_resid = uio->uio_resid;
2098 uio->uio_resid = clip_size;
2099 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2100 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2101 }
2102 else
2103 {
2104 /* round clip_size down to a multiple of pagesize */
2105 clip_size = clip_size & ~(PAGE_MASK);
2106 prev_resid = uio->uio_resid;
2107 uio->uio_resid = clip_size;
2108 retval = cluster_nocopy_read(vp, uio, filesize, devblocksize, flags);
2109 if ((retval==0) && uio->uio_resid)
2110 retval = cluster_read_x(vp, uio, filesize, devblocksize, flags);
2111 uio->uio_resid = prev_resid - (clip_size - uio->uio_resid);
2112 }
2113 } /* end else */
2114 } /* end while */
2115
2116 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
2117 (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0);
2118
2119 return(retval);
2120 }
2121
2122 static int
2123 cluster_read_x(vp, uio, filesize, devblocksize, flags)
2124 struct vnode *vp;
2125 struct uio *uio;
2126 off_t filesize;
2127 int devblocksize;
2128 int flags;
2129 {
2130 upl_page_info_t *pl;
2131 upl_t upl;
2132 vm_offset_t upl_offset;
2133 int upl_size;
2134 off_t upl_f_offset;
2135 int start_offset;
2136 int start_pg;
2137 int last_pg;
2138 int uio_last;
2139 int pages_in_upl;
2140 off_t max_size;
2141 int io_size;
2142 vm_offset_t io_address;
2143 kern_return_t kret;
2144 int segflg;
2145 int error = 0;
2146 int retval = 0;
2147 int b_lblkno;
2148 int e_lblkno;
2149
2150 b_lblkno = (int)(uio->uio_offset / PAGE_SIZE_64);
2151
2152 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2153 /*
2154 * compute the size of the upl needed to encompass
2155 * the requested read... limit each call to cluster_io
2156 * to the maximum UPL size... cluster_io will clip if
2157 * this exceeds the maximum io_size for the device,
2158 * make sure to account for
2159 * a starting offset that's not page aligned
2160 */
2161 start_offset = (int)(uio->uio_offset & PAGE_MASK_64);
2162 upl_f_offset = uio->uio_offset - (off_t)start_offset;
2163 max_size = filesize - uio->uio_offset;
2164
2165 if ((off_t)((unsigned int)uio->uio_resid) < max_size)
2166 io_size = uio->uio_resid;
2167 else
2168 io_size = max_size;
2169
2170 if (uio->uio_segflg == UIO_USERSPACE && !(vp->v_flag & VNOCACHE_DATA)) {
2171 segflg = uio->uio_segflg;
2172
2173 uio->uio_segflg = UIO_PHYS_USERSPACE;
2174
2175 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2176 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2177
2178 while (io_size && retval == 0) {
2179 int xsize;
2180 vm_offset_t paddr;
2181
2182 if (ubc_page_op(vp,
2183 upl_f_offset,
2184 UPL_POP_SET | UPL_POP_BUSY,
2185 &paddr, 0) != KERN_SUCCESS)
2186 break;
2187
2188 xsize = PAGE_SIZE - start_offset;
2189
2190 if (xsize > io_size)
2191 xsize = io_size;
2192
2193 retval = uiomove((caddr_t)(paddr + start_offset), xsize, uio);
2194
2195 ubc_page_op(vp, upl_f_offset,
2196 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2197
2198 io_size -= xsize;
2199 start_offset = (int)
2200 (uio->uio_offset & PAGE_MASK_64);
2201 upl_f_offset = uio->uio_offset - start_offset;
2202 }
2203 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2204 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2205
2206 uio->uio_segflg = segflg;
2207
2208 if (retval)
2209 break;
2210
2211 if (io_size == 0) {
2212 /*
2213 * we're already finished with this read request
2214 * let's see if we should do a read-ahead
2215 */
2216 e_lblkno = (int)
2217 ((uio->uio_offset - 1) / PAGE_SIZE_64);
2218
2219 if (!(vp->v_flag & VRAOFF))
2220 /*
2221 * let's try to read ahead if we're in
2222 * a sequential access pattern
2223 */
2224 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
2225 vp->v_lastr = e_lblkno;
2226
2227 break;
2228 }
2229 max_size = filesize - uio->uio_offset;
2230 }
2231 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2232 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2233 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2234 pages_in_upl = upl_size / PAGE_SIZE;
2235
2236 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_START,
2237 (int)upl, (int)upl_f_offset, upl_size, start_offset, 0);
2238
2239 kret = ubc_create_upl(vp,
2240 upl_f_offset,
2241 upl_size,
2242 &upl,
2243 &pl,
2244 UPL_FLAGS_NONE);
2245 if (kret != KERN_SUCCESS)
2246 panic("cluster_read: failed to get pagelist");
2247
2248 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_END,
2249 (int)upl, (int)upl_f_offset, upl_size, start_offset, 0);
2250
2251 /*
2252 * scan from the beginning of the upl looking for the first
2253 * non-valid page.... this will become the first page in
2254 * the request we're going to make to 'cluster_io'... if all
2255 * of the pages are valid, we won't call through to 'cluster_io'
2256 */
2257 for (start_pg = 0; start_pg < pages_in_upl; start_pg++) {
2258 if (!upl_valid_page(pl, start_pg))
2259 break;
2260 }
2261
2262 /*
2263 * scan from the starting invalid page looking for a valid
2264 * page before the end of the upl is reached, if we
2265 * find one, then it will be the last page of the request to
2266 * 'cluster_io'
2267 */
2268 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2269 if (upl_valid_page(pl, last_pg))
2270 break;
2271 }
2272
2273 if (start_pg < last_pg) {
2274 /*
2275 * we found a range of 'invalid' pages that must be filled
2276 * if the last page in this range is the last page of the file
2277 * we may have to clip the size of it to keep from reading past
2278 * the end of the last physical block associated with the file
2279 */
2280 upl_offset = start_pg * PAGE_SIZE;
2281 io_size = (last_pg - start_pg) * PAGE_SIZE;
2282
2283 if ((upl_f_offset + upl_offset + io_size) > filesize)
2284 io_size = filesize - (upl_f_offset + upl_offset);
2285
2286 /*
2287 * issue a synchronous read to cluster_io
2288 */
2289
2290 error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset,
2291 io_size, devblocksize, CL_READ, (struct buf *)0);
2292 }
2293 if (error == 0) {
2294 /*
2295 * if the read completed successfully, or there was no I/O request
2296 * issued, than map the upl into kernel address space and
2297 * move the data into user land.... we'll first add on any 'valid'
2298 * pages that were present in the upl when we acquired it.
2299 */
2300 u_int val_size;
2301 u_int size_of_prefetch;
2302
2303 for (uio_last = last_pg; uio_last < pages_in_upl; uio_last++) {
2304 if (!upl_valid_page(pl, uio_last))
2305 break;
2306 }
2307 /*
2308 * compute size to transfer this round, if uio->uio_resid is
2309 * still non-zero after this uiomove, we'll loop around and
2310 * set up for another I/O.
2311 */
2312 val_size = (uio_last * PAGE_SIZE) - start_offset;
2313
2314 if (max_size < val_size)
2315 val_size = max_size;
2316
2317 if (uio->uio_resid < val_size)
2318 val_size = uio->uio_resid;
2319
2320 e_lblkno = (int)((uio->uio_offset + ((off_t)val_size - 1)) / PAGE_SIZE_64);
2321
2322 if (size_of_prefetch = (uio->uio_resid - val_size)) {
2323 /*
2324 * if there's still I/O left to do for this request, then issue a
2325 * pre-fetch I/O... the I/O wait time will overlap
2326 * with the copying of the data
2327 */
2328 cluster_rd_prefetch(vp, uio->uio_offset + val_size, size_of_prefetch, filesize, devblocksize);
2329 } else {
2330 if (!(vp->v_flag & VRAOFF) && !(vp->v_flag & VNOCACHE_DATA))
2331 /*
2332 * let's try to read ahead if we're in
2333 * a sequential access pattern
2334 */
2335 cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize);
2336 vp->v_lastr = e_lblkno;
2337 }
2338 if (uio->uio_segflg == UIO_USERSPACE) {
2339 int offset;
2340
2341 segflg = uio->uio_segflg;
2342
2343 uio->uio_segflg = UIO_PHYS_USERSPACE;
2344
2345
2346 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
2347 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2348
2349 offset = start_offset;
2350
2351 while (val_size && retval == 0) {
2352 int csize;
2353 int i;
2354 caddr_t paddr;
2355
2356 i = offset / PAGE_SIZE;
2357 csize = min(PAGE_SIZE - start_offset, val_size);
2358
2359 paddr = (caddr_t)upl_phys_page(pl, i) + start_offset;
2360
2361 retval = uiomove(paddr, csize, uio);
2362
2363 val_size -= csize;
2364 offset += csize;
2365 start_offset = offset & PAGE_MASK;
2366 }
2367 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END,
2368 (int)uio->uio_offset, val_size, uio->uio_resid, 0, 0);
2369
2370 uio->uio_segflg = segflg;
2371 }
2372 else
2373 {
2374 if ((kret = ubc_upl_map(upl, &io_address)) != KERN_SUCCESS)
2375 panic("cluster_read: ubc_upl_map() failed\n");
2376
2377 retval = uiomove((caddr_t)(io_address + start_offset), val_size, uio);
2378
2379 if ((kret = ubc_upl_unmap(upl)) != KERN_SUCCESS)
2380 panic("cluster_read: ubc_upl_unmap() failed\n");
2381 }
2382 }
2383 if (start_pg < last_pg) {
2384 /*
2385 * compute the range of pages that we actually issued an I/O for
2386 * and either commit them as valid if the I/O succeeded
2387 * or abort them if the I/O failed
2388 */
2389 io_size = (last_pg - start_pg) * PAGE_SIZE;
2390
2391 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2392 (int)upl, start_pg * PAGE_SIZE, io_size, error, 0);
2393
2394 if (error || (vp->v_flag & VNOCACHE_DATA))
2395 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size,
2396 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2397 else
2398 ubc_upl_commit_range(upl, start_pg * PAGE_SIZE, io_size,
2399 UPL_COMMIT_CLEAR_DIRTY
2400 | UPL_COMMIT_FREE_ON_EMPTY
2401 | UPL_COMMIT_INACTIVATE);
2402
2403 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2404 (int)upl, start_pg * PAGE_SIZE, io_size, error, 0);
2405 }
2406 if ((last_pg - start_pg) < pages_in_upl) {
2407 int cur_pg;
2408 int commit_flags;
2409
2410 /*
2411 * the set of pages that we issued an I/O for did not encompass
2412 * the entire upl... so just release these without modifying
2413 * there state
2414 */
2415 if (error)
2416 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2417 else {
2418 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START,
2419 (int)upl, -1, pages_in_upl - (last_pg - start_pg), 0, 0);
2420
2421 if (start_pg) {
2422 /*
2423 * we found some already valid pages at the beginning of
2424 * the upl commit these back to the inactive list with
2425 * reference cleared
2426 */
2427 for (cur_pg = 0; cur_pg < start_pg; cur_pg++) {
2428 commit_flags = UPL_COMMIT_FREE_ON_EMPTY
2429 | UPL_COMMIT_INACTIVATE;
2430
2431 if (upl_dirty_page(pl, cur_pg))
2432 commit_flags |= UPL_COMMIT_SET_DIRTY;
2433
2434 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2435 ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2436 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2437 else
2438 ubc_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2439 PAGE_SIZE, commit_flags);
2440 }
2441 }
2442 if (last_pg < uio_last) {
2443 /*
2444 * we found some already valid pages immediately after the
2445 * pages we issued I/O for, commit these back to the
2446 * inactive list with reference cleared
2447 */
2448 for (cur_pg = last_pg; cur_pg < uio_last; cur_pg++) {
2449 commit_flags = UPL_COMMIT_FREE_ON_EMPTY
2450 | UPL_COMMIT_INACTIVATE;
2451
2452 if (upl_dirty_page(pl, cur_pg))
2453 commit_flags |= UPL_COMMIT_SET_DIRTY;
2454
2455 if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA))
2456 ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE,
2457 UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY);
2458 else
2459 ubc_upl_commit_range(upl, cur_pg * PAGE_SIZE,
2460 PAGE_SIZE, commit_flags);
2461 }
2462 }
2463 if (uio_last < pages_in_upl) {
2464 /*
2465 * there were some invalid pages beyond the valid pages
2466 * that we didn't issue an I/O for, just release them
2467 * unchanged
2468 */
2469 ubc_upl_abort_range(upl, uio_last * PAGE_SIZE,
2470 (pages_in_upl - uio_last) * PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
2471 }
2472
2473 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END,
2474 (int)upl, -1, -1, 0, 0);
2475 }
2476 }
2477 if (retval == 0)
2478 retval = error;
2479 }
2480
2481 return (retval);
2482 }
2483
2484 static int
2485 cluster_nocopy_read(vp, uio, filesize, devblocksize, flags)
2486 struct vnode *vp;
2487 struct uio *uio;
2488 off_t filesize;
2489 int devblocksize;
2490 int flags;
2491 {
2492 upl_t upl;
2493 upl_page_info_t *pl;
2494 off_t upl_f_offset;
2495 vm_offset_t upl_offset;
2496 off_t start_upl_f_offset;
2497 off_t max_io_size;
2498 int io_size;
2499 int upl_size;
2500 int upl_needed_size;
2501 int pages_in_pl;
2502 vm_offset_t paddr;
2503 int upl_flags;
2504 kern_return_t kret;
2505 int segflg;
2506 struct iovec *iov;
2507 int i;
2508 int force_data_sync;
2509 int error = 0;
2510 int retval = 0;
2511
2512 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START,
2513 (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0);
2514
2515 /*
2516 * When we enter this routine, we know
2517 * -- the offset into the file is on a pagesize boundary
2518 * -- the resid is a page multiple
2519 * -- the resid will not exceed iov_len
2520 */
2521
2522 iov = uio->uio_iov;
2523 while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) {
2524
2525 max_io_size = filesize - uio->uio_offset;
2526
2527 if (max_io_size < (off_t)((unsigned int)uio->uio_resid))
2528 io_size = max_io_size;
2529 else
2530 io_size = uio->uio_resid;
2531
2532 /*
2533 * We don't come into this routine unless
2534 * UIO_USERSPACE is set.
2535 */
2536 segflg = uio->uio_segflg;
2537
2538 uio->uio_segflg = UIO_PHYS_USERSPACE;
2539
2540 /*
2541 * First look for pages already in the cache
2542 * and move them to user space.
2543 */
2544 while (io_size && (retval == 0)) {
2545 upl_f_offset = uio->uio_offset;
2546
2547 /*
2548 * If this call fails, it means the page is not
2549 * in the page cache.
2550 */
2551 if (ubc_page_op(vp, upl_f_offset,
2552 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) != KERN_SUCCESS)
2553 break;
2554
2555 retval = uiomove((caddr_t)(paddr), PAGE_SIZE, uio);
2556
2557 ubc_page_op(vp, upl_f_offset,
2558 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2559
2560 io_size -= PAGE_SIZE;
2561 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 71)) | DBG_FUNC_NONE,
2562 (int)uio->uio_offset, io_size, uio->uio_resid, 0, 0);
2563 }
2564
2565 uio->uio_segflg = segflg;
2566
2567 if (retval)
2568 {
2569 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2570 (int)uio->uio_offset, uio->uio_resid, 2, retval, 0);
2571 return(retval);
2572 }
2573
2574 /* If we are already finished with this read, then return */
2575 if (io_size == 0)
2576 {
2577
2578 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2579 (int)uio->uio_offset, uio->uio_resid, 3, io_size, 0);
2580 return(0);
2581 }
2582
2583 max_io_size = io_size;
2584 if (max_io_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2585 max_io_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2586
2587 start_upl_f_offset = uio->uio_offset; /* this is page aligned in the file */
2588 upl_f_offset = start_upl_f_offset;
2589 io_size = 0;
2590
2591 while(io_size < max_io_size)
2592 {
2593
2594 if(ubc_page_op(vp, upl_f_offset,
2595 UPL_POP_SET | UPL_POP_BUSY, &paddr, 0) == KERN_SUCCESS)
2596 {
2597 ubc_page_op(vp, upl_f_offset,
2598 UPL_POP_CLR | UPL_POP_BUSY, 0, 0);
2599 break;
2600 }
2601
2602 /*
2603 * Build up the io request parameters.
2604 */
2605
2606 io_size += PAGE_SIZE;
2607 upl_f_offset += PAGE_SIZE;
2608 }
2609
2610 if (io_size == 0)
2611 return(retval);
2612
2613 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2614 upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK;
2615
2616 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START,
2617 (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0);
2618
2619 for (force_data_sync = 0; force_data_sync < 3; force_data_sync++)
2620 {
2621 pages_in_pl = 0;
2622 upl_size = upl_needed_size;
2623 upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2624
2625 kret = vm_map_get_upl(current_map(),
2626 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2627 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, force_data_sync);
2628
2629 if (kret != KERN_SUCCESS)
2630 {
2631 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2632 (int)upl_offset, upl_size, io_size, kret, 0);
2633
2634 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2635 (int)uio->uio_offset, uio->uio_resid, 4, retval, 0);
2636
2637 /* cluster_nocopy_read: failed to get pagelist */
2638 /* do not return kret here */
2639 return(retval);
2640 }
2641
2642 pages_in_pl = upl_size / PAGE_SIZE;
2643 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2644
2645 for(i=0; i < pages_in_pl; i++)
2646 {
2647 if (!upl_valid_page(pl, i))
2648 break;
2649 }
2650 if (i == pages_in_pl)
2651 break;
2652
2653 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2654 UPL_ABORT_FREE_ON_EMPTY);
2655 }
2656
2657 if (force_data_sync >= 3)
2658 {
2659 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2660 (int)upl_offset, upl_size, io_size, kret, 0);
2661
2662 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2663 (int)uio->uio_offset, uio->uio_resid, 5, retval, 0);
2664 return(retval);
2665 }
2666 /*
2667 * Consider the possibility that upl_size wasn't satisfied.
2668 */
2669 if (upl_size != upl_needed_size)
2670 io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK;
2671
2672 if (io_size == 0)
2673 {
2674 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2675 UPL_ABORT_FREE_ON_EMPTY);
2676 return(retval);
2677 }
2678
2679 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END,
2680 (int)upl_offset, upl_size, io_size, kret, 0);
2681
2682 /*
2683 * issue a synchronous read to cluster_io
2684 */
2685
2686 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START,
2687 (int)upl, (int)upl_offset, (int)start_upl_f_offset, io_size, 0);
2688
2689 error = cluster_io(vp, upl, upl_offset, start_upl_f_offset,
2690 io_size, devblocksize, CL_READ| CL_NOZERO, (struct buf *)0);
2691
2692 if (error == 0) {
2693 /*
2694 * The cluster_io read completed successfully,
2695 * update the uio structure and commit.
2696 */
2697
2698 ubc_upl_commit_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2699 UPL_COMMIT_SET_DIRTY | UPL_COMMIT_FREE_ON_EMPTY);
2700
2701 iov->iov_base += io_size;
2702 iov->iov_len -= io_size;
2703 uio->uio_resid -= io_size;
2704 uio->uio_offset += io_size;
2705 }
2706 else {
2707 ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size,
2708 UPL_ABORT_FREE_ON_EMPTY);
2709 }
2710
2711 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END,
2712 (int)upl, (int)uio->uio_offset, (int)uio->uio_resid, error, 0);
2713
2714 if (retval == 0)
2715 retval = error;
2716
2717 } /* end while */
2718
2719
2720 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END,
2721 (int)uio->uio_offset, (int)uio->uio_resid, 6, retval, 0);
2722
2723 return (retval);
2724 }
2725
2726
2727 static int
2728 cluster_phys_read(vp, uio, filesize)
2729 struct vnode *vp;
2730 struct uio *uio;
2731 off_t filesize;
2732 {
2733 upl_t upl;
2734 vm_offset_t upl_offset;
2735 off_t max_size;
2736 int io_size;
2737 int upl_size;
2738 int upl_needed_size;
2739 int pages_in_pl;
2740 int upl_flags;
2741 kern_return_t kret;
2742 struct iovec *iov;
2743 int error;
2744
2745 /*
2746 * When we enter this routine, we know
2747 * -- the resid will not exceed iov_len
2748 * -- the target address is physically contiguous
2749 */
2750
2751 iov = uio->uio_iov;
2752
2753 max_size = filesize - uio->uio_offset;
2754
2755 if (max_size < (off_t)((unsigned int)iov->iov_len))
2756 io_size = max_size;
2757 else
2758 io_size = iov->iov_len;
2759
2760 upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK_64;
2761 upl_needed_size = upl_offset + io_size;
2762
2763 pages_in_pl = 0;
2764 upl_size = upl_needed_size;
2765 upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL;
2766
2767 kret = vm_map_get_upl(current_map(),
2768 (vm_offset_t)iov->iov_base & ~PAGE_MASK,
2769 &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0);
2770
2771 if (kret != KERN_SUCCESS)
2772 {
2773 /* cluster_phys_read: failed to get pagelist */
2774 return(EINVAL);
2775 }
2776
2777 /*
2778 * Consider the possibility that upl_size wasn't satisfied.
2779 */
2780 if (upl_size < upl_needed_size)
2781 {
2782 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2783 return(EINVAL);
2784 }
2785
2786 /*
2787 * issue a synchronous read to cluster_io
2788 */
2789
2790 error = cluster_io(vp, upl, upl_offset, uio->uio_offset,
2791 io_size, 0, CL_READ| CL_NOZERO | CL_DEV_MEMORY, (struct buf *)0);
2792
2793 if (error == 0)
2794 {
2795 /*
2796 * The cluster_io read completed successfully,
2797 * update the uio structure and commit.
2798 */
2799
2800 ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_FREE_ON_EMPTY);
2801
2802 iov->iov_base += io_size;
2803 iov->iov_len -= io_size;
2804 uio->uio_resid -= io_size;
2805 uio->uio_offset += io_size;
2806 }
2807 else
2808 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
2809
2810 return (error);
2811 }
2812
2813 /*
2814 * generate advisory I/O's in the largest chunks possible
2815 * the completed pages will be released into the VM cache
2816 */
2817 int
2818 advisory_read(vp, filesize, f_offset, resid, devblocksize)
2819 struct vnode *vp;
2820 off_t filesize;
2821 off_t f_offset;
2822 int resid;
2823 int devblocksize;
2824 {
2825 upl_page_info_t *pl;
2826 upl_t upl;
2827 vm_offset_t upl_offset;
2828 int upl_size;
2829 off_t upl_f_offset;
2830 int start_offset;
2831 int start_pg;
2832 int last_pg;
2833 int pages_in_upl;
2834 off_t max_size;
2835 int io_size;
2836 kern_return_t kret;
2837 int retval = 0;
2838 int issued_io;
2839
2840 if (!UBCINFOEXISTS(vp))
2841 return(EINVAL);
2842
2843 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START,
2844 (int)f_offset, resid, (int)filesize, devblocksize, 0);
2845
2846 while (resid && f_offset < filesize && retval == 0) {
2847 /*
2848 * compute the size of the upl needed to encompass
2849 * the requested read... limit each call to cluster_io
2850 * to the maximum UPL size... cluster_io will clip if
2851 * this exceeds the maximum io_size for the device,
2852 * make sure to account for
2853 * a starting offset that's not page aligned
2854 */
2855 start_offset = (int)(f_offset & PAGE_MASK_64);
2856 upl_f_offset = f_offset - (off_t)start_offset;
2857 max_size = filesize - f_offset;
2858
2859 if (resid < max_size)
2860 io_size = resid;
2861 else
2862 io_size = max_size;
2863
2864 upl_size = (start_offset + io_size + (PAGE_SIZE - 1)) & ~PAGE_MASK;
2865 if (upl_size > (MAX_UPL_TRANSFER * PAGE_SIZE))
2866 upl_size = MAX_UPL_TRANSFER * PAGE_SIZE;
2867 pages_in_upl = upl_size / PAGE_SIZE;
2868
2869 kret = ubc_create_upl(vp,
2870 upl_f_offset,
2871 upl_size,
2872 &upl,
2873 &pl,
2874 UPL_RET_ONLY_ABSENT);
2875 if (kret != KERN_SUCCESS)
2876 return(retval);
2877 issued_io = 0;
2878
2879 /*
2880 * before we start marching forward, we must make sure we end on
2881 * a present page, otherwise we will be working with a freed
2882 * upl
2883 */
2884 for (last_pg = pages_in_upl - 1; last_pg >= 0; last_pg--) {
2885 if (upl_page_present(pl, last_pg))
2886 break;
2887 }
2888 pages_in_upl = last_pg + 1;
2889
2890
2891 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 61)) | DBG_FUNC_NONE,
2892 (int)upl, (int)upl_f_offset, upl_size, start_offset, 0);
2893
2894
2895 for (last_pg = 0; last_pg < pages_in_upl; ) {
2896 /*
2897 * scan from the beginning of the upl looking for the first
2898 * page that is present.... this will become the first page in
2899 * the request we're going to make to 'cluster_io'... if all
2900 * of the pages are absent, we won't call through to 'cluster_io'
2901 */
2902 for (start_pg = last_pg; start_pg < pages_in_upl; start_pg++) {
2903 if (upl_page_present(pl, start_pg))
2904 break;
2905 }
2906
2907 /*
2908 * scan from the starting present page looking for an absent
2909 * page before the end of the upl is reached, if we
2910 * find one, then it will terminate the range of pages being
2911 * presented to 'cluster_io'
2912 */
2913 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
2914 if (!upl_page_present(pl, last_pg))
2915 break;
2916 }
2917
2918 if (last_pg > start_pg) {
2919 /*
2920 * we found a range of pages that must be filled
2921 * if the last page in this range is the last page of the file
2922 * we may have to clip the size of it to keep from reading past
2923 * the end of the last physical block associated with the file
2924 */
2925 upl_offset = start_pg * PAGE_SIZE;
2926 io_size = (last_pg - start_pg) * PAGE_SIZE;
2927
2928 if ((upl_f_offset + upl_offset + io_size) > filesize)
2929 io_size = filesize - (upl_f_offset + upl_offset);
2930
2931 /*
2932 * issue an asynchronous read to cluster_io
2933 */
2934 retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, devblocksize,
2935 CL_ASYNC | CL_READ | CL_COMMIT | CL_AGE, (struct buf *)0);
2936
2937 issued_io = 1;
2938 }
2939 }
2940 if (issued_io == 0)
2941 ubc_upl_abort(upl, 0);
2942
2943 io_size = upl_size - start_offset;
2944
2945 if (io_size > resid)
2946 io_size = resid;
2947 f_offset += io_size;
2948 resid -= io_size;
2949 }
2950
2951 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_END,
2952 (int)f_offset, resid, retval, 0, 0);
2953
2954 return(retval);
2955 }
2956
2957
2958 int
2959 cluster_push(vp)
2960 struct vnode *vp;
2961 {
2962 int retval;
2963
2964 if (!UBCINFOEXISTS(vp) || vp->v_clen == 0) {
2965 vp->v_flag &= ~VHASDIRTY;
2966 return(0);
2967 }
2968
2969 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_START,
2970 vp->v_flag & VHASDIRTY, vp->v_clen, 0, 0, 0);
2971
2972 if (vp->v_flag & VHASDIRTY) {
2973 daddr_t start_pg;
2974 daddr_t last_pg;
2975 daddr_t end_pg;
2976
2977 start_pg = vp->v_cstart;
2978 end_pg = vp->v_lastw;
2979
2980 vp->v_flag &= ~VHASDIRTY;
2981 vp->v_clen = 0;
2982
2983 while (start_pg < end_pg) {
2984 last_pg = start_pg + MAX_UPL_TRANSFER;
2985
2986 if (last_pg > end_pg)
2987 last_pg = end_pg;
2988
2989 cluster_push_x(vp, ubc_getsize(vp), start_pg, last_pg, 0);
2990
2991 start_pg = last_pg;
2992 }
2993 return (1);
2994 }
2995 retval = cluster_try_push(vp, ubc_getsize(vp), 0, 1);
2996
2997 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_END,
2998 vp->v_flag & VHASDIRTY, vp->v_clen, retval, 0, 0);
2999
3000 return (retval);
3001 }
3002
3003
3004 static int
3005 cluster_try_push(vp, EOF, can_delay, push_all)
3006 struct vnode *vp;
3007 off_t EOF;
3008 int can_delay;
3009 int push_all;
3010 {
3011 int cl_index;
3012 int cl_index1;
3013 int min_index;
3014 int cl_len;
3015 int cl_total;
3016 int cl_pushed;
3017 struct v_cluster l_clusters[MAX_CLUSTERS];
3018
3019 /*
3020 * make a local 'sorted' copy of the clusters
3021 * and clear vp->v_clen so that new clusters can
3022 * be developed
3023 */
3024 for (cl_index = 0; cl_index < vp->v_clen; cl_index++) {
3025 for (min_index = -1, cl_index1 = 0; cl_index1 < vp->v_clen; cl_index1++) {
3026 if (vp->v_clusters[cl_index1].start_pg == vp->v_clusters[cl_index1].last_pg)
3027 continue;
3028 if (min_index == -1)
3029 min_index = cl_index1;
3030 else if (vp->v_clusters[cl_index1].start_pg < vp->v_clusters[min_index].start_pg)
3031 min_index = cl_index1;
3032 }
3033 if (min_index == -1)
3034 break;
3035 l_clusters[cl_index].start_pg = vp->v_clusters[min_index].start_pg;
3036 l_clusters[cl_index].last_pg = vp->v_clusters[min_index].last_pg;
3037
3038 vp->v_clusters[min_index].start_pg = vp->v_clusters[min_index].last_pg;
3039 }
3040 cl_len = cl_index;
3041 vp->v_clen = 0;
3042
3043 for (cl_pushed = 0, cl_index = 0; cl_index < cl_len; cl_index++) {
3044 /*
3045 * try to push each cluster in turn... cluster_push_x may not
3046 * push the cluster if can_delay is TRUE and the cluster doesn't
3047 * meet the critera for an immediate push
3048 */
3049 if (cluster_push_x(vp, EOF, l_clusters[cl_index].start_pg, l_clusters[cl_index].last_pg, can_delay)) {
3050 l_clusters[cl_index].start_pg = 0;
3051 l_clusters[cl_index].last_pg = 0;
3052
3053 cl_pushed++;
3054
3055 if (push_all == 0)
3056 break;
3057 }
3058 }
3059 if (cl_len > cl_pushed) {
3060 /*
3061 * we didn't push all of the clusters, so
3062 * lets try to merge them back in to the vnode
3063 */
3064 if ((MAX_CLUSTERS - vp->v_clen) < (cl_len - cl_pushed)) {
3065 /*
3066 * we picked up some new clusters while we were trying to
3067 * push the old ones (I don't think this can happen because
3068 * I'm holding the lock, but just in case)... the sum of the
3069 * leftovers plus the new cluster count exceeds our ability
3070 * to represent them, so fall back to the VHASDIRTY mechanism
3071 */
3072 for (cl_index = 0; cl_index < cl_len; cl_index++) {
3073 if (l_clusters[cl_index].start_pg == l_clusters[cl_index].last_pg)
3074 continue;
3075
3076 if (l_clusters[cl_index].start_pg < vp->v_cstart)
3077 vp->v_cstart = l_clusters[cl_index].start_pg;
3078 if (l_clusters[cl_index].last_pg > vp->v_lastw)
3079 vp->v_lastw = l_clusters[cl_index].last_pg;
3080 }
3081 vp->v_flag |= VHASDIRTY;
3082 } else {
3083 /*
3084 * we've got room to merge the leftovers back in
3085 * just append them starting at the next 'hole'
3086 * represented by vp->v_clen
3087 */
3088 for (cl_index = 0, cl_index1 = vp->v_clen; cl_index < cl_len; cl_index++) {
3089 if (l_clusters[cl_index].start_pg == l_clusters[cl_index].last_pg)
3090 continue;
3091
3092 vp->v_clusters[cl_index1].start_pg = l_clusters[cl_index].start_pg;
3093 vp->v_clusters[cl_index1].last_pg = l_clusters[cl_index].last_pg;
3094
3095 if (cl_index1 == 0) {
3096 vp->v_cstart = l_clusters[cl_index].start_pg;
3097 vp->v_lastw = l_clusters[cl_index].last_pg;
3098 } else {
3099 if (l_clusters[cl_index].start_pg < vp->v_cstart)
3100 vp->v_cstart = l_clusters[cl_index].start_pg;
3101 if (l_clusters[cl_index].last_pg > vp->v_lastw)
3102 vp->v_lastw = l_clusters[cl_index].last_pg;
3103 }
3104 cl_index1++;
3105 }
3106 /*
3107 * update the cluster count
3108 */
3109 vp->v_clen = cl_index1;
3110 }
3111 }
3112 return(MAX_CLUSTERS - vp->v_clen);
3113 }
3114
3115
3116
3117 static int
3118 cluster_push_x(vp, EOF, first, last, can_delay)
3119 struct vnode *vp;
3120 off_t EOF;
3121 daddr_t first;
3122 daddr_t last;
3123 int can_delay;
3124 {
3125 upl_page_info_t *pl;
3126 upl_t upl;
3127 vm_offset_t upl_offset;
3128 int upl_size;
3129 off_t upl_f_offset;
3130 int pages_in_upl;
3131 int start_pg;
3132 int last_pg;
3133 int io_size;
3134 int io_flags;
3135 int size;
3136 kern_return_t kret;
3137
3138
3139 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_START,
3140 vp->v_clen, first, last, EOF, 0);
3141
3142 if ((pages_in_upl = last - first) == 0) {
3143 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 0, 0, 0, 0);
3144
3145 return (1);
3146 }
3147 upl_size = pages_in_upl * PAGE_SIZE;
3148 upl_f_offset = ((off_t)first) * PAGE_SIZE_64;
3149
3150 if (upl_f_offset + upl_size >= EOF) {
3151
3152 if (upl_f_offset >= EOF) {
3153 /*
3154 * must have truncated the file and missed
3155 * clearing a dangling cluster (i.e. it's completely
3156 * beyond the new EOF
3157 */
3158 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 1, 0, 0, 0);
3159
3160 return(1);
3161 }
3162 size = EOF - upl_f_offset;
3163
3164 upl_size = (size + (PAGE_SIZE - 1) ) & ~(PAGE_SIZE - 1);
3165 pages_in_upl = upl_size / PAGE_SIZE;
3166 } else {
3167 if (can_delay && (pages_in_upl < (MAX_UPL_TRANSFER - (MAX_UPL_TRANSFER / 2))))
3168 return(0);
3169 size = upl_size;
3170 }
3171 kret = ubc_create_upl(vp,
3172 upl_f_offset,
3173 upl_size,
3174 &upl,
3175 &pl,
3176 UPL_RET_ONLY_DIRTY);
3177 if (kret != KERN_SUCCESS)
3178 panic("cluster_push: failed to get pagelist");
3179
3180 if (can_delay) {
3181 int num_of_dirty;
3182
3183 for (num_of_dirty = 0, start_pg = 0; start_pg < pages_in_upl; start_pg++) {
3184 if (upl_valid_page(pl, start_pg) && upl_dirty_page(pl, start_pg))
3185 num_of_dirty++;
3186 }
3187 if (num_of_dirty < pages_in_upl / 2) {
3188 ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
3189
3190 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 0, 2, num_of_dirty, (pages_in_upl / 2), 0);
3191
3192 return(0);
3193 }
3194 }
3195 last_pg = 0;
3196
3197 while (size) {
3198
3199 for (start_pg = last_pg; start_pg < pages_in_upl; start_pg++) {
3200 if (upl_valid_page(pl, start_pg) && upl_dirty_page(pl, start_pg))
3201 break;
3202 }
3203 if (start_pg > last_pg) {
3204 io_size = (start_pg - last_pg) * PAGE_SIZE;
3205
3206 ubc_upl_abort_range(upl, last_pg * PAGE_SIZE, io_size,
3207 UPL_ABORT_FREE_ON_EMPTY);
3208
3209 if (io_size < size)
3210 size -= io_size;
3211 else
3212 break;
3213 }
3214 for (last_pg = start_pg; last_pg < pages_in_upl; last_pg++) {
3215 if (!upl_valid_page(pl, last_pg) || !upl_dirty_page(pl, last_pg))
3216 break;
3217 }
3218 upl_offset = start_pg * PAGE_SIZE;
3219
3220 io_size = min(size, (last_pg - start_pg) * PAGE_SIZE);
3221
3222 if (vp->v_flag & VNOCACHE_DATA)
3223 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC | CL_DUMP;
3224 else
3225 io_flags = CL_COMMIT | CL_AGE | CL_ASYNC;
3226
3227 while (vp->v_numoutput >= ASYNC_THROTTLE) {
3228 vp->v_flag |= VTHROTTLED;
3229 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_push", 0);
3230 }
3231 cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, vp->v_ciosiz, io_flags, (struct buf *)0);
3232
3233 size -= io_size;
3234 }
3235 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 3, 0, 0, 0);
3236
3237 return(1);
3238 }