]> git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ufs/ufs_readwrite.c
9aa3b92cd80e6093841a6208865bf0d35af28c52
[apple/xnu.git] / bsd / ufs / ufs / ufs_readwrite.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*-
24 * Copyright (c) 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
56 */
57
58 #include <sys/buf_internal.h>
59 #include <sys/uio_internal.h>
60
61
62 #define BLKSIZE(a, b, c) blksize(a, b, c)
63 #define FS struct fs
64 #define I_FS i_fs
65
66
67
68 /*
69 * Vnode op for reading.
70 */
71 /* ARGSUSED */
72 ffs_read(ap)
73 struct vnop_read_args /* {
74 struct vnode *a_vp;
75 struct uio *a_uio;
76 int a_ioflag;
77 vfs_context_t a_context;
78 } */ *ap;
79 {
80 return(ffs_read_internal(ap->a_vp, ap->a_uio, ap->a_ioflag));
81 }
82
83
84 int
85 ffs_read_internal(vnode_t vp, struct uio *uio, int ioflag)
86 {
87 struct inode *ip;
88 FS *fs;
89 buf_t bp = (struct buf *)0;
90 ufs_daddr_t lbn, nextlbn;
91 off_t bytesinfile;
92 long size, xfersize, blkoffset;
93 int error;
94 u_short mode;
95 #if REV_ENDIAN_FS
96 int rev_endian=0;
97 #endif /* REV_ENDIAN_FS */
98
99 ip = VTOI(vp);
100 mode = ip->i_mode;
101
102 #if REV_ENDIAN_FS
103 rev_endian=(vp->v_mount->mnt_flag & MNT_REVEND);
104 #endif /* REV_ENDIAN_FS */
105
106 #if DIAGNOSTIC
107 if (uio->uio_rw != UIO_READ)
108 panic("ffs_read: invalid uio_rw = %x", uio->uio_rw);
109
110 if (vp->v_type == VLNK) {
111 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
112 panic("ffs_read: short symlink = %d", ip->i_size);
113 } else if (vp->v_type != VREG && vp->v_type != VDIR)
114 panic("ffs_read: invalid v_type = %x", vp->v_type);
115 #endif
116 fs = ip->I_FS;
117 if (uio->uio_offset < 0)
118 return (EINVAL);
119 if (uio->uio_offset > fs->fs_maxfilesize)
120 return (EFBIG);
121
122 if (UBCINFOEXISTS(vp)) {
123 error = cluster_read(vp, uio, (off_t)ip->i_size, 0);
124 } else {
125 for (error = 0, bp = NULL; uio_resid(uio) > 0;
126 bp = NULL) {
127 char *buf_data;
128
129 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
130 break;
131 lbn = lblkno(fs, uio->uio_offset);
132 nextlbn = lbn + 1;
133 size = BLKSIZE(fs, ip, lbn);
134 blkoffset = blkoff(fs, uio->uio_offset);
135 xfersize = fs->fs_bsize - blkoffset;
136 // LP64todo - fix this
137 if (uio_resid(uio) < xfersize)
138 xfersize = uio_resid(uio);
139 if (bytesinfile < xfersize)
140 xfersize = bytesinfile;
141
142 if (lblktosize(fs, nextlbn) >= ip->i_size)
143 error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), size, NOCRED, &bp);
144 else if (lbn - 1 == ip->i_lastr && !(vp->v_flag & VRAOFF)) {
145 int nextsize = BLKSIZE(fs, ip, nextlbn);
146 error = (int)buf_breadn(vp, (daddr64_t)((unsigned)lbn),
147 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
148 } else
149 error = (int)buf_bread(vp, lbn, size, NOCRED, &bp);
150 if (error)
151 break;
152 ip->i_lastr = lbn;
153
154 /*
155 * We should only get non-zero buffer resid when an I/O error
156 * has occurred, which should cause us to break above.
157 * However, if the short read did not cause an error,
158 * then we want to ensure that we do not uiomove bad
159 * or uninitialized data.
160 */
161 size -= buf_resid(bp);
162 if (size < xfersize) {
163 if (size == 0)
164 break;
165 xfersize = size;
166 }
167 buf_data = (char *)buf_dataptr(bp);
168 #if REV_ENDIAN_FS
169 if (rev_endian && S_ISDIR(mode)) {
170 byte_swap_dir_block_in(buf_data + blkoffset, xfersize);
171 }
172 #endif /* REV_ENDIAN_FS */
173 if (error =
174 uiomove(buf_data + blkoffset, (int)xfersize, uio)) {
175 #if REV_ENDIAN_FS
176 if (rev_endian && S_ISDIR(mode)) {
177 byte_swap_dir_block_in(buf_data + blkoffset, xfersize);
178 }
179 #endif /* REV_ENDIAN_FS */
180 break;
181 }
182
183 #if REV_ENDIAN_FS
184 if (rev_endian && S_ISDIR(mode)) {
185 byte_swap_dir_out(buf_data + blkoffset, xfersize);
186 }
187 #endif /* REV_ENDIAN_FS */
188 if (S_ISREG(mode) && (xfersize + blkoffset == fs->fs_bsize ||
189 uio->uio_offset == ip->i_size))
190 buf_markaged(bp);
191 buf_brelse(bp);
192 }
193 }
194 if (bp != NULL)
195 buf_brelse(bp);
196 ip->i_flag |= IN_ACCESS;
197 return (error);
198 }
199
200 /*
201 * Vnode op for writing.
202 */
203 ffs_write(ap)
204 struct vnop_write_args /* {
205 struct vnode *a_vp;
206 struct uio *a_uio;
207 int a_ioflag;
208 vfs_context_t a_context;
209 } */ *ap;
210 {
211 return(ffs_write_internal(ap->a_vp, ap->a_uio, ap->a_ioflag, vfs_context_ucred(ap->a_context)));
212 }
213
214
215 ffs_write_internal(vnode_t vp, struct uio *uio, int ioflag, ucred_t cred)
216 {
217 buf_t bp;
218 proc_t p;
219 struct inode *ip;
220 FS *fs;
221 ufs_daddr_t lbn;
222 off_t osize;
223 int blkoffset, flags, resid, rsd, size, xfersize;
224 int save_error=0, save_size=0;
225 int blkalloc = 0;
226 int error = 0;
227 int file_extended = 0;
228 int doingdirectory = 0;
229
230 #if REV_ENDIAN_FS
231 int rev_endian=0;
232 #endif /* REV_ENDIAN_FS */
233
234 ip = VTOI(vp);
235 #if REV_ENDIAN_FS
236 rev_endian=(vp->v_mount->mnt_flag & MNT_REVEND);
237 #endif /* REV_ENDIAN_FS */
238
239 #if DIAGNOSTIC
240 if (uio->uio_rw != UIO_WRITE)
241 panic("ffs_write: uio_rw = %x\n", uio->uio_rw);
242 #endif
243
244 switch (vp->v_type) {
245 case VREG:
246 if (ioflag & IO_APPEND)
247 uio->uio_offset = ip->i_size;
248 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
249 return (EPERM);
250 /* FALLTHROUGH */
251 case VLNK:
252 break;
253 case VDIR:
254 doingdirectory = 1;
255 if ((ioflag & IO_SYNC) == 0)
256 panic("ffs_write: nonsync dir write");
257 break;
258 default:
259 panic("ffs_write: invalid v_type=%x", vp->v_type);
260 }
261
262 fs = ip->I_FS;
263 if (uio->uio_offset < 0 ||
264 (u_int64_t)uio->uio_offset + uio_resid(uio) > fs->fs_maxfilesize)
265 return (EFBIG);
266 if (uio_resid(uio) == 0)
267 return (0);
268
269 // LP64todo - fix this
270 resid = uio_resid(uio);
271 osize = ip->i_size;
272 flags = 0;
273 if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC))
274 flags = B_SYNC;
275
276 if (UBCINFOEXISTS(vp)) {
277 off_t filesize;
278 off_t endofwrite;
279 off_t local_offset;
280 off_t head_offset;
281 int local_flags;
282 int first_block;
283 int fboff;
284 int fblk;
285 int loopcount;
286
287 // LP64todo - fix this
288 endofwrite = uio->uio_offset + uio_resid(uio);
289
290 if (endofwrite > ip->i_size) {
291 filesize = endofwrite;
292 file_extended = 1;
293 } else
294 filesize = ip->i_size;
295
296 head_offset = ip->i_size;
297
298 /* Go ahead and allocate the block that are going to be written */
299 // LP64todo - fix this
300 rsd = uio_resid(uio);
301 local_offset = uio->uio_offset;
302 local_flags = 0;
303 if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC))
304 local_flags = B_SYNC;
305 local_flags |= B_NOBUFF;
306
307 first_block = 1;
308 fboff = 0;
309 fblk = 0;
310 loopcount = 0;
311
312 for (error = 0; rsd > 0;) {
313 blkalloc = 0;
314 lbn = lblkno(fs, local_offset);
315 blkoffset = blkoff(fs, local_offset);
316 xfersize = fs->fs_bsize - blkoffset;
317 if (first_block)
318 fboff = blkoffset;
319 if (rsd < xfersize)
320 xfersize = rsd;
321 if (fs->fs_bsize > xfersize)
322 local_flags |= B_CLRBUF;
323 else
324 local_flags &= ~B_CLRBUF;
325
326 /* Allocate block without reading into a buf */
327 error = ffs_balloc(ip,
328 lbn, blkoffset + xfersize, cred,
329 &bp, local_flags, &blkalloc);
330 if (error)
331 break;
332 if (first_block) {
333 fblk = blkalloc;
334 first_block = 0;
335 }
336 loopcount++;
337
338 rsd -= xfersize;
339 local_offset += (off_t)xfersize;
340 if (local_offset > ip->i_size)
341 ip->i_size = local_offset;
342 }
343
344 if(error) {
345 save_error = error;
346 save_size = rsd;
347 uio_setresid(uio, (uio_resid(uio) - rsd));
348 if (file_extended)
349 filesize -= rsd;
350 }
351
352 flags = ioflag & IO_SYNC ? IO_SYNC : 0;
353 /* flags |= IO_NOZEROVALID; */
354
355 if((error == 0) && fblk && fboff) {
356 if( fblk > fs->fs_bsize)
357 panic("ffs_balloc : allocated more than bsize(head)");
358 /* We need to zero out the head */
359 head_offset = uio->uio_offset - (off_t)fboff ;
360 flags |= IO_HEADZEROFILL;
361 /* flags &= ~IO_NOZEROVALID; */
362 }
363
364 if((error == 0) && blkalloc && ((blkalloc - xfersize) > 0)) {
365 /* We need to zero out the tail */
366 if( blkalloc > fs->fs_bsize)
367 panic("ffs_balloc : allocated more than bsize(tail)");
368 local_offset += (blkalloc - xfersize);
369 if (loopcount == 1) {
370 /* blkalloc is same as fblk; so no need to check again*/
371 local_offset -= fboff;
372 }
373 flags |= IO_TAILZEROFILL;
374 /* Freshly allocated block; bzero even if
375 * find a page
376 */
377 /* flags &= ~IO_NOZEROVALID; */
378 }
379 /*
380 * if the write starts beyond the current EOF then
381 * we we'll zero fill from the current EOF to where the write begins
382 */
383
384 error = cluster_write(vp, uio, osize, filesize, head_offset, local_offset, flags);
385
386 if (uio->uio_offset > osize) {
387 if (error && ((ioflag & IO_UNIT)==0))
388 (void)ffs_truncate_internal(vp, uio->uio_offset, ioflag & IO_SYNC, cred);
389 ip->i_size = uio->uio_offset;
390 ubc_setsize(vp, (off_t)ip->i_size);
391 }
392 if(save_error) {
393 uio_setresid(uio, (uio_resid(uio) + save_size));
394 if(!error)
395 error = save_error;
396 }
397 ip->i_flag |= IN_CHANGE | IN_UPDATE;
398 } else {
399 flags = 0;
400 if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC))
401 flags = B_SYNC;
402
403 for (error = 0; uio_resid(uio) > 0;) {
404 char *buf_data;
405
406 lbn = lblkno(fs, uio->uio_offset);
407 blkoffset = blkoff(fs, uio->uio_offset);
408 xfersize = fs->fs_bsize - blkoffset;
409 if (uio_resid(uio) < xfersize)
410 // LP64todo - fix this
411 xfersize = uio_resid(uio);
412
413 if (fs->fs_bsize > xfersize)
414 flags |= B_CLRBUF;
415 else
416 flags &= ~B_CLRBUF;
417
418 error = ffs_balloc(ip, lbn, blkoffset + xfersize, cred, &bp, flags, 0);
419 if (error)
420 break;
421 if (uio->uio_offset + xfersize > ip->i_size) {
422 ip->i_size = uio->uio_offset + xfersize;
423 ubc_setsize(vp, (u_long)ip->i_size);
424 }
425
426 size = BLKSIZE(fs, ip, lbn) - buf_resid(bp);
427 if (size < xfersize)
428 xfersize = size;
429
430 buf_data = (char *)buf_dataptr(bp);
431
432 error = uiomove(buf_data + blkoffset, (int)xfersize, uio);
433 #if REV_ENDIAN_FS
434 if (rev_endian && S_ISDIR(ip->i_mode)) {
435 byte_swap_dir_out(buf_data + blkoffset, xfersize);
436 }
437 #endif /* REV_ENDIAN_FS */
438 if (doingdirectory == 0 && (ioflag & IO_SYNC))
439 (void)buf_bwrite(bp);
440 else if (xfersize + blkoffset == fs->fs_bsize) {
441 buf_markaged(bp);
442 buf_bdwrite(bp);
443 }
444 else
445 buf_bdwrite(bp);
446 if (error || xfersize == 0)
447 break;
448 ip->i_flag |= IN_CHANGE | IN_UPDATE;
449 }
450 }
451 /*
452 * If we successfully wrote any data, and we are not the superuser
453 * we clear the setuid and setgid bits as a precaution against
454 * tampering.
455 */
456 if (resid > uio_resid(uio) && cred && suser(cred, NULL))
457 ip->i_mode &= ~(ISUID | ISGID);
458 if (resid > uio_resid(uio))
459 VN_KNOTE(vp, NOTE_WRITE | (file_extended ? NOTE_EXTEND : 0));
460 if (error) {
461 if (ioflag & IO_UNIT) {
462 (void)ffs_truncate_internal(vp, osize, ioflag & IO_SYNC, cred);
463 // LP64todo - fix this
464 uio->uio_offset -= resid - uio_resid(uio);
465 uio_setresid(uio, resid);
466 }
467 } else if (resid > uio_resid(uio) && (ioflag & IO_SYNC)) {
468 struct timeval tv;
469
470 microtime(&tv);
471 error = ffs_update(vp, &tv, &tv, 1);
472 }
473 return (error);
474 }
475
476 /*
477 * Vnode op for pagein.
478 * Similar to ffs_read()
479 */
480 /* ARGSUSED */
481 ffs_pagein(ap)
482 struct vnop_pagein_args /* {
483 struct vnode *a_vp,
484 upl_t a_pl,
485 vm_offset_t a_pl_offset,
486 off_t a_f_offset,
487 size_t a_size,
488 int a_flags
489 vfs_context_t a_context;
490 } */ *ap;
491 {
492 register struct vnode *vp = ap->a_vp;
493 upl_t pl = ap->a_pl;
494 size_t size= ap->a_size;
495 off_t f_offset = ap->a_f_offset;
496 vm_offset_t pl_offset = ap->a_pl_offset;
497 int flags = ap->a_flags;
498 register struct inode *ip;
499 int error;
500
501 ip = VTOI(vp);
502
503 /* check pageins for reg file only and ubc info is present*/
504 if (UBCINVALID(vp))
505 panic("ffs_pagein: Not a VREG: vp=%x", vp);
506 if (UBCINFOMISSING(vp))
507 panic("ffs_pagein: No mapping: vp=%x", vp);
508
509 #if DIAGNOSTIC
510 if (vp->v_type == VLNK) {
511 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
512 panic("%s: short symlink", "ffs_pagein");
513 } else if (vp->v_type != VREG && vp->v_type != VDIR)
514 panic("%s: type %d", "ffs_pagein", vp->v_type);
515 #endif
516
517 error = cluster_pagein(vp, pl, pl_offset, f_offset, size, (off_t)ip->i_size, flags);
518
519 /* ip->i_flag |= IN_ACCESS; */
520 return (error);
521 }
522
523 /*
524 * Vnode op for pageout.
525 * Similar to ffs_write()
526 * make sure the buf is not in hash queue when you return
527 */
528 ffs_pageout(ap)
529 struct vnop_pageout_args /* {
530 struct vnode *a_vp,
531 upl_t a_pl,
532 vm_offset_t a_pl_offset,
533 off_t a_f_offset,
534 size_t a_size,
535 int a_flags
536 vfs_context_t a_context;
537 } */ *ap;
538 {
539 register struct vnode *vp = ap->a_vp;
540 upl_t pl = ap->a_pl;
541 size_t size= ap->a_size;
542 off_t f_offset = ap->a_f_offset;
543 vm_offset_t pl_offset = ap->a_pl_offset;
544 int flags = ap->a_flags;
545 register struct inode *ip;
546 register FS *fs;
547 int error ;
548 size_t xfer_size = 0;
549 int local_flags=0;
550 off_t local_offset;
551 int resid, blkoffset;
552 size_t xsize, lsize;
553 daddr_t lbn;
554 int save_error =0, save_size=0;
555 vm_offset_t lupl_offset;
556 int nocommit = flags & UPL_NOCOMMIT;
557 int devBlockSize = 0;
558 struct buf *bp;
559
560 ip = VTOI(vp);
561
562 /* check pageouts for reg file only and ubc info is present*/
563 if (UBCINVALID(vp))
564 panic("ffs_pageout: Not a VREG: vp=%x", vp);
565 if (UBCINFOMISSING(vp))
566 panic("ffs_pageout: No mapping: vp=%x", vp);
567
568 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
569 if (!nocommit)
570 ubc_upl_abort_range(pl, pl_offset, size,
571 UPL_ABORT_FREE_ON_EMPTY);
572 return (EROFS);
573 }
574 fs = ip->I_FS;
575
576 if (f_offset < 0 || f_offset >= ip->i_size) {
577 if (!nocommit)
578 ubc_upl_abort_range(pl, pl_offset, size,
579 UPL_ABORT_FREE_ON_EMPTY);
580 return (EINVAL);
581 }
582
583 /*
584 * once we enable multi-page pageouts we will
585 * need to make sure we abort any pages in the upl
586 * that we don't issue an I/O for
587 */
588 if (f_offset + size > ip->i_size)
589 xfer_size = ip->i_size - f_offset;
590 else
591 xfer_size = size;
592
593 devBlockSize = vfs_devblocksize(vnode_mount(vp));
594
595 if (xfer_size & (PAGE_SIZE - 1)) {
596 /* if not a multiple of page size
597 * then round up to be a multiple
598 * the physical disk block size
599 */
600 xfer_size = (xfer_size + (devBlockSize - 1)) & ~(devBlockSize - 1);
601 }
602
603 /*
604 * once the block allocation is moved to ufs_blockmap
605 * we can remove all the size and offset checks above
606 * cluster_pageout does all of this now
607 * we need to continue to do it here so as not to
608 * allocate blocks that aren't going to be used because
609 * of a bogus parameter being passed in
610 */
611 local_flags = 0;
612 resid = xfer_size;
613 local_offset = f_offset;
614 for (error = 0; resid > 0;) {
615 lbn = lblkno(fs, local_offset);
616 blkoffset = blkoff(fs, local_offset);
617 xsize = fs->fs_bsize - blkoffset;
618 if (resid < xsize)
619 xsize = resid;
620 /* Allocate block without reading into a buf */
621 error = ffs_blkalloc(ip,
622 lbn, blkoffset + xsize, vfs_context_ucred(ap->a_context),
623 local_flags);
624 if (error)
625 break;
626 resid -= xsize;
627 local_offset += (off_t)xsize;
628 }
629
630 if (error) {
631 save_size = resid;
632 save_error = error;
633 xfer_size -= save_size;
634 }
635
636
637 error = cluster_pageout(vp, pl, pl_offset, f_offset, round_page_32(xfer_size), ip->i_size, flags);
638
639 if(save_error) {
640 lupl_offset = size - save_size;
641 resid = round_page_32(save_size);
642 if (!nocommit)
643 ubc_upl_abort_range(pl, lupl_offset, resid,
644 UPL_ABORT_FREE_ON_EMPTY);
645 if(!error)
646 error= save_error;
647 }
648 return (error);
649 }