]>
git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ufs/ufs_readwrite.c
f1b21eda6625216ebcbbfff925388fdf4b99da59
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
28 * The Regents of the University of California. All rights reserved.
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
61 #define BLKSIZE(a, b, c) blksize(a, b, c)
65 #define PGRD_S "ffs_pgrd"
67 #define PGWR_S "ffs_pgwr"
70 * Vnode op for reading.
74 struct vop_read_args
/* {
81 register struct vnode
*vp
;
82 register struct inode
*ip
;
83 register struct uio
*uio
;
85 struct buf
*bp
= (struct buf
*)0;
86 ufs_daddr_t lbn
, nextlbn
;
88 long size
, xfersize
, blkoffset
;
94 #endif /* REV_ENDIAN_FS */
102 rev_endian
=(vp
->v_mount
->mnt_flag
& MNT_REVEND
);
103 #endif /* REV_ENDIAN_FS */
106 if (uio
->uio_rw
!= UIO_READ
)
107 panic("ffs_read: invalid uio_rw = %x", uio
->uio_rw
);
109 if (vp
->v_type
== VLNK
) {
110 if ((int)ip
->i_size
< vp
->v_mount
->mnt_maxsymlinklen
)
111 panic("ffs_read: short symlink = %d", ip
->i_size
);
112 } else if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
)
113 panic("ffs_read: invalid v_type = %x", vp
->v_type
);
116 if (uio
->uio_offset
< 0)
118 if (uio
->uio_offset
> fs
->fs_maxfilesize
)
121 VOP_DEVBLOCKSIZE(ip
->i_devvp
, &devBlockSize
);
123 if (UBCISVALID(vp
)) {
124 error
= cluster_read(vp
, uio
, (off_t
)ip
->i_size
,
127 for (error
= 0, bp
= NULL
; uio
->uio_resid
> 0;
129 if ((bytesinfile
= ip
->i_size
- uio
->uio_offset
) <= 0)
131 lbn
= lblkno(fs
, uio
->uio_offset
);
133 size
= BLKSIZE(fs
, ip
, lbn
);
134 blkoffset
= blkoff(fs
, uio
->uio_offset
);
135 xfersize
= fs
->fs_bsize
- blkoffset
;
136 if (uio
->uio_resid
< xfersize
)
137 xfersize
= uio
->uio_resid
;
138 if (bytesinfile
< xfersize
)
139 xfersize
= bytesinfile
;
141 if (lblktosize(fs
, nextlbn
) >= ip
->i_size
)
142 error
= bread(vp
, lbn
, size
, NOCRED
, &bp
);
143 else if (lbn
- 1 == vp
->v_lastr
&& !(vp
->v_flag
& VRAOFF
)) {
144 int nextsize
= BLKSIZE(fs
, ip
, nextlbn
);
145 error
= breadn(vp
, lbn
,
146 size
, &nextlbn
, &nextsize
, 1, NOCRED
, &bp
);
148 error
= bread(vp
, lbn
, size
, NOCRED
, &bp
);
154 * We should only get non-zero b_resid when an I/O error
155 * has occurred, which should cause us to break above.
156 * However, if the short read did not cause an error,
157 * then we want to ensure that we do not uiomove bad
158 * or uninitialized data.
161 if (size
< xfersize
) {
167 if (rev_endian
&& S_ISDIR(mode
)) {
168 byte_swap_dir_block_in((char *)bp
->b_data
+ blkoffset
, xfersize
);
170 #endif /* REV_ENDIAN_FS */
172 uiomove((char *)bp
->b_data
+ blkoffset
, (int)xfersize
, uio
)) {
174 if (rev_endian
&& S_ISDIR(mode
)) {
175 byte_swap_dir_block_in((char *)bp
->b_data
+ blkoffset
, xfersize
);
177 #endif /* REV_ENDIAN_FS */
182 if (rev_endian
&& S_ISDIR(mode
)) {
183 byte_swap_dir_out((char *)bp
->b_data
+ blkoffset
, xfersize
);
185 #endif /* REV_ENDIAN_FS */
186 if (S_ISREG(mode
) && (xfersize
+ blkoffset
== fs
->fs_bsize
||
187 uio
->uio_offset
== ip
->i_size
))
188 bp
->b_flags
|= B_AGE
;
194 ip
->i_flag
|= IN_ACCESS
;
199 * Vnode op for writing.
202 struct vop_write_args
/* {
206 struct ucred *a_cred;
209 register struct vnode
*vp
;
210 register struct uio
*uio
;
211 register struct inode
*ip
;
217 int blkoffset
, flags
, ioflag
, resid
, rsd
, size
, xfersize
;
219 int save_error
=0, save_size
=0;
225 #endif /* REV_ENDIAN_FS */
227 ioflag
= ap
->a_ioflag
;
232 rev_endian
=(vp
->v_mount
->mnt_flag
& MNT_REVEND
);
233 #endif /* REV_ENDIAN_FS */
236 if (uio
->uio_rw
!= UIO_WRITE
)
237 panic("ffs_write: uio_rw = %x\n", uio
->uio_rw
);
240 switch (vp
->v_type
) {
242 if (ioflag
& IO_APPEND
)
243 uio
->uio_offset
= ip
->i_size
;
244 if ((ip
->i_flags
& APPEND
) && uio
->uio_offset
!= ip
->i_size
)
250 if ((ioflag
& IO_SYNC
) == 0)
251 panic("ffs_write: nonsync dir write");
254 panic("ffs_write: invalid v_type=%x", vp
->v_type
);
258 if (uio
->uio_offset
< 0 ||
259 (u_int64_t
)uio
->uio_offset
+ uio
->uio_resid
> fs
->fs_maxfilesize
)
261 if (uio
->uio_resid
== 0)
264 VOP_DEVBLOCKSIZE(ip
->i_devvp
, &devBlockSize
);
267 * Maybe this should be above the vnode op call, but so long as
268 * file servers have no limits, I don't think it matters.
271 if (vp
->v_type
== VREG
&& p
&&
272 uio
->uio_offset
+ uio
->uio_resid
>
273 p
->p_rlimit
[RLIMIT_FSIZE
].rlim_cur
) {
278 resid
= uio
->uio_resid
;
280 flags
= ioflag
& IO_SYNC
? B_SYNC
: 0;
282 if (UBCISVALID(vp
)) {
292 int file_extended
= 0;
294 endofwrite
= uio
->uio_offset
+ uio
->uio_resid
;
296 if (endofwrite
> ip
->i_size
) {
297 filesize
= endofwrite
;
300 filesize
= ip
->i_size
;
302 head_offset
= ip
->i_size
;
304 /* Go ahead and allocate the block that are going to be written */
305 rsd
= uio
->uio_resid
;
306 local_offset
= uio
->uio_offset
;
307 local_flags
= ioflag
& IO_SYNC
? B_SYNC
: 0;
308 local_flags
|= B_NOBUFF
;
315 for (error
= 0; rsd
> 0;) {
317 lbn
= lblkno(fs
, local_offset
);
318 blkoffset
= blkoff(fs
, local_offset
);
319 xfersize
= fs
->fs_bsize
- blkoffset
;
324 if (fs
->fs_bsize
> xfersize
)
325 local_flags
|= B_CLRBUF
;
327 local_flags
&= ~B_CLRBUF
;
329 /* Allocate block without reading into a buf */
330 error
= ffs_balloc(ip
,
331 lbn
, blkoffset
+ xfersize
, ap
->a_cred
,
332 &bp
, local_flags
, &blkalloc
);
342 local_offset
+= (off_t
)xfersize
;
343 if (local_offset
> ip
->i_size
)
344 ip
->i_size
= local_offset
;
350 uio
->uio_resid
-= rsd
;
355 flags
= ioflag
& IO_SYNC
? IO_SYNC
: 0;
356 /* flags |= IO_NOZEROVALID; */
358 if((error
== 0) && fblk
&& fboff
) {
359 if( fblk
> fs
->fs_bsize
)
360 panic("ffs_balloc : allocated more than bsize(head)");
361 /* We need to zero out the head */
362 head_offset
= uio
->uio_offset
- (off_t
)fboff
;
363 flags
|= IO_HEADZEROFILL
;
364 /* flags &= ~IO_NOZEROVALID; */
367 if((error
== 0) && blkalloc
&& ((blkalloc
- xfersize
) > 0)) {
368 /* We need to zero out the tail */
369 if( blkalloc
> fs
->fs_bsize
)
370 panic("ffs_balloc : allocated more than bsize(tail)");
371 local_offset
+= (blkalloc
- xfersize
);
372 if (loopcount
== 1) {
373 /* blkalloc is same as fblk; so no need to check again*/
374 local_offset
-= fboff
;
376 flags
|= IO_TAILZEROFILL
;
377 /* Freshly allocated block; bzero even if
380 /* flags &= ~IO_NOZEROVALID; */
383 * if the write starts beyond the current EOF then
384 * we we'll zero fill from the current EOF to where the write begins
387 error
= cluster_write(vp
, uio
, osize
, filesize
, head_offset
, local_offset
, devBlockSize
, flags
);
389 if (uio
->uio_offset
> osize
) {
390 if (error
&& ((ioflag
& IO_UNIT
)==0))
391 (void)VOP_TRUNCATE(vp
, uio
->uio_offset
,
392 ioflag
& IO_SYNC
, ap
->a_cred
, uio
->uio_procp
);
393 ip
->i_size
= uio
->uio_offset
;
394 ubc_setsize(vp
, (off_t
)ip
->i_size
);
397 uio
->uio_resid
+= save_size
;
401 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
403 flags
= ioflag
& IO_SYNC
? B_SYNC
: 0;
405 for (error
= 0; uio
->uio_resid
> 0;) {
406 lbn
= lblkno(fs
, uio
->uio_offset
);
407 blkoffset
= blkoff(fs
, uio
->uio_offset
);
408 xfersize
= fs
->fs_bsize
- blkoffset
;
409 if (uio
->uio_resid
< xfersize
)
410 xfersize
= uio
->uio_resid
;
412 if (fs
->fs_bsize
> xfersize
)
417 error
= ffs_balloc(ip
,
418 lbn
, blkoffset
+ xfersize
, ap
->a_cred
, &bp
, flags
, 0);
421 if (uio
->uio_offset
+ xfersize
> ip
->i_size
) {
422 ip
->i_size
= uio
->uio_offset
+ xfersize
;
425 ubc_setsize(vp
, (u_long
)ip
->i_size
); /* XXX check errors */
428 size
= BLKSIZE(fs
, ip
, lbn
) - bp
->b_resid
;
433 uiomove((char *)bp
->b_data
+ blkoffset
, (int)xfersize
, uio
);
435 if (rev_endian
&& S_ISDIR(ip
->i_mode
)) {
436 byte_swap_dir_out((char *)bp
->b_data
+ blkoffset
, xfersize
);
438 #endif /* REV_ENDIAN_FS */
439 if (ioflag
& IO_SYNC
)
441 else if (xfersize
+ blkoffset
== fs
->fs_bsize
) {
442 bp
->b_flags
|= B_AGE
;
447 if (error
|| xfersize
== 0)
449 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
453 * If we successfully wrote any data, and we are not the superuser
454 * we clear the setuid and setgid bits as a precaution against
457 if (resid
> uio
->uio_resid
&& ap
->a_cred
&& ap
->a_cred
->cr_uid
!= 0)
458 ip
->i_mode
&= ~(ISUID
| ISGID
);
460 if (ioflag
& IO_UNIT
) {
461 (void)VOP_TRUNCATE(vp
, osize
,
462 ioflag
& IO_SYNC
, ap
->a_cred
, uio
->uio_procp
);
463 uio
->uio_offset
-= resid
- uio
->uio_resid
;
464 uio
->uio_resid
= resid
;
466 } else if (resid
> uio
->uio_resid
&& (ioflag
& IO_SYNC
))
467 error
= VOP_UPDATE(vp
, &time
, &time
, 1);
472 * Vnode op for page read.
476 struct vop_pgrd_args
/* {
479 struct ucred *a_cred;
483 #warning ufs_readwrite PGRD need to implement
489 * Vnode op for page read.
493 struct vop_pgwr_args
/* {
496 struct ucred *a_cred;
497 memory_object_t a_pager;
498 vm_offset_t a_offset;
502 #warning ufs_readwrite PGWR need to implement
508 * Vnode op for pagein.
509 * Similar to ffs_read()
513 struct vop_pagein_args
/* {
516 vm_offset_t a_pl_offset,
519 struct ucred *a_cred,
523 register struct vnode
*vp
= ap
->a_vp
;
525 size_t size
= ap
->a_size
;
526 off_t f_offset
= ap
->a_f_offset
;
527 vm_offset_t pl_offset
= ap
->a_pl_offset
;
528 int flags
= ap
->a_flags
;
529 register struct inode
*ip
;
535 /* check pageins for reg file only and ubc info is present*/
537 panic("ffs_pagein: Not a VREG: vp=%x", vp
);
538 if (UBCINFOMISSING(vp
))
539 panic("ffs_pagein: No mapping: vp=%x", vp
);
542 if (vp
->v_type
== VLNK
) {
543 if ((int)ip
->i_size
< vp
->v_mount
->mnt_maxsymlinklen
)
544 panic("%s: short symlink", "ffs_pagein");
545 } else if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
)
546 panic("%s: type %d", "ffs_pagein", vp
->v_type
);
549 VOP_DEVBLOCKSIZE(ip
->i_devvp
, &devBlockSize
);
551 error
= cluster_pagein(vp
, pl
, pl_offset
, f_offset
, size
,
552 (off_t
)ip
->i_size
, devBlockSize
, flags
);
553 /* ip->i_flag |= IN_ACCESS; */
558 * Vnode op for pageout.
559 * Similar to ffs_write()
560 * make sure the buf is not in hash queue when you return
563 struct vop_pageout_args
/* {
566 vm_offset_t a_pl_offset,
569 struct ucred *a_cred,
573 register struct vnode
*vp
= ap
->a_vp
;
575 size_t size
= ap
->a_size
;
576 off_t f_offset
= ap
->a_f_offset
;
577 vm_offset_t pl_offset
= ap
->a_pl_offset
;
578 int flags
= ap
->a_flags
;
579 register struct inode
*ip
;
583 size_t xfer_size
= 0;
586 int resid
, blkoffset
;
589 int save_error
=0, save_size
=0;
590 vm_offset_t lupl_offset
;
591 int nocommit
= flags
& UPL_NOCOMMIT
;
596 /* check pageouts for reg file only and ubc info is present*/
598 panic("ffs_pageout: Not a VREG: vp=%x", vp
);
599 if (UBCINFOMISSING(vp
))
600 panic("ffs_pageout: No mapping: vp=%x", vp
);
602 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
) {
604 ubc_upl_abort_range(pl
, pl_offset
, size
,
605 UPL_ABORT_FREE_ON_EMPTY
);
610 if (f_offset
< 0 || f_offset
>= ip
->i_size
) {
612 ubc_upl_abort_range(pl
, pl_offset
, size
,
613 UPL_ABORT_FREE_ON_EMPTY
);
618 * once we enable multi-page pageouts we will
619 * need to make sure we abort any pages in the upl
620 * that we don't issue an I/O for
622 if (f_offset
+ size
> ip
->i_size
)
623 xfer_size
= ip
->i_size
- f_offset
;
627 VOP_DEVBLOCKSIZE(ip
->i_devvp
, &devBlockSize
);
629 if (xfer_size
& (PAGE_SIZE
- 1)) {
630 /* if not a multiple of page size
631 * then round up to be a multiple
632 * the physical disk block size
634 xfer_size
= (xfer_size
+ (devBlockSize
- 1)) & ~(devBlockSize
- 1);
638 * once the block allocation is moved to ufs_cmap
639 * we can remove all the size and offset checks above
640 * cluster_pageout does all of this now
641 * we need to continue to do it here so as not to
642 * allocate blocks that aren't going to be used because
643 * of a bogus parameter being passed in
647 local_offset
= f_offset
;
648 for (error
= 0; resid
> 0;) {
649 lbn
= lblkno(fs
, local_offset
);
650 blkoffset
= blkoff(fs
, local_offset
);
651 xsize
= fs
->fs_bsize
- blkoffset
;
654 /* Allocate block without reading into a buf */
655 error
= ffs_blkalloc(ip
,
656 lbn
, blkoffset
+ xsize
, ap
->a_cred
,
661 local_offset
+= (off_t
)xsize
;
667 xfer_size
-= save_size
;
671 error
= cluster_pageout(vp
, pl
, pl_offset
, f_offset
, round_page_32(xfer_size
), ip
->i_size
, devBlockSize
, flags
);
674 lupl_offset
= size
- save_size
;
675 resid
= round_page_32(save_size
);
677 ubc_upl_abort_range(pl
, lupl_offset
, resid
,
678 UPL_ABORT_FREE_ON_EMPTY
);