]>
git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ufs/ufs_readwrite.c
42cd1380119bd6acea688ca379701db6f396746c
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
28 * The Regents of the University of California. All rights reserved.
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
61 #define BLKSIZE(a, b, c) blksize(a, b, c)
65 #define PGRD_S "ffs_pgrd"
67 #define PGWR_S "ffs_pgwr"
70 * Vnode op for reading.
74 struct vop_read_args
/* {
81 register struct vnode
*vp
;
82 register struct inode
*ip
;
83 register struct uio
*uio
;
85 struct buf
*bp
= (struct buf
*)0;
86 ufs_daddr_t lbn
, nextlbn
;
88 long size
, xfersize
, blkoffset
;
94 #endif /* REV_ENDIAN_FS */
102 rev_endian
=(vp
->v_mount
->mnt_flag
& MNT_REVEND
);
103 #endif /* REV_ENDIAN_FS */
106 if (uio
->uio_rw
!= UIO_READ
)
107 panic("ffs_read: invalid uio_rw = %x", uio
->uio_rw
);
109 if (vp
->v_type
== VLNK
) {
110 if ((int)ip
->i_size
< vp
->v_mount
->mnt_maxsymlinklen
)
111 panic("ffs_read: short symlink = %d", ip
->i_size
);
112 } else if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
)
113 panic("ffs_read: invalid v_type = %x", vp
->v_type
);
116 if (uio
->uio_offset
< 0)
118 if (uio
->uio_offset
> fs
->fs_maxfilesize
)
121 VOP_DEVBLOCKSIZE(ip
->i_devvp
, &devBlockSize
);
123 if (UBCISVALID(vp
)) {
124 error
= cluster_read(vp
, uio
, (off_t
)ip
->i_size
,
127 for (error
= 0, bp
= NULL
; uio
->uio_resid
> 0;
129 if ((bytesinfile
= ip
->i_size
- uio
->uio_offset
) <= 0)
131 lbn
= lblkno(fs
, uio
->uio_offset
);
133 size
= BLKSIZE(fs
, ip
, lbn
);
134 blkoffset
= blkoff(fs
, uio
->uio_offset
);
135 xfersize
= fs
->fs_bsize
- blkoffset
;
136 if (uio
->uio_resid
< xfersize
)
137 xfersize
= uio
->uio_resid
;
138 if (bytesinfile
< xfersize
)
139 xfersize
= bytesinfile
;
141 if (lblktosize(fs
, nextlbn
) >= ip
->i_size
)
142 error
= bread(vp
, lbn
, size
, NOCRED
, &bp
);
143 else if (lbn
- 1 == vp
->v_lastr
&& !(vp
->v_flag
& VRAOFF
)) {
144 int nextsize
= BLKSIZE(fs
, ip
, nextlbn
);
145 error
= breadn(vp
, lbn
,
146 size
, &nextlbn
, &nextsize
, 1, NOCRED
, &bp
);
148 error
= bread(vp
, lbn
, size
, NOCRED
, &bp
);
154 * We should only get non-zero b_resid when an I/O error
155 * has occurred, which should cause us to break above.
156 * However, if the short read did not cause an error,
157 * then we want to ensure that we do not uiomove bad
158 * or uninitialized data.
161 if (size
< xfersize
) {
167 if (rev_endian
&& S_ISDIR(mode
)) {
168 byte_swap_dir_block_in((char *)bp
->b_data
+ blkoffset
, xfersize
);
170 #endif /* REV_ENDIAN_FS */
172 uiomove((char *)bp
->b_data
+ blkoffset
, (int)xfersize
, uio
)) {
174 if (rev_endian
&& S_ISDIR(mode
)) {
175 byte_swap_dir_block_in((char *)bp
->b_data
+ blkoffset
, xfersize
);
177 #endif /* REV_ENDIAN_FS */
182 if (rev_endian
&& S_ISDIR(mode
)) {
183 byte_swap_dir_out((char *)bp
->b_data
+ blkoffset
, xfersize
);
185 #endif /* REV_ENDIAN_FS */
186 if (S_ISREG(mode
) && (xfersize
+ blkoffset
== fs
->fs_bsize
||
187 uio
->uio_offset
== ip
->i_size
))
188 bp
->b_flags
|= B_AGE
;
194 ip
->i_flag
|= IN_ACCESS
;
199 * Vnode op for writing.
202 struct vop_write_args
/* {
206 struct ucred *a_cred;
209 register struct vnode
*vp
;
210 register struct uio
*uio
;
211 register struct inode
*ip
;
217 int blkoffset
, flags
, ioflag
, resid
, rsd
, size
, xfersize
;
219 int save_error
=0, save_size
=0;
222 int file_extended
= 0;
223 int doingdirectory
= 0;
227 #endif /* REV_ENDIAN_FS */
229 ioflag
= ap
->a_ioflag
;
234 rev_endian
=(vp
->v_mount
->mnt_flag
& MNT_REVEND
);
235 #endif /* REV_ENDIAN_FS */
238 if (uio
->uio_rw
!= UIO_WRITE
)
239 panic("ffs_write: uio_rw = %x\n", uio
->uio_rw
);
242 switch (vp
->v_type
) {
244 if (ioflag
& IO_APPEND
)
245 uio
->uio_offset
= ip
->i_size
;
246 if ((ip
->i_flags
& APPEND
) && uio
->uio_offset
!= ip
->i_size
)
253 if ((ioflag
& IO_SYNC
) == 0)
254 panic("ffs_write: nonsync dir write");
257 panic("ffs_write: invalid v_type=%x", vp
->v_type
);
261 if (uio
->uio_offset
< 0 ||
262 (u_int64_t
)uio
->uio_offset
+ uio
->uio_resid
> fs
->fs_maxfilesize
)
264 if (uio
->uio_resid
== 0)
267 VOP_DEVBLOCKSIZE(ip
->i_devvp
, &devBlockSize
);
270 * Maybe this should be above the vnode op call, but so long as
271 * file servers have no limits, I don't think it matters.
274 if (vp
->v_type
== VREG
&& p
&&
275 uio
->uio_offset
+ uio
->uio_resid
>
276 p
->p_rlimit
[RLIMIT_FSIZE
].rlim_cur
) {
281 resid
= uio
->uio_resid
;
284 if ((ioflag
& IO_SYNC
) && !((vp
)->v_mount
->mnt_flag
& MNT_ASYNC
))
287 if (UBCISVALID(vp
)) {
298 endofwrite
= uio
->uio_offset
+ uio
->uio_resid
;
300 if (endofwrite
> ip
->i_size
) {
301 filesize
= endofwrite
;
304 filesize
= ip
->i_size
;
306 head_offset
= ip
->i_size
;
308 /* Go ahead and allocate the block that are going to be written */
309 rsd
= uio
->uio_resid
;
310 local_offset
= uio
->uio_offset
;
312 if ((ioflag
& IO_SYNC
) && !((vp
)->v_mount
->mnt_flag
& MNT_ASYNC
))
313 local_flags
= B_SYNC
;
314 local_flags
|= B_NOBUFF
;
321 for (error
= 0; rsd
> 0;) {
323 lbn
= lblkno(fs
, local_offset
);
324 blkoffset
= blkoff(fs
, local_offset
);
325 xfersize
= fs
->fs_bsize
- blkoffset
;
330 if (fs
->fs_bsize
> xfersize
)
331 local_flags
|= B_CLRBUF
;
333 local_flags
&= ~B_CLRBUF
;
335 /* Allocate block without reading into a buf */
336 error
= ffs_balloc(ip
,
337 lbn
, blkoffset
+ xfersize
, ap
->a_cred
,
338 &bp
, local_flags
, &blkalloc
);
348 local_offset
+= (off_t
)xfersize
;
349 if (local_offset
> ip
->i_size
)
350 ip
->i_size
= local_offset
;
356 uio
->uio_resid
-= rsd
;
361 flags
= ioflag
& IO_SYNC
? IO_SYNC
: 0;
362 /* flags |= IO_NOZEROVALID; */
364 if((error
== 0) && fblk
&& fboff
) {
365 if( fblk
> fs
->fs_bsize
)
366 panic("ffs_balloc : allocated more than bsize(head)");
367 /* We need to zero out the head */
368 head_offset
= uio
->uio_offset
- (off_t
)fboff
;
369 flags
|= IO_HEADZEROFILL
;
370 /* flags &= ~IO_NOZEROVALID; */
373 if((error
== 0) && blkalloc
&& ((blkalloc
- xfersize
) > 0)) {
374 /* We need to zero out the tail */
375 if( blkalloc
> fs
->fs_bsize
)
376 panic("ffs_balloc : allocated more than bsize(tail)");
377 local_offset
+= (blkalloc
- xfersize
);
378 if (loopcount
== 1) {
379 /* blkalloc is same as fblk; so no need to check again*/
380 local_offset
-= fboff
;
382 flags
|= IO_TAILZEROFILL
;
383 /* Freshly allocated block; bzero even if
386 /* flags &= ~IO_NOZEROVALID; */
389 * if the write starts beyond the current EOF then
390 * we we'll zero fill from the current EOF to where the write begins
393 error
= cluster_write(vp
, uio
, osize
, filesize
, head_offset
, local_offset
, devBlockSize
, flags
);
395 if (uio
->uio_offset
> osize
) {
396 if (error
&& ((ioflag
& IO_UNIT
)==0))
397 (void)VOP_TRUNCATE(vp
, uio
->uio_offset
,
398 ioflag
& IO_SYNC
, ap
->a_cred
, uio
->uio_procp
);
399 ip
->i_size
= uio
->uio_offset
;
400 ubc_setsize(vp
, (off_t
)ip
->i_size
);
403 uio
->uio_resid
+= save_size
;
407 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
410 if ((ioflag
& IO_SYNC
) && !((vp
)->v_mount
->mnt_flag
& MNT_ASYNC
))
413 for (error
= 0; uio
->uio_resid
> 0;) {
414 lbn
= lblkno(fs
, uio
->uio_offset
);
415 blkoffset
= blkoff(fs
, uio
->uio_offset
);
416 xfersize
= fs
->fs_bsize
- blkoffset
;
417 if (uio
->uio_resid
< xfersize
)
418 xfersize
= uio
->uio_resid
;
420 if (fs
->fs_bsize
> xfersize
)
425 error
= ffs_balloc(ip
,
426 lbn
, blkoffset
+ xfersize
, ap
->a_cred
, &bp
, flags
, 0);
429 if (uio
->uio_offset
+ xfersize
> ip
->i_size
) {
430 ip
->i_size
= uio
->uio_offset
+ xfersize
;
433 ubc_setsize(vp
, (u_long
)ip
->i_size
); /* XXX check errors */
436 size
= BLKSIZE(fs
, ip
, lbn
) - bp
->b_resid
;
441 uiomove((char *)bp
->b_data
+ blkoffset
, (int)xfersize
, uio
);
443 if (rev_endian
&& S_ISDIR(ip
->i_mode
)) {
444 byte_swap_dir_out((char *)bp
->b_data
+ blkoffset
, xfersize
);
446 #endif /* REV_ENDIAN_FS */
447 if (doingdirectory
== 0 && (ioflag
& IO_SYNC
))
449 else if (xfersize
+ blkoffset
== fs
->fs_bsize
) {
450 bp
->b_flags
|= B_AGE
;
455 if (error
|| xfersize
== 0)
457 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
461 * If we successfully wrote any data, and we are not the superuser
462 * we clear the setuid and setgid bits as a precaution against
465 if (resid
> uio
->uio_resid
&& ap
->a_cred
&& ap
->a_cred
->cr_uid
!= 0)
466 ip
->i_mode
&= ~(ISUID
| ISGID
);
467 if (resid
> uio
->uio_resid
)
468 VN_KNOTE(vp
, NOTE_WRITE
| (file_extended
? NOTE_EXTEND
: 0));
470 if (ioflag
& IO_UNIT
) {
471 (void)VOP_TRUNCATE(vp
, osize
,
472 ioflag
& IO_SYNC
, ap
->a_cred
, uio
->uio_procp
);
473 uio
->uio_offset
-= resid
- uio
->uio_resid
;
474 uio
->uio_resid
= resid
;
476 } else if (resid
> uio
->uio_resid
&& (ioflag
& IO_SYNC
))
477 error
= VOP_UPDATE(vp
, (struct timeval
*)&time
,
478 (struct timeval
*)&time
, 1);
483 * Vnode op for pagein.
484 * Similar to ffs_read()
488 struct vop_pagein_args
/* {
491 vm_offset_t a_pl_offset,
494 struct ucred *a_cred,
498 register struct vnode
*vp
= ap
->a_vp
;
500 size_t size
= ap
->a_size
;
501 off_t f_offset
= ap
->a_f_offset
;
502 vm_offset_t pl_offset
= ap
->a_pl_offset
;
503 int flags
= ap
->a_flags
;
504 register struct inode
*ip
;
510 /* check pageins for reg file only and ubc info is present*/
512 panic("ffs_pagein: Not a VREG: vp=%x", vp
);
513 if (UBCINFOMISSING(vp
))
514 panic("ffs_pagein: No mapping: vp=%x", vp
);
517 if (vp
->v_type
== VLNK
) {
518 if ((int)ip
->i_size
< vp
->v_mount
->mnt_maxsymlinklen
)
519 panic("%s: short symlink", "ffs_pagein");
520 } else if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
)
521 panic("%s: type %d", "ffs_pagein", vp
->v_type
);
524 VOP_DEVBLOCKSIZE(ip
->i_devvp
, &devBlockSize
);
526 error
= cluster_pagein(vp
, pl
, pl_offset
, f_offset
, size
,
527 (off_t
)ip
->i_size
, devBlockSize
, flags
);
528 /* ip->i_flag |= IN_ACCESS; */
533 * Vnode op for pageout.
534 * Similar to ffs_write()
535 * make sure the buf is not in hash queue when you return
538 struct vop_pageout_args
/* {
541 vm_offset_t a_pl_offset,
544 struct ucred *a_cred,
548 register struct vnode
*vp
= ap
->a_vp
;
550 size_t size
= ap
->a_size
;
551 off_t f_offset
= ap
->a_f_offset
;
552 vm_offset_t pl_offset
= ap
->a_pl_offset
;
553 int flags
= ap
->a_flags
;
554 register struct inode
*ip
;
558 size_t xfer_size
= 0;
561 int resid
, blkoffset
;
564 int save_error
=0, save_size
=0;
565 vm_offset_t lupl_offset
;
566 int nocommit
= flags
& UPL_NOCOMMIT
;
571 /* check pageouts for reg file only and ubc info is present*/
573 panic("ffs_pageout: Not a VREG: vp=%x", vp
);
574 if (UBCINFOMISSING(vp
))
575 panic("ffs_pageout: No mapping: vp=%x", vp
);
577 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
) {
579 ubc_upl_abort_range(pl
, pl_offset
, size
,
580 UPL_ABORT_FREE_ON_EMPTY
);
585 if (f_offset
< 0 || f_offset
>= ip
->i_size
) {
587 ubc_upl_abort_range(pl
, pl_offset
, size
,
588 UPL_ABORT_FREE_ON_EMPTY
);
593 * once we enable multi-page pageouts we will
594 * need to make sure we abort any pages in the upl
595 * that we don't issue an I/O for
597 if (f_offset
+ size
> ip
->i_size
)
598 xfer_size
= ip
->i_size
- f_offset
;
602 VOP_DEVBLOCKSIZE(ip
->i_devvp
, &devBlockSize
);
604 if (xfer_size
& (PAGE_SIZE
- 1)) {
605 /* if not a multiple of page size
606 * then round up to be a multiple
607 * the physical disk block size
609 xfer_size
= (xfer_size
+ (devBlockSize
- 1)) & ~(devBlockSize
- 1);
613 * once the block allocation is moved to ufs_cmap
614 * we can remove all the size and offset checks above
615 * cluster_pageout does all of this now
616 * we need to continue to do it here so as not to
617 * allocate blocks that aren't going to be used because
618 * of a bogus parameter being passed in
622 local_offset
= f_offset
;
623 for (error
= 0; resid
> 0;) {
624 lbn
= lblkno(fs
, local_offset
);
625 blkoffset
= blkoff(fs
, local_offset
);
626 xsize
= fs
->fs_bsize
- blkoffset
;
629 /* Allocate block without reading into a buf */
630 error
= ffs_blkalloc(ip
,
631 lbn
, blkoffset
+ xsize
, ap
->a_cred
,
636 local_offset
+= (off_t
)xsize
;
642 xfer_size
-= save_size
;
646 error
= cluster_pageout(vp
, pl
, pl_offset
, f_offset
, round_page_32(xfer_size
), ip
->i_size
, devBlockSize
, flags
);
649 lupl_offset
= size
- save_size
;
650 resid
= round_page_32(save_size
);
652 ubc_upl_abort_range(pl
, lupl_offset
, resid
,
653 UPL_ABORT_FREE_ON_EMPTY
);