]> git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ffs/ffs_inode.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / ufs / ffs / ffs_inode.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95
62 */
63
64 #include <rev_endian_fs.h>
65 #include <vm/vm_pager.h>
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/mount_internal.h>
70 #include <sys/proc_internal.h> /* for accessing p_stats */
71 #include <sys/file.h>
72 #include <sys/buf_internal.h>
73 #include <sys/vnode_internal.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/trace.h>
77 #include <sys/resourcevar.h>
78 #include <sys/ubc.h>
79 #include <sys/quota.h>
80
81 #include <sys/vm.h>
82
83 #include <ufs/ufs/quota.h>
84 #include <ufs/ufs/inode.h>
85 #include <ufs/ufs/ufsmount.h>
86 #include <ufs/ufs/ufs_extern.h>
87
88 #include <ufs/ffs/fs.h>
89 #include <ufs/ffs/ffs_extern.h>
90
91 #if REV_ENDIAN_FS
92 #include <ufs/ufs/ufs_byte_order.h>
93 #include <libkern/OSByteOrder.h>
94 #endif /* REV_ENDIAN_FS */
95 #include <libkern/OSAtomic.h>
96
97 static int ffs_indirtrunc(struct inode *, ufs_daddr_t, ufs_daddr_t,
98 ufs_daddr_t, int, long *);
99
100 /*
101 * Update the access, modified, and inode change times as specified by the
102 * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is
103 * used to specify that the inode needs to be updated but that the times have
104 * already been set. The access and modified times are taken from the second
105 * and third parameters; the inode change time is always taken from the current
106 * time. If waitfor is set, then wait for the disk write of the inode to
107 * complete.
108 */
109 int
110 ffs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, int waitfor)
111 {
112 register struct fs *fs;
113 struct buf *bp;
114 struct inode *ip;
115 struct timeval tv;
116 errno_t error;
117 #if REV_ENDIAN_FS
118 struct mount *mp=(vp)->v_mount;
119 int rev_endian=(mp->mnt_flag & MNT_REVEND);
120 #endif /* REV_ENDIAN_FS */
121
122 ip = VTOI(vp);
123 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
124 ip->i_flag &=
125 ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
126 return (0);
127 }
128 if ((ip->i_flag &
129 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
130 return (0);
131 if (ip->i_flag & IN_ACCESS)
132 ip->i_atime = access->tv_sec;
133 if (ip->i_flag & IN_UPDATE) {
134 ip->i_mtime = modify->tv_sec;
135 ip->i_modrev++;
136 }
137 if (ip->i_flag & IN_CHANGE) {
138 microtime(&tv);
139 ip->i_ctime = tv.tv_sec;
140 }
141 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
142 fs = ip->i_fs;
143 /*
144 * Ensure that uid and gid are correct. This is a temporary
145 * fix until fsck has been changed to do the update.
146 */
147 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
148 ip->i_din.di_ouid = ip->i_uid; /* XXX */
149 ip->i_din.di_ogid = ip->i_gid; /* XXX */
150 } /* XXX */
151 if (error = buf_bread(ip->i_devvp,
152 (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
153 (int)fs->fs_bsize, NOCRED, &bp)) {
154 buf_brelse(bp);
155 return ((int)error);
156 }
157 #if REV_ENDIAN_FS
158 if (rev_endian)
159 byte_swap_inode_out(ip, ((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number)));
160 else {
161 #endif /* REV_ENDIAN_FS */
162 *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number)) = ip->i_din;
163 #if REV_ENDIAN_FS
164 }
165 #endif /* REV_ENDIAN_FS */
166
167 if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
168 return ((int)buf_bwrite(bp));
169 else {
170 buf_bdwrite(bp);
171 return (0);
172 }
173 }
174
175
176 #define SINGLE 0 /* index of single indirect block */
177 #define DOUBLE 1 /* index of double indirect block */
178 #define TRIPLE 2 /* index of triple indirect block */
179
180 int
181 ffs_truncate_internal(vnode_t ovp, off_t length, int flags, ucred_t cred)
182 {
183 struct inode *oip;
184 struct fs *fs;
185 ufs_daddr_t lastblock;
186 ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
187 ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
188 buf_t bp;
189 int offset, size, level, i;
190 long count, nblocks, vflags, blocksreleased = 0;
191 struct timeval tv;
192 int aflags, error, allerror;
193 off_t osize;
194 int devBlockSize=0;
195 #if QUOTA
196 int64_t change; /* in bytes */
197 #endif /* QUOTA */
198
199 if (length < 0)
200 return (EINVAL);
201
202 oip = VTOI(ovp);
203 fs = oip->i_fs;
204
205 if (length > fs->fs_maxfilesize)
206 return (EFBIG);
207
208 microtime(&tv);
209 if (ovp->v_type == VLNK &&
210 oip->i_size < ovp->v_mount->mnt_maxsymlinklen) {
211 #if DIAGNOSTIC
212 if (length != 0)
213 panic("ffs_truncate: partial truncate of symlink");
214 #endif
215 bzero((char *)&oip->i_shortlink, (u_int)oip->i_size);
216 oip->i_size = 0;
217 oip->i_flag |= IN_CHANGE | IN_UPDATE;
218 return (ffs_update(ovp, &tv, &tv, 1));
219 }
220
221 if (oip->i_size == length) {
222 oip->i_flag |= IN_CHANGE | IN_UPDATE;
223 return (ffs_update(ovp, &tv, &tv, 0));
224 }
225 #if QUOTA
226 if (error = getinoquota(oip))
227 return (error);
228 #endif
229 osize = oip->i_size;
230
231 /*
232 * Lengthen the size of the file. We must ensure that the
233 * last byte of the file is allocated. Since the smallest
234 * value of osize is 0, length will be at least 1.
235 */
236 if (osize < length) {
237 offset = blkoff(fs, length - 1);
238 lbn = lblkno(fs, length - 1);
239 aflags = B_CLRBUF;
240 if (flags & IO_SYNC)
241 aflags |= B_SYNC;
242 if (error = ffs_balloc(oip, lbn, offset + 1, cred, &bp, aflags, 0))
243 return (error);
244 oip->i_size = length;
245
246 if (UBCINFOEXISTS(ovp)) {
247 buf_markinvalid(bp);
248 buf_bwrite(bp);
249 ubc_setsize(ovp, (off_t)length);
250 } else {
251 if (aflags & B_SYNC)
252 buf_bwrite(bp);
253 else
254 buf_bawrite(bp);
255 }
256 oip->i_flag |= IN_CHANGE | IN_UPDATE;
257 return (ffs_update(ovp, &tv, &tv, 1));
258 }
259 /*
260 * Shorten the size of the file. If the file is not being
261 * truncated to a block boundry, the contents of the
262 * partial block following the end of the file must be
263 * zero'ed in case it ever become accessable again because
264 * of subsequent file growth.
265 */
266 if (UBCINFOEXISTS(ovp))
267 ubc_setsize(ovp, (off_t)length);
268
269 vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META;
270
271 if (vflags & BUF_WRITE_DATA)
272 ffs_fsync_internal(ovp, MNT_WAIT);
273 allerror = buf_invalidateblks(ovp, vflags, 0, 0);
274
275 offset = blkoff(fs, length);
276 if (offset == 0) {
277 oip->i_size = length;
278 } else {
279 lbn = lblkno(fs, length);
280 aflags = B_CLRBUF;
281 if (flags & IO_SYNC)
282 aflags |= B_SYNC;
283 if (error = ffs_balloc(oip, lbn, offset, cred, &bp, aflags, 0))
284 return (error);
285 oip->i_size = length;
286 size = blksize(fs, oip, lbn);
287 bzero((char *)buf_dataptr(bp) + offset, (u_int)(size - offset));
288 allocbuf(bp, size);
289 if (UBCINFOEXISTS(ovp)) {
290 buf_markinvalid(bp);
291 buf_bwrite(bp);
292 } else {
293 if (aflags & B_SYNC)
294 buf_bwrite(bp);
295 else
296 buf_bawrite(bp);
297 }
298 }
299 /*
300 * Calculate index into inode's block list of
301 * last direct and indirect blocks (if any)
302 * which we want to keep. Lastblock is -1 when
303 * the file is truncated to 0.
304 */
305 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
306 lastiblock[SINGLE] = lastblock - NDADDR;
307 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
308 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
309
310 devBlockSize = vfs_devblocksize(vnode_mount(ovp));
311 nblocks = btodb(fs->fs_bsize, devBlockSize);
312
313 /*
314 * Update file and block pointers on disk before we start freeing
315 * blocks. If we crash before free'ing blocks below, the blocks
316 * will be returned to the free list. lastiblock values are also
317 * normalized to -1 for calls to ffs_indirtrunc below.
318 */
319 bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks);
320 for (level = TRIPLE; level >= SINGLE; level--)
321 if (lastiblock[level] < 0) {
322 oip->i_ib[level] = 0;
323 lastiblock[level] = -1;
324 }
325 for (i = NDADDR - 1; i > lastblock; i--)
326 oip->i_db[i] = 0;
327 oip->i_flag |= IN_CHANGE | IN_UPDATE;
328 if (error = ffs_update(ovp, &tv, &tv, MNT_WAIT))
329 allerror = error;
330 /*
331 * Having written the new inode to disk, save its new configuration
332 * and put back the old block pointers long enough to process them.
333 * Note that we save the new block configuration so we can check it
334 * when we are done.
335 */
336 bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks);
337 bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks);
338 oip->i_size = osize;
339
340 vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META;
341
342 if (vflags & BUF_WRITE_DATA)
343 ffs_fsync_internal(ovp, MNT_WAIT);
344 allerror = buf_invalidateblks(ovp, vflags, 0, 0);
345
346 /*
347 * Indirect blocks first.
348 */
349 indir_lbn[SINGLE] = -NDADDR;
350 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
351 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
352 for (level = TRIPLE; level >= SINGLE; level--) {
353 bn = oip->i_ib[level];
354 if (bn != 0) {
355 error = ffs_indirtrunc(oip, indir_lbn[level],
356 fsbtodb(fs, bn), lastiblock[level], level, &count);
357 if (error)
358 allerror = error;
359 blocksreleased += count;
360 if (lastiblock[level] < 0) {
361 oip->i_ib[level] = 0;
362 ffs_blkfree(oip, bn, fs->fs_bsize);
363 blocksreleased += nblocks;
364 }
365 }
366 if (lastiblock[level] >= 0)
367 goto done;
368 }
369
370 /*
371 * All whole direct blocks or frags.
372 */
373 for (i = NDADDR - 1; i > lastblock; i--) {
374 register long bsize;
375
376 bn = oip->i_db[i];
377 if (bn == 0)
378 continue;
379 oip->i_db[i] = 0;
380 bsize = blksize(fs, oip, i);
381 ffs_blkfree(oip, bn, bsize);
382 blocksreleased += btodb(bsize, devBlockSize);
383 }
384 if (lastblock < 0)
385 goto done;
386
387 /*
388 * Finally, look for a change in size of the
389 * last direct block; release any frags.
390 */
391 bn = oip->i_db[lastblock];
392 if (bn != 0) {
393 long oldspace, newspace;
394
395 /*
396 * Calculate amount of space we're giving
397 * back as old block size minus new block size.
398 */
399 oldspace = blksize(fs, oip, lastblock);
400 oip->i_size = length;
401 newspace = blksize(fs, oip, lastblock);
402 if (newspace == 0)
403 panic("itrunc: newspace");
404 if (oldspace - newspace > 0) {
405 /*
406 * Block number of space to be free'd is
407 * the old block # plus the number of frags
408 * required for the storage we're keeping.
409 */
410 bn += numfrags(fs, newspace);
411 ffs_blkfree(oip, bn, oldspace - newspace);
412 blocksreleased += btodb(oldspace - newspace, devBlockSize);
413 }
414 }
415 done:
416 #if DIAGNOSTIC
417 for (level = SINGLE; level <= TRIPLE; level++)
418 if (newblks[NDADDR + level] != oip->i_ib[level])
419 panic("itrunc1");
420 for (i = 0; i < NDADDR; i++)
421 if (newblks[i] != oip->i_db[i])
422 panic("itrunc2");
423 if (length == 0 &&
424 (vnode_hasdirtyblks(ovp) || vnode_hascleanblks(ovp)))
425 panic("itrunc3");
426 #endif /* DIAGNOSTIC */
427 /*
428 * Put back the real size.
429 */
430 oip->i_size = length;
431 oip->i_blocks -= blocksreleased;
432 if (oip->i_blocks < 0) /* sanity */
433 oip->i_blocks = 0;
434 oip->i_flag |= IN_CHANGE;
435 #if QUOTA
436 change = dbtob((int64_t)blocksreleased,devBlockSize);
437 (void) chkdq(oip, -change, NOCRED, 0);
438 #endif
439 return (allerror);
440 }
441
442 /*
443 * Release blocks associated with the inode ip and stored in the indirect
444 * block bn. Blocks are free'd in LIFO order up to (but not including)
445 * lastbn. If level is greater than SINGLE, the block is an indirect block
446 * and recursive calls to indirtrunc must be used to cleanse other indirect
447 * blocks.
448 *
449 * NB: triple indirect blocks are untested.
450 */
451 static int
452 ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
453 register struct inode *ip;
454 ufs_daddr_t lbn, lastbn;
455 ufs_daddr_t dbn;
456 int level;
457 long *countp;
458 {
459 register int i;
460 struct buf *bp;
461 struct buf *tbp;
462 register struct fs *fs = ip->i_fs;
463 register ufs_daddr_t *bap;
464 struct vnode *vp=ITOV(ip);
465 ufs_daddr_t *copy, nb, nlbn, last;
466 long blkcount, factor;
467 int nblocks, blocksreleased = 0;
468 errno_t error = 0, allerror = 0;
469 int devBlockSize=0;
470 struct mount *mp=vp->v_mount;
471 #if REV_ENDIAN_FS
472 int rev_endian=(mp->mnt_flag & MNT_REVEND);
473 #endif /* REV_ENDIAN_FS */
474
475 /*
476 * Calculate index in current block of last
477 * block to be kept. -1 indicates the entire
478 * block so we need not calculate the index.
479 */
480 factor = 1;
481 for (i = SINGLE; i < level; i++)
482 factor *= NINDIR(fs);
483 last = lastbn;
484 if (lastbn > 0)
485 last /= factor;
486
487 devBlockSize = vfs_devblocksize(mp);
488 nblocks = btodb(fs->fs_bsize, devBlockSize);
489
490 /* Doing a MALLOC here is asking for trouble. We can still
491 * deadlock on pagerfile lock, in case we are running
492 * low on memory and block in MALLOC
493 */
494
495 tbp = buf_geteblk(fs->fs_bsize);
496 copy = (ufs_daddr_t *)buf_dataptr(tbp);
497
498 /*
499 * Get buffer of block pointers, zero those entries corresponding
500 * to blocks to be free'd, and update on disk copy first. Since
501 * double(triple) indirect before single(double) indirect, calls
502 * to bmap on these blocks will fail. However, we already have
503 * the on disk address, so we have to set the blkno field
504 * explicitly instead of letting buf_bread do everything for us.
505 */
506
507 vp = ITOV(ip);
508 bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, 0, 0, BLK_META);
509
510 if (buf_valid(bp)) {
511 /* Braces must be here in case trace evaluates to nothing. */
512 trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
513 } else {
514 trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn);
515 OSIncrementAtomic(&current_proc()->p_stats->p_ru.ru_inblock); /* pay for read */
516 buf_setflags(bp, B_READ);
517 if (buf_count(bp) > buf_size(bp))
518 panic("ffs_indirtrunc: bad buffer size");
519 buf_setblkno(bp, (daddr64_t)((unsigned)dbn));
520 VNOP_STRATEGY(bp);
521 error = buf_biowait(bp);
522 }
523 if (error) {
524 buf_brelse(bp);
525 *countp = 0;
526 buf_brelse(tbp);
527 return ((int)error);
528 }
529
530 bap = (ufs_daddr_t *)buf_dataptr(bp);
531 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
532 bzero((caddr_t)&bap[last + 1],
533 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t));
534 if (last == -1)
535 buf_markinvalid(bp);
536 if (last != -1 && (vp)->v_mount->mnt_flag & MNT_ASYNC) {
537 error = 0;
538 buf_bdwrite(bp);
539 } else {
540 error = buf_bwrite(bp);
541 if (error)
542 allerror = error;
543 }
544 bap = copy;
545
546 /*
547 * Recursively free totally unused blocks.
548 */
549 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
550 i--, nlbn += factor) {
551 #if REV_ENDIAN_FS
552 if (rev_endian)
553 nb = OSSwapInt32(bap[i]);
554 else {
555 #endif /* REV_ENDIAN_FS */
556 nb = bap[i];
557 #if REV_ENDIAN_FS
558 }
559 #endif /* REV_ENDIAN_FS */
560 if (nb == 0)
561 continue;
562 if (level > SINGLE) {
563 if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
564 (ufs_daddr_t)-1, level - 1, &blkcount))
565 allerror = error;
566 blocksreleased += blkcount;
567 }
568 ffs_blkfree(ip, nb, fs->fs_bsize);
569 blocksreleased += nblocks;
570 }
571
572 /*
573 * Recursively free last partial block.
574 */
575 if (level > SINGLE && lastbn >= 0) {
576 last = lastbn % factor;
577 #if REV_ENDIAN_FS
578 if (rev_endian)
579 nb = OSSwapInt32(bap[i]);
580 else {
581 #endif /* REV_ENDIAN_FS */
582 nb = bap[i];
583 #if REV_ENDIAN_FS
584 }
585 #endif /* REV_ENDIAN_FS */
586 if (nb != 0) {
587 if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
588 last, level - 1, &blkcount))
589 allerror = error;
590 blocksreleased += blkcount;
591 }
592 }
593 buf_brelse(tbp);
594 *countp = blocksreleased;
595 return ((int)allerror);
596 }
597