]> git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ffs/ffs_vfsops.c
6614aa7406d83d5b9202281b68f66d870a3c1afc
[apple/xnu.git] / bsd / ufs / ffs / ffs_vfsops.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1989, 1991, 1993, 1994
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
57 */
58
59 #include <rev_endian_fs.h>
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/namei.h>
63 #include <sys/proc.h>
64 #include <sys/kauth.h>
65 #include <sys/kernel.h>
66 #include <sys/vnode_internal.h>
67 #include <sys/socket.h>
68 #include <sys/mount_internal.h>
69 #include <sys/mount.h>
70 #include <sys/buf.h>
71 #include <sys/mbuf.h>
72 #include <sys/file.h>
73 #include <sys/disk.h>
74 #include <sys/ioctl.h>
75 #include <sys/errno.h>
76 #include <sys/malloc.h>
77 #include <sys/ubc.h>
78 #include <sys/quota.h>
79
80 #include <miscfs/specfs/specdev.h>
81
82 #include <ufs/ufs/quota.h>
83 #include <ufs/ufs/ufsmount.h>
84 #include <ufs/ufs/inode.h>
85 #include <ufs/ufs/ufs_extern.h>
86
87 #include <ufs/ffs/fs.h>
88 #include <ufs/ffs/ffs_extern.h>
89 #if REV_ENDIAN_FS
90 #include <ufs/ufs/ufs_byte_order.h>
91 #include <architecture/byte_order.h>
92 #endif /* REV_ENDIAN_FS */
93
94 int ffs_sbupdate(struct ufsmount *, int);
95
96 struct vfsops ufs_vfsops = {
97 ffs_mount,
98 ufs_start,
99 ffs_unmount,
100 ufs_root,
101 ufs_quotactl,
102 ffs_vfs_getattr,
103 ffs_sync,
104 ffs_vget,
105 ffs_fhtovp,
106 ffs_vptofh,
107 ffs_init,
108 ffs_sysctl,
109 ffs_vfs_setattr,
110 {0}
111 };
112
113 extern u_long nextgennumber;
114
115 union _qcvt {
116 int64_t qcvt;
117 int32_t val[2];
118 };
119 #define SETHIGH(q, h) { \
120 union _qcvt tmp; \
121 tmp.qcvt = (q); \
122 tmp.val[_QUAD_HIGHWORD] = (h); \
123 (q) = tmp.qcvt; \
124 }
125 #define SETLOW(q, l) { \
126 union _qcvt tmp; \
127 tmp.qcvt = (q); \
128 tmp.val[_QUAD_LOWWORD] = (l); \
129 (q) = tmp.qcvt; \
130 }
131
132 /*
133 * Called by main() when ufs is going to be mounted as root.
134 */
135 int
136 ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
137 {
138 struct proc *p = current_proc(); /* XXX */
139 int error;
140
141 /* Set asynchronous flag by default */
142 vfs_setflags(mp, MNT_ASYNC);
143
144 if (error = ffs_mountfs(rvp, mp, context))
145 return (error);
146
147 (void)ffs_statfs(mp, vfs_statfs(mp), NULL);
148
149 return (0);
150 }
151
152 /*
153 * VFS Operations.
154 *
155 * mount system call
156 */
157 int
158 ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data, vfs_context_t context)
159 {
160 struct proc *p = vfs_context_proc(context);
161 struct ufsmount *ump;
162 register struct fs *fs;
163 u_int size;
164 int error = 0, flags;
165 mode_t accessmode;
166 int ronly;
167 int reload = 0;
168
169 /*
170 * If updating, check whether changing from read-write to
171 * read-only; if there is no device name, that's all we do.
172 */
173 if (mp->mnt_flag & MNT_UPDATE) {
174 ump = VFSTOUFS(mp);
175 fs = ump->um_fs;
176 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
177 /*
178 * Flush any dirty data.
179 */
180 VFS_SYNC(mp, MNT_WAIT, context);
181 /*
182 * Check for and optionally get rid of files open
183 * for writing.
184 */
185 flags = WRITECLOSE;
186 if (mp->mnt_flag & MNT_FORCE)
187 flags |= FORCECLOSE;
188 if (error = ffs_flushfiles(mp, flags, p))
189 return (error);
190 fs->fs_clean = 1;
191 fs->fs_ronly = 1;
192 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
193 fs->fs_clean = 0;
194 fs->fs_ronly = 0;
195 return (error);
196 }
197 }
198 /* save fs_ronly to later use */
199 ronly = fs->fs_ronly;
200 if ((mp->mnt_flag & MNT_RELOAD) || ronly)
201 reload = 1;
202 if ((reload) &&
203 (error = ffs_reload(mp, vfs_context_ucred(context), p)))
204 return (error);
205 /* replace the ronly after load */
206 fs->fs_ronly = ronly;
207 /*
208 * Do not update the file system if the user was in singleuser
209 * and then tries to mount -uw without fscking
210 */
211 if (!fs->fs_clean && ronly) {
212 printf("WARNING: trying to mount a dirty file system\n");
213 if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
214 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
215 /*
216 * Reset the readonly bit as reload might have
217 * modified this bit
218 */
219 fs->fs_ronly = 1;
220 return(EPERM);
221 }
222 }
223
224 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
225 fs->fs_ronly = 0;
226 fs->fs_clean = 0;
227 (void) ffs_sbupdate(ump, MNT_WAIT);
228 }
229 if (devvp == 0) {
230 return(0);
231 }
232 }
233 if ((mp->mnt_flag & MNT_UPDATE) == 0)
234 error = ffs_mountfs(devvp, mp, context);
235 else {
236 if (devvp != ump->um_devvp)
237 error = EINVAL; /* needs translation */
238 }
239 if (error) {
240 return (error);
241 }
242 ump = VFSTOUFS(mp);
243 fs = ump->um_fs;
244 bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
245 strncpy(fs->fs_fsmnt, (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
246 (void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
247 return (0);
248 }
249
250
251 struct ffs_reload_cargs {
252 struct vnode *devvp;
253 kauth_cred_t cred;
254 struct fs *fs;
255 struct proc *p;
256 int error;
257 #if REV_ENDIAN_FS
258 int rev_endian;
259 #endif /* REV_ENDIAN_FS */
260 };
261
262
263 static int
264 ffs_reload_callback(struct vnode *vp, void *cargs)
265 {
266 struct inode *ip;
267 struct buf *bp;
268 struct fs *fs;
269 struct ffs_reload_cargs *args;
270
271 args = (struct ffs_reload_cargs *)cargs;
272
273 /*
274 * flush all the buffers associated with this node
275 */
276 if (buf_invalidateblks(vp, 0, 0, 0))
277 panic("ffs_reload: dirty2");
278
279 /*
280 * Step 6: re-read inode data
281 */
282 ip = VTOI(vp);
283 fs = args->fs;
284
285 if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
286 (int)fs->fs_bsize, NOCRED, &bp)) {
287 buf_brelse(bp);
288
289 return (VNODE_RETURNED_DONE);
290 }
291
292 #if REV_ENDIAN_FS
293 if (args->rev_endian) {
294 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
295 ino_to_fsbo(fs, ip->i_number)), ip);
296 } else {
297 #endif /* REV_ENDIAN_FS */
298 ip->i_din = *((struct dinode *)buf_dataptr(bp) +
299 ino_to_fsbo(fs, ip->i_number));
300 #if REV_ENDIAN_FS
301 }
302 #endif /* REV_ENDIAN_FS */
303
304 buf_brelse(bp);
305
306 return (VNODE_RETURNED);
307 }
308
309
310 /*
311 * Reload all incore data for a filesystem (used after running fsck on
312 * the root filesystem and finding things to fix). The filesystem must
313 * be mounted read-only.
314 *
315 * Things to do to update the mount:
316 * 1) invalidate all cached meta-data.
317 * 2) re-read superblock from disk.
318 * 3) re-read summary information from disk.
319 * 4) invalidate all inactive vnodes.
320 * 5) invalidate all cached file data.
321 * 6) re-read inode data for all active vnodes.
322 */
323 ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
324 {
325 register struct vnode *devvp;
326 void *space;
327 struct buf *bp;
328 struct fs *fs, *newfs;
329 int i, blks, size, error;
330 u_int64_t maxfilesize; /* XXX */
331 int32_t *lp;
332 struct ffs_reload_cargs args;
333 #if REV_ENDIAN_FS
334 int rev_endian = (mountp->mnt_flag & MNT_REVEND);
335 #endif /* REV_ENDIAN_FS */
336
337 if ((mountp->mnt_flag & MNT_RDONLY) == 0)
338 return (EINVAL);
339 /*
340 * Step 1: invalidate all cached meta-data.
341 */
342 devvp = VFSTOUFS(mountp)->um_devvp;
343 if (buf_invalidateblks(devvp, 0, 0, 0))
344 panic("ffs_reload: dirty1");
345 /*
346 * Step 2: re-read superblock from disk.
347 */
348 size = vfs_devblocksize(mountp);
349
350 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
351 buf_brelse(bp);
352 return (error);
353 }
354 newfs = (struct fs *)buf_dataptr(bp);
355 #if REV_ENDIAN_FS
356 if (rev_endian) {
357 byte_swap_sbin(newfs);
358 }
359 #endif /* REV_ENDIAN_FS */
360 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
361 newfs->fs_bsize < sizeof(struct fs)) {
362 #if REV_ENDIAN_FS
363 if (rev_endian)
364 byte_swap_sbout(newfs);
365 #endif /* REV_ENDIAN_FS */
366
367 buf_brelse(bp);
368 return (EIO); /* XXX needs translation */
369 }
370 fs = VFSTOUFS(mountp)->um_fs;
371 /*
372 * Copy pointer fields back into superblock before copying in XXX
373 * new superblock. These should really be in the ufsmount. XXX
374 * Note that important parameters (eg fs_ncg) are unchanged.
375 */
376 newfs->fs_csp = fs->fs_csp;
377 newfs->fs_maxcluster = fs->fs_maxcluster;
378 newfs->fs_contigdirs = fs->fs_contigdirs;
379 bcopy(newfs, fs, (u_int)fs->fs_sbsize);
380 if (fs->fs_sbsize < SBSIZE)
381 buf_markinvalid(bp);
382 #if REV_ENDIAN_FS
383 if (rev_endian)
384 byte_swap_sbout(newfs);
385 #endif /* REV_ENDIAN_FS */
386 buf_brelse(bp);
387 mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
388 ffs_oldfscompat(fs);
389 maxfilesize = 0x100000000ULL; /* 4GB */
390 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
391 fs->fs_maxfilesize = maxfilesize; /* XXX */
392 /*
393 * Step 3: re-read summary information from disk.
394 */
395 blks = howmany(fs->fs_cssize, fs->fs_fsize);
396 space = fs->fs_csp;
397 for (i = 0; i < blks; i += fs->fs_frag) {
398 size = fs->fs_bsize;
399 if (i + fs->fs_frag > blks)
400 size = (blks - i) * fs->fs_fsize;
401 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
402 NOCRED, &bp)) {
403 buf_brelse(bp);
404 return (error);
405 }
406 #if REV_ENDIAN_FS
407 if (rev_endian) {
408 /* csum swaps */
409 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
410 }
411 #endif /* REV_ENDIAN_FS */
412 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
413 #if REV_ENDIAN_FS
414 if (rev_endian) {
415 /* csum swaps */
416 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
417 }
418 #endif /* REV_ENDIAN_FS */
419 space = (char *) space + size;
420 buf_brelse(bp);
421 }
422 /*
423 * We no longer know anything about clusters per cylinder group.
424 */
425 if (fs->fs_contigsumsize > 0) {
426 lp = fs->fs_maxcluster;
427 for (i = 0; i < fs->fs_ncg; i++)
428 *lp++ = fs->fs_contigsumsize;
429 }
430 #if REV_ENDIAN_FS
431 args.rev_endian = rev_endian;
432 #endif /* REV_ENDIAN_FS */
433 args.devvp = devvp;
434 args.cred = cred;
435 args.fs = fs;
436 args.p = p;
437 args.error = 0;
438 /*
439 * ffs_reload_callback will be called for each vnode
440 * hung off of this mount point that can't be recycled...
441 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
442 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
443 * properly referenced and unreferenced around the callback
444 */
445 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
446
447 return (args.error);
448 }
449
450 /*
451 * Common code for mount and mountroot
452 */
453 int
454 ffs_mountfs(devvp, mp, context)
455 struct vnode *devvp;
456 struct mount *mp;
457 vfs_context_t context;
458 {
459 struct ufsmount *ump;
460 struct buf *bp;
461 struct fs *fs;
462 dev_t dev;
463 struct buf *cgbp;
464 struct cg *cgp;
465 int32_t clustersumoff;
466 void *space;
467 int error, i, blks, ronly;
468 u_int32_t size;
469 int32_t *lp;
470 kauth_cred_t cred;
471 u_int64_t maxfilesize; /* XXX */
472 u_int dbsize = DEV_BSIZE;
473 #if REV_ENDIAN_FS
474 int rev_endian=0;
475 #endif /* REV_ENDIAN_FS */
476 dev = devvp->v_rdev;
477 cred = vfs_context_ucred(context);
478
479 ronly = vfs_isrdonly(mp);
480 bp = NULL;
481 ump = NULL;
482
483 /* Advisory locking should be handled at the VFS layer */
484 vfs_setlocklocal(mp);
485
486 /* Obtain the actual device block size */
487 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
488 error = ENXIO;
489 goto out;
490 }
491
492 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
493 SBSIZE, cred, &bp))
494 goto out;
495 fs = (struct fs *)buf_dataptr(bp);
496 #if REV_ENDIAN_FS
497 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
498 fs->fs_bsize < sizeof(struct fs)) {
499 int magic = fs->fs_magic;
500
501 byte_swap_ints(&magic, 1);
502 if (magic != FS_MAGIC) {
503 error = EINVAL;
504 goto out;
505 }
506 byte_swap_sbin(fs);
507 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
508 fs->fs_bsize < sizeof(struct fs)) {
509 byte_swap_sbout(fs);
510 error = EINVAL; /* XXX needs translation */
511 goto out;
512 }
513 rev_endian=1;
514 }
515 #endif /* REV_ENDIAN_FS */
516 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
517 fs->fs_bsize < sizeof(struct fs)) {
518 #if REV_ENDIAN_FS
519 if (rev_endian)
520 byte_swap_sbout(fs);
521 #endif /* REV_ENDIAN_FS */
522 error = EINVAL; /* XXX needs translation */
523 goto out;
524 }
525
526
527 /*
528 * Buffer cache does not handle multiple pages in a buf when
529 * invalidating incore buffer in pageout. There are no locks
530 * in the pageout path. So there is a danger of loosing data when
531 * block allocation happens at the same time a pageout of buddy
532 * page occurs. incore() returns buf with both
533 * pages, this leads vnode-pageout to incorrectly flush of entire.
534 * buf. Till the low level ffs code is modified to deal with these
535 * do not mount any FS more than 4K size.
536 */
537 /*
538 * Can't mount filesystems with a fragment size less than DIRBLKSIZ
539 */
540 /*
541 * Don't mount dirty filesystems, except for the root filesystem
542 */
543 if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
544 ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
545 #if REV_ENDIAN_FS
546 if (rev_endian)
547 byte_swap_sbout(fs);
548 #endif /* REV_ENDIAN_FS */
549 error = ENOTSUP;
550 goto out;
551 }
552
553 /* Let's figure out the devblock size the file system is with */
554 /* the device block size = fragment size / number of sectors per frag */
555
556 dbsize = fs->fs_fsize / NSPF(fs);
557 if(dbsize <= 0 ) {
558 kprintf("device blocksize computaion failed\n");
559 } else {
560 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
561 FWRITE, context) != 0) {
562 kprintf("failed to set device blocksize\n");
563 }
564 /* force the specfs to reread blocksize from size() */
565 set_fsblocksize(devvp);
566 }
567
568 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
569 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
570 #if REV_ENDIAN_FS
571 if (rev_endian)
572 byte_swap_sbout(fs);
573 #endif /* REV_ENDIAN_FS */
574 error = EROFS; /* needs translation */
575 goto out;
576 }
577
578 /* If we are not mounting read only, then check for overlap
579 * condition in cylinder group's free block map.
580 * If overlap exists, then force this into a read only mount
581 * to avoid further corruption. PR#2216969
582 */
583 if (ronly == 0){
584 if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
585 (int)fs->fs_cgsize, NOCRED, &cgbp)) {
586 buf_brelse(cgbp);
587 goto out;
588 }
589 cgp = (struct cg *)buf_dataptr(cgbp);
590 #if REV_ENDIAN_FS
591 if (rev_endian)
592 byte_swap_cgin(cgp,fs);
593 #endif /* REV_ENDIAN_FS */
594 if (!cg_chkmagic(cgp)){
595 #if REV_ENDIAN_FS
596 if (rev_endian)
597 byte_swap_cgout(cgp,fs);
598 #endif /* REV_ENDIAN_FS */
599 buf_brelse(cgbp);
600 goto out;
601 }
602 if (cgp->cg_clustersumoff != 0) {
603 /* Check for overlap */
604 clustersumoff = cgp->cg_freeoff +
605 howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
606 clustersumoff = roundup(clustersumoff, sizeof(long));
607 if (cgp->cg_clustersumoff < clustersumoff) {
608 /* Overlap exists */
609 mp->mnt_flag |= MNT_RDONLY;
610 ronly = 1;
611 }
612 }
613 #if REV_ENDIAN_FS
614 if (rev_endian)
615 byte_swap_cgout(cgp,fs);
616 #endif /* REV_ENDIAN_FS */
617 buf_brelse(cgbp);
618 }
619
620 ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
621 bzero((caddr_t)ump, sizeof *ump);
622 ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
623 M_WAITOK);
624 bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
625 if (fs->fs_sbsize < SBSIZE)
626 buf_markinvalid(bp);
627 #if REV_ENDIAN_FS
628 if (rev_endian)
629 byte_swap_sbout(fs);
630 #endif /* REV_ENDIAN_FS */
631 buf_brelse(bp);
632 bp = NULL;
633 fs = ump->um_fs;
634 fs->fs_ronly = ronly;
635 size = fs->fs_cssize;
636 blks = howmany(size, fs->fs_fsize);
637 if (fs->fs_contigsumsize > 0)
638 size += fs->fs_ncg * sizeof(int32_t);
639 size += fs->fs_ncg * sizeof(u_int8_t);
640 space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
641 fs->fs_csp = space;
642 for (i = 0; i < blks; i += fs->fs_frag) {
643 size = fs->fs_bsize;
644 if (i + fs->fs_frag > blks)
645 size = (blks - i) * fs->fs_fsize;
646 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
647 size, cred, &bp)) {
648 _FREE(fs->fs_csp, M_UFSMNT);
649 goto out;
650 }
651 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
652 #if REV_ENDIAN_FS
653 if (rev_endian)
654 byte_swap_ints((int *) space, size / sizeof(int));
655 #endif /* REV_ENDIAN_FS */
656 space = (char *)space + size;
657 buf_brelse(bp);
658 bp = NULL;
659 }
660 if (fs->fs_contigsumsize > 0) {
661 fs->fs_maxcluster = lp = space;
662 for (i = 0; i < fs->fs_ncg; i++)
663 *lp++ = fs->fs_contigsumsize;
664 space = lp;
665 }
666 size = fs->fs_ncg * sizeof(u_int8_t);
667 fs->fs_contigdirs = (u_int8_t *)space;
668 space = (u_int8_t *)space + size;
669 bzero(fs->fs_contigdirs, size);
670 /* XXX Compatibility for old filesystems */
671 if (fs->fs_avgfilesize <= 0)
672 fs->fs_avgfilesize = AVFILESIZ;
673 if (fs->fs_avgfpdir <= 0)
674 fs->fs_avgfpdir = AFPDIR;
675 /* XXX End of compatibility */
676 mp->mnt_data = (qaddr_t)ump;
677 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
678 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
679 /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
680 mp->mnt_maxsymlinklen = 60;
681 #if REV_ENDIAN_FS
682 if (rev_endian)
683 mp->mnt_flag |= MNT_REVEND;
684 #endif /* REV_ENDIAN_FS */
685 ump->um_mountp = mp;
686 ump->um_dev = dev;
687 ump->um_devvp = devvp;
688 ump->um_nindir = fs->fs_nindir;
689 ump->um_bptrtodb = fs->fs_fsbtodb;
690 ump->um_seqinc = fs->fs_frag;
691 for (i = 0; i < MAXQUOTAS; i++)
692 dqfileinit(&ump->um_qfiles[i]);
693 ffs_oldfscompat(fs);
694 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */
695 maxfilesize = 0x100000000ULL; /* 4GB */
696 #if 0
697 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
698 #endif /* 0 */
699 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
700 fs->fs_maxfilesize = maxfilesize; /* XXX */
701 if (ronly == 0) {
702 fs->fs_clean = 0;
703 (void) ffs_sbupdate(ump, MNT_WAIT);
704 }
705 return (0);
706 out:
707 if (bp)
708 buf_brelse(bp);
709 if (ump) {
710 _FREE(ump->um_fs, M_UFSMNT);
711 _FREE(ump, M_UFSMNT);
712 }
713 return (error);
714 }
715
716 /*
717 * Sanity checks for old file systems.
718 *
719 * XXX - goes away some day.
720 */
721 ffs_oldfscompat(fs)
722 struct fs *fs;
723 {
724 int i;
725
726 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */
727 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */
728 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
729 fs->fs_nrpos = 8; /* XXX */
730 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
731 u_int64_t sizepb = fs->fs_bsize; /* XXX */
732 /* XXX */
733 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
734 for (i = 0; i < NIADDR; i++) { /* XXX */
735 sizepb *= NINDIR(fs); /* XXX */
736 fs->fs_maxfilesize += sizepb; /* XXX */
737 } /* XXX */
738 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */
739 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */
740 } /* XXX */
741 return (0);
742 }
743
744 /*
745 * unmount system call
746 */
747 int
748 ffs_unmount(mp, mntflags, context)
749 struct mount *mp;
750 int mntflags;
751 vfs_context_t context;
752 {
753 struct proc *p = vfs_context_proc(context);
754 register struct ufsmount *ump;
755 register struct fs *fs;
756 int error, flags;
757 int force;
758
759 flags = 0;
760 force = 0;
761 if (mntflags & MNT_FORCE) {
762 flags |= FORCECLOSE;
763 force = 1;
764 }
765 if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
766 return (error);
767 ump = VFSTOUFS(mp);
768 fs = ump->um_fs;
769
770 if (fs->fs_ronly == 0) {
771 fs->fs_clean = 1;
772 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
773 fs->fs_clean = 0;
774 #ifdef notyet
775 /* we can atleast cleanup ; as the media could be WP */
776 /* & during mount, we do not check for write failures */
777 /* FIXME LATER : the Correct fix would be to have */
778 /* mount detect the WP media and downgrade to readonly mount */
779 /* For now, here it is */
780 return (error);
781 #endif /* notyet */
782 }
783 }
784 _FREE(fs->fs_csp, M_UFSMNT);
785 _FREE(fs, M_UFSMNT);
786 _FREE(ump, M_UFSMNT);
787
788 return (0);
789 }
790
791 /*
792 * Flush out all the files in a filesystem.
793 */
794 ffs_flushfiles(mp, flags, p)
795 register struct mount *mp;
796 int flags;
797 struct proc *p;
798 {
799 register struct ufsmount *ump;
800 int i, error;
801
802 ump = VFSTOUFS(mp);
803
804 #if QUOTA
805 /*
806 * NOTE: The open quota files have an indirect reference
807 * on the root directory vnode. We must account for this
808 * extra reference when doing the intial vflush.
809 */
810 if (mp->mnt_flag & MNT_QUOTA) {
811 struct vnode *rootvp = NULLVP;
812 int quotafilecnt = 0;
813
814 /* Find out how many quota files we have open. */
815 for (i = 0; i < MAXQUOTAS; i++) {
816 if (ump->um_qfiles[i].qf_vp != NULLVP)
817 ++quotafilecnt;
818 }
819
820 /*
821 * Check if the root vnode is in our inode hash
822 * (so we can skip over it).
823 */
824 rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
825
826 error = vflush(mp, rootvp, SKIPSYSTEM|flags);
827
828 if (rootvp) {
829 /*
830 * See if there are additional references on the
831 * root vp besides the ones obtained from the open
832 * quota files and the hfs_chashget call above.
833 */
834 if ((error == 0) &&
835 (rootvp->v_usecount > (1 + quotafilecnt))) {
836 error = EBUSY; /* root dir is still open */
837 }
838 vnode_put(rootvp);
839 }
840 if (error && (flags & FORCECLOSE) == 0)
841 return (error);
842
843 for (i = 0; i < MAXQUOTAS; i++) {
844 if (ump->um_qfiles[i].qf_vp == NULLVP)
845 continue;
846 quotaoff(mp, i);
847 }
848 /*
849 * Here we fall through to vflush again to ensure
850 * that we have gotten rid of all the system vnodes.
851 */
852 }
853 #endif
854 error = vflush(mp, NULLVP, SKIPSWAP|flags);
855 error = vflush(mp, NULLVP, flags);
856 return (error);
857 }
858
859 /*
860 * Get file system statistics.
861 */
862 int
863 ffs_statfs(mp, sbp, context)
864 struct mount *mp;
865 register struct vfsstatfs *sbp;
866 vfs_context_t context;
867 {
868 register struct ufsmount *ump;
869 register struct fs *fs;
870
871 ump = VFSTOUFS(mp);
872 fs = ump->um_fs;
873 if (fs->fs_magic != FS_MAGIC)
874 panic("ffs_statfs");
875 sbp->f_bsize = fs->fs_fsize;
876 sbp->f_iosize = fs->fs_bsize;
877 sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
878 sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
879 fs->fs_cstotal.cs_nffree));
880 sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
881 sbp->f_files = (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
882 sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
883 return (0);
884 }
885
886 int
887 ffs_vfs_getattr(mp, fsap, context)
888 struct mount *mp;
889 struct vfs_attr *fsap;
890 vfs_context_t context;
891 {
892 struct ufsmount *ump;
893 struct fs *fs;
894 kauth_cred_t cred;
895 struct vnode *devvp;
896 struct buf *bp;
897 struct ufslabel *ulp;
898 char *offset;
899 int bs, error, length;
900
901 ump = VFSTOUFS(mp);
902 fs = ump->um_fs;
903 cred = vfs_context_ucred(context);
904
905 VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
906 VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
907 VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
908 VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
909 (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
910 fs->fs_cstotal.cs_nffree)));
911 VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
912 fs->fs_minfree)));
913 VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
914 (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
915 VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
916 fs->fs_cstotal.cs_nifree));
917
918 if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
919 fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
920 fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
921 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
922 }
923
924 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
925 devvp = ump->um_devvp;
926 bs = vfs_devblocksize(mp);
927
928 if (error = (int)buf_meta_bread(devvp,
929 (daddr64_t)(UFS_LABEL_OFFSET / bs),
930 MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
931 if (bp)
932 buf_brelse(bp);
933 return (error);
934 }
935
936 /*
937 * Since the disklabel is read directly by older user space
938 * code, make sure this buffer won't remain in the cache when
939 * we release it.
940 */
941 buf_setflags(bp, B_NOCACHE);
942
943 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
944 ulp = (struct ufslabel *)offset;
945
946 if (ufs_label_check(ulp)) {
947 length = ulp->ul_namelen;
948 #if REV_ENDIAN_FS
949 if (mp->mnt_flag & MNT_REVEND)
950 length = NXSwapShort(length);
951 #endif
952 if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
953 bcopy(ulp->ul_name, fsap->f_vol_name, length);
954 fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
955 fsap->f_vol_name[length] = '\0';
956 }
957 }
958
959 buf_brelse(bp);
960 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
961 }
962
963 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
964 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
965 VOL_CAP_FMT_SYMBOLICLINKS |
966 VOL_CAP_FMT_HARDLINKS |
967 VOL_CAP_FMT_SPARSE_FILES |
968 VOL_CAP_FMT_CASE_SENSITIVE |
969 VOL_CAP_FMT_CASE_PRESERVING |
970 VOL_CAP_FMT_FAST_STATFS ;
971 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
972 = VOL_CAP_INT_NFSEXPORT |
973 VOL_CAP_INT_VOL_RENAME |
974 VOL_CAP_INT_ADVLOCK |
975 VOL_CAP_INT_FLOCK;
976 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
977 = 0;
978 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
979 = 0;
980
981 /* Capabilities we know about: */
982 fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
983 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
984 VOL_CAP_FMT_SYMBOLICLINKS |
985 VOL_CAP_FMT_HARDLINKS |
986 VOL_CAP_FMT_JOURNAL |
987 VOL_CAP_FMT_JOURNAL_ACTIVE |
988 VOL_CAP_FMT_NO_ROOT_TIMES |
989 VOL_CAP_FMT_SPARSE_FILES |
990 VOL_CAP_FMT_ZERO_RUNS |
991 VOL_CAP_FMT_CASE_SENSITIVE |
992 VOL_CAP_FMT_CASE_PRESERVING |
993 VOL_CAP_FMT_FAST_STATFS |
994 VOL_CAP_FMT_2TB_FILESIZE;
995 fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
996 VOL_CAP_INT_SEARCHFS |
997 VOL_CAP_INT_ATTRLIST |
998 VOL_CAP_INT_NFSEXPORT |
999 VOL_CAP_INT_READDIRATTR |
1000 VOL_CAP_INT_EXCHANGEDATA |
1001 VOL_CAP_INT_COPYFILE |
1002 VOL_CAP_INT_ALLOCATE |
1003 VOL_CAP_INT_VOL_RENAME |
1004 VOL_CAP_INT_ADVLOCK |
1005 VOL_CAP_INT_FLOCK ;
1006 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1007 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1008
1009 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1010 }
1011
1012 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1013 fsap->f_attributes.validattr.commonattr = 0;
1014 fsap->f_attributes.validattr.volattr =
1015 ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1016 fsap->f_attributes.validattr.dirattr = 0;
1017 fsap->f_attributes.validattr.fileattr = 0;
1018 fsap->f_attributes.validattr.forkattr = 0;
1019
1020 fsap->f_attributes.nativeattr.commonattr = 0;
1021 fsap->f_attributes.nativeattr.volattr =
1022 ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1023 fsap->f_attributes.nativeattr.dirattr = 0;
1024 fsap->f_attributes.nativeattr.fileattr = 0;
1025 fsap->f_attributes.nativeattr.forkattr = 0;
1026
1027 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1028 }
1029
1030 return (0);
1031 }
1032
1033
1034 int
1035 ffs_vfs_setattr(mp, fsap, context)
1036 struct mount *mp;
1037 struct vfs_attr *fsap;
1038 vfs_context_t context;
1039 {
1040 struct ufsmount *ump;
1041 struct vnode *devvp;
1042 struct buf *bp;
1043 struct ufslabel *ulp;
1044 kauth_cred_t cred;
1045 char *offset;
1046 int bs, error;
1047
1048
1049 ump = VFSTOUFS(mp);
1050 cred = vfs_context_ucred(context);
1051
1052 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1053 devvp = ump->um_devvp;
1054 bs = vfs_devblocksize(mp);
1055 if (error = buf_meta_bread(devvp,
1056 (daddr64_t)(UFS_LABEL_OFFSET / bs),
1057 MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1058 if (bp)
1059 buf_brelse(bp);
1060 return (error);
1061 }
1062
1063 /*
1064 * Since the disklabel is read directly by older user space
1065 * code, make sure this buffer won't remain in the cache when
1066 * we release it.
1067 */
1068 buf_setflags(bp, B_NOCACHE);
1069
1070 /* Validate the label structure; init if not valid */
1071 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1072 ulp = (struct ufslabel *)offset;
1073 if (!ufs_label_check(ulp))
1074 ufs_label_init(ulp);
1075
1076 /* Copy new name over existing name */
1077 ulp->ul_namelen = strlen(fsap->f_vol_name);
1078 #if REV_ENDIAN_FS
1079 if (mp->mnt_flag & MNT_REVEND)
1080 ulp->ul_namelen = NXSwapShort(ulp->ul_namelen);
1081 #endif
1082 bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1083 ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1084 ulp->ul_name[ulp->ul_namelen] = '\0';
1085
1086 /* Update the checksum */
1087 ulp->ul_checksum = 0;
1088 ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1089
1090 /* Write the label back to disk */
1091 buf_bwrite(bp);
1092 bp = NULL;
1093
1094 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1095 }
1096
1097 return (0);
1098 }
1099 struct ffs_sync_cargs {
1100 vfs_context_t context;
1101 int waitfor;
1102 int error;
1103 };
1104
1105
1106 static int
1107 ffs_sync_callback(struct vnode *vp, void *cargs)
1108 {
1109 struct inode *ip;
1110 struct ffs_sync_cargs *args;
1111 int error;
1112
1113 args = (struct ffs_sync_cargs *)cargs;
1114
1115 ip = VTOI(vp);
1116
1117 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1118 error = VNOP_FSYNC(vp, args->waitfor, args->context);
1119
1120 if (error)
1121 args->error = error;
1122
1123 }
1124 return (VNODE_RETURNED);
1125 }
1126
1127 /*
1128 * Go through the disk queues to initiate sandbagged IO;
1129 * go through the inodes to write those that have been modified;
1130 * initiate the writing of the super block if it has been modified.
1131 *
1132 * Note: we are always called with the filesystem marked `MPBUSY'.
1133 */
1134 int
1135 ffs_sync(mp, waitfor, context)
1136 struct mount *mp;
1137 int waitfor;
1138 vfs_context_t context;
1139 {
1140 struct vnode *nvp, *vp;
1141 struct ufsmount *ump = VFSTOUFS(mp);
1142 struct fs *fs;
1143 struct timeval tv;
1144 int error, allerror = 0;
1145 struct ffs_sync_cargs args;
1146
1147 fs = ump->um_fs;
1148 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1149 printf("fs = %s\n", fs->fs_fsmnt);
1150 panic("update: rofs mod");
1151 }
1152 /*
1153 * Write back each (modified) inode.
1154 */
1155 args.context = context;
1156 args.waitfor = waitfor;
1157 args.error = 0;
1158 /*
1159 * ffs_sync_callback will be called for each vnode
1160 * hung off of this mount point... the vnode will be
1161 * properly referenced and unreferenced around the callback
1162 */
1163 vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1164
1165 if (args.error)
1166 allerror = args.error;
1167
1168 /*
1169 * Force stale file system control information to be flushed.
1170 */
1171 if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1172 allerror = error;
1173 #if QUOTA
1174 qsync(mp);
1175 #endif
1176 /*
1177 * Write back modified superblock.
1178 */
1179 if (fs->fs_fmod != 0) {
1180 fs->fs_fmod = 0;
1181 microtime(&tv);
1182 fs->fs_time = tv.tv_sec;
1183 if (error = ffs_sbupdate(ump, waitfor))
1184 allerror = error;
1185 }
1186 return (allerror);
1187 }
1188
1189 /*
1190 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1191 * in from disk. If it is in core, wait for the lock bit to clear, then
1192 * return the inode locked. Detection and handling of mount points must be
1193 * done by the calling routine.
1194 */
1195 int
1196 ffs_vget(mp, ino, vpp, context)
1197 mount_t mp;
1198 ino64_t ino;
1199 vnode_t *vpp;
1200 vfs_context_t context;
1201 {
1202 return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1203 }
1204
1205
1206 int
1207 ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1208 mount_t mp;
1209 ino_t ino;
1210 vnode_t *vpp;
1211 vnode_t dvp;
1212 struct componentname *cnp;
1213 int mode;
1214 int fhwanted;
1215 {
1216 struct proc *p = current_proc(); /* XXX */
1217 struct fs *fs;
1218 struct inode *ip;
1219 struct ufsmount *ump;
1220 struct buf *bp;
1221 struct vnode *vp;
1222 struct vnode_fsparam vfsp;
1223 struct timeval tv;
1224 enum vtype vtype;
1225 dev_t dev;
1226 int i, type, error = 0;
1227
1228 *vpp = NULL;
1229 ump = VFSTOUFS(mp);
1230 dev = ump->um_dev;
1231 #if 0
1232 /* Check for unmount in progress */
1233 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1234 return (EPERM);
1235 }
1236 #endif
1237 /*
1238 * Allocate a new inode... do it before we check the
1239 * cache, because the MALLOC_ZONE may block
1240 */
1241 type = M_FFSNODE;
1242 MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1243
1244 /*
1245 * check in the inode hash
1246 */
1247 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1248 /*
1249 * found it... get rid of the allocation
1250 * that we didn't need and return
1251 * the 'found' vnode
1252 */
1253 FREE_ZONE(ip, sizeof(struct inode), type);
1254 vp = *vpp;
1255 return (0);
1256 }
1257 bzero((caddr_t)ip, sizeof(struct inode));
1258 /*
1259 * lock the inode
1260 */
1261 // lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1262 // lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1263
1264 ip->i_fs = fs = ump->um_fs;
1265 ip->i_dev = dev;
1266 ip->i_number = ino;
1267 #if QUOTA
1268 for (i = 0; i < MAXQUOTAS; i++)
1269 ip->i_dquot[i] = NODQUOT;
1270 #endif
1271 SET(ip->i_flag, IN_ALLOC);
1272 /*
1273 * Put it onto its hash chain locked so that other requests for
1274 * this inode will block if they arrive while we are sleeping waiting
1275 * for old data structures to be purged or for the contents of the
1276 * disk portion of this inode to be read.
1277 */
1278 ufs_ihashins(ip);
1279
1280 /* Read in the disk contents for the inode, copy into the inode. */
1281 if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1282 (int)fs->fs_bsize, NOCRED, &bp)) {
1283 buf_brelse(bp);
1284 goto errout;
1285 }
1286 #if REV_ENDIAN_FS
1287 if (mp->mnt_flag & MNT_REVEND) {
1288 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1289 } else {
1290 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1291 }
1292 #else
1293 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1294 #endif /* REV_ENDIAN_FS */
1295 buf_brelse(bp);
1296
1297 if (mode == 0)
1298 vtype = IFTOVT(ip->i_mode);
1299 else
1300 vtype = IFTOVT(mode);
1301
1302 if (vtype == VNON) {
1303 if (fhwanted) {
1304 /* NFS is in play */
1305 error = ESTALE;
1306 goto errout;
1307 } else {
1308 error = ENOENT;
1309 goto errout;
1310 }
1311 }
1312
1313 vfsp.vnfs_mp = mp;
1314 vfsp.vnfs_vtype = vtype;
1315 vfsp.vnfs_str = "ufs";
1316 vfsp.vnfs_dvp = dvp;
1317 vfsp.vnfs_fsnode = ip;
1318 vfsp.vnfs_cnp = cnp;
1319
1320 if (mode == 0)
1321 vfsp.vnfs_filesize = ip->i_din.di_size;
1322 else
1323 vfsp.vnfs_filesize = 0;
1324
1325 if (vtype == VFIFO )
1326 vfsp.vnfs_vops = FFS_FIFOOPS;
1327 else if (vtype == VBLK || vtype == VCHR)
1328 vfsp.vnfs_vops = ffs_specop_p;
1329 else
1330 vfsp.vnfs_vops = ffs_vnodeop_p;
1331
1332 if (vtype == VBLK || vtype == VCHR)
1333 vfsp.vnfs_rdev = ip->i_rdev;
1334 else
1335 vfsp.vnfs_rdev = 0;
1336
1337 if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1338 vfsp.vnfs_flags = 0;
1339 else
1340 vfsp.vnfs_flags = VNFS_NOCACHE;
1341
1342 /*
1343 * Tag root directory
1344 */
1345 vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1346 vfsp.vnfs_marksystem = 0;
1347
1348 if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1349 goto errout;
1350
1351 /*
1352 * Finish inode initialization now that aliasing has been resolved.
1353 */
1354 ip->i_devvp = ump->um_devvp;
1355 ip->i_vnode = vp;
1356
1357 vnode_ref(ip->i_devvp);
1358 vnode_addfsref(vp);
1359 vnode_settag(vp, VT_UFS);
1360
1361 /*
1362 * Initialize modrev times
1363 */
1364 microtime(&tv);
1365 SETHIGH(ip->i_modrev, tv.tv_sec);
1366 SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1367
1368 /*
1369 * Set up a generation number for this inode if it does not
1370 * already have one. This should only happen on old filesystems.
1371 */
1372 if (ip->i_gen == 0) {
1373 if (++nextgennumber < (u_long)tv.tv_sec)
1374 nextgennumber = tv.tv_sec;
1375 ip->i_gen = nextgennumber;
1376 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1377 ip->i_flag |= IN_MODIFIED;
1378 }
1379 /*
1380 * Ensure that uid and gid are correct. This is a temporary
1381 * fix until fsck has been changed to do the update.
1382 */
1383 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1384 ip->i_uid = ip->i_din.di_ouid; /* XXX */
1385 ip->i_gid = ip->i_din.di_ogid; /* XXX */
1386 } /* XXX */
1387 *vpp = vp;
1388
1389 CLR(ip->i_flag, IN_ALLOC);
1390
1391 if (ISSET(ip->i_flag, IN_WALLOC))
1392 wakeup(ip);
1393
1394 return (0);
1395
1396 errout:
1397 ufs_ihashrem(ip);
1398
1399 if (ISSET(ip->i_flag, IN_WALLOC))
1400 wakeup(ip);
1401 FREE_ZONE(ip, sizeof(struct inode), type);
1402
1403 return (error);
1404 }
1405
1406 /*
1407 * File handle to vnode
1408 *
1409 * Have to be really careful about stale file handles:
1410 * - check that the inode number is valid
1411 * - call vget to get the locked inode
1412 * - check for an unallocated inode (i_mode == 0)
1413 */
1414 int
1415 ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1416 register struct mount *mp;
1417 int fhlen;
1418 unsigned char *fhp;
1419 struct vnode **vpp;
1420 vfs_context_t context;
1421 {
1422 register struct ufid *ufhp;
1423 register struct inode *ip;
1424 struct vnode *nvp;
1425 struct fs *fs;
1426 int error;
1427
1428 if (fhlen < (int)sizeof(struct ufid))
1429 return (EINVAL);
1430 ufhp = (struct ufid *)fhp;
1431 fs = VFSTOUFS(mp)->um_fs;
1432 if (ufhp->ufid_ino < ROOTINO ||
1433 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1434 return (ESTALE);
1435 error = ffs_vget_internal(mp, ufhp->ufid_ino, &nvp, NULL, NULL, 0, 1);
1436 if (error) {
1437 *vpp = NULLVP;
1438 return (error);
1439 }
1440 ip = VTOI(nvp);
1441 if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) {
1442 vnode_put(nvp);
1443 *vpp = NULLVP;
1444 return (ESTALE);
1445 }
1446 *vpp = nvp;
1447 return (0);
1448 }
1449
1450 /*
1451 * Vnode pointer to File handle
1452 */
1453 /* ARGSUSED */
1454 int
1455 ffs_vptofh(vp, fhlenp, fhp, context)
1456 struct vnode *vp;
1457 int *fhlenp;
1458 unsigned char *fhp;
1459 vfs_context_t context;
1460 {
1461 register struct inode *ip;
1462 register struct ufid *ufhp;
1463
1464 if (*fhlenp < (int)sizeof(struct ufid))
1465 return (EOVERFLOW);
1466 ip = VTOI(vp);
1467 ufhp = (struct ufid *)fhp;
1468 ufhp->ufid_ino = ip->i_number;
1469 ufhp->ufid_gen = ip->i_gen;
1470 *fhlenp = sizeof(struct ufid);
1471 return (0);
1472 }
1473
1474 /*
1475 * Initialize the filesystem; just use ufs_init.
1476 */
1477 int
1478 ffs_init(vfsp)
1479 struct vfsconf *vfsp;
1480 {
1481
1482 return (ufs_init(vfsp));
1483 }
1484
1485 /*
1486 * fast filesystem related variables.
1487 */
1488 ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1489 user_addr_t newp, size_t newlen, vfs_context_t context)
1490 {
1491 extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1492
1493 /* all sysctl names at this level are terminal */
1494 if (namelen != 1)
1495 return (ENOTDIR); /* overloaded */
1496
1497 switch (name[0]) {
1498 case FFS_CLUSTERREAD:
1499 return (sysctl_int(oldp, oldlenp, newp, newlen,
1500 &doclusterread));
1501 case FFS_CLUSTERWRITE:
1502 return (sysctl_int(oldp, oldlenp, newp, newlen,
1503 &doclusterwrite));
1504 case FFS_REALLOCBLKS:
1505 return (sysctl_int(oldp, oldlenp, newp, newlen,
1506 &doreallocblks));
1507 case FFS_ASYNCFREE:
1508 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1509 default:
1510 return (ENOTSUP);
1511 }
1512 /* NOTREACHED */
1513 }
1514
1515 /*
1516 * Write a superblock and associated information back to disk.
1517 */
1518 int
1519 ffs_sbupdate(mp, waitfor)
1520 struct ufsmount *mp;
1521 int waitfor;
1522 {
1523 register struct fs *dfs, *fs = mp->um_fs;
1524 register struct buf *bp;
1525 int blks;
1526 void *space;
1527 int i, size, error, allerror = 0;
1528 int devBlockSize=0;
1529 #if REV_ENDIAN_FS
1530 int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1531 #endif /* REV_ENDIAN_FS */
1532
1533 /*
1534 * First write back the summary information.
1535 */
1536 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1537 space = fs->fs_csp;
1538 for (i = 0; i < blks; i += fs->fs_frag) {
1539 size = fs->fs_bsize;
1540 if (i + fs->fs_frag > blks)
1541 size = (blks - i) * fs->fs_fsize;
1542 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1543 size, 0, 0, BLK_META);
1544 bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1545 #if REV_ENDIAN_FS
1546 if (rev_endian) {
1547 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1548 }
1549 #endif /* REV_ENDIAN_FS */
1550 space = (char *)space + size;
1551 if (waitfor != MNT_WAIT)
1552 buf_bawrite(bp);
1553 else if (error = (int)buf_bwrite(bp))
1554 allerror = error;
1555 }
1556 /*
1557 * Now write back the superblock itself. If any errors occurred
1558 * up to this point, then fail so that the superblock avoids
1559 * being written out as clean.
1560 */
1561 if (allerror)
1562 return (allerror);
1563 devBlockSize = vfs_devblocksize(mp->um_mountp);
1564
1565 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1566 bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1567 /* Restore compatibility to old file systems. XXX */
1568 dfs = (struct fs *)buf_dataptr(bp); /* XXX */
1569 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
1570 dfs->fs_nrpos = -1; /* XXX */
1571 #if REV_ENDIAN_FS
1572 /*
1573 * Swapping bytes here ; so that in case
1574 * of inode format < FS_44INODEFMT appropriate
1575 * fields get moved
1576 */
1577 if (rev_endian) {
1578 byte_swap_sbout((struct fs *)buf_dataptr(bp));
1579 }
1580 #endif /* REV_ENDIAN_FS */
1581 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1582 int32_t *lp, tmp; /* XXX */
1583 /* XXX */
1584 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */
1585 tmp = lp[4]; /* XXX */
1586 for (i = 4; i > 0; i--) /* XXX */
1587 lp[i] = lp[i-1]; /* XXX */
1588 lp[0] = tmp; /* XXX */
1589 } /* XXX */
1590 #if REV_ENDIAN_FS
1591 /* Note that dfs is already swapped so swap the filesize
1592 * before writing
1593 */
1594 if (rev_endian) {
1595 dfs->fs_maxfilesize = NXSwapLongLong(mp->um_savedmaxfilesize); /* XXX */
1596 } else {
1597 #endif /* REV_ENDIAN_FS */
1598 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */
1599 #if REV_ENDIAN_FS
1600 }
1601 #endif /* REV_ENDIAN_FS */
1602 if (waitfor != MNT_WAIT)
1603 buf_bawrite(bp);
1604 else if (error = (int)buf_bwrite(bp))
1605 allerror = error;
1606
1607 return (allerror);
1608 }