]> git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ffs/ffs_vfsops.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / bsd / ufs / ffs / ffs_vfsops.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
31 /*
32 * Copyright (c) 1989, 1991, 1993, 1994
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
64 */
65
66 #include <rev_endian_fs.h>
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/namei.h>
70 #include <sys/proc.h>
71 #include <sys/kauth.h>
72 #include <sys/kernel.h>
73 #include <sys/vnode_internal.h>
74 #include <sys/socket.h>
75 #include <sys/mount_internal.h>
76 #include <sys/mount.h>
77 #include <sys/buf.h>
78 #include <sys/mbuf.h>
79 #include <sys/file.h>
80 #include <sys/disk.h>
81 #include <sys/ioctl.h>
82 #include <sys/errno.h>
83 #include <sys/malloc.h>
84 #include <sys/ubc.h>
85 #include <sys/quota.h>
86
87 #include <miscfs/specfs/specdev.h>
88
89 #include <ufs/ufs/quota.h>
90 #include <ufs/ufs/ufsmount.h>
91 #include <ufs/ufs/inode.h>
92 #include <ufs/ufs/ufs_extern.h>
93
94 #include <ufs/ffs/fs.h>
95 #include <ufs/ffs/ffs_extern.h>
96 #if REV_ENDIAN_FS
97 #include <ufs/ufs/ufs_byte_order.h>
98 #include <libkern/OSByteOrder.h>
99 #endif /* REV_ENDIAN_FS */
100
101 int ffs_sbupdate(struct ufsmount *, int);
102
103 struct vfsops ufs_vfsops = {
104 ffs_mount,
105 ufs_start,
106 ffs_unmount,
107 ufs_root,
108 ufs_quotactl,
109 ffs_vfs_getattr,
110 ffs_sync,
111 ffs_vget,
112 ffs_fhtovp,
113 ffs_vptofh,
114 ffs_init,
115 ffs_sysctl,
116 ffs_vfs_setattr,
117 {0}
118 };
119
120 extern u_long nextgennumber;
121
122 union _qcvt {
123 int64_t qcvt;
124 int32_t val[2];
125 };
126 #define SETHIGH(q, h) { \
127 union _qcvt tmp; \
128 tmp.qcvt = (q); \
129 tmp.val[_QUAD_HIGHWORD] = (h); \
130 (q) = tmp.qcvt; \
131 }
132 #define SETLOW(q, l) { \
133 union _qcvt tmp; \
134 tmp.qcvt = (q); \
135 tmp.val[_QUAD_LOWWORD] = (l); \
136 (q) = tmp.qcvt; \
137 }
138
139 /*
140 * Called by main() when ufs is going to be mounted as root.
141 */
142 int
143 ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
144 {
145 struct proc *p = current_proc(); /* XXX */
146 int error;
147
148 /* Set asynchronous flag by default */
149 vfs_setflags(mp, MNT_ASYNC);
150
151 if (error = ffs_mountfs(rvp, mp, context))
152 return (error);
153
154 (void)ffs_statfs(mp, vfs_statfs(mp), NULL);
155
156 return (0);
157 }
158
159 /*
160 * VFS Operations.
161 *
162 * mount system call
163 */
164 int
165 ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data, vfs_context_t context)
166 {
167 struct proc *p = vfs_context_proc(context);
168 struct ufsmount *ump;
169 register struct fs *fs;
170 u_int size;
171 int error = 0, flags;
172 mode_t accessmode;
173 int ronly;
174 int reload = 0;
175
176 /*
177 * If updating, check whether changing from read-write to
178 * read-only; if there is no device name, that's all we do.
179 */
180 if (mp->mnt_flag & MNT_UPDATE) {
181 ump = VFSTOUFS(mp);
182 fs = ump->um_fs;
183 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
184 /*
185 * Flush any dirty data.
186 */
187 VFS_SYNC(mp, MNT_WAIT, context);
188 /*
189 * Check for and optionally get rid of files open
190 * for writing.
191 */
192 flags = WRITECLOSE;
193 if (mp->mnt_flag & MNT_FORCE)
194 flags |= FORCECLOSE;
195 if (error = ffs_flushfiles(mp, flags, p))
196 return (error);
197 fs->fs_clean = 1;
198 fs->fs_ronly = 1;
199 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
200 fs->fs_clean = 0;
201 fs->fs_ronly = 0;
202 return (error);
203 }
204 }
205 /* save fs_ronly to later use */
206 ronly = fs->fs_ronly;
207 if ((mp->mnt_flag & MNT_RELOAD) || ronly)
208 reload = 1;
209 if ((reload) &&
210 (error = ffs_reload(mp, vfs_context_ucred(context), p)))
211 return (error);
212 /* replace the ronly after load */
213 fs->fs_ronly = ronly;
214 /*
215 * Do not update the file system if the user was in singleuser
216 * and then tries to mount -uw without fscking
217 */
218 if (!fs->fs_clean && ronly) {
219 printf("WARNING: trying to mount a dirty file system\n");
220 if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
221 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
222 /*
223 * Reset the readonly bit as reload might have
224 * modified this bit
225 */
226 fs->fs_ronly = 1;
227 return(EPERM);
228 }
229 }
230
231 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
232 fs->fs_ronly = 0;
233 fs->fs_clean = 0;
234 (void) ffs_sbupdate(ump, MNT_WAIT);
235 }
236 if (devvp == 0) {
237 return(0);
238 }
239 }
240 if ((mp->mnt_flag & MNT_UPDATE) == 0)
241 error = ffs_mountfs(devvp, mp, context);
242 else {
243 if (devvp != ump->um_devvp)
244 error = EINVAL; /* needs translation */
245 }
246 if (error) {
247 return (error);
248 }
249 ump = VFSTOUFS(mp);
250 fs = ump->um_fs;
251 bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
252 strncpy(fs->fs_fsmnt, (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
253 (void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
254 return (0);
255 }
256
257
258 struct ffs_reload_cargs {
259 struct vnode *devvp;
260 kauth_cred_t cred;
261 struct fs *fs;
262 struct proc *p;
263 int error;
264 #if REV_ENDIAN_FS
265 int rev_endian;
266 #endif /* REV_ENDIAN_FS */
267 };
268
269
270 static int
271 ffs_reload_callback(struct vnode *vp, void *cargs)
272 {
273 struct inode *ip;
274 struct buf *bp;
275 struct fs *fs;
276 struct ffs_reload_cargs *args;
277
278 args = (struct ffs_reload_cargs *)cargs;
279
280 /*
281 * flush all the buffers associated with this node
282 */
283 if (buf_invalidateblks(vp, 0, 0, 0))
284 panic("ffs_reload: dirty2");
285
286 /*
287 * Step 6: re-read inode data
288 */
289 ip = VTOI(vp);
290 fs = args->fs;
291
292 if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
293 (int)fs->fs_bsize, NOCRED, &bp)) {
294 buf_brelse(bp);
295
296 return (VNODE_RETURNED_DONE);
297 }
298
299 #if REV_ENDIAN_FS
300 if (args->rev_endian) {
301 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
302 ino_to_fsbo(fs, ip->i_number)), ip);
303 } else {
304 #endif /* REV_ENDIAN_FS */
305 ip->i_din = *((struct dinode *)buf_dataptr(bp) +
306 ino_to_fsbo(fs, ip->i_number));
307 #if REV_ENDIAN_FS
308 }
309 #endif /* REV_ENDIAN_FS */
310
311 buf_brelse(bp);
312
313 return (VNODE_RETURNED);
314 }
315
316
317 /*
318 * Reload all incore data for a filesystem (used after running fsck on
319 * the root filesystem and finding things to fix). The filesystem must
320 * be mounted read-only.
321 *
322 * Things to do to update the mount:
323 * 1) invalidate all cached meta-data.
324 * 2) re-read superblock from disk.
325 * 3) re-read summary information from disk.
326 * 4) invalidate all inactive vnodes.
327 * 5) invalidate all cached file data.
328 * 6) re-read inode data for all active vnodes.
329 */
330 ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
331 {
332 register struct vnode *devvp;
333 void *space;
334 struct buf *bp;
335 struct fs *fs, *newfs;
336 int i, blks, size, error;
337 u_int64_t maxfilesize; /* XXX */
338 int32_t *lp;
339 struct ffs_reload_cargs args;
340 #if REV_ENDIAN_FS
341 int rev_endian = (mountp->mnt_flag & MNT_REVEND);
342 #endif /* REV_ENDIAN_FS */
343
344 if ((mountp->mnt_flag & MNT_RDONLY) == 0)
345 return (EINVAL);
346 /*
347 * Step 1: invalidate all cached meta-data.
348 */
349 devvp = VFSTOUFS(mountp)->um_devvp;
350 if (buf_invalidateblks(devvp, 0, 0, 0))
351 panic("ffs_reload: dirty1");
352 /*
353 * Step 2: re-read superblock from disk.
354 */
355 size = vfs_devblocksize(mountp);
356
357 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
358 buf_brelse(bp);
359 return (error);
360 }
361 newfs = (struct fs *)buf_dataptr(bp);
362 #if REV_ENDIAN_FS
363 if (rev_endian) {
364 byte_swap_sbin(newfs);
365 }
366 #endif /* REV_ENDIAN_FS */
367 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
368 newfs->fs_bsize < sizeof(struct fs)) {
369 #if REV_ENDIAN_FS
370 if (rev_endian)
371 byte_swap_sbout(newfs);
372 #endif /* REV_ENDIAN_FS */
373
374 buf_brelse(bp);
375 return (EIO); /* XXX needs translation */
376 }
377 fs = VFSTOUFS(mountp)->um_fs;
378 /*
379 * Copy pointer fields back into superblock before copying in XXX
380 * new superblock. These should really be in the ufsmount. XXX
381 * Note that important parameters (eg fs_ncg) are unchanged.
382 */
383 newfs->fs_csp = fs->fs_csp;
384 newfs->fs_maxcluster = fs->fs_maxcluster;
385 newfs->fs_contigdirs = fs->fs_contigdirs;
386 bcopy(newfs, fs, (u_int)fs->fs_sbsize);
387 if (fs->fs_sbsize < SBSIZE)
388 buf_markinvalid(bp);
389 #if REV_ENDIAN_FS
390 if (rev_endian)
391 byte_swap_sbout(newfs);
392 #endif /* REV_ENDIAN_FS */
393 buf_brelse(bp);
394 mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
395 ffs_oldfscompat(fs);
396 maxfilesize = 0x100000000ULL; /* 4GB */
397 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
398 fs->fs_maxfilesize = maxfilesize; /* XXX */
399 /*
400 * Step 3: re-read summary information from disk.
401 */
402 blks = howmany(fs->fs_cssize, fs->fs_fsize);
403 space = fs->fs_csp;
404 for (i = 0; i < blks; i += fs->fs_frag) {
405 size = fs->fs_bsize;
406 if (i + fs->fs_frag > blks)
407 size = (blks - i) * fs->fs_fsize;
408 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
409 NOCRED, &bp)) {
410 buf_brelse(bp);
411 return (error);
412 }
413 #if REV_ENDIAN_FS
414 if (rev_endian) {
415 /* csum swaps */
416 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
417 }
418 #endif /* REV_ENDIAN_FS */
419 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
420 #if REV_ENDIAN_FS
421 if (rev_endian) {
422 /* csum swaps */
423 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
424 }
425 #endif /* REV_ENDIAN_FS */
426 space = (char *) space + size;
427 buf_brelse(bp);
428 }
429 /*
430 * We no longer know anything about clusters per cylinder group.
431 */
432 if (fs->fs_contigsumsize > 0) {
433 lp = fs->fs_maxcluster;
434 for (i = 0; i < fs->fs_ncg; i++)
435 *lp++ = fs->fs_contigsumsize;
436 }
437 #if REV_ENDIAN_FS
438 args.rev_endian = rev_endian;
439 #endif /* REV_ENDIAN_FS */
440 args.devvp = devvp;
441 args.cred = cred;
442 args.fs = fs;
443 args.p = p;
444 args.error = 0;
445 /*
446 * ffs_reload_callback will be called for each vnode
447 * hung off of this mount point that can't be recycled...
448 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
449 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
450 * properly referenced and unreferenced around the callback
451 */
452 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
453
454 return (args.error);
455 }
456
457 /*
458 * Common code for mount and mountroot
459 */
460 int
461 ffs_mountfs(devvp, mp, context)
462 struct vnode *devvp;
463 struct mount *mp;
464 vfs_context_t context;
465 {
466 struct ufsmount *ump;
467 struct buf *bp;
468 struct fs *fs;
469 dev_t dev;
470 struct buf *cgbp;
471 struct cg *cgp;
472 int32_t clustersumoff;
473 void *space;
474 int error, i, blks, ronly;
475 u_int32_t size;
476 int32_t *lp;
477 kauth_cred_t cred;
478 u_int64_t maxfilesize; /* XXX */
479 u_int dbsize = DEV_BSIZE;
480 #if REV_ENDIAN_FS
481 int rev_endian=0;
482 #endif /* REV_ENDIAN_FS */
483 dev = devvp->v_rdev;
484 cred = vfs_context_ucred(context);
485
486 ronly = vfs_isrdonly(mp);
487 bp = NULL;
488 ump = NULL;
489
490 /* Advisory locking should be handled at the VFS layer */
491 vfs_setlocklocal(mp);
492
493 /* Obtain the actual device block size */
494 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
495 error = ENXIO;
496 goto out;
497 }
498
499 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
500 SBSIZE, cred, &bp))
501 goto out;
502 fs = (struct fs *)buf_dataptr(bp);
503 #if REV_ENDIAN_FS
504 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
505 fs->fs_bsize < sizeof(struct fs)) {
506 int magic = fs->fs_magic;
507
508 byte_swap_ints(&magic, 1);
509 if (magic != FS_MAGIC) {
510 error = EINVAL;
511 goto out;
512 }
513 byte_swap_sbin(fs);
514 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
515 fs->fs_bsize < sizeof(struct fs)) {
516 byte_swap_sbout(fs);
517 error = EINVAL; /* XXX needs translation */
518 goto out;
519 }
520 rev_endian=1;
521 }
522 #endif /* REV_ENDIAN_FS */
523 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
524 fs->fs_bsize < sizeof(struct fs)) {
525 #if REV_ENDIAN_FS
526 if (rev_endian)
527 byte_swap_sbout(fs);
528 #endif /* REV_ENDIAN_FS */
529 error = EINVAL; /* XXX needs translation */
530 goto out;
531 }
532
533
534 /*
535 * Buffer cache does not handle multiple pages in a buf when
536 * invalidating incore buffer in pageout. There are no locks
537 * in the pageout path. So there is a danger of loosing data when
538 * block allocation happens at the same time a pageout of buddy
539 * page occurs. incore() returns buf with both
540 * pages, this leads vnode-pageout to incorrectly flush of entire.
541 * buf. Till the low level ffs code is modified to deal with these
542 * do not mount any FS more than 4K size.
543 */
544 /*
545 * Can't mount filesystems with a fragment size less than DIRBLKSIZ
546 */
547 /*
548 * Don't mount dirty filesystems, except for the root filesystem
549 */
550 if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
551 ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
552 #if REV_ENDIAN_FS
553 if (rev_endian)
554 byte_swap_sbout(fs);
555 #endif /* REV_ENDIAN_FS */
556 error = ENOTSUP;
557 goto out;
558 }
559
560 /* Let's figure out the devblock size the file system is with */
561 /* the device block size = fragment size / number of sectors per frag */
562
563 dbsize = fs->fs_fsize / NSPF(fs);
564 if(dbsize <= 0 ) {
565 kprintf("device blocksize computaion failed\n");
566 } else {
567 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
568 FWRITE, context) != 0) {
569 kprintf("failed to set device blocksize\n");
570 }
571 /* force the specfs to reread blocksize from size() */
572 set_fsblocksize(devvp);
573 }
574
575 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
576 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
577 #if REV_ENDIAN_FS
578 if (rev_endian)
579 byte_swap_sbout(fs);
580 #endif /* REV_ENDIAN_FS */
581 error = EROFS; /* needs translation */
582 goto out;
583 }
584
585 /* If we are not mounting read only, then check for overlap
586 * condition in cylinder group's free block map.
587 * If overlap exists, then force this into a read only mount
588 * to avoid further corruption. PR#2216969
589 */
590 if (ronly == 0){
591 if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
592 (int)fs->fs_cgsize, NOCRED, &cgbp)) {
593 buf_brelse(cgbp);
594 goto out;
595 }
596 cgp = (struct cg *)buf_dataptr(cgbp);
597 #if REV_ENDIAN_FS
598 if (rev_endian)
599 byte_swap_cgin(cgp,fs);
600 #endif /* REV_ENDIAN_FS */
601 if (!cg_chkmagic(cgp)){
602 #if REV_ENDIAN_FS
603 if (rev_endian)
604 byte_swap_cgout(cgp,fs);
605 #endif /* REV_ENDIAN_FS */
606 buf_brelse(cgbp);
607 goto out;
608 }
609 if (cgp->cg_clustersumoff != 0) {
610 /* Check for overlap */
611 clustersumoff = cgp->cg_freeoff +
612 howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
613 clustersumoff = roundup(clustersumoff, sizeof(long));
614 if (cgp->cg_clustersumoff < clustersumoff) {
615 /* Overlap exists */
616 mp->mnt_flag |= MNT_RDONLY;
617 ronly = 1;
618 }
619 }
620 #if REV_ENDIAN_FS
621 if (rev_endian)
622 byte_swap_cgout(cgp,fs);
623 #endif /* REV_ENDIAN_FS */
624 buf_brelse(cgbp);
625 }
626
627 ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
628 bzero((caddr_t)ump, sizeof *ump);
629 ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
630 M_WAITOK);
631 bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
632 if (fs->fs_sbsize < SBSIZE)
633 buf_markinvalid(bp);
634 #if REV_ENDIAN_FS
635 if (rev_endian)
636 byte_swap_sbout(fs);
637 #endif /* REV_ENDIAN_FS */
638 buf_brelse(bp);
639 bp = NULL;
640 fs = ump->um_fs;
641 fs->fs_ronly = ronly;
642 size = fs->fs_cssize;
643 blks = howmany(size, fs->fs_fsize);
644 if (fs->fs_contigsumsize > 0)
645 size += fs->fs_ncg * sizeof(int32_t);
646 size += fs->fs_ncg * sizeof(u_int8_t);
647 space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
648 fs->fs_csp = space;
649 for (i = 0; i < blks; i += fs->fs_frag) {
650 size = fs->fs_bsize;
651 if (i + fs->fs_frag > blks)
652 size = (blks - i) * fs->fs_fsize;
653 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
654 size, cred, &bp)) {
655 _FREE(fs->fs_csp, M_UFSMNT);
656 goto out;
657 }
658 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
659 #if REV_ENDIAN_FS
660 if (rev_endian)
661 byte_swap_ints((int *) space, size / sizeof(int));
662 #endif /* REV_ENDIAN_FS */
663 space = (char *)space + size;
664 buf_brelse(bp);
665 bp = NULL;
666 }
667 if (fs->fs_contigsumsize > 0) {
668 fs->fs_maxcluster = lp = space;
669 for (i = 0; i < fs->fs_ncg; i++)
670 *lp++ = fs->fs_contigsumsize;
671 space = lp;
672 }
673 size = fs->fs_ncg * sizeof(u_int8_t);
674 fs->fs_contigdirs = (u_int8_t *)space;
675 space = (u_int8_t *)space + size;
676 bzero(fs->fs_contigdirs, size);
677 /* XXX Compatibility for old filesystems */
678 if (fs->fs_avgfilesize <= 0)
679 fs->fs_avgfilesize = AVFILESIZ;
680 if (fs->fs_avgfpdir <= 0)
681 fs->fs_avgfpdir = AFPDIR;
682 /* XXX End of compatibility */
683 mp->mnt_data = (qaddr_t)ump;
684 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
685 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
686 /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
687 mp->mnt_maxsymlinklen = 60;
688 #if REV_ENDIAN_FS
689 if (rev_endian)
690 mp->mnt_flag |= MNT_REVEND;
691 #endif /* REV_ENDIAN_FS */
692 ump->um_mountp = mp;
693 ump->um_dev = dev;
694 ump->um_devvp = devvp;
695 ump->um_nindir = fs->fs_nindir;
696 ump->um_bptrtodb = fs->fs_fsbtodb;
697 ump->um_seqinc = fs->fs_frag;
698 for (i = 0; i < MAXQUOTAS; i++)
699 dqfileinit(&ump->um_qfiles[i]);
700 ffs_oldfscompat(fs);
701 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */
702 maxfilesize = 0x100000000ULL; /* 4GB */
703 #if 0
704 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
705 #endif /* 0 */
706 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
707 fs->fs_maxfilesize = maxfilesize; /* XXX */
708 if (ronly == 0) {
709 fs->fs_clean = 0;
710 (void) ffs_sbupdate(ump, MNT_WAIT);
711 }
712 return (0);
713 out:
714 if (bp)
715 buf_brelse(bp);
716 if (ump) {
717 _FREE(ump->um_fs, M_UFSMNT);
718 _FREE(ump, M_UFSMNT);
719 }
720 return (error);
721 }
722
723 /*
724 * Sanity checks for old file systems.
725 *
726 * XXX - goes away some day.
727 */
728 ffs_oldfscompat(fs)
729 struct fs *fs;
730 {
731 int i;
732
733 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */
734 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */
735 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
736 fs->fs_nrpos = 8; /* XXX */
737 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
738 u_int64_t sizepb = fs->fs_bsize; /* XXX */
739 /* XXX */
740 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
741 for (i = 0; i < NIADDR; i++) { /* XXX */
742 sizepb *= NINDIR(fs); /* XXX */
743 fs->fs_maxfilesize += sizepb; /* XXX */
744 } /* XXX */
745 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */
746 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */
747 } /* XXX */
748 return (0);
749 }
750
751 /*
752 * unmount system call
753 */
754 int
755 ffs_unmount(mp, mntflags, context)
756 struct mount *mp;
757 int mntflags;
758 vfs_context_t context;
759 {
760 struct proc *p = vfs_context_proc(context);
761 register struct ufsmount *ump;
762 register struct fs *fs;
763 int error, flags;
764 int force;
765
766 flags = 0;
767 force = 0;
768 if (mntflags & MNT_FORCE) {
769 flags |= FORCECLOSE;
770 force = 1;
771 }
772 if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
773 return (error);
774 ump = VFSTOUFS(mp);
775 fs = ump->um_fs;
776
777 if (fs->fs_ronly == 0) {
778 fs->fs_clean = 1;
779 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
780 fs->fs_clean = 0;
781 #ifdef notyet
782 /* we can atleast cleanup ; as the media could be WP */
783 /* & during mount, we do not check for write failures */
784 /* FIXME LATER : the Correct fix would be to have */
785 /* mount detect the WP media and downgrade to readonly mount */
786 /* For now, here it is */
787 return (error);
788 #endif /* notyet */
789 }
790 }
791 _FREE(fs->fs_csp, M_UFSMNT);
792 _FREE(fs, M_UFSMNT);
793 _FREE(ump, M_UFSMNT);
794
795 return (0);
796 }
797
798 /*
799 * Flush out all the files in a filesystem.
800 */
801 ffs_flushfiles(mp, flags, p)
802 register struct mount *mp;
803 int flags;
804 struct proc *p;
805 {
806 register struct ufsmount *ump;
807 int i, error;
808
809 ump = VFSTOUFS(mp);
810
811 #if QUOTA
812 /*
813 * NOTE: The open quota files have an indirect reference
814 * on the root directory vnode. We must account for this
815 * extra reference when doing the intial vflush.
816 */
817 if (mp->mnt_flag & MNT_QUOTA) {
818 struct vnode *rootvp = NULLVP;
819 int quotafilecnt = 0;
820
821 /* Find out how many quota files we have open. */
822 for (i = 0; i < MAXQUOTAS; i++) {
823 if (ump->um_qfiles[i].qf_vp != NULLVP)
824 ++quotafilecnt;
825 }
826
827 /*
828 * Check if the root vnode is in our inode hash
829 * (so we can skip over it).
830 */
831 rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
832
833 error = vflush(mp, rootvp, SKIPSYSTEM|flags);
834
835 if (rootvp) {
836 /*
837 * See if there are additional references on the
838 * root vp besides the ones obtained from the open
839 * quota files and the hfs_chashget call above.
840 */
841 if ((error == 0) &&
842 (rootvp->v_usecount > (1 + quotafilecnt))) {
843 error = EBUSY; /* root dir is still open */
844 }
845 vnode_put(rootvp);
846 }
847 if (error && (flags & FORCECLOSE) == 0)
848 return (error);
849
850 for (i = 0; i < MAXQUOTAS; i++) {
851 if (ump->um_qfiles[i].qf_vp == NULLVP)
852 continue;
853 quotaoff(mp, i);
854 }
855 /*
856 * Here we fall through to vflush again to ensure
857 * that we have gotten rid of all the system vnodes.
858 */
859 }
860 #endif
861 error = vflush(mp, NULLVP, SKIPSWAP|flags);
862 error = vflush(mp, NULLVP, flags);
863 return (error);
864 }
865
866 /*
867 * Get file system statistics.
868 */
869 int
870 ffs_statfs(mp, sbp, context)
871 struct mount *mp;
872 register struct vfsstatfs *sbp;
873 vfs_context_t context;
874 {
875 register struct ufsmount *ump;
876 register struct fs *fs;
877
878 ump = VFSTOUFS(mp);
879 fs = ump->um_fs;
880 if (fs->fs_magic != FS_MAGIC)
881 panic("ffs_statfs");
882 sbp->f_bsize = fs->fs_fsize;
883 sbp->f_iosize = fs->fs_bsize;
884 sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
885 sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
886 fs->fs_cstotal.cs_nffree));
887 sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
888 sbp->f_files = (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
889 sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
890 return (0);
891 }
892
893 int
894 ffs_vfs_getattr(mp, fsap, context)
895 struct mount *mp;
896 struct vfs_attr *fsap;
897 vfs_context_t context;
898 {
899 struct ufsmount *ump;
900 struct fs *fs;
901 kauth_cred_t cred;
902 struct vnode *devvp;
903 struct buf *bp;
904 struct ufslabel *ulp;
905 char *offset;
906 int bs, error, length;
907
908 ump = VFSTOUFS(mp);
909 fs = ump->um_fs;
910 cred = vfs_context_ucred(context);
911
912 VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
913 VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
914 VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
915 VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
916 (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
917 fs->fs_cstotal.cs_nffree)));
918 VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
919 fs->fs_minfree)));
920 VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
921 (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
922 VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
923 fs->fs_cstotal.cs_nifree));
924
925 if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
926 fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
927 fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
928 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
929 }
930
931 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
932 devvp = ump->um_devvp;
933 bs = vfs_devblocksize(mp);
934
935 if (error = (int)buf_meta_bread(devvp,
936 (daddr64_t)(UFS_LABEL_OFFSET / bs),
937 MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
938 if (bp)
939 buf_brelse(bp);
940 return (error);
941 }
942
943 /*
944 * Since the disklabel is read directly by older user space
945 * code, make sure this buffer won't remain in the cache when
946 * we release it.
947 */
948 buf_setflags(bp, B_NOCACHE);
949
950 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
951 ulp = (struct ufslabel *)offset;
952
953 if (ufs_label_check(ulp)) {
954 length = ulp->ul_namelen;
955 #if REV_ENDIAN_FS
956 if (mp->mnt_flag & MNT_REVEND)
957 length = OSSwapInt16(length);
958 #endif
959 if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
960 bcopy(ulp->ul_name, fsap->f_vol_name, length);
961 fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
962 fsap->f_vol_name[length] = '\0';
963 }
964 }
965
966 buf_brelse(bp);
967 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
968 }
969
970 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
971 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
972 VOL_CAP_FMT_SYMBOLICLINKS |
973 VOL_CAP_FMT_HARDLINKS |
974 VOL_CAP_FMT_SPARSE_FILES |
975 VOL_CAP_FMT_CASE_SENSITIVE |
976 VOL_CAP_FMT_CASE_PRESERVING |
977 VOL_CAP_FMT_FAST_STATFS ;
978 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
979 = VOL_CAP_INT_NFSEXPORT |
980 VOL_CAP_INT_VOL_RENAME |
981 VOL_CAP_INT_ADVLOCK |
982 VOL_CAP_INT_FLOCK;
983 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
984 = 0;
985 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
986 = 0;
987
988 /* Capabilities we know about: */
989 fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
990 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
991 VOL_CAP_FMT_SYMBOLICLINKS |
992 VOL_CAP_FMT_HARDLINKS |
993 VOL_CAP_FMT_JOURNAL |
994 VOL_CAP_FMT_JOURNAL_ACTIVE |
995 VOL_CAP_FMT_NO_ROOT_TIMES |
996 VOL_CAP_FMT_SPARSE_FILES |
997 VOL_CAP_FMT_ZERO_RUNS |
998 VOL_CAP_FMT_CASE_SENSITIVE |
999 VOL_CAP_FMT_CASE_PRESERVING |
1000 VOL_CAP_FMT_FAST_STATFS |
1001 VOL_CAP_FMT_2TB_FILESIZE;
1002 fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
1003 VOL_CAP_INT_SEARCHFS |
1004 VOL_CAP_INT_ATTRLIST |
1005 VOL_CAP_INT_NFSEXPORT |
1006 VOL_CAP_INT_READDIRATTR |
1007 VOL_CAP_INT_EXCHANGEDATA |
1008 VOL_CAP_INT_COPYFILE |
1009 VOL_CAP_INT_ALLOCATE |
1010 VOL_CAP_INT_VOL_RENAME |
1011 VOL_CAP_INT_ADVLOCK |
1012 VOL_CAP_INT_FLOCK ;
1013 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1014 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1015
1016 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1017 }
1018
1019 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1020 fsap->f_attributes.validattr.commonattr = 0;
1021 fsap->f_attributes.validattr.volattr =
1022 ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1023 fsap->f_attributes.validattr.dirattr = 0;
1024 fsap->f_attributes.validattr.fileattr = 0;
1025 fsap->f_attributes.validattr.forkattr = 0;
1026
1027 fsap->f_attributes.nativeattr.commonattr = 0;
1028 fsap->f_attributes.nativeattr.volattr =
1029 ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1030 fsap->f_attributes.nativeattr.dirattr = 0;
1031 fsap->f_attributes.nativeattr.fileattr = 0;
1032 fsap->f_attributes.nativeattr.forkattr = 0;
1033
1034 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1035 }
1036
1037 return (0);
1038 }
1039
1040
1041 int
1042 ffs_vfs_setattr(mp, fsap, context)
1043 struct mount *mp;
1044 struct vfs_attr *fsap;
1045 vfs_context_t context;
1046 {
1047 struct ufsmount *ump;
1048 struct vnode *devvp;
1049 struct buf *bp;
1050 struct ufslabel *ulp;
1051 kauth_cred_t cred;
1052 char *offset;
1053 int bs, error;
1054
1055
1056 ump = VFSTOUFS(mp);
1057 cred = vfs_context_ucred(context);
1058
1059 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1060 devvp = ump->um_devvp;
1061 bs = vfs_devblocksize(mp);
1062 if (error = buf_meta_bread(devvp,
1063 (daddr64_t)(UFS_LABEL_OFFSET / bs),
1064 MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1065 if (bp)
1066 buf_brelse(bp);
1067 return (error);
1068 }
1069
1070 /*
1071 * Since the disklabel is read directly by older user space
1072 * code, make sure this buffer won't remain in the cache when
1073 * we release it.
1074 */
1075 buf_setflags(bp, B_NOCACHE);
1076
1077 /* Validate the label structure; init if not valid */
1078 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1079 ulp = (struct ufslabel *)offset;
1080 if (!ufs_label_check(ulp))
1081 ufs_label_init(ulp);
1082
1083 /* Copy new name over existing name */
1084 ulp->ul_namelen = strlen(fsap->f_vol_name);
1085 bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1086 ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1087 ulp->ul_name[ulp->ul_namelen] = '\0';
1088
1089 #if REV_ENDIAN_FS
1090 if (mp->mnt_flag & MNT_REVEND)
1091 ulp->ul_namelen = OSSwapInt16(ulp->ul_namelen);
1092 #endif
1093
1094 /* Update the checksum */
1095 ulp->ul_checksum = 0;
1096 ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1097
1098 /* Write the label back to disk */
1099 buf_bwrite(bp);
1100 bp = NULL;
1101
1102 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1103 }
1104
1105 return (0);
1106 }
1107 struct ffs_sync_cargs {
1108 vfs_context_t context;
1109 int waitfor;
1110 int error;
1111 };
1112
1113
1114 static int
1115 ffs_sync_callback(struct vnode *vp, void *cargs)
1116 {
1117 struct inode *ip;
1118 struct ffs_sync_cargs *args;
1119 int error;
1120
1121 args = (struct ffs_sync_cargs *)cargs;
1122
1123 ip = VTOI(vp);
1124
1125 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1126 error = VNOP_FSYNC(vp, args->waitfor, args->context);
1127
1128 if (error)
1129 args->error = error;
1130
1131 }
1132 return (VNODE_RETURNED);
1133 }
1134
1135 /*
1136 * Go through the disk queues to initiate sandbagged IO;
1137 * go through the inodes to write those that have been modified;
1138 * initiate the writing of the super block if it has been modified.
1139 *
1140 * Note: we are always called with the filesystem marked `MPBUSY'.
1141 */
1142 int
1143 ffs_sync(mp, waitfor, context)
1144 struct mount *mp;
1145 int waitfor;
1146 vfs_context_t context;
1147 {
1148 struct vnode *nvp, *vp;
1149 struct ufsmount *ump = VFSTOUFS(mp);
1150 struct fs *fs;
1151 struct timeval tv;
1152 int error, allerror = 0;
1153 struct ffs_sync_cargs args;
1154
1155 fs = ump->um_fs;
1156 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1157 printf("fs = %s\n", fs->fs_fsmnt);
1158 panic("update: rofs mod");
1159 }
1160 /*
1161 * Write back each (modified) inode.
1162 */
1163 args.context = context;
1164 args.waitfor = waitfor;
1165 args.error = 0;
1166 /*
1167 * ffs_sync_callback will be called for each vnode
1168 * hung off of this mount point... the vnode will be
1169 * properly referenced and unreferenced around the callback
1170 */
1171 vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1172
1173 if (args.error)
1174 allerror = args.error;
1175
1176 /*
1177 * Force stale file system control information to be flushed.
1178 */
1179 if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1180 allerror = error;
1181 #if QUOTA
1182 qsync(mp);
1183 #endif
1184 /*
1185 * Write back modified superblock.
1186 */
1187 if (fs->fs_fmod != 0) {
1188 fs->fs_fmod = 0;
1189 microtime(&tv);
1190 fs->fs_time = tv.tv_sec;
1191 if (error = ffs_sbupdate(ump, waitfor))
1192 allerror = error;
1193 }
1194 return (allerror);
1195 }
1196
1197 /*
1198 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1199 * in from disk. If it is in core, wait for the lock bit to clear, then
1200 * return the inode locked. Detection and handling of mount points must be
1201 * done by the calling routine.
1202 */
1203 int
1204 ffs_vget(mp, ino, vpp, context)
1205 mount_t mp;
1206 ino64_t ino;
1207 vnode_t *vpp;
1208 vfs_context_t context;
1209 {
1210 return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1211 }
1212
1213
1214 int
1215 ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1216 mount_t mp;
1217 ino_t ino;
1218 vnode_t *vpp;
1219 vnode_t dvp;
1220 struct componentname *cnp;
1221 int mode;
1222 int fhwanted;
1223 {
1224 struct proc *p = current_proc(); /* XXX */
1225 struct fs *fs;
1226 struct inode *ip;
1227 struct ufsmount *ump;
1228 struct buf *bp;
1229 struct vnode *vp;
1230 struct vnode_fsparam vfsp;
1231 struct timeval tv;
1232 enum vtype vtype;
1233 dev_t dev;
1234 int i, type, error = 0;
1235
1236 *vpp = NULL;
1237 ump = VFSTOUFS(mp);
1238 dev = ump->um_dev;
1239 #if 0
1240 /* Check for unmount in progress */
1241 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1242 return (EPERM);
1243 }
1244 #endif
1245 /*
1246 * Allocate a new inode... do it before we check the
1247 * cache, because the MALLOC_ZONE may block
1248 */
1249 type = M_FFSNODE;
1250 MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1251
1252 /*
1253 * check in the inode hash
1254 */
1255 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1256 /*
1257 * found it... get rid of the allocation
1258 * that we didn't need and return
1259 * the 'found' vnode
1260 */
1261 FREE_ZONE(ip, sizeof(struct inode), type);
1262 vp = *vpp;
1263 return (0);
1264 }
1265 bzero((caddr_t)ip, sizeof(struct inode));
1266 /*
1267 * lock the inode
1268 */
1269 // lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1270 // lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1271
1272 ip->i_fs = fs = ump->um_fs;
1273 ip->i_dev = dev;
1274 ip->i_number = ino;
1275 #if QUOTA
1276 for (i = 0; i < MAXQUOTAS; i++)
1277 ip->i_dquot[i] = NODQUOT;
1278 #endif
1279 SET(ip->i_flag, IN_ALLOC);
1280 /*
1281 * Put it onto its hash chain locked so that other requests for
1282 * this inode will block if they arrive while we are sleeping waiting
1283 * for old data structures to be purged or for the contents of the
1284 * disk portion of this inode to be read.
1285 */
1286 ufs_ihashins(ip);
1287
1288 /* Read in the disk contents for the inode, copy into the inode. */
1289 if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1290 (int)fs->fs_bsize, NOCRED, &bp)) {
1291 buf_brelse(bp);
1292 goto errout;
1293 }
1294 #if REV_ENDIAN_FS
1295 if (mp->mnt_flag & MNT_REVEND) {
1296 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1297 } else {
1298 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1299 }
1300 #else
1301 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1302 #endif /* REV_ENDIAN_FS */
1303 buf_brelse(bp);
1304
1305 if (mode == 0)
1306 vtype = IFTOVT(ip->i_mode);
1307 else
1308 vtype = IFTOVT(mode);
1309
1310 if (vtype == VNON) {
1311 if (fhwanted) {
1312 /* NFS is in play */
1313 error = ESTALE;
1314 goto errout;
1315 } else {
1316 error = ENOENT;
1317 goto errout;
1318 }
1319 }
1320
1321 vfsp.vnfs_mp = mp;
1322 vfsp.vnfs_vtype = vtype;
1323 vfsp.vnfs_str = "ufs";
1324 vfsp.vnfs_dvp = dvp;
1325 vfsp.vnfs_fsnode = ip;
1326 vfsp.vnfs_cnp = cnp;
1327
1328 if (mode == 0)
1329 vfsp.vnfs_filesize = ip->i_din.di_size;
1330 else
1331 vfsp.vnfs_filesize = 0;
1332
1333 if (vtype == VFIFO )
1334 vfsp.vnfs_vops = FFS_FIFOOPS;
1335 else if (vtype == VBLK || vtype == VCHR)
1336 vfsp.vnfs_vops = ffs_specop_p;
1337 else
1338 vfsp.vnfs_vops = ffs_vnodeop_p;
1339
1340 if (vtype == VBLK || vtype == VCHR)
1341 vfsp.vnfs_rdev = ip->i_rdev;
1342 else
1343 vfsp.vnfs_rdev = 0;
1344
1345 if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1346 vfsp.vnfs_flags = 0;
1347 else
1348 vfsp.vnfs_flags = VNFS_NOCACHE;
1349
1350 /*
1351 * Tag root directory
1352 */
1353 vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1354 vfsp.vnfs_marksystem = 0;
1355
1356 if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1357 goto errout;
1358
1359 /*
1360 * Finish inode initialization now that aliasing has been resolved.
1361 */
1362 ip->i_devvp = ump->um_devvp;
1363 ip->i_vnode = vp;
1364
1365 vnode_ref(ip->i_devvp);
1366 vnode_addfsref(vp);
1367 vnode_settag(vp, VT_UFS);
1368
1369 /*
1370 * Initialize modrev times
1371 */
1372 microtime(&tv);
1373 SETHIGH(ip->i_modrev, tv.tv_sec);
1374 SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1375
1376 /*
1377 * Set up a generation number for this inode if it does not
1378 * already have one. This should only happen on old filesystems.
1379 */
1380 if (ip->i_gen == 0) {
1381 if (++nextgennumber < (u_long)tv.tv_sec)
1382 nextgennumber = tv.tv_sec;
1383 ip->i_gen = nextgennumber;
1384 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1385 ip->i_flag |= IN_MODIFIED;
1386 }
1387 /*
1388 * Ensure that uid and gid are correct. This is a temporary
1389 * fix until fsck has been changed to do the update.
1390 */
1391 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1392 ip->i_uid = ip->i_din.di_ouid; /* XXX */
1393 ip->i_gid = ip->i_din.di_ogid; /* XXX */
1394 } /* XXX */
1395 *vpp = vp;
1396
1397 CLR(ip->i_flag, IN_ALLOC);
1398
1399 if (ISSET(ip->i_flag, IN_WALLOC))
1400 wakeup(ip);
1401
1402 return (0);
1403
1404 errout:
1405 ufs_ihashrem(ip);
1406
1407 if (ISSET(ip->i_flag, IN_WALLOC))
1408 wakeup(ip);
1409 FREE_ZONE(ip, sizeof(struct inode), type);
1410
1411 return (error);
1412 }
1413
1414 /*
1415 * File handle to vnode
1416 *
1417 * Have to be really careful about stale file handles:
1418 * - check that the inode number is valid
1419 * - call vget to get the locked inode
1420 * - check for an unallocated inode (i_mode == 0)
1421 */
1422 int
1423 ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1424 register struct mount *mp;
1425 int fhlen;
1426 unsigned char *fhp;
1427 struct vnode **vpp;
1428 vfs_context_t context;
1429 {
1430 register struct ufid *ufhp;
1431 register struct inode *ip;
1432 struct vnode *nvp;
1433 struct fs *fs;
1434 int error;
1435 ino_t ino;
1436
1437 if (fhlen < (int)sizeof(struct ufid))
1438 return (EINVAL);
1439 ufhp = (struct ufid *)fhp;
1440 fs = VFSTOUFS(mp)->um_fs;
1441 ino = ntohl(ufhp->ufid_ino);
1442 if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
1443 return (ESTALE);
1444 error = ffs_vget_internal(mp, ino, &nvp, NULL, NULL, 0, 1);
1445 if (error) {
1446 *vpp = NULLVP;
1447 return (error);
1448 }
1449 ip = VTOI(nvp);
1450 if (ip->i_mode == 0 || ip->i_gen != ntohl(ufhp->ufid_gen)) {
1451 vnode_put(nvp);
1452 *vpp = NULLVP;
1453 return (ESTALE);
1454 }
1455 *vpp = nvp;
1456 return (0);
1457 }
1458
1459 /*
1460 * Vnode pointer to File handle
1461 */
1462 /* ARGSUSED */
1463 int
1464 ffs_vptofh(vp, fhlenp, fhp, context)
1465 struct vnode *vp;
1466 int *fhlenp;
1467 unsigned char *fhp;
1468 vfs_context_t context;
1469 {
1470 register struct inode *ip;
1471 register struct ufid *ufhp;
1472
1473 if (*fhlenp < (int)sizeof(struct ufid))
1474 return (EOVERFLOW);
1475 ip = VTOI(vp);
1476 ufhp = (struct ufid *)fhp;
1477 ufhp->ufid_ino = htonl(ip->i_number);
1478 ufhp->ufid_gen = htonl(ip->i_gen);
1479 *fhlenp = sizeof(struct ufid);
1480 return (0);
1481 }
1482
1483 /*
1484 * Initialize the filesystem; just use ufs_init.
1485 */
1486 int
1487 ffs_init(vfsp)
1488 struct vfsconf *vfsp;
1489 {
1490
1491 return (ufs_init(vfsp));
1492 }
1493
1494 /*
1495 * fast filesystem related variables.
1496 */
1497 ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1498 user_addr_t newp, size_t newlen, vfs_context_t context)
1499 {
1500 extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1501
1502 /* all sysctl names at this level are terminal */
1503 if (namelen != 1)
1504 return (ENOTDIR); /* overloaded */
1505
1506 switch (name[0]) {
1507 case FFS_CLUSTERREAD:
1508 return (sysctl_int(oldp, oldlenp, newp, newlen,
1509 &doclusterread));
1510 case FFS_CLUSTERWRITE:
1511 return (sysctl_int(oldp, oldlenp, newp, newlen,
1512 &doclusterwrite));
1513 case FFS_REALLOCBLKS:
1514 return (sysctl_int(oldp, oldlenp, newp, newlen,
1515 &doreallocblks));
1516 case FFS_ASYNCFREE:
1517 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1518 default:
1519 return (ENOTSUP);
1520 }
1521 /* NOTREACHED */
1522 }
1523
1524 /*
1525 * Write a superblock and associated information back to disk.
1526 */
1527 int
1528 ffs_sbupdate(mp, waitfor)
1529 struct ufsmount *mp;
1530 int waitfor;
1531 {
1532 register struct fs *dfs, *fs = mp->um_fs;
1533 register struct buf *bp;
1534 int blks;
1535 void *space;
1536 int i, size, error, allerror = 0;
1537 int devBlockSize=0;
1538 #if REV_ENDIAN_FS
1539 int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1540 #endif /* REV_ENDIAN_FS */
1541
1542 /*
1543 * First write back the summary information.
1544 */
1545 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1546 space = fs->fs_csp;
1547 for (i = 0; i < blks; i += fs->fs_frag) {
1548 size = fs->fs_bsize;
1549 if (i + fs->fs_frag > blks)
1550 size = (blks - i) * fs->fs_fsize;
1551 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1552 size, 0, 0, BLK_META);
1553 bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1554 #if REV_ENDIAN_FS
1555 if (rev_endian) {
1556 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1557 }
1558 #endif /* REV_ENDIAN_FS */
1559 space = (char *)space + size;
1560 if (waitfor != MNT_WAIT)
1561 buf_bawrite(bp);
1562 else if (error = (int)buf_bwrite(bp))
1563 allerror = error;
1564 }
1565 /*
1566 * Now write back the superblock itself. If any errors occurred
1567 * up to this point, then fail so that the superblock avoids
1568 * being written out as clean.
1569 */
1570 if (allerror)
1571 return (allerror);
1572 devBlockSize = vfs_devblocksize(mp->um_mountp);
1573
1574 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1575 bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1576 /* Restore compatibility to old file systems. XXX */
1577 dfs = (struct fs *)buf_dataptr(bp); /* XXX */
1578 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
1579 dfs->fs_nrpos = -1; /* XXX */
1580 #if REV_ENDIAN_FS
1581 /*
1582 * Swapping bytes here ; so that in case
1583 * of inode format < FS_44INODEFMT appropriate
1584 * fields get moved
1585 */
1586 if (rev_endian) {
1587 byte_swap_sbout((struct fs *)buf_dataptr(bp));
1588 }
1589 #endif /* REV_ENDIAN_FS */
1590 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1591 int32_t *lp, tmp; /* XXX */
1592 /* XXX */
1593 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */
1594 tmp = lp[4]; /* XXX */
1595 for (i = 4; i > 0; i--) /* XXX */
1596 lp[i] = lp[i-1]; /* XXX */
1597 lp[0] = tmp; /* XXX */
1598 } /* XXX */
1599 #if REV_ENDIAN_FS
1600 /* Note that dfs is already swapped so swap the filesize
1601 * before writing
1602 */
1603 if (rev_endian) {
1604 dfs->fs_maxfilesize = OSSwapInt64(mp->um_savedmaxfilesize); /* XXX */
1605 } else {
1606 #endif /* REV_ENDIAN_FS */
1607 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */
1608 #if REV_ENDIAN_FS
1609 }
1610 #endif /* REV_ENDIAN_FS */
1611 if (waitfor != MNT_WAIT)
1612 buf_bawrite(bp);
1613 else if (error = (int)buf_bwrite(bp))
1614 allerror = error;
1615
1616 return (allerror);
1617 }