]> git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ffs/ffs_vfsops.c
bbd904778c9824f65bd4e8f076e89f84bfe6f873
[apple/xnu.git] / bsd / ufs / ffs / ffs_vfsops.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1991, 1993, 1994
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
56 */
57
58 #include <rev_endian_fs.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/namei.h>
62 #include <sys/proc.h>
63 #include <sys/kauth.h>
64 #include <sys/kernel.h>
65 #include <sys/vnode_internal.h>
66 #include <sys/socket.h>
67 #include <sys/mount_internal.h>
68 #include <sys/mount.h>
69 #include <sys/buf.h>
70 #include <sys/mbuf.h>
71 #include <sys/file.h>
72 #include <sys/disk.h>
73 #include <sys/ioctl.h>
74 #include <sys/errno.h>
75 #include <sys/malloc.h>
76 #include <sys/ubc.h>
77 #include <sys/quota.h>
78
79 #include <miscfs/specfs/specdev.h>
80
81 #include <ufs/ufs/quota.h>
82 #include <ufs/ufs/ufsmount.h>
83 #include <ufs/ufs/inode.h>
84 #include <ufs/ufs/ufs_extern.h>
85
86 #include <ufs/ffs/fs.h>
87 #include <ufs/ffs/ffs_extern.h>
88 #if REV_ENDIAN_FS
89 #include <ufs/ufs/ufs_byte_order.h>
90 #include <architecture/byte_order.h>
91 #endif /* REV_ENDIAN_FS */
92
93 int ffs_sbupdate(struct ufsmount *, int);
94
95 struct vfsops ufs_vfsops = {
96 ffs_mount,
97 ufs_start,
98 ffs_unmount,
99 ufs_root,
100 ufs_quotactl,
101 ffs_vfs_getattr,
102 ffs_sync,
103 ffs_vget,
104 ffs_fhtovp,
105 ffs_vptofh,
106 ffs_init,
107 ffs_sysctl,
108 ffs_vfs_setattr,
109 {0}
110 };
111
112 extern u_long nextgennumber;
113
114 union _qcvt {
115 int64_t qcvt;
116 int32_t val[2];
117 };
118 #define SETHIGH(q, h) { \
119 union _qcvt tmp; \
120 tmp.qcvt = (q); \
121 tmp.val[_QUAD_HIGHWORD] = (h); \
122 (q) = tmp.qcvt; \
123 }
124 #define SETLOW(q, l) { \
125 union _qcvt tmp; \
126 tmp.qcvt = (q); \
127 tmp.val[_QUAD_LOWWORD] = (l); \
128 (q) = tmp.qcvt; \
129 }
130
131 /*
132 * Called by main() when ufs is going to be mounted as root.
133 */
134 int
135 ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
136 {
137 struct proc *p = current_proc(); /* XXX */
138 int error;
139
140 /* Set asynchronous flag by default */
141 vfs_setflags(mp, MNT_ASYNC);
142
143 if (error = ffs_mountfs(rvp, mp, context))
144 return (error);
145
146 (void)ffs_statfs(mp, vfs_statfs(mp), NULL);
147
148 return (0);
149 }
150
151 /*
152 * VFS Operations.
153 *
154 * mount system call
155 */
156 int
157 ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data, vfs_context_t context)
158 {
159 struct proc *p = vfs_context_proc(context);
160 struct ufsmount *ump;
161 register struct fs *fs;
162 u_int size;
163 int error = 0, flags;
164 mode_t accessmode;
165 int ronly;
166 int reload = 0;
167
168 /*
169 * If updating, check whether changing from read-write to
170 * read-only; if there is no device name, that's all we do.
171 */
172 if (mp->mnt_flag & MNT_UPDATE) {
173 ump = VFSTOUFS(mp);
174 fs = ump->um_fs;
175 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
176 /*
177 * Flush any dirty data.
178 */
179 VFS_SYNC(mp, MNT_WAIT, context);
180 /*
181 * Check for and optionally get rid of files open
182 * for writing.
183 */
184 flags = WRITECLOSE;
185 if (mp->mnt_flag & MNT_FORCE)
186 flags |= FORCECLOSE;
187 if (error = ffs_flushfiles(mp, flags, p))
188 return (error);
189 fs->fs_clean = 1;
190 fs->fs_ronly = 1;
191 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
192 fs->fs_clean = 0;
193 fs->fs_ronly = 0;
194 return (error);
195 }
196 }
197 /* save fs_ronly to later use */
198 ronly = fs->fs_ronly;
199 if ((mp->mnt_flag & MNT_RELOAD) || ronly)
200 reload = 1;
201 if ((reload) &&
202 (error = ffs_reload(mp, vfs_context_ucred(context), p)))
203 return (error);
204 /* replace the ronly after load */
205 fs->fs_ronly = ronly;
206 /*
207 * Do not update the file system if the user was in singleuser
208 * and then tries to mount -uw without fscking
209 */
210 if (!fs->fs_clean && ronly) {
211 printf("WARNING: trying to mount a dirty file system\n");
212 if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
213 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
214 /*
215 * Reset the readonly bit as reload might have
216 * modified this bit
217 */
218 fs->fs_ronly = 1;
219 return(EPERM);
220 }
221 }
222
223 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
224 fs->fs_ronly = 0;
225 fs->fs_clean = 0;
226 (void) ffs_sbupdate(ump, MNT_WAIT);
227 }
228 if (devvp == 0) {
229 return(0);
230 }
231 }
232 if ((mp->mnt_flag & MNT_UPDATE) == 0)
233 error = ffs_mountfs(devvp, mp, context);
234 else {
235 if (devvp != ump->um_devvp)
236 error = EINVAL; /* needs translation */
237 }
238 if (error) {
239 return (error);
240 }
241 ump = VFSTOUFS(mp);
242 fs = ump->um_fs;
243 bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
244 strncpy(fs->fs_fsmnt, (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
245 (void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
246 return (0);
247 }
248
249
250 struct ffs_reload_cargs {
251 struct vnode *devvp;
252 kauth_cred_t cred;
253 struct fs *fs;
254 struct proc *p;
255 int error;
256 #if REV_ENDIAN_FS
257 int rev_endian;
258 #endif /* REV_ENDIAN_FS */
259 };
260
261
262 static int
263 ffs_reload_callback(struct vnode *vp, void *cargs)
264 {
265 struct inode *ip;
266 struct buf *bp;
267 struct fs *fs;
268 struct ffs_reload_cargs *args;
269
270 args = (struct ffs_reload_cargs *)cargs;
271
272 /*
273 * flush all the buffers associated with this node
274 */
275 if (buf_invalidateblks(vp, 0, 0, 0))
276 panic("ffs_reload: dirty2");
277
278 /*
279 * Step 6: re-read inode data
280 */
281 ip = VTOI(vp);
282 fs = args->fs;
283
284 if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
285 (int)fs->fs_bsize, NOCRED, &bp)) {
286 buf_brelse(bp);
287
288 return (VNODE_RETURNED_DONE);
289 }
290
291 #if REV_ENDIAN_FS
292 if (args->rev_endian) {
293 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
294 ino_to_fsbo(fs, ip->i_number)), ip);
295 } else {
296 #endif /* REV_ENDIAN_FS */
297 ip->i_din = *((struct dinode *)buf_dataptr(bp) +
298 ino_to_fsbo(fs, ip->i_number));
299 #if REV_ENDIAN_FS
300 }
301 #endif /* REV_ENDIAN_FS */
302
303 buf_brelse(bp);
304
305 return (VNODE_RETURNED);
306 }
307
308
309 /*
310 * Reload all incore data for a filesystem (used after running fsck on
311 * the root filesystem and finding things to fix). The filesystem must
312 * be mounted read-only.
313 *
314 * Things to do to update the mount:
315 * 1) invalidate all cached meta-data.
316 * 2) re-read superblock from disk.
317 * 3) re-read summary information from disk.
318 * 4) invalidate all inactive vnodes.
319 * 5) invalidate all cached file data.
320 * 6) re-read inode data for all active vnodes.
321 */
322 ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
323 {
324 register struct vnode *devvp;
325 void *space;
326 struct buf *bp;
327 struct fs *fs, *newfs;
328 int i, blks, size, error;
329 u_int64_t maxfilesize; /* XXX */
330 int32_t *lp;
331 struct ffs_reload_cargs args;
332 #if REV_ENDIAN_FS
333 int rev_endian = (mountp->mnt_flag & MNT_REVEND);
334 #endif /* REV_ENDIAN_FS */
335
336 if ((mountp->mnt_flag & MNT_RDONLY) == 0)
337 return (EINVAL);
338 /*
339 * Step 1: invalidate all cached meta-data.
340 */
341 devvp = VFSTOUFS(mountp)->um_devvp;
342 if (buf_invalidateblks(devvp, 0, 0, 0))
343 panic("ffs_reload: dirty1");
344 /*
345 * Step 2: re-read superblock from disk.
346 */
347 size = vfs_devblocksize(mountp);
348
349 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
350 buf_brelse(bp);
351 return (error);
352 }
353 newfs = (struct fs *)buf_dataptr(bp);
354 #if REV_ENDIAN_FS
355 if (rev_endian) {
356 error = byte_swap_sbin(newfs);
357 if (error) {
358 buf_brelse(bp);
359 return (error);
360 }
361 }
362 #endif /* REV_ENDIAN_FS */
363 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
364 newfs->fs_bsize < sizeof(struct fs)) {
365 #if REV_ENDIAN_FS
366 if (rev_endian)
367 byte_swap_sbout(newfs);
368 #endif /* REV_ENDIAN_FS */
369
370 buf_brelse(bp);
371 return (EIO); /* XXX needs translation */
372 }
373 fs = VFSTOUFS(mountp)->um_fs;
374 /*
375 * Copy pointer fields back into superblock before copying in XXX
376 * new superblock. These should really be in the ufsmount. XXX
377 * Note that important parameters (eg fs_ncg) are unchanged.
378 */
379 newfs->fs_csp = fs->fs_csp;
380 newfs->fs_maxcluster = fs->fs_maxcluster;
381 newfs->fs_contigdirs = fs->fs_contigdirs;
382 bcopy(newfs, fs, (u_int)fs->fs_sbsize);
383 if (fs->fs_sbsize < SBSIZE)
384 buf_markinvalid(bp);
385 #if REV_ENDIAN_FS
386 if (rev_endian)
387 byte_swap_sbout(newfs);
388 #endif /* REV_ENDIAN_FS */
389 buf_brelse(bp);
390 mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
391 ffs_oldfscompat(fs);
392 maxfilesize = 0x100000000ULL; /* 4GB */
393 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
394 fs->fs_maxfilesize = maxfilesize; /* XXX */
395 /*
396 * Step 3: re-read summary information from disk.
397 */
398 blks = howmany(fs->fs_cssize, fs->fs_fsize);
399 space = fs->fs_csp;
400 for (i = 0; i < blks; i += fs->fs_frag) {
401 size = fs->fs_bsize;
402 if (i + fs->fs_frag > blks)
403 size = (blks - i) * fs->fs_fsize;
404 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
405 NOCRED, &bp)) {
406 buf_brelse(bp);
407 return (error);
408 }
409 #if REV_ENDIAN_FS
410 if (rev_endian) {
411 /* csum swaps */
412 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
413 }
414 #endif /* REV_ENDIAN_FS */
415 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
416 #if REV_ENDIAN_FS
417 if (rev_endian) {
418 /* csum swaps */
419 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
420 }
421 #endif /* REV_ENDIAN_FS */
422 space = (char *) space + size;
423 buf_brelse(bp);
424 }
425 /*
426 * We no longer know anything about clusters per cylinder group.
427 */
428 if (fs->fs_contigsumsize > 0) {
429 lp = fs->fs_maxcluster;
430 for (i = 0; i < fs->fs_ncg; i++)
431 *lp++ = fs->fs_contigsumsize;
432 }
433 #if REV_ENDIAN_FS
434 args.rev_endian = rev_endian;
435 #endif /* REV_ENDIAN_FS */
436 args.devvp = devvp;
437 args.cred = cred;
438 args.fs = fs;
439 args.p = p;
440 args.error = 0;
441 /*
442 * ffs_reload_callback will be called for each vnode
443 * hung off of this mount point that can't be recycled...
444 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
445 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
446 * properly referenced and unreferenced around the callback
447 */
448 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
449
450 return (args.error);
451 }
452
453 /*
454 * Common code for mount and mountroot
455 */
456 int
457 ffs_mountfs(devvp, mp, context)
458 struct vnode *devvp;
459 struct mount *mp;
460 vfs_context_t context;
461 {
462 struct ufsmount *ump;
463 struct buf *bp;
464 struct fs *fs;
465 dev_t dev;
466 struct buf *cgbp;
467 struct cg *cgp;
468 int32_t clustersumoff;
469 void *space;
470 int error, i, blks, ronly;
471 u_int32_t size;
472 int32_t *lp;
473 kauth_cred_t cred;
474 u_int64_t maxfilesize; /* XXX */
475 u_int dbsize = DEV_BSIZE;
476 #if REV_ENDIAN_FS
477 int rev_endian=0;
478 #endif /* REV_ENDIAN_FS */
479 dev = devvp->v_rdev;
480 cred = vfs_context_ucred(context);
481
482 ronly = vfs_isrdonly(mp);
483 bp = NULL;
484 ump = NULL;
485
486 /* Advisory locking should be handled at the VFS layer */
487 vfs_setlocklocal(mp);
488
489 /* Obtain the actual device block size */
490 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
491 error = ENXIO;
492 goto out;
493 }
494
495 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
496 SBSIZE, cred, &bp))
497 goto out;
498 fs = (struct fs *)buf_dataptr(bp);
499 #if REV_ENDIAN_FS
500 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
501 fs->fs_bsize < sizeof(struct fs)) {
502 int magic = fs->fs_magic;
503
504 byte_swap_ints(&magic, 1);
505 if (magic != FS_MAGIC) {
506 error = EINVAL;
507 goto out;
508 }
509 if (error = byte_swap_sbin(fs))
510 goto out;
511
512 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
513 fs->fs_bsize < sizeof(struct fs)) {
514 byte_swap_sbout(fs);
515 error = EINVAL; /* XXX needs translation */
516 goto out;
517 }
518 rev_endian=1;
519 }
520 #endif /* REV_ENDIAN_FS */
521 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
522 fs->fs_bsize < sizeof(struct fs)) {
523 #if REV_ENDIAN_FS
524 if (rev_endian)
525 byte_swap_sbout(fs);
526 #endif /* REV_ENDIAN_FS */
527 error = EINVAL; /* XXX needs translation */
528 goto out;
529 }
530
531 if (fs->fs_sbsize < 0 || fs->fs_sbsize > SBSIZE) {
532 error = EINVAL;
533 goto out;
534 }
535
536 /*
537 * Buffer cache does not handle multiple pages in a buf when
538
539
540 /*
541 * Buffer cache does not handle multiple pages in a buf when
542 * invalidating incore buffer in pageout. There are no locks
543 * in the pageout path. So there is a danger of loosing data when
544 * block allocation happens at the same time a pageout of buddy
545 * page occurs. incore() returns buf with both
546 * pages, this leads vnode-pageout to incorrectly flush of entire.
547 * buf. Till the low level ffs code is modified to deal with these
548 * do not mount any FS more than 4K size.
549 */
550 /*
551 * Can't mount filesystems with a fragment size less than DIRBLKSIZ
552 */
553 /*
554 * Don't mount dirty filesystems, except for the root filesystem
555 */
556 if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
557 ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
558 #if REV_ENDIAN_FS
559 if (rev_endian)
560 byte_swap_sbout(fs);
561 #endif /* REV_ENDIAN_FS */
562 error = ENOTSUP;
563 goto out;
564 }
565
566 /* Let's figure out the devblock size the file system is with */
567 /* the device block size = fragment size / number of sectors per frag */
568
569 dbsize = fs->fs_fsize / NSPF(fs);
570 if(dbsize <= 0 ) {
571 kprintf("device blocksize computaion failed\n");
572 } else {
573 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
574 FWRITE, context) != 0) {
575 kprintf("failed to set device blocksize\n");
576 }
577 /* force the specfs to reread blocksize from size() */
578 set_fsblocksize(devvp);
579 }
580
581 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
582 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
583 #if REV_ENDIAN_FS
584 if (rev_endian)
585 byte_swap_sbout(fs);
586 #endif /* REV_ENDIAN_FS */
587 error = EROFS; /* needs translation */
588 goto out;
589 }
590
591 /* If we are not mounting read only, then check for overlap
592 * condition in cylinder group's free block map.
593 * If overlap exists, then force this into a read only mount
594 * to avoid further corruption. PR#2216969
595 */
596 if (ronly == 0){
597 if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
598 (int)fs->fs_cgsize, NOCRED, &cgbp)) {
599 buf_brelse(cgbp);
600 goto out;
601 }
602 cgp = (struct cg *)buf_dataptr(cgbp);
603 #if REV_ENDIAN_FS
604 if (rev_endian)
605 byte_swap_cgin(cgp,fs);
606 #endif /* REV_ENDIAN_FS */
607 if (!cg_chkmagic(cgp)){
608 #if REV_ENDIAN_FS
609 if (rev_endian)
610 byte_swap_cgout(cgp,fs);
611 #endif /* REV_ENDIAN_FS */
612 buf_brelse(cgbp);
613 goto out;
614 }
615 if (cgp->cg_clustersumoff != 0) {
616 /* Check for overlap */
617 clustersumoff = cgp->cg_freeoff +
618 howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
619 clustersumoff = roundup(clustersumoff, sizeof(long));
620 if (cgp->cg_clustersumoff < clustersumoff) {
621 /* Overlap exists */
622 mp->mnt_flag |= MNT_RDONLY;
623 ronly = 1;
624 }
625 }
626 #if REV_ENDIAN_FS
627 if (rev_endian)
628 byte_swap_cgout(cgp,fs);
629 #endif /* REV_ENDIAN_FS */
630 buf_brelse(cgbp);
631 }
632
633 ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
634 bzero((caddr_t)ump, sizeof *ump);
635 ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
636 M_WAITOK);
637 bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
638 if (fs->fs_sbsize < SBSIZE)
639 buf_markinvalid(bp);
640 #if REV_ENDIAN_FS
641 if (rev_endian)
642 byte_swap_sbout(fs);
643 #endif /* REV_ENDIAN_FS */
644 buf_brelse(bp);
645 bp = NULL;
646 fs = ump->um_fs;
647 fs->fs_ronly = ronly;
648 if (fs->fs_cssize < 1 || fs->fs_fsize < 1 || fs->fs_ncg < 1) {
649 error = EINVAL;
650 goto out;
651 }
652 if (fs->fs_frag < 1 || fs->fs_frag > MAXFRAG) {
653 error = EINVAL;
654 goto out;
655 }
656
657 size = fs->fs_cssize;
658 blks = howmany(size, fs->fs_fsize);
659 if (fs->fs_contigsumsize > 0) {
660 if (fs->fs_ncg > INT_MAX / sizeof(int32_t) || size > INT_MAX - fs->fs_ncg * sizeof(int32_t)) {
661 error = EINVAL;
662 goto out;
663 }
664 size += fs->fs_ncg * sizeof(int32_t);
665 }
666 if (fs->fs_ncg > INT_MAX / sizeof(u_int8_t) || size > INT_MAX - fs->fs_ncg * sizeof(u_int8_t)) {
667 error = EINVAL;
668 goto out;
669 }
670 size += fs->fs_ncg * sizeof(u_int8_t);
671 space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
672 fs->fs_csp = space;
673 for (i = 0; i < blks; i += fs->fs_frag) {
674 size = fs->fs_bsize;
675 if (i + fs->fs_frag > blks)
676 size = (blks - i) * fs->fs_fsize;
677 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
678 size, cred, &bp)) {
679 _FREE(fs->fs_csp, M_UFSMNT);
680 goto out;
681 }
682 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
683 #if REV_ENDIAN_FS
684 if (rev_endian)
685 byte_swap_ints((int *) space, size / sizeof(int));
686 #endif /* REV_ENDIAN_FS */
687 space = (char *)space + size;
688 buf_brelse(bp);
689 bp = NULL;
690 }
691 if (fs->fs_contigsumsize > 0) {
692 fs->fs_maxcluster = lp = space;
693 for (i = 0; i < fs->fs_ncg; i++)
694 *lp++ = fs->fs_contigsumsize;
695 space = lp;
696 }
697 size = fs->fs_ncg * sizeof(u_int8_t);
698 fs->fs_contigdirs = (u_int8_t *)space;
699 space = (u_int8_t *)space + size;
700 bzero(fs->fs_contigdirs, size);
701 /* XXX Compatibility for old filesystems */
702 if (fs->fs_avgfilesize <= 0)
703 fs->fs_avgfilesize = AVFILESIZ;
704 if (fs->fs_avgfpdir <= 0)
705 fs->fs_avgfpdir = AFPDIR;
706 /* XXX End of compatibility */
707 mp->mnt_data = (qaddr_t)ump;
708 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
709 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
710 /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
711 mp->mnt_maxsymlinklen = 60;
712 #if REV_ENDIAN_FS
713 if (rev_endian)
714 mp->mnt_flag |= MNT_REVEND;
715 #endif /* REV_ENDIAN_FS */
716 ump->um_mountp = mp;
717 ump->um_dev = dev;
718 ump->um_devvp = devvp;
719 ump->um_nindir = fs->fs_nindir;
720 ump->um_bptrtodb = fs->fs_fsbtodb;
721 ump->um_seqinc = fs->fs_frag;
722 for (i = 0; i < MAXQUOTAS; i++)
723 dqfileinit(&ump->um_qfiles[i]);
724 ffs_oldfscompat(fs);
725 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */
726 maxfilesize = 0x100000000ULL; /* 4GB */
727 #if 0
728 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
729 #endif /* 0 */
730 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
731 fs->fs_maxfilesize = maxfilesize; /* XXX */
732 if (ronly == 0) {
733 fs->fs_clean = 0;
734 (void) ffs_sbupdate(ump, MNT_WAIT);
735 }
736 return (0);
737 out:
738 if (bp)
739 buf_brelse(bp);
740 if (ump) {
741 _FREE(ump->um_fs, M_UFSMNT);
742 _FREE(ump, M_UFSMNT);
743 }
744 return (error);
745 }
746
747 /*
748 * Sanity checks for old file systems.
749 *
750 * XXX - goes away some day.
751 */
752 ffs_oldfscompat(fs)
753 struct fs *fs;
754 {
755 int i;
756
757 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */
758 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */
759 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
760 fs->fs_nrpos = 8; /* XXX */
761 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
762 u_int64_t sizepb = fs->fs_bsize; /* XXX */
763 /* XXX */
764 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
765 for (i = 0; i < NIADDR; i++) { /* XXX */
766 sizepb *= NINDIR(fs); /* XXX */
767 fs->fs_maxfilesize += sizepb; /* XXX */
768 } /* XXX */
769 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */
770 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */
771 } /* XXX */
772 return (0);
773 }
774
775 /*
776 * unmount system call
777 */
778 int
779 ffs_unmount(mp, mntflags, context)
780 struct mount *mp;
781 int mntflags;
782 vfs_context_t context;
783 {
784 struct proc *p = vfs_context_proc(context);
785 register struct ufsmount *ump;
786 register struct fs *fs;
787 int error, flags;
788 int force;
789
790 flags = 0;
791 force = 0;
792 if (mntflags & MNT_FORCE) {
793 flags |= FORCECLOSE;
794 force = 1;
795 }
796 if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
797 return (error);
798 ump = VFSTOUFS(mp);
799 fs = ump->um_fs;
800
801 if (fs->fs_ronly == 0) {
802 fs->fs_clean = 1;
803 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
804 fs->fs_clean = 0;
805 #ifdef notyet
806 /* we can atleast cleanup ; as the media could be WP */
807 /* & during mount, we do not check for write failures */
808 /* FIXME LATER : the Correct fix would be to have */
809 /* mount detect the WP media and downgrade to readonly mount */
810 /* For now, here it is */
811 return (error);
812 #endif /* notyet */
813 }
814 }
815 _FREE(fs->fs_csp, M_UFSMNT);
816 _FREE(fs, M_UFSMNT);
817 _FREE(ump, M_UFSMNT);
818
819 return (0);
820 }
821
822 /*
823 * Flush out all the files in a filesystem.
824 */
825 ffs_flushfiles(mp, flags, p)
826 register struct mount *mp;
827 int flags;
828 struct proc *p;
829 {
830 register struct ufsmount *ump;
831 int i, error;
832
833 ump = VFSTOUFS(mp);
834
835 #if QUOTA
836 /*
837 * NOTE: The open quota files have an indirect reference
838 * on the root directory vnode. We must account for this
839 * extra reference when doing the intial vflush.
840 */
841 if (mp->mnt_flag & MNT_QUOTA) {
842 struct vnode *rootvp = NULLVP;
843 int quotafilecnt = 0;
844
845 /* Find out how many quota files we have open. */
846 for (i = 0; i < MAXQUOTAS; i++) {
847 if (ump->um_qfiles[i].qf_vp != NULLVP)
848 ++quotafilecnt;
849 }
850
851 /*
852 * Check if the root vnode is in our inode hash
853 * (so we can skip over it).
854 */
855 rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
856
857 error = vflush(mp, rootvp, SKIPSYSTEM|flags);
858
859 if (rootvp) {
860 /*
861 * See if there are additional references on the
862 * root vp besides the ones obtained from the open
863 * quota files and the hfs_chashget call above.
864 */
865 if ((error == 0) &&
866 (rootvp->v_usecount > (1 + quotafilecnt))) {
867 error = EBUSY; /* root dir is still open */
868 }
869 vnode_put(rootvp);
870 }
871 if (error && (flags & FORCECLOSE) == 0)
872 return (error);
873
874 for (i = 0; i < MAXQUOTAS; i++) {
875 if (ump->um_qfiles[i].qf_vp == NULLVP)
876 continue;
877 quotaoff(mp, i);
878 }
879 /*
880 * Here we fall through to vflush again to ensure
881 * that we have gotten rid of all the system vnodes.
882 */
883 }
884 #endif
885 error = vflush(mp, NULLVP, SKIPSWAP|flags);
886 error = vflush(mp, NULLVP, flags);
887 return (error);
888 }
889
890 /*
891 * Get file system statistics.
892 */
893 int
894 ffs_statfs(mp, sbp, context)
895 struct mount *mp;
896 register struct vfsstatfs *sbp;
897 vfs_context_t context;
898 {
899 register struct ufsmount *ump;
900 register struct fs *fs;
901
902 ump = VFSTOUFS(mp);
903 fs = ump->um_fs;
904 if (fs->fs_magic != FS_MAGIC)
905 panic("ffs_statfs");
906 sbp->f_bsize = fs->fs_fsize;
907 sbp->f_iosize = fs->fs_bsize;
908 sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
909 sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
910 fs->fs_cstotal.cs_nffree));
911 sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
912 sbp->f_files = (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
913 sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
914 return (0);
915 }
916
917 int
918 ffs_vfs_getattr(mp, fsap, context)
919 struct mount *mp;
920 struct vfs_attr *fsap;
921 vfs_context_t context;
922 {
923 struct ufsmount *ump;
924 struct fs *fs;
925 kauth_cred_t cred;
926 struct vnode *devvp;
927 struct buf *bp;
928 struct ufslabel *ulp;
929 char *offset;
930 int bs, error, length;
931
932 ump = VFSTOUFS(mp);
933 fs = ump->um_fs;
934 cred = vfs_context_ucred(context);
935
936 VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
937 VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
938 VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
939 VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
940 (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
941 fs->fs_cstotal.cs_nffree)));
942 VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
943 fs->fs_minfree)));
944 VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
945 (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
946 VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
947 fs->fs_cstotal.cs_nifree));
948
949 if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
950 fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
951 fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
952 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
953 }
954
955 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
956 devvp = ump->um_devvp;
957 bs = vfs_devblocksize(mp);
958
959 if (error = (int)buf_meta_bread(devvp,
960 (daddr64_t)(UFS_LABEL_OFFSET / bs),
961 MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
962 if (bp)
963 buf_brelse(bp);
964 return (error);
965 }
966
967 /*
968 * Since the disklabel is read directly by older user space
969 * code, make sure this buffer won't remain in the cache when
970 * we release it.
971 */
972 buf_setflags(bp, B_NOCACHE);
973
974 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
975 ulp = (struct ufslabel *)offset;
976
977 if (ufs_label_check(ulp)) {
978 length = ulp->ul_namelen;
979 #if REV_ENDIAN_FS
980 if (mp->mnt_flag & MNT_REVEND)
981 length = NXSwapShort(length);
982 #endif
983 if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
984 bcopy(ulp->ul_name, fsap->f_vol_name, length);
985 fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
986 fsap->f_vol_name[length] = '\0';
987 }
988 }
989
990 buf_brelse(bp);
991 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
992 }
993
994 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
995 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
996 VOL_CAP_FMT_SYMBOLICLINKS |
997 VOL_CAP_FMT_HARDLINKS |
998 VOL_CAP_FMT_SPARSE_FILES |
999 VOL_CAP_FMT_CASE_SENSITIVE |
1000 VOL_CAP_FMT_CASE_PRESERVING |
1001 VOL_CAP_FMT_FAST_STATFS ;
1002 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
1003 = VOL_CAP_INT_NFSEXPORT |
1004 VOL_CAP_INT_VOL_RENAME |
1005 VOL_CAP_INT_ADVLOCK |
1006 VOL_CAP_INT_FLOCK;
1007 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
1008 = 0;
1009 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
1010 = 0;
1011
1012 /* Capabilities we know about: */
1013 fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
1014 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
1015 VOL_CAP_FMT_SYMBOLICLINKS |
1016 VOL_CAP_FMT_HARDLINKS |
1017 VOL_CAP_FMT_JOURNAL |
1018 VOL_CAP_FMT_JOURNAL_ACTIVE |
1019 VOL_CAP_FMT_NO_ROOT_TIMES |
1020 VOL_CAP_FMT_SPARSE_FILES |
1021 VOL_CAP_FMT_ZERO_RUNS |
1022 VOL_CAP_FMT_CASE_SENSITIVE |
1023 VOL_CAP_FMT_CASE_PRESERVING |
1024 VOL_CAP_FMT_FAST_STATFS |
1025 VOL_CAP_FMT_2TB_FILESIZE;
1026 fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
1027 VOL_CAP_INT_SEARCHFS |
1028 VOL_CAP_INT_ATTRLIST |
1029 VOL_CAP_INT_NFSEXPORT |
1030 VOL_CAP_INT_READDIRATTR |
1031 VOL_CAP_INT_EXCHANGEDATA |
1032 VOL_CAP_INT_COPYFILE |
1033 VOL_CAP_INT_ALLOCATE |
1034 VOL_CAP_INT_VOL_RENAME |
1035 VOL_CAP_INT_ADVLOCK |
1036 VOL_CAP_INT_FLOCK ;
1037 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1038 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1039
1040 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1041 }
1042
1043 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1044 fsap->f_attributes.validattr.commonattr = 0;
1045 fsap->f_attributes.validattr.volattr =
1046 ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1047 fsap->f_attributes.validattr.dirattr = 0;
1048 fsap->f_attributes.validattr.fileattr = 0;
1049 fsap->f_attributes.validattr.forkattr = 0;
1050
1051 fsap->f_attributes.nativeattr.commonattr = 0;
1052 fsap->f_attributes.nativeattr.volattr =
1053 ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1054 fsap->f_attributes.nativeattr.dirattr = 0;
1055 fsap->f_attributes.nativeattr.fileattr = 0;
1056 fsap->f_attributes.nativeattr.forkattr = 0;
1057
1058 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1059 }
1060
1061 return (0);
1062 }
1063
1064
1065 int
1066 ffs_vfs_setattr(mp, fsap, context)
1067 struct mount *mp;
1068 struct vfs_attr *fsap;
1069 vfs_context_t context;
1070 {
1071 struct ufsmount *ump;
1072 struct vnode *devvp;
1073 struct buf *bp;
1074 struct ufslabel *ulp;
1075 kauth_cred_t cred;
1076 char *offset;
1077 int bs, error;
1078
1079
1080 ump = VFSTOUFS(mp);
1081 cred = vfs_context_ucred(context);
1082
1083 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1084 devvp = ump->um_devvp;
1085 bs = vfs_devblocksize(mp);
1086 if (error = buf_meta_bread(devvp,
1087 (daddr64_t)(UFS_LABEL_OFFSET / bs),
1088 MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1089 if (bp)
1090 buf_brelse(bp);
1091 return (error);
1092 }
1093
1094 /*
1095 * Since the disklabel is read directly by older user space
1096 * code, make sure this buffer won't remain in the cache when
1097 * we release it.
1098 */
1099 buf_setflags(bp, B_NOCACHE);
1100
1101 /* Validate the label structure; init if not valid */
1102 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1103 ulp = (struct ufslabel *)offset;
1104 if (!ufs_label_check(ulp))
1105 ufs_label_init(ulp);
1106
1107 /* Copy new name over existing name */
1108 ulp->ul_namelen = strlen(fsap->f_vol_name);
1109 #if REV_ENDIAN_FS
1110 if (mp->mnt_flag & MNT_REVEND)
1111 ulp->ul_namelen = NXSwapShort(ulp->ul_namelen);
1112 #endif
1113 bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1114 ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1115 ulp->ul_name[ulp->ul_namelen] = '\0';
1116
1117 /* Update the checksum */
1118 ulp->ul_checksum = 0;
1119 ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1120
1121 /* Write the label back to disk */
1122 buf_bwrite(bp);
1123 bp = NULL;
1124
1125 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1126 }
1127
1128 return (0);
1129 }
1130 struct ffs_sync_cargs {
1131 vfs_context_t context;
1132 int waitfor;
1133 int error;
1134 };
1135
1136
1137 static int
1138 ffs_sync_callback(struct vnode *vp, void *cargs)
1139 {
1140 struct inode *ip;
1141 struct ffs_sync_cargs *args;
1142 int error;
1143
1144 args = (struct ffs_sync_cargs *)cargs;
1145
1146 ip = VTOI(vp);
1147
1148 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1149 error = VNOP_FSYNC(vp, args->waitfor, args->context);
1150
1151 if (error)
1152 args->error = error;
1153
1154 }
1155 return (VNODE_RETURNED);
1156 }
1157
1158 /*
1159 * Go through the disk queues to initiate sandbagged IO;
1160 * go through the inodes to write those that have been modified;
1161 * initiate the writing of the super block if it has been modified.
1162 *
1163 * Note: we are always called with the filesystem marked `MPBUSY'.
1164 */
1165 int
1166 ffs_sync(mp, waitfor, context)
1167 struct mount *mp;
1168 int waitfor;
1169 vfs_context_t context;
1170 {
1171 struct vnode *nvp, *vp;
1172 struct ufsmount *ump = VFSTOUFS(mp);
1173 struct fs *fs;
1174 struct timeval tv;
1175 int error, allerror = 0;
1176 struct ffs_sync_cargs args;
1177
1178 fs = ump->um_fs;
1179 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1180 printf("fs = %s\n", fs->fs_fsmnt);
1181 panic("update: rofs mod");
1182 }
1183 /*
1184 * Write back each (modified) inode.
1185 */
1186 args.context = context;
1187 args.waitfor = waitfor;
1188 args.error = 0;
1189 /*
1190 * ffs_sync_callback will be called for each vnode
1191 * hung off of this mount point... the vnode will be
1192 * properly referenced and unreferenced around the callback
1193 */
1194 vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1195
1196 if (args.error)
1197 allerror = args.error;
1198
1199 /*
1200 * Force stale file system control information to be flushed.
1201 */
1202 if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1203 allerror = error;
1204 #if QUOTA
1205 qsync(mp);
1206 #endif
1207 /*
1208 * Write back modified superblock.
1209 */
1210 if (fs->fs_fmod != 0) {
1211 fs->fs_fmod = 0;
1212 microtime(&tv);
1213 fs->fs_time = tv.tv_sec;
1214 if (error = ffs_sbupdate(ump, waitfor))
1215 allerror = error;
1216 }
1217 return (allerror);
1218 }
1219
1220 /*
1221 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1222 * in from disk. If it is in core, wait for the lock bit to clear, then
1223 * return the inode locked. Detection and handling of mount points must be
1224 * done by the calling routine.
1225 */
1226 int
1227 ffs_vget(mp, ino, vpp, context)
1228 mount_t mp;
1229 ino64_t ino;
1230 vnode_t *vpp;
1231 vfs_context_t context;
1232 {
1233 return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1234 }
1235
1236
1237 int
1238 ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1239 mount_t mp;
1240 ino_t ino;
1241 vnode_t *vpp;
1242 vnode_t dvp;
1243 struct componentname *cnp;
1244 int mode;
1245 int fhwanted;
1246 {
1247 struct proc *p = current_proc(); /* XXX */
1248 struct fs *fs;
1249 struct inode *ip;
1250 struct ufsmount *ump;
1251 struct buf *bp;
1252 struct vnode *vp;
1253 struct vnode_fsparam vfsp;
1254 struct timeval tv;
1255 enum vtype vtype;
1256 dev_t dev;
1257 int i, type, error = 0;
1258
1259 *vpp = NULL;
1260 ump = VFSTOUFS(mp);
1261 dev = ump->um_dev;
1262 #if 0
1263 /* Check for unmount in progress */
1264 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1265 return (EPERM);
1266 }
1267 #endif
1268 /*
1269 * Allocate a new inode... do it before we check the
1270 * cache, because the MALLOC_ZONE may block
1271 */
1272 type = M_FFSNODE;
1273 MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1274
1275 /*
1276 * check in the inode hash
1277 */
1278 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1279 /*
1280 * found it... get rid of the allocation
1281 * that we didn't need and return
1282 * the 'found' vnode
1283 */
1284 FREE_ZONE(ip, sizeof(struct inode), type);
1285 vp = *vpp;
1286 return (0);
1287 }
1288 bzero((caddr_t)ip, sizeof(struct inode));
1289 /*
1290 * lock the inode
1291 */
1292 // lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1293 // lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1294
1295 ip->i_fs = fs = ump->um_fs;
1296 ip->i_dev = dev;
1297 ip->i_number = ino;
1298 #if QUOTA
1299 for (i = 0; i < MAXQUOTAS; i++)
1300 ip->i_dquot[i] = NODQUOT;
1301 #endif
1302 SET(ip->i_flag, IN_ALLOC);
1303 /*
1304 * Put it onto its hash chain locked so that other requests for
1305 * this inode will block if they arrive while we are sleeping waiting
1306 * for old data structures to be purged or for the contents of the
1307 * disk portion of this inode to be read.
1308 */
1309 ufs_ihashins(ip);
1310
1311 /* Read in the disk contents for the inode, copy into the inode. */
1312 if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1313 (int)fs->fs_bsize, NOCRED, &bp)) {
1314 buf_brelse(bp);
1315 goto errout;
1316 }
1317 #if REV_ENDIAN_FS
1318 if (mp->mnt_flag & MNT_REVEND) {
1319 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1320 } else {
1321 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1322 }
1323 #else
1324 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1325 #endif /* REV_ENDIAN_FS */
1326 buf_brelse(bp);
1327
1328 if (mode == 0)
1329 vtype = IFTOVT(ip->i_mode);
1330 else
1331 vtype = IFTOVT(mode);
1332
1333 if (vtype == VNON) {
1334 if (fhwanted) {
1335 /* NFS is in play */
1336 error = ESTALE;
1337 goto errout;
1338 } else {
1339 error = ENOENT;
1340 goto errout;
1341 }
1342 }
1343
1344 vfsp.vnfs_mp = mp;
1345 vfsp.vnfs_vtype = vtype;
1346 vfsp.vnfs_str = "ufs";
1347 vfsp.vnfs_dvp = dvp;
1348 vfsp.vnfs_fsnode = ip;
1349 vfsp.vnfs_cnp = cnp;
1350
1351 if (mode == 0)
1352 vfsp.vnfs_filesize = ip->i_din.di_size;
1353 else
1354 vfsp.vnfs_filesize = 0;
1355
1356 if (vtype == VFIFO )
1357 vfsp.vnfs_vops = FFS_FIFOOPS;
1358 else if (vtype == VBLK || vtype == VCHR)
1359 vfsp.vnfs_vops = ffs_specop_p;
1360 else
1361 vfsp.vnfs_vops = ffs_vnodeop_p;
1362
1363 if (vtype == VBLK || vtype == VCHR)
1364 vfsp.vnfs_rdev = ip->i_rdev;
1365 else
1366 vfsp.vnfs_rdev = 0;
1367
1368 if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1369 vfsp.vnfs_flags = 0;
1370 else
1371 vfsp.vnfs_flags = VNFS_NOCACHE;
1372
1373 /*
1374 * Tag root directory
1375 */
1376 vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1377 vfsp.vnfs_marksystem = 0;
1378
1379 if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1380 goto errout;
1381
1382 /*
1383 * Finish inode initialization now that aliasing has been resolved.
1384 */
1385 ip->i_devvp = ump->um_devvp;
1386 ip->i_vnode = vp;
1387
1388 vnode_ref(ip->i_devvp);
1389 vnode_addfsref(vp);
1390 vnode_settag(vp, VT_UFS);
1391
1392 /*
1393 * Initialize modrev times
1394 */
1395 microtime(&tv);
1396 SETHIGH(ip->i_modrev, tv.tv_sec);
1397 SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1398
1399 /*
1400 * Set up a generation number for this inode if it does not
1401 * already have one. This should only happen on old filesystems.
1402 */
1403 if (ip->i_gen == 0) {
1404 if (++nextgennumber < (u_long)tv.tv_sec)
1405 nextgennumber = tv.tv_sec;
1406 ip->i_gen = nextgennumber;
1407 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1408 ip->i_flag |= IN_MODIFIED;
1409 }
1410 /*
1411 * Ensure that uid and gid are correct. This is a temporary
1412 * fix until fsck has been changed to do the update.
1413 */
1414 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1415 ip->i_uid = ip->i_din.di_ouid; /* XXX */
1416 ip->i_gid = ip->i_din.di_ogid; /* XXX */
1417 } /* XXX */
1418 *vpp = vp;
1419
1420 CLR(ip->i_flag, IN_ALLOC);
1421
1422 if (ISSET(ip->i_flag, IN_WALLOC))
1423 wakeup(ip);
1424
1425 return (0);
1426
1427 errout:
1428 ufs_ihashrem(ip);
1429
1430 if (ISSET(ip->i_flag, IN_WALLOC))
1431 wakeup(ip);
1432 FREE_ZONE(ip, sizeof(struct inode), type);
1433
1434 return (error);
1435 }
1436
1437 /*
1438 * File handle to vnode
1439 *
1440 * Have to be really careful about stale file handles:
1441 * - check that the inode number is valid
1442 * - call vget to get the locked inode
1443 * - check for an unallocated inode (i_mode == 0)
1444 */
1445 int
1446 ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1447 register struct mount *mp;
1448 int fhlen;
1449 unsigned char *fhp;
1450 struct vnode **vpp;
1451 vfs_context_t context;
1452 {
1453 register struct ufid *ufhp;
1454 register struct inode *ip;
1455 struct vnode *nvp;
1456 struct fs *fs;
1457 int error;
1458
1459 if (fhlen < (int)sizeof(struct ufid))
1460 return (EINVAL);
1461 ufhp = (struct ufid *)fhp;
1462 fs = VFSTOUFS(mp)->um_fs;
1463 if (ufhp->ufid_ino < ROOTINO ||
1464 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1465 return (ESTALE);
1466 error = ffs_vget_internal(mp, ufhp->ufid_ino, &nvp, NULL, NULL, 0, 1);
1467 if (error) {
1468 *vpp = NULLVP;
1469 return (error);
1470 }
1471 ip = VTOI(nvp);
1472 if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) {
1473 vnode_put(nvp);
1474 *vpp = NULLVP;
1475 return (ESTALE);
1476 }
1477 *vpp = nvp;
1478 return (0);
1479 }
1480
1481 /*
1482 * Vnode pointer to File handle
1483 */
1484 /* ARGSUSED */
1485 int
1486 ffs_vptofh(vp, fhlenp, fhp, context)
1487 struct vnode *vp;
1488 int *fhlenp;
1489 unsigned char *fhp;
1490 vfs_context_t context;
1491 {
1492 register struct inode *ip;
1493 register struct ufid *ufhp;
1494
1495 if (*fhlenp < (int)sizeof(struct ufid))
1496 return (EOVERFLOW);
1497 ip = VTOI(vp);
1498 ufhp = (struct ufid *)fhp;
1499 ufhp->ufid_ino = ip->i_number;
1500 ufhp->ufid_gen = ip->i_gen;
1501 *fhlenp = sizeof(struct ufid);
1502 return (0);
1503 }
1504
1505 /*
1506 * Initialize the filesystem; just use ufs_init.
1507 */
1508 int
1509 ffs_init(vfsp)
1510 struct vfsconf *vfsp;
1511 {
1512
1513 return (ufs_init(vfsp));
1514 }
1515
1516 /*
1517 * fast filesystem related variables.
1518 */
1519 ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1520 user_addr_t newp, size_t newlen, vfs_context_t context)
1521 {
1522 extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1523
1524 /* all sysctl names at this level are terminal */
1525 if (namelen != 1)
1526 return (ENOTDIR); /* overloaded */
1527
1528 switch (name[0]) {
1529 case FFS_CLUSTERREAD:
1530 return (sysctl_int(oldp, oldlenp, newp, newlen,
1531 &doclusterread));
1532 case FFS_CLUSTERWRITE:
1533 return (sysctl_int(oldp, oldlenp, newp, newlen,
1534 &doclusterwrite));
1535 case FFS_REALLOCBLKS:
1536 return (sysctl_int(oldp, oldlenp, newp, newlen,
1537 &doreallocblks));
1538 case FFS_ASYNCFREE:
1539 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1540 default:
1541 return (ENOTSUP);
1542 }
1543 /* NOTREACHED */
1544 }
1545
1546 /*
1547 * Write a superblock and associated information back to disk.
1548 */
1549 int
1550 ffs_sbupdate(mp, waitfor)
1551 struct ufsmount *mp;
1552 int waitfor;
1553 {
1554 register struct fs *dfs, *fs = mp->um_fs;
1555 register struct buf *bp;
1556 int blks;
1557 void *space;
1558 int i, size, error, allerror = 0;
1559 int devBlockSize=0;
1560 #if REV_ENDIAN_FS
1561 int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1562 #endif /* REV_ENDIAN_FS */
1563
1564 /*
1565 * First write back the summary information.
1566 */
1567 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1568 space = fs->fs_csp;
1569 for (i = 0; i < blks; i += fs->fs_frag) {
1570 size = fs->fs_bsize;
1571 if (i + fs->fs_frag > blks)
1572 size = (blks - i) * fs->fs_fsize;
1573 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1574 size, 0, 0, BLK_META);
1575 bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1576 #if REV_ENDIAN_FS
1577 if (rev_endian) {
1578 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1579 }
1580 #endif /* REV_ENDIAN_FS */
1581 space = (char *)space + size;
1582 if (waitfor != MNT_WAIT)
1583 buf_bawrite(bp);
1584 else if (error = (int)buf_bwrite(bp))
1585 allerror = error;
1586 }
1587 /*
1588 * Now write back the superblock itself. If any errors occurred
1589 * up to this point, then fail so that the superblock avoids
1590 * being written out as clean.
1591 */
1592 if (allerror)
1593 return (allerror);
1594 devBlockSize = vfs_devblocksize(mp->um_mountp);
1595
1596 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1597 bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1598 /* Restore compatibility to old file systems. XXX */
1599 dfs = (struct fs *)buf_dataptr(bp); /* XXX */
1600 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
1601 dfs->fs_nrpos = -1; /* XXX */
1602 #if REV_ENDIAN_FS
1603 /*
1604 * Swapping bytes here ; so that in case
1605 * of inode format < FS_44INODEFMT appropriate
1606 * fields get moved
1607 */
1608 if (rev_endian) {
1609 byte_swap_sbout((struct fs *)buf_dataptr(bp));
1610 }
1611 #endif /* REV_ENDIAN_FS */
1612 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1613 int32_t *lp, tmp; /* XXX */
1614 /* XXX */
1615 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */
1616 tmp = lp[4]; /* XXX */
1617 for (i = 4; i > 0; i--) /* XXX */
1618 lp[i] = lp[i-1]; /* XXX */
1619 lp[0] = tmp; /* XXX */
1620 } /* XXX */
1621 #if REV_ENDIAN_FS
1622 /* Note that dfs is already swapped so swap the filesize
1623 * before writing
1624 */
1625 if (rev_endian) {
1626 dfs->fs_maxfilesize = NXSwapLongLong(mp->um_savedmaxfilesize); /* XXX */
1627 } else {
1628 #endif /* REV_ENDIAN_FS */
1629 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */
1630 #if REV_ENDIAN_FS
1631 }
1632 #endif /* REV_ENDIAN_FS */
1633 if (waitfor != MNT_WAIT)
1634 buf_bawrite(bp);
1635 else if (error = (int)buf_bwrite(bp))
1636 allerror = error;
1637
1638 return (allerror);
1639 }