]> git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ffs/ffs_vfsops.c
xnu-792.6.61.tar.gz
[apple/xnu.git] / bsd / ufs / ffs / ffs_vfsops.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1991, 1993, 1994
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
56 */
57
58 #include <rev_endian_fs.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/namei.h>
62 #include <sys/proc.h>
63 #include <sys/kauth.h>
64 #include <sys/kernel.h>
65 #include <sys/vnode_internal.h>
66 #include <sys/socket.h>
67 #include <sys/mount_internal.h>
68 #include <sys/mount.h>
69 #include <sys/buf.h>
70 #include <sys/mbuf.h>
71 #include <sys/file.h>
72 #include <sys/disk.h>
73 #include <sys/ioctl.h>
74 #include <sys/errno.h>
75 #include <sys/malloc.h>
76 #include <sys/ubc.h>
77 #include <sys/quota.h>
78
79 #include <miscfs/specfs/specdev.h>
80
81 #include <ufs/ufs/quota.h>
82 #include <ufs/ufs/ufsmount.h>
83 #include <ufs/ufs/inode.h>
84 #include <ufs/ufs/ufs_extern.h>
85
86 #include <ufs/ffs/fs.h>
87 #include <ufs/ffs/ffs_extern.h>
88 #if REV_ENDIAN_FS
89 #include <ufs/ufs/ufs_byte_order.h>
90 #include <architecture/byte_order.h>
91 #endif /* REV_ENDIAN_FS */
92
93 int ffs_sbupdate(struct ufsmount *, int);
94
95 struct vfsops ufs_vfsops = {
96 ffs_mount,
97 ufs_start,
98 ffs_unmount,
99 ufs_root,
100 ufs_quotactl,
101 ffs_vfs_getattr,
102 ffs_sync,
103 ffs_vget,
104 ffs_fhtovp,
105 ffs_vptofh,
106 ffs_init,
107 ffs_sysctl,
108 ffs_vfs_setattr,
109 {0}
110 };
111
112 extern u_long nextgennumber;
113
114 union _qcvt {
115 int64_t qcvt;
116 int32_t val[2];
117 };
118 #define SETHIGH(q, h) { \
119 union _qcvt tmp; \
120 tmp.qcvt = (q); \
121 tmp.val[_QUAD_HIGHWORD] = (h); \
122 (q) = tmp.qcvt; \
123 }
124 #define SETLOW(q, l) { \
125 union _qcvt tmp; \
126 tmp.qcvt = (q); \
127 tmp.val[_QUAD_LOWWORD] = (l); \
128 (q) = tmp.qcvt; \
129 }
130
131 /*
132 * Called by main() when ufs is going to be mounted as root.
133 */
134 int
135 ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
136 {
137 struct proc *p = current_proc(); /* XXX */
138 int error;
139
140 /* Set asynchronous flag by default */
141 vfs_setflags(mp, MNT_ASYNC);
142
143 if (error = ffs_mountfs(rvp, mp, context))
144 return (error);
145
146 (void)ffs_statfs(mp, vfs_statfs(mp), NULL);
147
148 return (0);
149 }
150
151 /*
152 * VFS Operations.
153 *
154 * mount system call
155 */
156 int
157 ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data, vfs_context_t context)
158 {
159 struct proc *p = vfs_context_proc(context);
160 struct ufsmount *ump;
161 register struct fs *fs;
162 u_int size;
163 int error = 0, flags;
164 mode_t accessmode;
165 int ronly;
166 int reload = 0;
167
168 /*
169 * If updating, check whether changing from read-write to
170 * read-only; if there is no device name, that's all we do.
171 */
172 if (mp->mnt_flag & MNT_UPDATE) {
173 ump = VFSTOUFS(mp);
174 fs = ump->um_fs;
175 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
176 /*
177 * Flush any dirty data.
178 */
179 VFS_SYNC(mp, MNT_WAIT, context);
180 /*
181 * Check for and optionally get rid of files open
182 * for writing.
183 */
184 flags = WRITECLOSE;
185 if (mp->mnt_flag & MNT_FORCE)
186 flags |= FORCECLOSE;
187 if (error = ffs_flushfiles(mp, flags, p))
188 return (error);
189 fs->fs_clean = 1;
190 fs->fs_ronly = 1;
191 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
192 fs->fs_clean = 0;
193 fs->fs_ronly = 0;
194 return (error);
195 }
196 }
197 /* save fs_ronly to later use */
198 ronly = fs->fs_ronly;
199 if ((mp->mnt_flag & MNT_RELOAD) || ronly)
200 reload = 1;
201 if ((reload) &&
202 (error = ffs_reload(mp, vfs_context_ucred(context), p)))
203 return (error);
204 /* replace the ronly after load */
205 fs->fs_ronly = ronly;
206 /*
207 * Do not update the file system if the user was in singleuser
208 * and then tries to mount -uw without fscking
209 */
210 if (!fs->fs_clean && ronly) {
211 printf("WARNING: trying to mount a dirty file system\n");
212 if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
213 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
214 /*
215 * Reset the readonly bit as reload might have
216 * modified this bit
217 */
218 fs->fs_ronly = 1;
219 return(EPERM);
220 }
221 }
222
223 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
224 fs->fs_ronly = 0;
225 fs->fs_clean = 0;
226 (void) ffs_sbupdate(ump, MNT_WAIT);
227 }
228 if (devvp == 0) {
229 return(0);
230 }
231 }
232 if ((mp->mnt_flag & MNT_UPDATE) == 0)
233 error = ffs_mountfs(devvp, mp, context);
234 else {
235 if (devvp != ump->um_devvp)
236 error = EINVAL; /* needs translation */
237 }
238 if (error) {
239 return (error);
240 }
241 ump = VFSTOUFS(mp);
242 fs = ump->um_fs;
243 bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
244 strncpy(fs->fs_fsmnt, (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
245 (void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
246 return (0);
247 }
248
249
250 struct ffs_reload_cargs {
251 struct vnode *devvp;
252 kauth_cred_t cred;
253 struct fs *fs;
254 struct proc *p;
255 int error;
256 #if REV_ENDIAN_FS
257 int rev_endian;
258 #endif /* REV_ENDIAN_FS */
259 };
260
261
262 static int
263 ffs_reload_callback(struct vnode *vp, void *cargs)
264 {
265 struct inode *ip;
266 struct buf *bp;
267 struct fs *fs;
268 struct ffs_reload_cargs *args;
269
270 args = (struct ffs_reload_cargs *)cargs;
271
272 /*
273 * flush all the buffers associated with this node
274 */
275 if (buf_invalidateblks(vp, 0, 0, 0))
276 panic("ffs_reload: dirty2");
277
278 /*
279 * Step 6: re-read inode data
280 */
281 ip = VTOI(vp);
282 fs = args->fs;
283
284 if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
285 (int)fs->fs_bsize, NOCRED, &bp)) {
286 buf_brelse(bp);
287
288 return (VNODE_RETURNED_DONE);
289 }
290
291 #if REV_ENDIAN_FS
292 if (args->rev_endian) {
293 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
294 ino_to_fsbo(fs, ip->i_number)), ip);
295 } else {
296 #endif /* REV_ENDIAN_FS */
297 ip->i_din = *((struct dinode *)buf_dataptr(bp) +
298 ino_to_fsbo(fs, ip->i_number));
299 #if REV_ENDIAN_FS
300 }
301 #endif /* REV_ENDIAN_FS */
302
303 buf_brelse(bp);
304
305 return (VNODE_RETURNED);
306 }
307
308
309 /*
310 * Reload all incore data for a filesystem (used after running fsck on
311 * the root filesystem and finding things to fix). The filesystem must
312 * be mounted read-only.
313 *
314 * Things to do to update the mount:
315 * 1) invalidate all cached meta-data.
316 * 2) re-read superblock from disk.
317 * 3) re-read summary information from disk.
318 * 4) invalidate all inactive vnodes.
319 * 5) invalidate all cached file data.
320 * 6) re-read inode data for all active vnodes.
321 */
322 ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
323 {
324 register struct vnode *devvp;
325 void *space;
326 struct buf *bp;
327 struct fs *fs, *newfs;
328 int i, blks, size, error;
329 u_int64_t maxfilesize; /* XXX */
330 int32_t *lp;
331 struct ffs_reload_cargs args;
332 #if REV_ENDIAN_FS
333 int rev_endian = (mountp->mnt_flag & MNT_REVEND);
334 #endif /* REV_ENDIAN_FS */
335
336 if ((mountp->mnt_flag & MNT_RDONLY) == 0)
337 return (EINVAL);
338 /*
339 * Step 1: invalidate all cached meta-data.
340 */
341 devvp = VFSTOUFS(mountp)->um_devvp;
342 if (buf_invalidateblks(devvp, 0, 0, 0))
343 panic("ffs_reload: dirty1");
344 /*
345 * Step 2: re-read superblock from disk.
346 */
347 size = vfs_devblocksize(mountp);
348
349 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
350 buf_brelse(bp);
351 return (error);
352 }
353 newfs = (struct fs *)buf_dataptr(bp);
354 #if REV_ENDIAN_FS
355 if (rev_endian) {
356 byte_swap_sbin(newfs);
357 }
358 #endif /* REV_ENDIAN_FS */
359 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
360 newfs->fs_bsize < sizeof(struct fs)) {
361 #if REV_ENDIAN_FS
362 if (rev_endian)
363 byte_swap_sbout(newfs);
364 #endif /* REV_ENDIAN_FS */
365
366 buf_brelse(bp);
367 return (EIO); /* XXX needs translation */
368 }
369 fs = VFSTOUFS(mountp)->um_fs;
370 /*
371 * Copy pointer fields back into superblock before copying in XXX
372 * new superblock. These should really be in the ufsmount. XXX
373 * Note that important parameters (eg fs_ncg) are unchanged.
374 */
375 newfs->fs_csp = fs->fs_csp;
376 newfs->fs_maxcluster = fs->fs_maxcluster;
377 newfs->fs_contigdirs = fs->fs_contigdirs;
378 bcopy(newfs, fs, (u_int)fs->fs_sbsize);
379 if (fs->fs_sbsize < SBSIZE)
380 buf_markinvalid(bp);
381 #if REV_ENDIAN_FS
382 if (rev_endian)
383 byte_swap_sbout(newfs);
384 #endif /* REV_ENDIAN_FS */
385 buf_brelse(bp);
386 mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
387 ffs_oldfscompat(fs);
388 maxfilesize = 0x100000000ULL; /* 4GB */
389 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
390 fs->fs_maxfilesize = maxfilesize; /* XXX */
391 /*
392 * Step 3: re-read summary information from disk.
393 */
394 blks = howmany(fs->fs_cssize, fs->fs_fsize);
395 space = fs->fs_csp;
396 for (i = 0; i < blks; i += fs->fs_frag) {
397 size = fs->fs_bsize;
398 if (i + fs->fs_frag > blks)
399 size = (blks - i) * fs->fs_fsize;
400 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
401 NOCRED, &bp)) {
402 buf_brelse(bp);
403 return (error);
404 }
405 #if REV_ENDIAN_FS
406 if (rev_endian) {
407 /* csum swaps */
408 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
409 }
410 #endif /* REV_ENDIAN_FS */
411 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
412 #if REV_ENDIAN_FS
413 if (rev_endian) {
414 /* csum swaps */
415 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
416 }
417 #endif /* REV_ENDIAN_FS */
418 space = (char *) space + size;
419 buf_brelse(bp);
420 }
421 /*
422 * We no longer know anything about clusters per cylinder group.
423 */
424 if (fs->fs_contigsumsize > 0) {
425 lp = fs->fs_maxcluster;
426 for (i = 0; i < fs->fs_ncg; i++)
427 *lp++ = fs->fs_contigsumsize;
428 }
429 #if REV_ENDIAN_FS
430 args.rev_endian = rev_endian;
431 #endif /* REV_ENDIAN_FS */
432 args.devvp = devvp;
433 args.cred = cred;
434 args.fs = fs;
435 args.p = p;
436 args.error = 0;
437 /*
438 * ffs_reload_callback will be called for each vnode
439 * hung off of this mount point that can't be recycled...
440 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
441 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
442 * properly referenced and unreferenced around the callback
443 */
444 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
445
446 return (args.error);
447 }
448
449 /*
450 * Common code for mount and mountroot
451 */
452 int
453 ffs_mountfs(devvp, mp, context)
454 struct vnode *devvp;
455 struct mount *mp;
456 vfs_context_t context;
457 {
458 struct ufsmount *ump;
459 struct buf *bp;
460 struct fs *fs;
461 dev_t dev;
462 struct buf *cgbp;
463 struct cg *cgp;
464 int32_t clustersumoff;
465 void *space;
466 int error, i, blks, ronly;
467 u_int32_t size;
468 int32_t *lp;
469 kauth_cred_t cred;
470 u_int64_t maxfilesize; /* XXX */
471 u_int dbsize = DEV_BSIZE;
472 #if REV_ENDIAN_FS
473 int rev_endian=0;
474 #endif /* REV_ENDIAN_FS */
475 dev = devvp->v_rdev;
476 cred = vfs_context_ucred(context);
477
478 ronly = vfs_isrdonly(mp);
479 bp = NULL;
480 ump = NULL;
481
482 /* Advisory locking should be handled at the VFS layer */
483 vfs_setlocklocal(mp);
484
485 /* Obtain the actual device block size */
486 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
487 error = ENXIO;
488 goto out;
489 }
490
491 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
492 SBSIZE, cred, &bp))
493 goto out;
494 fs = (struct fs *)buf_dataptr(bp);
495 #if REV_ENDIAN_FS
496 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
497 fs->fs_bsize < sizeof(struct fs)) {
498 int magic = fs->fs_magic;
499
500 byte_swap_ints(&magic, 1);
501 if (magic != FS_MAGIC) {
502 error = EINVAL;
503 goto out;
504 }
505 byte_swap_sbin(fs);
506 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
507 fs->fs_bsize < sizeof(struct fs)) {
508 byte_swap_sbout(fs);
509 error = EINVAL; /* XXX needs translation */
510 goto out;
511 }
512 rev_endian=1;
513 }
514 #endif /* REV_ENDIAN_FS */
515 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
516 fs->fs_bsize < sizeof(struct fs)) {
517 #if REV_ENDIAN_FS
518 if (rev_endian)
519 byte_swap_sbout(fs);
520 #endif /* REV_ENDIAN_FS */
521 error = EINVAL; /* XXX needs translation */
522 goto out;
523 }
524
525
526 /*
527 * Buffer cache does not handle multiple pages in a buf when
528 * invalidating incore buffer in pageout. There are no locks
529 * in the pageout path. So there is a danger of loosing data when
530 * block allocation happens at the same time a pageout of buddy
531 * page occurs. incore() returns buf with both
532 * pages, this leads vnode-pageout to incorrectly flush of entire.
533 * buf. Till the low level ffs code is modified to deal with these
534 * do not mount any FS more than 4K size.
535 */
536 /*
537 * Can't mount filesystems with a fragment size less than DIRBLKSIZ
538 */
539 /*
540 * Don't mount dirty filesystems, except for the root filesystem
541 */
542 if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
543 ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
544 #if REV_ENDIAN_FS
545 if (rev_endian)
546 byte_swap_sbout(fs);
547 #endif /* REV_ENDIAN_FS */
548 error = ENOTSUP;
549 goto out;
550 }
551
552 /* Let's figure out the devblock size the file system is with */
553 /* the device block size = fragment size / number of sectors per frag */
554
555 dbsize = fs->fs_fsize / NSPF(fs);
556 if(dbsize <= 0 ) {
557 kprintf("device blocksize computaion failed\n");
558 } else {
559 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
560 FWRITE, context) != 0) {
561 kprintf("failed to set device blocksize\n");
562 }
563 /* force the specfs to reread blocksize from size() */
564 set_fsblocksize(devvp);
565 }
566
567 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
568 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
569 #if REV_ENDIAN_FS
570 if (rev_endian)
571 byte_swap_sbout(fs);
572 #endif /* REV_ENDIAN_FS */
573 error = EROFS; /* needs translation */
574 goto out;
575 }
576
577 /* If we are not mounting read only, then check for overlap
578 * condition in cylinder group's free block map.
579 * If overlap exists, then force this into a read only mount
580 * to avoid further corruption. PR#2216969
581 */
582 if (ronly == 0){
583 if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
584 (int)fs->fs_cgsize, NOCRED, &cgbp)) {
585 buf_brelse(cgbp);
586 goto out;
587 }
588 cgp = (struct cg *)buf_dataptr(cgbp);
589 #if REV_ENDIAN_FS
590 if (rev_endian)
591 byte_swap_cgin(cgp,fs);
592 #endif /* REV_ENDIAN_FS */
593 if (!cg_chkmagic(cgp)){
594 #if REV_ENDIAN_FS
595 if (rev_endian)
596 byte_swap_cgout(cgp,fs);
597 #endif /* REV_ENDIAN_FS */
598 buf_brelse(cgbp);
599 goto out;
600 }
601 if (cgp->cg_clustersumoff != 0) {
602 /* Check for overlap */
603 clustersumoff = cgp->cg_freeoff +
604 howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
605 clustersumoff = roundup(clustersumoff, sizeof(long));
606 if (cgp->cg_clustersumoff < clustersumoff) {
607 /* Overlap exists */
608 mp->mnt_flag |= MNT_RDONLY;
609 ronly = 1;
610 }
611 }
612 #if REV_ENDIAN_FS
613 if (rev_endian)
614 byte_swap_cgout(cgp,fs);
615 #endif /* REV_ENDIAN_FS */
616 buf_brelse(cgbp);
617 }
618
619 ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
620 bzero((caddr_t)ump, sizeof *ump);
621 ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
622 M_WAITOK);
623 bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
624 if (fs->fs_sbsize < SBSIZE)
625 buf_markinvalid(bp);
626 #if REV_ENDIAN_FS
627 if (rev_endian)
628 byte_swap_sbout(fs);
629 #endif /* REV_ENDIAN_FS */
630 buf_brelse(bp);
631 bp = NULL;
632 fs = ump->um_fs;
633 fs->fs_ronly = ronly;
634 size = fs->fs_cssize;
635 blks = howmany(size, fs->fs_fsize);
636 if (fs->fs_contigsumsize > 0)
637 size += fs->fs_ncg * sizeof(int32_t);
638 size += fs->fs_ncg * sizeof(u_int8_t);
639 space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
640 fs->fs_csp = space;
641 for (i = 0; i < blks; i += fs->fs_frag) {
642 size = fs->fs_bsize;
643 if (i + fs->fs_frag > blks)
644 size = (blks - i) * fs->fs_fsize;
645 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
646 size, cred, &bp)) {
647 _FREE(fs->fs_csp, M_UFSMNT);
648 goto out;
649 }
650 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
651 #if REV_ENDIAN_FS
652 if (rev_endian)
653 byte_swap_ints((int *) space, size / sizeof(int));
654 #endif /* REV_ENDIAN_FS */
655 space = (char *)space + size;
656 buf_brelse(bp);
657 bp = NULL;
658 }
659 if (fs->fs_contigsumsize > 0) {
660 fs->fs_maxcluster = lp = space;
661 for (i = 0; i < fs->fs_ncg; i++)
662 *lp++ = fs->fs_contigsumsize;
663 space = lp;
664 }
665 size = fs->fs_ncg * sizeof(u_int8_t);
666 fs->fs_contigdirs = (u_int8_t *)space;
667 space = (u_int8_t *)space + size;
668 bzero(fs->fs_contigdirs, size);
669 /* XXX Compatibility for old filesystems */
670 if (fs->fs_avgfilesize <= 0)
671 fs->fs_avgfilesize = AVFILESIZ;
672 if (fs->fs_avgfpdir <= 0)
673 fs->fs_avgfpdir = AFPDIR;
674 /* XXX End of compatibility */
675 mp->mnt_data = (qaddr_t)ump;
676 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
677 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
678 /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
679 mp->mnt_maxsymlinklen = 60;
680 #if REV_ENDIAN_FS
681 if (rev_endian)
682 mp->mnt_flag |= MNT_REVEND;
683 #endif /* REV_ENDIAN_FS */
684 ump->um_mountp = mp;
685 ump->um_dev = dev;
686 ump->um_devvp = devvp;
687 ump->um_nindir = fs->fs_nindir;
688 ump->um_bptrtodb = fs->fs_fsbtodb;
689 ump->um_seqinc = fs->fs_frag;
690 for (i = 0; i < MAXQUOTAS; i++)
691 dqfileinit(&ump->um_qfiles[i]);
692 ffs_oldfscompat(fs);
693 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */
694 maxfilesize = 0x100000000ULL; /* 4GB */
695 #if 0
696 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
697 #endif /* 0 */
698 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
699 fs->fs_maxfilesize = maxfilesize; /* XXX */
700 if (ronly == 0) {
701 fs->fs_clean = 0;
702 (void) ffs_sbupdate(ump, MNT_WAIT);
703 }
704 return (0);
705 out:
706 if (bp)
707 buf_brelse(bp);
708 if (ump) {
709 _FREE(ump->um_fs, M_UFSMNT);
710 _FREE(ump, M_UFSMNT);
711 }
712 return (error);
713 }
714
715 /*
716 * Sanity checks for old file systems.
717 *
718 * XXX - goes away some day.
719 */
720 ffs_oldfscompat(fs)
721 struct fs *fs;
722 {
723 int i;
724
725 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */
726 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */
727 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
728 fs->fs_nrpos = 8; /* XXX */
729 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
730 u_int64_t sizepb = fs->fs_bsize; /* XXX */
731 /* XXX */
732 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
733 for (i = 0; i < NIADDR; i++) { /* XXX */
734 sizepb *= NINDIR(fs); /* XXX */
735 fs->fs_maxfilesize += sizepb; /* XXX */
736 } /* XXX */
737 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */
738 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */
739 } /* XXX */
740 return (0);
741 }
742
743 /*
744 * unmount system call
745 */
746 int
747 ffs_unmount(mp, mntflags, context)
748 struct mount *mp;
749 int mntflags;
750 vfs_context_t context;
751 {
752 struct proc *p = vfs_context_proc(context);
753 register struct ufsmount *ump;
754 register struct fs *fs;
755 int error, flags;
756 int force;
757
758 flags = 0;
759 force = 0;
760 if (mntflags & MNT_FORCE) {
761 flags |= FORCECLOSE;
762 force = 1;
763 }
764 if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
765 return (error);
766 ump = VFSTOUFS(mp);
767 fs = ump->um_fs;
768
769 if (fs->fs_ronly == 0) {
770 fs->fs_clean = 1;
771 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
772 fs->fs_clean = 0;
773 #ifdef notyet
774 /* we can atleast cleanup ; as the media could be WP */
775 /* & during mount, we do not check for write failures */
776 /* FIXME LATER : the Correct fix would be to have */
777 /* mount detect the WP media and downgrade to readonly mount */
778 /* For now, here it is */
779 return (error);
780 #endif /* notyet */
781 }
782 }
783 _FREE(fs->fs_csp, M_UFSMNT);
784 _FREE(fs, M_UFSMNT);
785 _FREE(ump, M_UFSMNT);
786
787 return (0);
788 }
789
790 /*
791 * Flush out all the files in a filesystem.
792 */
793 ffs_flushfiles(mp, flags, p)
794 register struct mount *mp;
795 int flags;
796 struct proc *p;
797 {
798 register struct ufsmount *ump;
799 int i, error;
800
801 ump = VFSTOUFS(mp);
802
803 #if QUOTA
804 /*
805 * NOTE: The open quota files have an indirect reference
806 * on the root directory vnode. We must account for this
807 * extra reference when doing the intial vflush.
808 */
809 if (mp->mnt_flag & MNT_QUOTA) {
810 struct vnode *rootvp = NULLVP;
811 int quotafilecnt = 0;
812
813 /* Find out how many quota files we have open. */
814 for (i = 0; i < MAXQUOTAS; i++) {
815 if (ump->um_qfiles[i].qf_vp != NULLVP)
816 ++quotafilecnt;
817 }
818
819 /*
820 * Check if the root vnode is in our inode hash
821 * (so we can skip over it).
822 */
823 rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
824
825 error = vflush(mp, rootvp, SKIPSYSTEM|flags);
826
827 if (rootvp) {
828 /*
829 * See if there are additional references on the
830 * root vp besides the ones obtained from the open
831 * quota files and the hfs_chashget call above.
832 */
833 if ((error == 0) &&
834 (rootvp->v_usecount > (1 + quotafilecnt))) {
835 error = EBUSY; /* root dir is still open */
836 }
837 vnode_put(rootvp);
838 }
839 if (error && (flags & FORCECLOSE) == 0)
840 return (error);
841
842 for (i = 0; i < MAXQUOTAS; i++) {
843 if (ump->um_qfiles[i].qf_vp == NULLVP)
844 continue;
845 quotaoff(mp, i);
846 }
847 /*
848 * Here we fall through to vflush again to ensure
849 * that we have gotten rid of all the system vnodes.
850 */
851 }
852 #endif
853 error = vflush(mp, NULLVP, SKIPSWAP|flags);
854 error = vflush(mp, NULLVP, flags);
855 return (error);
856 }
857
858 /*
859 * Get file system statistics.
860 */
861 int
862 ffs_statfs(mp, sbp, context)
863 struct mount *mp;
864 register struct vfsstatfs *sbp;
865 vfs_context_t context;
866 {
867 register struct ufsmount *ump;
868 register struct fs *fs;
869
870 ump = VFSTOUFS(mp);
871 fs = ump->um_fs;
872 if (fs->fs_magic != FS_MAGIC)
873 panic("ffs_statfs");
874 sbp->f_bsize = fs->fs_fsize;
875 sbp->f_iosize = fs->fs_bsize;
876 sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
877 sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
878 fs->fs_cstotal.cs_nffree));
879 sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
880 sbp->f_files = (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
881 sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
882 return (0);
883 }
884
885 int
886 ffs_vfs_getattr(mp, fsap, context)
887 struct mount *mp;
888 struct vfs_attr *fsap;
889 vfs_context_t context;
890 {
891 struct ufsmount *ump;
892 struct fs *fs;
893 kauth_cred_t cred;
894 struct vnode *devvp;
895 struct buf *bp;
896 struct ufslabel *ulp;
897 char *offset;
898 int bs, error, length;
899
900 ump = VFSTOUFS(mp);
901 fs = ump->um_fs;
902 cred = vfs_context_ucred(context);
903
904 VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
905 VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
906 VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
907 VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
908 (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
909 fs->fs_cstotal.cs_nffree)));
910 VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
911 fs->fs_minfree)));
912 VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
913 (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
914 VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
915 fs->fs_cstotal.cs_nifree));
916
917 if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
918 fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
919 fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
920 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
921 }
922
923 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
924 devvp = ump->um_devvp;
925 bs = vfs_devblocksize(mp);
926
927 if (error = (int)buf_meta_bread(devvp,
928 (daddr64_t)(UFS_LABEL_OFFSET / bs),
929 MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
930 if (bp)
931 buf_brelse(bp);
932 return (error);
933 }
934
935 /*
936 * Since the disklabel is read directly by older user space
937 * code, make sure this buffer won't remain in the cache when
938 * we release it.
939 */
940 buf_setflags(bp, B_NOCACHE);
941
942 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
943 ulp = (struct ufslabel *)offset;
944
945 if (ufs_label_check(ulp)) {
946 length = ulp->ul_namelen;
947 #if REV_ENDIAN_FS
948 if (mp->mnt_flag & MNT_REVEND)
949 length = NXSwapShort(length);
950 #endif
951 if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
952 bcopy(ulp->ul_name, fsap->f_vol_name, length);
953 fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
954 fsap->f_vol_name[length] = '\0';
955 }
956 }
957
958 buf_brelse(bp);
959 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
960 }
961
962 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
963 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
964 VOL_CAP_FMT_SYMBOLICLINKS |
965 VOL_CAP_FMT_HARDLINKS |
966 VOL_CAP_FMT_SPARSE_FILES |
967 VOL_CAP_FMT_CASE_SENSITIVE |
968 VOL_CAP_FMT_CASE_PRESERVING |
969 VOL_CAP_FMT_FAST_STATFS ;
970 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
971 = VOL_CAP_INT_NFSEXPORT |
972 VOL_CAP_INT_VOL_RENAME |
973 VOL_CAP_INT_ADVLOCK |
974 VOL_CAP_INT_FLOCK;
975 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
976 = 0;
977 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
978 = 0;
979
980 /* Capabilities we know about: */
981 fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
982 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
983 VOL_CAP_FMT_SYMBOLICLINKS |
984 VOL_CAP_FMT_HARDLINKS |
985 VOL_CAP_FMT_JOURNAL |
986 VOL_CAP_FMT_JOURNAL_ACTIVE |
987 VOL_CAP_FMT_NO_ROOT_TIMES |
988 VOL_CAP_FMT_SPARSE_FILES |
989 VOL_CAP_FMT_ZERO_RUNS |
990 VOL_CAP_FMT_CASE_SENSITIVE |
991 VOL_CAP_FMT_CASE_PRESERVING |
992 VOL_CAP_FMT_FAST_STATFS |
993 VOL_CAP_FMT_2TB_FILESIZE;
994 fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
995 VOL_CAP_INT_SEARCHFS |
996 VOL_CAP_INT_ATTRLIST |
997 VOL_CAP_INT_NFSEXPORT |
998 VOL_CAP_INT_READDIRATTR |
999 VOL_CAP_INT_EXCHANGEDATA |
1000 VOL_CAP_INT_COPYFILE |
1001 VOL_CAP_INT_ALLOCATE |
1002 VOL_CAP_INT_VOL_RENAME |
1003 VOL_CAP_INT_ADVLOCK |
1004 VOL_CAP_INT_FLOCK ;
1005 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1006 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1007
1008 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1009 }
1010
1011 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1012 fsap->f_attributes.validattr.commonattr = 0;
1013 fsap->f_attributes.validattr.volattr =
1014 ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1015 fsap->f_attributes.validattr.dirattr = 0;
1016 fsap->f_attributes.validattr.fileattr = 0;
1017 fsap->f_attributes.validattr.forkattr = 0;
1018
1019 fsap->f_attributes.nativeattr.commonattr = 0;
1020 fsap->f_attributes.nativeattr.volattr =
1021 ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1022 fsap->f_attributes.nativeattr.dirattr = 0;
1023 fsap->f_attributes.nativeattr.fileattr = 0;
1024 fsap->f_attributes.nativeattr.forkattr = 0;
1025
1026 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1027 }
1028
1029 return (0);
1030 }
1031
1032
1033 int
1034 ffs_vfs_setattr(mp, fsap, context)
1035 struct mount *mp;
1036 struct vfs_attr *fsap;
1037 vfs_context_t context;
1038 {
1039 struct ufsmount *ump;
1040 struct vnode *devvp;
1041 struct buf *bp;
1042 struct ufslabel *ulp;
1043 kauth_cred_t cred;
1044 char *offset;
1045 int bs, error;
1046
1047
1048 ump = VFSTOUFS(mp);
1049 cred = vfs_context_ucred(context);
1050
1051 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1052 devvp = ump->um_devvp;
1053 bs = vfs_devblocksize(mp);
1054 if (error = buf_meta_bread(devvp,
1055 (daddr64_t)(UFS_LABEL_OFFSET / bs),
1056 MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1057 if (bp)
1058 buf_brelse(bp);
1059 return (error);
1060 }
1061
1062 /*
1063 * Since the disklabel is read directly by older user space
1064 * code, make sure this buffer won't remain in the cache when
1065 * we release it.
1066 */
1067 buf_setflags(bp, B_NOCACHE);
1068
1069 /* Validate the label structure; init if not valid */
1070 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1071 ulp = (struct ufslabel *)offset;
1072 if (!ufs_label_check(ulp))
1073 ufs_label_init(ulp);
1074
1075 /* Copy new name over existing name */
1076 ulp->ul_namelen = strlen(fsap->f_vol_name);
1077 #if REV_ENDIAN_FS
1078 if (mp->mnt_flag & MNT_REVEND)
1079 ulp->ul_namelen = NXSwapShort(ulp->ul_namelen);
1080 #endif
1081 bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1082 ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1083 ulp->ul_name[ulp->ul_namelen] = '\0';
1084
1085 /* Update the checksum */
1086 ulp->ul_checksum = 0;
1087 ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1088
1089 /* Write the label back to disk */
1090 buf_bwrite(bp);
1091 bp = NULL;
1092
1093 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1094 }
1095
1096 return (0);
1097 }
1098 struct ffs_sync_cargs {
1099 vfs_context_t context;
1100 int waitfor;
1101 int error;
1102 };
1103
1104
1105 static int
1106 ffs_sync_callback(struct vnode *vp, void *cargs)
1107 {
1108 struct inode *ip;
1109 struct ffs_sync_cargs *args;
1110 int error;
1111
1112 args = (struct ffs_sync_cargs *)cargs;
1113
1114 ip = VTOI(vp);
1115
1116 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1117 error = VNOP_FSYNC(vp, args->waitfor, args->context);
1118
1119 if (error)
1120 args->error = error;
1121
1122 }
1123 return (VNODE_RETURNED);
1124 }
1125
1126 /*
1127 * Go through the disk queues to initiate sandbagged IO;
1128 * go through the inodes to write those that have been modified;
1129 * initiate the writing of the super block if it has been modified.
1130 *
1131 * Note: we are always called with the filesystem marked `MPBUSY'.
1132 */
1133 int
1134 ffs_sync(mp, waitfor, context)
1135 struct mount *mp;
1136 int waitfor;
1137 vfs_context_t context;
1138 {
1139 struct vnode *nvp, *vp;
1140 struct ufsmount *ump = VFSTOUFS(mp);
1141 struct fs *fs;
1142 struct timeval tv;
1143 int error, allerror = 0;
1144 struct ffs_sync_cargs args;
1145
1146 fs = ump->um_fs;
1147 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1148 printf("fs = %s\n", fs->fs_fsmnt);
1149 panic("update: rofs mod");
1150 }
1151 /*
1152 * Write back each (modified) inode.
1153 */
1154 args.context = context;
1155 args.waitfor = waitfor;
1156 args.error = 0;
1157 /*
1158 * ffs_sync_callback will be called for each vnode
1159 * hung off of this mount point... the vnode will be
1160 * properly referenced and unreferenced around the callback
1161 */
1162 vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1163
1164 if (args.error)
1165 allerror = args.error;
1166
1167 /*
1168 * Force stale file system control information to be flushed.
1169 */
1170 if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1171 allerror = error;
1172 #if QUOTA
1173 qsync(mp);
1174 #endif
1175 /*
1176 * Write back modified superblock.
1177 */
1178 if (fs->fs_fmod != 0) {
1179 fs->fs_fmod = 0;
1180 microtime(&tv);
1181 fs->fs_time = tv.tv_sec;
1182 if (error = ffs_sbupdate(ump, waitfor))
1183 allerror = error;
1184 }
1185 return (allerror);
1186 }
1187
1188 /*
1189 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1190 * in from disk. If it is in core, wait for the lock bit to clear, then
1191 * return the inode locked. Detection and handling of mount points must be
1192 * done by the calling routine.
1193 */
1194 int
1195 ffs_vget(mp, ino, vpp, context)
1196 mount_t mp;
1197 ino64_t ino;
1198 vnode_t *vpp;
1199 vfs_context_t context;
1200 {
1201 return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1202 }
1203
1204
1205 int
1206 ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1207 mount_t mp;
1208 ino_t ino;
1209 vnode_t *vpp;
1210 vnode_t dvp;
1211 struct componentname *cnp;
1212 int mode;
1213 int fhwanted;
1214 {
1215 struct proc *p = current_proc(); /* XXX */
1216 struct fs *fs;
1217 struct inode *ip;
1218 struct ufsmount *ump;
1219 struct buf *bp;
1220 struct vnode *vp;
1221 struct vnode_fsparam vfsp;
1222 struct timeval tv;
1223 enum vtype vtype;
1224 dev_t dev;
1225 int i, type, error = 0;
1226
1227 *vpp = NULL;
1228 ump = VFSTOUFS(mp);
1229 dev = ump->um_dev;
1230 #if 0
1231 /* Check for unmount in progress */
1232 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1233 return (EPERM);
1234 }
1235 #endif
1236 /*
1237 * Allocate a new inode... do it before we check the
1238 * cache, because the MALLOC_ZONE may block
1239 */
1240 type = M_FFSNODE;
1241 MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1242
1243 /*
1244 * check in the inode hash
1245 */
1246 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1247 /*
1248 * found it... get rid of the allocation
1249 * that we didn't need and return
1250 * the 'found' vnode
1251 */
1252 FREE_ZONE(ip, sizeof(struct inode), type);
1253 vp = *vpp;
1254 return (0);
1255 }
1256 bzero((caddr_t)ip, sizeof(struct inode));
1257 /*
1258 * lock the inode
1259 */
1260 // lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1261 // lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1262
1263 ip->i_fs = fs = ump->um_fs;
1264 ip->i_dev = dev;
1265 ip->i_number = ino;
1266 #if QUOTA
1267 for (i = 0; i < MAXQUOTAS; i++)
1268 ip->i_dquot[i] = NODQUOT;
1269 #endif
1270 SET(ip->i_flag, IN_ALLOC);
1271 /*
1272 * Put it onto its hash chain locked so that other requests for
1273 * this inode will block if they arrive while we are sleeping waiting
1274 * for old data structures to be purged or for the contents of the
1275 * disk portion of this inode to be read.
1276 */
1277 ufs_ihashins(ip);
1278
1279 /* Read in the disk contents for the inode, copy into the inode. */
1280 if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1281 (int)fs->fs_bsize, NOCRED, &bp)) {
1282 buf_brelse(bp);
1283 goto errout;
1284 }
1285 #if REV_ENDIAN_FS
1286 if (mp->mnt_flag & MNT_REVEND) {
1287 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1288 } else {
1289 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1290 }
1291 #else
1292 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1293 #endif /* REV_ENDIAN_FS */
1294 buf_brelse(bp);
1295
1296 if (mode == 0)
1297 vtype = IFTOVT(ip->i_mode);
1298 else
1299 vtype = IFTOVT(mode);
1300
1301 if (vtype == VNON) {
1302 if (fhwanted) {
1303 /* NFS is in play */
1304 error = ESTALE;
1305 goto errout;
1306 } else {
1307 error = ENOENT;
1308 goto errout;
1309 }
1310 }
1311
1312 vfsp.vnfs_mp = mp;
1313 vfsp.vnfs_vtype = vtype;
1314 vfsp.vnfs_str = "ufs";
1315 vfsp.vnfs_dvp = dvp;
1316 vfsp.vnfs_fsnode = ip;
1317 vfsp.vnfs_cnp = cnp;
1318
1319 if (mode == 0)
1320 vfsp.vnfs_filesize = ip->i_din.di_size;
1321 else
1322 vfsp.vnfs_filesize = 0;
1323
1324 if (vtype == VFIFO )
1325 vfsp.vnfs_vops = FFS_FIFOOPS;
1326 else if (vtype == VBLK || vtype == VCHR)
1327 vfsp.vnfs_vops = ffs_specop_p;
1328 else
1329 vfsp.vnfs_vops = ffs_vnodeop_p;
1330
1331 if (vtype == VBLK || vtype == VCHR)
1332 vfsp.vnfs_rdev = ip->i_rdev;
1333 else
1334 vfsp.vnfs_rdev = 0;
1335
1336 if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1337 vfsp.vnfs_flags = 0;
1338 else
1339 vfsp.vnfs_flags = VNFS_NOCACHE;
1340
1341 /*
1342 * Tag root directory
1343 */
1344 vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1345 vfsp.vnfs_marksystem = 0;
1346
1347 if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1348 goto errout;
1349
1350 /*
1351 * Finish inode initialization now that aliasing has been resolved.
1352 */
1353 ip->i_devvp = ump->um_devvp;
1354 ip->i_vnode = vp;
1355
1356 vnode_ref(ip->i_devvp);
1357 vnode_addfsref(vp);
1358 vnode_settag(vp, VT_UFS);
1359
1360 /*
1361 * Initialize modrev times
1362 */
1363 microtime(&tv);
1364 SETHIGH(ip->i_modrev, tv.tv_sec);
1365 SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1366
1367 /*
1368 * Set up a generation number for this inode if it does not
1369 * already have one. This should only happen on old filesystems.
1370 */
1371 if (ip->i_gen == 0) {
1372 if (++nextgennumber < (u_long)tv.tv_sec)
1373 nextgennumber = tv.tv_sec;
1374 ip->i_gen = nextgennumber;
1375 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1376 ip->i_flag |= IN_MODIFIED;
1377 }
1378 /*
1379 * Ensure that uid and gid are correct. This is a temporary
1380 * fix until fsck has been changed to do the update.
1381 */
1382 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1383 ip->i_uid = ip->i_din.di_ouid; /* XXX */
1384 ip->i_gid = ip->i_din.di_ogid; /* XXX */
1385 } /* XXX */
1386 *vpp = vp;
1387
1388 CLR(ip->i_flag, IN_ALLOC);
1389
1390 if (ISSET(ip->i_flag, IN_WALLOC))
1391 wakeup(ip);
1392
1393 return (0);
1394
1395 errout:
1396 ufs_ihashrem(ip);
1397
1398 if (ISSET(ip->i_flag, IN_WALLOC))
1399 wakeup(ip);
1400 FREE_ZONE(ip, sizeof(struct inode), type);
1401
1402 return (error);
1403 }
1404
1405 /*
1406 * File handle to vnode
1407 *
1408 * Have to be really careful about stale file handles:
1409 * - check that the inode number is valid
1410 * - call vget to get the locked inode
1411 * - check for an unallocated inode (i_mode == 0)
1412 */
1413 int
1414 ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1415 register struct mount *mp;
1416 int fhlen;
1417 unsigned char *fhp;
1418 struct vnode **vpp;
1419 vfs_context_t context;
1420 {
1421 register struct ufid *ufhp;
1422 register struct inode *ip;
1423 struct vnode *nvp;
1424 struct fs *fs;
1425 int error;
1426
1427 if (fhlen < (int)sizeof(struct ufid))
1428 return (EINVAL);
1429 ufhp = (struct ufid *)fhp;
1430 fs = VFSTOUFS(mp)->um_fs;
1431 if (ufhp->ufid_ino < ROOTINO ||
1432 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1433 return (ESTALE);
1434 error = ffs_vget_internal(mp, ufhp->ufid_ino, &nvp, NULL, NULL, 0, 1);
1435 if (error) {
1436 *vpp = NULLVP;
1437 return (error);
1438 }
1439 ip = VTOI(nvp);
1440 if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) {
1441 vnode_put(nvp);
1442 *vpp = NULLVP;
1443 return (ESTALE);
1444 }
1445 *vpp = nvp;
1446 return (0);
1447 }
1448
1449 /*
1450 * Vnode pointer to File handle
1451 */
1452 /* ARGSUSED */
1453 int
1454 ffs_vptofh(vp, fhlenp, fhp, context)
1455 struct vnode *vp;
1456 int *fhlenp;
1457 unsigned char *fhp;
1458 vfs_context_t context;
1459 {
1460 register struct inode *ip;
1461 register struct ufid *ufhp;
1462
1463 if (*fhlenp < (int)sizeof(struct ufid))
1464 return (EOVERFLOW);
1465 ip = VTOI(vp);
1466 ufhp = (struct ufid *)fhp;
1467 ufhp->ufid_ino = ip->i_number;
1468 ufhp->ufid_gen = ip->i_gen;
1469 *fhlenp = sizeof(struct ufid);
1470 return (0);
1471 }
1472
1473 /*
1474 * Initialize the filesystem; just use ufs_init.
1475 */
1476 int
1477 ffs_init(vfsp)
1478 struct vfsconf *vfsp;
1479 {
1480
1481 return (ufs_init(vfsp));
1482 }
1483
1484 /*
1485 * fast filesystem related variables.
1486 */
1487 ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1488 user_addr_t newp, size_t newlen, vfs_context_t context)
1489 {
1490 extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1491
1492 /* all sysctl names at this level are terminal */
1493 if (namelen != 1)
1494 return (ENOTDIR); /* overloaded */
1495
1496 switch (name[0]) {
1497 case FFS_CLUSTERREAD:
1498 return (sysctl_int(oldp, oldlenp, newp, newlen,
1499 &doclusterread));
1500 case FFS_CLUSTERWRITE:
1501 return (sysctl_int(oldp, oldlenp, newp, newlen,
1502 &doclusterwrite));
1503 case FFS_REALLOCBLKS:
1504 return (sysctl_int(oldp, oldlenp, newp, newlen,
1505 &doreallocblks));
1506 case FFS_ASYNCFREE:
1507 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1508 default:
1509 return (ENOTSUP);
1510 }
1511 /* NOTREACHED */
1512 }
1513
1514 /*
1515 * Write a superblock and associated information back to disk.
1516 */
1517 int
1518 ffs_sbupdate(mp, waitfor)
1519 struct ufsmount *mp;
1520 int waitfor;
1521 {
1522 register struct fs *dfs, *fs = mp->um_fs;
1523 register struct buf *bp;
1524 int blks;
1525 void *space;
1526 int i, size, error, allerror = 0;
1527 int devBlockSize=0;
1528 #if REV_ENDIAN_FS
1529 int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1530 #endif /* REV_ENDIAN_FS */
1531
1532 /*
1533 * First write back the summary information.
1534 */
1535 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1536 space = fs->fs_csp;
1537 for (i = 0; i < blks; i += fs->fs_frag) {
1538 size = fs->fs_bsize;
1539 if (i + fs->fs_frag > blks)
1540 size = (blks - i) * fs->fs_fsize;
1541 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1542 size, 0, 0, BLK_META);
1543 bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1544 #if REV_ENDIAN_FS
1545 if (rev_endian) {
1546 byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1547 }
1548 #endif /* REV_ENDIAN_FS */
1549 space = (char *)space + size;
1550 if (waitfor != MNT_WAIT)
1551 buf_bawrite(bp);
1552 else if (error = (int)buf_bwrite(bp))
1553 allerror = error;
1554 }
1555 /*
1556 * Now write back the superblock itself. If any errors occurred
1557 * up to this point, then fail so that the superblock avoids
1558 * being written out as clean.
1559 */
1560 if (allerror)
1561 return (allerror);
1562 devBlockSize = vfs_devblocksize(mp->um_mountp);
1563
1564 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1565 bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1566 /* Restore compatibility to old file systems. XXX */
1567 dfs = (struct fs *)buf_dataptr(bp); /* XXX */
1568 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
1569 dfs->fs_nrpos = -1; /* XXX */
1570 #if REV_ENDIAN_FS
1571 /*
1572 * Swapping bytes here ; so that in case
1573 * of inode format < FS_44INODEFMT appropriate
1574 * fields get moved
1575 */
1576 if (rev_endian) {
1577 byte_swap_sbout((struct fs *)buf_dataptr(bp));
1578 }
1579 #endif /* REV_ENDIAN_FS */
1580 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1581 int32_t *lp, tmp; /* XXX */
1582 /* XXX */
1583 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */
1584 tmp = lp[4]; /* XXX */
1585 for (i = 4; i > 0; i--) /* XXX */
1586 lp[i] = lp[i-1]; /* XXX */
1587 lp[0] = tmp; /* XXX */
1588 } /* XXX */
1589 #if REV_ENDIAN_FS
1590 /* Note that dfs is already swapped so swap the filesize
1591 * before writing
1592 */
1593 if (rev_endian) {
1594 dfs->fs_maxfilesize = NXSwapLongLong(mp->um_savedmaxfilesize); /* XXX */
1595 } else {
1596 #endif /* REV_ENDIAN_FS */
1597 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */
1598 #if REV_ENDIAN_FS
1599 }
1600 #endif /* REV_ENDIAN_FS */
1601 if (waitfor != MNT_WAIT)
1602 buf_bawrite(bp);
1603 else if (error = (int)buf_bwrite(bp))
1604 allerror = error;
1605
1606 return (allerror);
1607 }